Skip to content

Commit 4d10c51

Browse files
committed
Add CLI functionality with tests and configuration
1 parent 2020a72 commit 4d10c51

15 files changed

+1006
-1
lines changed

.gitignore

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -184,3 +184,12 @@ AgentHistoryList.json
184184

185185
# For Docker
186186
data/
187+
188+
# cursor
189+
.cursorrules
190+
.cursorignore
191+
.backup.env
192+
.brain/** */
193+
194+
# Brain directory
195+
.brain/

.vscode/settings.json

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -7,5 +7,11 @@
77
"source.fixAll.ruff": "explicit",
88
"source.organizeImports.ruff": "explicit"
99
}
10+
},
11+
"dotenv.enableAutocloaking": false,
12+
"workbench.colorCustomizations": {
13+
"activityBar.background": "#452606",
14+
"titleBar.activeBackground": "#603608",
15+
"titleBar.activeForeground": "#FEFBF7"
1016
}
1117
}

cli/__init__.py

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
"""
2+
Command-line interface for browser-use.
3+
"""

cli/browser-tasks-example.ts

Lines changed: 181 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,181 @@
1+
/**
2+
* Browser Automation Task Sequences
3+
*
4+
* This file defines task sequences for browser automation using the browser-use command.
5+
* Each sequence represents a series of browser interactions that can be executed in order.
6+
*/
7+
8+
export interface BrowserCommand {
9+
prompt: string;
10+
model?: 'deepseek-chat' | 'gemini' | 'gpt-4' | 'claude-3';
11+
headless?: boolean;
12+
vision?: boolean;
13+
keepSessionAlive?: boolean;
14+
}
15+
16+
export interface BrowserTask {
17+
description: string;
18+
command: BrowserCommand;
19+
subtasks?: BrowserTask[];
20+
}
21+
22+
export interface BrowserTaskSequence {
23+
name: string;
24+
description: string;
25+
tasks: BrowserTask[];
26+
}
27+
28+
// Example task sequences
29+
export const browserTasks: BrowserTaskSequence[] = [
30+
{
31+
name: "Product Research",
32+
description: "Compare product prices across multiple e-commerce sites",
33+
tasks: [
34+
{
35+
description: "Search Amazon for wireless earbuds",
36+
command: {
37+
prompt: "go to amazon.com and search for 'wireless earbuds' and tell me the price of the top 3 results",
38+
model: "gemini",
39+
vision: true,
40+
keepSessionAlive: true
41+
}
42+
},
43+
{
44+
description: "Search Best Buy for comparison",
45+
command: {
46+
prompt: "go to bestbuy.com and search for 'wireless earbuds' and tell me the price of the top 3 results",
47+
model: "gemini",
48+
vision: true,
49+
keepSessionAlive: true
50+
}
51+
},
52+
{
53+
description: "Create price comparison",
54+
command: {
55+
prompt: "create a comparison table of the prices from both sites",
56+
keepSessionAlive: false
57+
}
58+
}
59+
]
60+
},
61+
{
62+
name: "Site Health Check",
63+
description: "Monitor website availability and performance",
64+
tasks: [
65+
{
66+
description: "Check main site",
67+
command: {
68+
prompt: "go to example.com and check if it loads properly",
69+
headless: true
70+
}
71+
},
72+
{
73+
description: "Verify API health",
74+
command: {
75+
prompt: "go to api.example.com/health and tell me the status",
76+
headless: true
77+
}
78+
},
79+
{
80+
description: "Test documentation site",
81+
command: {
82+
prompt: "go to docs.example.com and verify all navigation links are working",
83+
headless: true
84+
}
85+
}
86+
]
87+
},
88+
{
89+
name: "Content Analysis",
90+
description: "Analyze blog content and engagement",
91+
tasks: [
92+
{
93+
description: "List articles",
94+
command: {
95+
prompt: "go to blog.example.com and list all article titles from the homepage",
96+
model: "gemini",
97+
vision: true
98+
}
99+
},
100+
{
101+
description: "Analyze first article",
102+
command: {
103+
prompt: "click on the first article and summarize its main points"
104+
},
105+
subtasks: [
106+
{
107+
description: "Get metadata",
108+
command: {
109+
prompt: "tell me the author, publication date, and reading time"
110+
}
111+
},
112+
{
113+
description: "Analyze comments",
114+
command: {
115+
prompt: "scroll to the comments section and summarize the main discussion points",
116+
vision: true
117+
}
118+
}
119+
]
120+
}
121+
]
122+
},
123+
{
124+
name: "Advanced Content Analysis",
125+
description: "Analyze website content using different models for different tasks",
126+
tasks: [
127+
{
128+
description: "Initial navigation and basic text extraction",
129+
command: {
130+
prompt: "go to docs.github.com and navigate to the Actions documentation",
131+
model: "deepseek-chat", // Use DeepSeek for basic navigation
132+
keepSessionAlive: true
133+
}
134+
},
135+
{
136+
description: "Visual analysis of page structure",
137+
command: {
138+
prompt: "analyze the layout of the page and tell me how the documentation is structured, including sidebars, navigation, and content areas",
139+
model: "gemini", // Switch to Gemini for visual analysis
140+
vision: true,
141+
keepSessionAlive: true
142+
}
143+
},
144+
{
145+
description: "Complex content summarization",
146+
command: {
147+
prompt: "summarize the key concepts of GitHub Actions based on the documentation",
148+
model: "claude-3", // Switch to Claude for complex summarization
149+
keepSessionAlive: true
150+
}
151+
},
152+
{
153+
description: "Extract code examples",
154+
command: {
155+
prompt: "find and list all YAML workflow examples on the page",
156+
model: "deepseek-chat", // Back to DeepSeek for code extraction
157+
keepSessionAlive: false // Close browser after final task
158+
}
159+
}
160+
]
161+
}
162+
];
163+
164+
// Example of executing a task sequence
165+
const executeTask = (task: BrowserCommand): string => {
166+
const options: string[] = [];
167+
if (task.model) options.push(`--model ${task.model}`);
168+
if (task.headless) options.push('--headless');
169+
if (task.vision) options.push('--vision');
170+
if (task.keepSessionAlive) options.push('--keep-browser-open');
171+
172+
return `browser-use "${task.prompt}" ${options.join(' ')}`.trim();
173+
};
174+
175+
// Example usage:
176+
const sequence = browserTasks[0]; // Get Product Research sequence
177+
console.log(`Executing sequence: ${sequence.name}`);
178+
sequence.tasks.forEach(task => {
179+
console.log(`\n${task.description}:`);
180+
console.log(executeTask(task.command));
181+
});

cli/browser-use

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
#!/bin/bash
2+
3+
# Get the absolute directory of this script
4+
SCRIPT_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd )"
5+
6+
# Add the project directory to PYTHONPATH
7+
export PYTHONPATH="$SCRIPT_DIR:$PYTHONPATH"
8+
9+
# Activate the virtual environment if it exists
10+
if [ -f "$SCRIPT_DIR/venv/bin/activate" ]; then
11+
source "$SCRIPT_DIR/venv/bin/activate"
12+
fi
13+
14+
# Run the Python script with all arguments passed through
15+
"$SCRIPT_DIR/venv/bin/python" "$SCRIPT_DIR/browser-use-cli.py" "$@"
16+
17+
# Deactivate the virtual environment if it was activated
18+
if [ -n "$VIRTUAL_ENV" ]; then
19+
deactivate
20+
fi

cli/browser-use.toolchain.json

Lines changed: 50 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,50 @@
1+
{
2+
"name": "browser-use",
3+
"description": "Execute natural language browser automation commands",
4+
"type": "terminal_command",
5+
"functions": [
6+
{
7+
"name": "browser_command",
8+
"description": "Control a browser using natural language instructions",
9+
"parameters": {
10+
"properties": {
11+
"prompt": {
12+
"type": "string",
13+
"description": "The natural language instruction (e.g., 'go to google.com and search for OpenAI')"
14+
},
15+
"model": {
16+
"type": "string",
17+
"enum": ["deepseek-chat", "gemini", "gpt-4", "claude-3"],
18+
"default": "deepseek-chat",
19+
"description": "The LLM model to use (optional)"
20+
},
21+
"headless": {
22+
"type": "boolean",
23+
"default": false,
24+
"description": "Run browser in headless mode (optional)"
25+
},
26+
"vision": {
27+
"type": "boolean",
28+
"default": false,
29+
"description": "Enable vision capabilities for supported models (optional)"
30+
}
31+
},
32+
"required": ["prompt"]
33+
}
34+
}
35+
],
36+
"examples": [
37+
{
38+
"description": "Basic usage",
39+
"command": "browser-use \"go to google.com and search for OpenAI\""
40+
},
41+
{
42+
"description": "Using vision to analyze a webpage",
43+
"command": "browser-use \"go to openai.com and tell me what you see\" --model gemini --vision"
44+
},
45+
{
46+
"description": "Running a check in headless mode",
47+
"command": "browser-use \"check if github.com is up\" --headless"
48+
}
49+
]
50+
}

0 commit comments

Comments
 (0)