diff --git a/.gitignore b/.gitignore index 2d83410f..142b465a 100644 --- a/.gitignore +++ b/.gitignore @@ -184,3 +184,17 @@ AgentHistoryList.json # For Docker data/ + +# cursor +.cursorrules +.cursorignore +.backup.env +.brain/** */ + +# Brain directory +.brain/ + +.env.google +.zip +traces/ +debug-session/ \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json index 8b09300d..58dcb3c6 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -7,5 +7,11 @@ "source.fixAll.ruff": "explicit", "source.organizeImports.ruff": "explicit" } + }, + "dotenv.enableAutocloaking": false, + "workbench.colorCustomizations": { + "activityBar.background": "#452606", + "titleBar.activeBackground": "#603608", + "titleBar.activeForeground": "#FEFBF7" } } diff --git a/README.md b/README.md index 184eeb93..698b00de 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,79 @@ +# Fork Purpose + +This fork of browser-use/web-ui adds CLI support specifically designed for AI agents like Cursor Agent. It enables direct command-line interaction with browser automation tasks, making it ideal for integration with AI development environments and automated workflows. + +## CLI Documentation + +See [CLI Guide](cli/README.md) for comprehensive documentation on: +- Available LLM providers and models +- Detailed command reference +- Environment configuration +- Example usage patterns + +### Quick Start + +```bash +# Run a task (browser will auto-start if needed) +browser-use run "go to example.com and create a report about the page structure" + +# Run with specific provider and vision capabilities +browser-use run "analyze the layout and visual elements" --provider Google --vision + +# Run with specific model selection +browser-use run "analyze the page" --provider Anthropic --model-index 1 + +# Explicitly start browser with custom options (optional) +browser-use start --headless --window-size 1920x1080 + +# Close browser when done +browser-use close +``` + +### Supported LLM Providers + +- **OpenAI** (`gpt-4o`) - Vision-capable model for advanced analysis +- **Anthropic** (`claude-3-5-sonnet-latest`, `claude-3-5-sonnet-20241022`) - Advanced language understanding +- **Google** (`gemini-1.5-pro`, `gemini-2.0-flash`) - Fast and efficient processing +- **DeepSeek** (`deepseek-chat`) - Cost-effective default option + +See the [CLI Guide](cli/README.md) for detailed provider configuration and usage examples. + +### CLI Commands + +- `start` - (Optional) Initialize browser session with custom options: + - `--headless` - Run in headless mode + - `--window-size` - Set window dimensions (e.g., "1920x1080") + - `--disable-security` - Disable security features + - `--user-data-dir` - Use custom Chrome profile + - `--proxy` - Set proxy server + +- `run` - Execute tasks (auto-starts browser if needed): + - `--model` - Choose LLM (deepseek-chat, gemini, gpt-4, claude-3) + - `--vision` - Enable visual analysis + - `--record` - Record browser session + - `--trace-path` - Save debugging traces + - `--max-steps` - Limit task steps + - `--add-info` - Provide additional context + +- `close` - Clean up browser session + +### Example Tasks + +The [browser-tasks-example.ts](cli/browser-tasks-example.ts) provides ready-to-use task sequences for: + +- Product research automation +- Documentation analysis +- Page structure analysis +- Debug sessions with tracing + +### Configuration + +See [.env.example](.env.example) for all available configuration options, including: + +- API keys for different LLM providers +- Browser settings +- Session persistence options + Browser Use Web UI
@@ -51,134 +127,4 @@ Then install playwright: ```bash playwright install -``` - -### Option 2: Docker Installation - -1. **Prerequisites:** - - Docker and Docker Compose installed on your system - - Git to clone the repository - -2. **Setup:** - ```bash - # Clone the repository - git clone https://github.com/browser-use/web-ui.git - cd web-ui - - # Copy and configure environment variables - cp .env.example .env - # Edit .env with your preferred text editor and add your API keys - ``` - -3. **Run with Docker:** - ```bash - # Build and start the container with default settings (browser closes after AI tasks) - docker compose up --build - - # Or run with persistent browser (browser stays open between AI tasks) - CHROME_PERSISTENT_SESSION=true docker compose up --build - ``` - -4. **Access the Application:** - - WebUI: `http://localhost:7788` - - VNC Viewer (to see browser interactions): `http://localhost:6080/vnc.html` - - Default VNC password is "vncpassword". You can change it by setting the `VNC_PASSWORD` environment variable in your `.env` file. - - -## Usage - -### Local Setup -1. Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM. `cp .env.example .env` -2. **Run the WebUI:** - ```bash - python webui.py --ip 127.0.0.1 --port 7788 - ``` -4. WebUI options: - - `--ip`: The IP address to bind the WebUI to. Default is `127.0.0.1`. - - `--port`: The port to bind the WebUI to. Default is `7788`. - - `--theme`: The theme for the user interface. Default is `Ocean`. - - **Default**: The standard theme with a balanced design. - - **Soft**: A gentle, muted color scheme for a relaxed viewing experience. - - **Monochrome**: A grayscale theme with minimal color for simplicity and focus. - - **Glass**: A sleek, semi-transparent design for a modern appearance. - - **Origin**: A classic, retro-inspired theme for a nostalgic feel. - - **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors. - - **Ocean** (default): A blue, ocean-inspired theme providing a calming effect. - - `--dark-mode`: Enables dark mode for the user interface. -3. **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`. -4. **Using Your Own Browser(Optional):** - - Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser. - - Windows - ```env - CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe" - CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data" - ``` - > Note: Replace `YourUsername` with your actual Windows username for Windows systems. - - Mac - ```env - CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome" - CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1" - ``` - - Close all Chrome windows - - Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent. - - Check the "Use Own Browser" option within the Browser Settings. -5. **Keep Browser Open(Optional):** - - Set `CHROME_PERSISTENT_SESSION=true` in the `.env` file. - -### Docker Setup -1. **Environment Variables:** - - All configuration is done through the `.env` file - - Available environment variables: - ``` - # LLM API Keys - OPENAI_API_KEY=your_key_here - ANTHROPIC_API_KEY=your_key_here - GOOGLE_API_KEY=your_key_here - - # Browser Settings - CHROME_PERSISTENT_SESSION=true # Set to true to keep browser open between AI tasks - RESOLUTION=1920x1080x24 # Custom resolution format: WIDTHxHEIGHTxDEPTH - RESOLUTION_WIDTH=1920 # Custom width in pixels - RESOLUTION_HEIGHT=1080 # Custom height in pixels - - # VNC Settings - VNC_PASSWORD=your_vnc_password # Optional, defaults to "vncpassword" - ``` - -2. **Browser Persistence Modes:** - - **Default Mode (CHROME_PERSISTENT_SESSION=false):** - - Browser opens and closes with each AI task - - Clean state for each interaction - - Lower resource usage - - - **Persistent Mode (CHROME_PERSISTENT_SESSION=true):** - - Browser stays open between AI tasks - - Maintains history and state - - Allows viewing previous AI interactions - - Set in `.env` file or via environment variable when starting container - -3. **Viewing Browser Interactions:** - - Access the noVNC viewer at `http://localhost:6080/vnc.html` - - Enter the VNC password (default: "vncpassword" or what you set in VNC_PASSWORD) - - You can now see all browser interactions in real-time - -4. **Container Management:** - ```bash - # Start with persistent browser - CHROME_PERSISTENT_SESSION=true docker compose up -d - - # Start with default mode (browser closes after tasks) - docker compose up -d - - # View logs - docker compose logs -f - - # Stop the container - docker compose down - ``` - -## Changelog - -- [x] **2025/01/10:** Thanks to @casistack. Now we have Docker Setup option and also Support keep browser open between tasks.[Video tutorial demo](https://github.com/browser-use/web-ui/issues/1#issuecomment-2582511750). -- [x] **2025/01/06:** Thanks to @richard-devbot. A New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113). \ No newline at end of file +``` \ No newline at end of file diff --git a/analyze_trace.py b/analyze_trace.py new file mode 100644 index 00000000..a66a26b8 --- /dev/null +++ b/analyze_trace.py @@ -0,0 +1,11 @@ +from src.trace_analyzer import EnhancedTraceAnalyzer +import asyncio +import json + +async def main(): + analyzer = EnhancedTraceAnalyzer('traces/enhanced-test.json') + result = await analyzer.analyze_all() + print(json.dumps(result, indent=2)) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/cli/README.md b/cli/README.md new file mode 100644 index 00000000..03e7466c --- /dev/null +++ b/cli/README.md @@ -0,0 +1,161 @@ +# Browser-Use CLI Guide + +This guide details the available models and commands for the browser-use CLI tool. + +## Available Models + +### OpenAI +- Model: `gpt-4o` (Vision-capable) +```bash +# Basic usage +browser-use run "analyze this webpage" --provider OpenAI + +# With vision capabilities +browser-use run "describe what you see on the page" --provider OpenAI --vision +``` + +### Anthropic +- Models: + - `claude-3-5-sonnet-latest` (Default) + - `claude-3-5-sonnet-20241022` +```bash +# Using default model +browser-use run "analyze this webpage" --provider Anthropic + +# Using specific model version +browser-use run "analyze this webpage" --provider Anthropic --model-index 1 +``` + +### Google (Gemini) +- Models: + - `gemini-1.5-pro` (Default) + - `gemini-2.0-flash` +```bash +# Using default model +browser-use run "analyze this webpage" --provider Google + +# Using flash model +browser-use run "analyze this webpage" --provider Google --model-index 1 +``` + +### DeepSeek +- Model: `deepseek-chat` +```bash +# DeepSeek is the default provider +browser-use run "analyze this webpage" + +# Explicitly specifying DeepSeek +browser-use run "analyze this webpage" --provider Deepseek +``` + +## CLI Commands + +### Start Browser Session +```bash +# Basic start +browser-use start + +# With custom window size +browser-use start --window-size 1920x1080 + +# Headless mode +browser-use start --headless + +# With custom Chrome profile +browser-use start --user-data-dir "/path/to/profile" + +# With proxy +browser-use start --proxy "localhost:8080" +``` + +### Run Tasks +```bash +# Basic task +browser-use run "analyze the page" --url "https://example.com" + +# With vision capabilities +browser-use run "describe the visual layout" --url "https://example.com" --vision + +# With specific provider and model +browser-use run "analyze this webpage" --url "https://example.com" --provider Google --model-index 1 + +# With recording +browser-use run "test the checkout flow" --url "https://example.com/checkout" --record --record-path ./recordings + +# With debugging traces +browser-use run "analyze form submission" --url "https://example.com/form" --trace-path ./traces + +# With step limits +browser-use run "complex task" --url "https://example.com" --max-steps 5 --max-actions 2 + +# With additional context +browser-use run "analyze pricing" --url "https://example.com/pricing" --add-info "Focus on enterprise plans" +``` + +### Close Browser +```bash +browser-use close +``` + +## Environment Variables + +Required API keys should be set in your `.env` file: +```env +# OpenAI +OPENAI_API_KEY=your_key_here +OPENAI_ENDPOINT=https://api.openai.com/v1 # Optional + +# Anthropic +ANTHROPIC_API_KEY=your_key_here + +# Google (Gemini) +GOOGLE_API_KEY=your_key_here + +# DeepSeek +DEEPSEEK_API_KEY=your_key_here +DEEPSEEK_ENDPOINT=your_endpoint # Optional +``` + +## Browser Settings + +Optional browser configuration in `.env`: +```env +# Custom Chrome settings +CHROME_PATH=/path/to/chrome +CHROME_USER_DATA=/path/to/user/data + +# Session persistence +CHROME_PERSISTENT_SESSION=true # Keep browser open between tasks +``` + +## Examples + +### Visual Analysis Task +```bash +browser-use run \ + "analyze the page layout" \ + --url "https://example.com" \ + --provider Google \ + --vision \ + --record \ + --record-path ./recordings +``` + +### Multi-Step Task +```bash +browser-use run \ + "fill the form and verify success" \ + --url "https://example.com/login" \ + --provider Anthropic \ + --max-steps 5 \ + --trace-path ./traces/login +``` + +### Research Task +```bash +browser-use run \ + "research pricing information for top 3 competitors" \ + --url "https://example.com" \ + --provider OpenAI \ + --add-info "Focus on enterprise features and annual pricing" +``` \ No newline at end of file diff --git a/cli/__init__.py b/cli/__init__.py new file mode 100644 index 00000000..d1f449a5 --- /dev/null +++ b/cli/__init__.py @@ -0,0 +1,3 @@ +""" +Command-line interface for browser-use. +""" \ No newline at end of file diff --git a/cli/browser-tasks-example.ts b/cli/browser-tasks-example.ts new file mode 100644 index 00000000..3f39f97a --- /dev/null +++ b/cli/browser-tasks-example.ts @@ -0,0 +1,287 @@ +/** + * Browser Automation Task Sequences + * + * This file defines task sequences for browser automation using the browser-use command. + * Each sequence represents a series of browser interactions that can be executed in order. + */ + +export interface BrowserCommand { + prompt: string; + url: string; + provider?: 'Deepseek' | 'Google' | 'OpenAI' | 'Anthropic'; + modelIndex?: number; + headless?: boolean; + vision?: boolean; + record?: boolean; + recordPath?: string; + tracePath?: string; + maxSteps?: number; + maxActions?: number; + addInfo?: string; + windowSize?: string; + userDataDir?: string; + proxy?: string; +} + +export interface BrowserTask { + description: string; + command: BrowserCommand; + subtasks?: BrowserTask[]; +} + +export interface BrowserTaskSequence { + name: string; + description: string; + tasks: BrowserTask[]; +} + +// Example task sequences +export const browserTasks: BrowserTaskSequence[] = [ + { + name: "Product Research", + description: "Compare product prices across multiple e-commerce sites", + tasks: [ + { + description: "Search Amazon for wireless earbuds", + command: { + prompt: "search for 'wireless earbuds' and tell me the price of the top 3 results", + url: "https://www.amazon.com", + provider: "Deepseek" + } + }, + { + description: "Search Best Buy for comparison", + command: { + prompt: "search for 'wireless earbuds' and tell me the price of the top 3 results", + url: "https://www.bestbuy.com", + provider: "Deepseek" + } + }, + { + description: "Create price comparison", + command: { + prompt: "create a comparison table of the prices from both sites", + url: "about:blank", + provider: "Deepseek" + } + } + ] + }, + { + name: "Site Health Check", + description: "Monitor website availability and performance", + tasks: [ + { + description: "Check main site", + command: { + prompt: "check if it loads properly", + url: "https://example.com", + provider: "Deepseek", + headless: true + } + }, + { + description: "Verify API health", + command: { + prompt: "check the API health status", + url: "https://api.example.com/health", + provider: "Deepseek", + headless: true + } + }, + { + description: "Test documentation site", + command: { + prompt: "verify all navigation links are working", + url: "https://docs.example.com", + provider: "Deepseek", + headless: true + } + } + ] + }, + { + name: "Content Analysis", + description: "Analyze blog content and engagement", + tasks: [ + { + description: "List articles", + command: { + prompt: "list all article titles from the homepage", + url: "https://blog.example.com", + provider: "Deepseek" + } + }, + { + description: "Analyze first article", + command: { + prompt: "click on the first article and summarize its main points", + url: "https://blog.example.com", + provider: "Deepseek" + }, + subtasks: [ + { + description: "Get metadata", + command: { + prompt: "tell me the author, publication date, and reading time", + url: "https://blog.example.com", + provider: "Deepseek" + } + }, + { + description: "Analyze comments", + command: { + prompt: "scroll to the comments section and summarize the main discussion points", + url: "https://blog.example.com", + provider: "Deepseek" + } + } + ] + } + ] + }, + { + name: "Advanced Content Analysis", + description: "Analyze website content using different models for different tasks", + tasks: [ + { + description: "Initial navigation and basic text extraction", + command: { + prompt: "navigate to the Actions documentation and extract basic text content", + url: "https://docs.github.com", + provider: "Deepseek" + } + }, + { + description: "Visual analysis of page structure", + command: { + prompt: "analyze the layout of the page and tell me how the documentation is structured, including sidebars, navigation, and content areas", + url: "https://docs.github.com", + provider: "Google", + vision: true, + modelIndex: 1, + addInfo: "Only using Google here because we need vision capabilities" + } + }, + { + description: "Complex content summarization", + command: { + prompt: "summarize the key concepts of GitHub Actions based on the documentation", + url: "https://docs.github.com", + provider: "Deepseek" + } + }, + { + description: "Extract code examples", + command: { + prompt: "find and list all YAML workflow examples on the page", + url: "https://docs.github.com", + provider: "Deepseek" + } + } + ] + }, + { + name: "Page Structure Analysis", + description: "Generate detailed reports about page structure and interactive elements", + tasks: [ + { + description: "Analyze homepage structure", + command: { + prompt: "create a report about the page structure, including the page title, headings, and any interactive elements found", + url: "https://example.com", + provider: "Deepseek" + } + }, + { + description: "Analyze navigation structure", + command: { + prompt: "focus on the navigation menu and create a detailed report of its structure and all available links", + url: "https://example.com", + provider: "Google", + vision: true, + addInfo: "Only using Google here because we need vision capabilities for complex layout analysis" + } + }, + { + description: "Document forms and inputs", + command: { + prompt: "find all forms on the page and document their inputs, buttons, and validation requirements", + url: "https://example.com", + provider: "Google", + vision: true, + addInfo: "Only using Google here because we need vision capabilities for form analysis" + } + } + ] + }, + { + name: "Debug Session", + description: "Record and analyze browser interactions for debugging", + tasks: [ + { + description: "Start debug session", + command: { + prompt: "attempt to log in with test credentials", + url: "https://example.com/login", + provider: "Deepseek", + headless: false, + tracePath: "./tmp/traces/login", + record: true, + recordPath: "./recordings/login" + } + }, + { + description: "Navigate complex workflow", + command: { + prompt: "complete the multi-step registration process", + url: "https://example.com/register", + provider: "Deepseek", + maxSteps: 5, + maxActions: 2, + tracePath: "./tmp/traces/registration" + } + }, + { + description: "Generate debug report", + command: { + prompt: "create a report of all actions taken and any errors encountered", + url: "about:blank", + provider: "Deepseek", + addInfo: "Focus on error patterns and user interaction points" + } + } + ] + } +]; + +// Updated execute task function to match CLI arguments +const executeTask = (task: BrowserCommand): string => { + const options: string[] = []; + + if (task.provider) options.push(`--provider ${task.provider}`); + if (task.modelIndex !== undefined) options.push(`--model-index ${task.modelIndex}`); + if (task.headless) options.push('--headless'); + if (task.vision) options.push('--vision'); + if (task.record) { + options.push('--record'); + if (task.recordPath) options.push(`--record-path ${task.recordPath}`); + } + if (task.tracePath) options.push(`--trace-path ${task.tracePath}`); + if (task.maxSteps) options.push(`--max-steps ${task.maxSteps}`); + if (task.maxActions) options.push(`--max-actions ${task.maxActions}`); + if (task.addInfo) options.push(`--add-info "${task.addInfo}"`); + if (task.windowSize) options.push(`--window-size ${task.windowSize}`); + if (task.userDataDir) options.push(`--user-data-dir "${task.userDataDir}"`); + if (task.proxy) options.push(`--proxy "${task.proxy}"`); + + return `browser-use run "${task.prompt}" --url "${task.url}" ${options.join(' ')}`.trim(); +}; + +// Example usage: +const sequence = browserTasks[0]; // Get Product Research sequence +console.log(`Executing sequence: ${sequence.name}`); +sequence.tasks.forEach(task => { + console.log(`\n${task.description}:`); + console.log(executeTask(task.command)); +}); \ No newline at end of file diff --git a/cli/browser-use b/cli/browser-use new file mode 100755 index 00000000..4c83fb85 --- /dev/null +++ b/cli/browser-use @@ -0,0 +1,76 @@ +#!/bin/bash + +# Get the absolute path of the script's real location (dereference symbolic link) +REAL_SCRIPT_PATH=$(readlink -f "${BASH_SOURCE[0]}") + +# Get the directory of the script +SCRIPT_DIR="$(dirname "$REAL_SCRIPT_PATH")" + +# Project root is one level up from the script's directory +PROJECT_ROOT="$SCRIPT_DIR/.." + +# Change to the project root directory +cd "$PROJECT_ROOT" + +# Activate the virtual environment +if [ -f "venv/bin/activate" ]; then + echo "Activating virtual environment" + source "venv/bin/activate" + echo "VIRTUAL_ENV: $VIRTUAL_ENV" +else + echo "Virtual environment activation script not found" +fi + +# Create a temporary file for state transfer +TEMP_STATE_FILE=$(mktemp) +echo "Created temporary state file: $TEMP_STATE_FILE" + +# Run the Python script and capture its output +echo "Running: venv/bin/python cli/browser_use_cli.py '$@'" +if ! "venv/bin/python" "cli/browser_use_cli.py" "$@" --temp-file "$TEMP_STATE_FILE"; then + echo "Error running command. Exiting." + echo "Cleaning up temp file: $TEMP_STATE_FILE" + rm -f "$TEMP_STATE_FILE" + exit 1 +fi + +# Check the exit code of the Python script +PYTHON_EXIT_CODE=$? + +# If Python script exited with a non-zero code, exit with the same code +if [ $PYTHON_EXIT_CODE -ne 0 ]; then + echo "Python script exited with error code: $PYTHON_EXIT_CODE" + echo "Cleaning up temp file: $TEMP_STATE_FILE" + rm -f "$TEMP_STATE_FILE" + exit $PYTHON_EXIT_CODE +fi + +# Read the BROWSER_USE_RUNNING value from the temporary file +if [ -f "$TEMP_STATE_FILE" ]; then + BROWSER_USE_RUNNING=$(cat "$TEMP_STATE_FILE") + echo "Read BROWSER_USE_RUNNING from file: $BROWSER_USE_RUNNING" + echo "Cleaning up temp file: $TEMP_STATE_FILE" + rm -f "$TEMP_STATE_FILE" +else + BROWSER_USE_RUNNING="false" + echo "Warning: Temp file not found at: $TEMP_STATE_FILE" + echo "Defaulting BROWSER_USE_RUNNING to: false" +fi + +# Set the environment variable in the shell script based on captured value +export BROWSER_USE_RUNNING +echo "Environment variable BROWSER_USE_RUNNING set to: $BROWSER_USE_RUNNING" + +# Check if the BROWSER_USE_RUNNING environment variable is set to true +echo "BROWSER_USE_RUNNING: $BROWSER_USE_RUNNING" +if [ "$BROWSER_USE_RUNNING" = "true" ]; then + echo "Keeping virtual environment active for persistent session." +else + # Deactivate the virtual environment only if not running persistently + if [ -n "$VIRTUAL_ENV" ]; then + echo "Deactivating virtual environment" + deactivate + else + echo "Virtual environment was not active." + fi +fi \ No newline at end of file diff --git a/cli/browser-use.toolchain.json b/cli/browser-use.toolchain.json new file mode 100644 index 00000000..18ca2c0b --- /dev/null +++ b/cli/browser-use.toolchain.json @@ -0,0 +1,114 @@ +{ + "name": "browser-use", + "description": "Execute natural language browser automation commands", + "type": "terminal_command", + "functions": [ + { + "name": "browser_command", + "description": "Control a browser using natural language instructions", + "parameters": { + "properties": { + "prompt": { + "type": "string", + "description": "The natural language instruction (e.g., 'go to google.com and search for OpenAI'). **Ensure URLs are well-formed and include the protocol (e.g., https://).**" + }, + "url": { + "type": "string", + "description": "The starting URL for the browser automation task. Must include the protocol (e.g., https://example.com)." + }, + "provider": { + "type": "string", + "enum": [ + "Deepseek", + "Google", + "OpenAI", + "Anthropic" + ], + "default": "Deepseek", + "description": "The LLM provider to use. DeepSeek is recommended for most tasks due to its cost-effectiveness and performance. The system will automatically select the appropriate model based on your task requirements (e.g., vision capabilities)." + }, + "model_index": { + "type": "integer", + "description": "Optional index to select a specific model from the provider's available models (0-based). Available models per provider:\nDeepseek: [0: deepseek-chat]\nGoogle: [0: gemini-1.5-pro, 1: gemini-2.0-flash]\nOpenAI: [0: gpt-4o]\nAnthropic: [0: claude-3-5-sonnet-latest, 1: claude-3-5-sonnet-20241022]" + }, + "vision": { + "type": "boolean", + "default": false, + "description": "Enable vision capabilities (optional). **When enabled, the system will automatically select a vision-capable model from your chosen provider.**" + }, + "headless": { + "type": "boolean", + "default": false, + "description": "Run browser in headless mode (optional). **Headless mode might be necessary for certain environments or tasks but can limit interaction with visually-dependent elements.**" + }, + "record": { + "type": "boolean", + "default": false, + "description": "Enable session recording (optional). **Useful for debugging and understanding the agent's actions.**" + }, + "recordPath": { + "type": "string", + "default": "./tmp/record_videos", + "description": "Path to save recordings (optional). **Ensure the directory exists and is writable.**" + }, + "tracePath": { + "type": "string", + "description": "Path to save debugging traces (optional). **Traces can provide detailed information about the automation process.**" + }, + "maxSteps": { + "type": "integer", + "default": 10, + "description": "Maximum number of steps per task (optional). **Increase this for complex tasks, but be mindful of potential infinite loops.**" + }, + "maxActions": { + "type": "integer", + "default": 1, + "description": "Maximum actions per step (optional). **Adjust this based on the complexity of each step.**" + }, + "addInfo": { + "type": "string", + "description": "Additional context or instructions for the agent (optional). **Use this to provide specific details not covered in the main prompt.**" + }, + "tempFile": { + "type": "string", + "description": "Path to temporary file to store the browser session state (optional). **Used for resuming or closing specific sessions.**" + }, + "userDataDir": { + "type": "string", + "description": "Path to user data directory for a persistent browser session (optional). **Use this to maintain browser state across sessions (e.g., cookies, extensions).**" + } + }, + "required": [ + "prompt", + "url" + ] + } + } + ], + "examples": [ + { + "description": "Basic usage with default provider (DeepSeek)", + "command": "browser-use run \"search for OpenAI\" --url \"https://www.google.com\"" + }, + { + "description": "Using Google Gemini with vision for visual analysis", + "command": "browser-use run \"analyze the visual layout\" --url \"https://www.openai.com\" --provider Google --vision" + }, + { + "description": "Using OpenAI for complex analysis", + "command": "browser-use run \"analyze the layout and design\" --url \"https://www.example.com\" --provider OpenAI --vision" + }, + { + "description": "Using Anthropic with specific model version", + "command": "browser-use run \"analyze the documentation\" --url \"https://docs.example.com\" --provider Anthropic --model-index 1" + }, + { + "description": "Running a check in headless mode", + "command": "browser-use run \"check if site is up\" --url \"https://www.github.com\" --provider Deepseek --headless" + }, + { + "description": "Recording a debug session", + "command": "browser-use run \"test the login process\" --url \"https://example.com\" --provider Google --record --record-path ./debug_session" + } + ] +} \ No newline at end of file diff --git a/cli/browser_use_cli.py b/cli/browser_use_cli.py new file mode 100644 index 00000000..fde15355 --- /dev/null +++ b/cli/browser_use_cli.py @@ -0,0 +1,411 @@ +#!/usr/bin/env python3 +import asyncio +import argparse +import os +import sys +from pathlib import Path +import json +import tempfile + +# Add the project root to PYTHONPATH +project_root = str(Path(__file__).parent.parent) +sys.path.append(project_root) + +from browser_use.browser.browser import Browser, BrowserConfig +from browser_use.browser.context import BrowserContext, BrowserContextConfig, BrowserContextWindowSize +from src.agent.custom_agent import CustomAgent +from src.controller.custom_controller import CustomController +from src.agent.custom_prompts import CustomSystemPrompt +from src.utils import utils +from dotenv import load_dotenv +from src.trace_analyzer import analyze_trace + +# Load .env from the project root +load_dotenv(Path(project_root) / '.env') + +# Global variables for browser persistence +_global_browser = None +_global_browser_context = None + +def _get_browser_state(): + """Get browser state from temporary file.""" + temp_file = os.path.join(tempfile.gettempdir(), "browser_use_state") + try: + with open(temp_file, "r") as f: + return f.read().strip().lower() == "true" + except FileNotFoundError: + return False + +def _set_browser_state(running=True, temp_file_path=None): + """Set browser state in a temporary file.""" + value = str(running).lower() + if temp_file_path: + with open(temp_file_path, "w") as f: + f.write(value) + +async def initialize_browser( + headless=False, + window_size=(1920, 1080), + disable_security=False, + user_data_dir=None, + proxy=None +): + """Initialize a new browser instance with the given configuration.""" + global _global_browser, _global_browser_context + + # Check both environment and global variables + if _get_browser_state() or _global_browser is not None: + # Close any existing browser first + if _global_browser is not None: + await close_browser() + else: + _set_browser_state(False) + + window_w, window_h = window_size + + # Initialize browser with launch-time options + browser = Browser( + config=BrowserConfig( + headless=headless, + disable_security=disable_security, + chrome_instance_path=user_data_dir, + extra_chromium_args=[f"--window-size={window_w},{window_h}"], + proxy=proxy + ) + ) + + # Create initial browser context + browser_context = await browser.new_context( + config=BrowserContextConfig( + no_viewport=False, + browser_window_size=BrowserContextWindowSize( + width=window_w, + height=window_h + ), + disable_security=disable_security + ) + ) + + # Store globally + _global_browser = browser + _global_browser_context = browser_context + _set_browser_state(True) + return True + +async def close_browser(): + """Close the current browser instance if one exists.""" + global _global_browser, _global_browser_context + + if _global_browser_context is not None: + await _global_browser_context.close() + _global_browser_context = None + + if _global_browser is not None: + await _global_browser.close() + _global_browser = None + + _set_browser_state(False) + +async def run_browser_task( + prompt, + url=None, + provider="Deepseek", + model_index=None, + vision=False, + record=False, + record_path=None, + trace_path=None, + hide_trace=False, + max_steps=10, + max_actions=1, + add_info="", + on_init=None, + headless=False, + window_size=(1920, 1080), + disable_security=False, + user_data_dir=None, + proxy=None +): + """Execute a task using the current browser instance, auto-initializing if needed.""" + global _global_browser, _global_browser_context + + # Validate URL if provided + if url: + try: + from urllib.parse import urlparse + result = urlparse(url) + if not all([result.scheme, result.netloc]): + raise ValueError("Invalid URL format") + except Exception as e: + return f"Invalid URL provided: {str(e)}" + + # Store the trace file path if tracing is enabled + trace_file = None + + # Check if browser is running and initialize if needed + if not _get_browser_state(): + print("Browser not running. Starting browser session...") + if not await initialize_browser( + headless=headless, + window_size=window_size, + disable_security=disable_security, + user_data_dir=user_data_dir, + proxy=proxy + ): + return "Browser initialization failed" + + # Signal successful initialization if callback provided + if _get_browser_state() and on_init: + await on_init() + + # Verify browser state is consistent + if _global_browser is None or _global_browser_context is None: + print("Browser session state is inconsistent. Attempting to reinitialize...") + if not await initialize_browser( + headless=headless, + window_size=window_size, + disable_security=disable_security, + user_data_dir=user_data_dir, + proxy=proxy + ): + return "Browser reinitialization failed" + if _global_browser is None or _global_browser_context is None: + return "Browser session state remains inconsistent after reinitialization" + + # Initialize controller + controller = CustomController() + + # Normalize provider name to lowercase for consistency + provider = provider.lower() + + # Handle Deepseek + vision case + if provider == "deepseek" and vision: + print("WARNING: Deepseek does not support vision capabilities. Falling back to standard Deepseek model.") + vision = False + + # Select appropriate model based on provider, model_index, and vision requirement + provider_key = provider + if provider == "google": + provider_key = "gemini" + elif provider == "openai": + provider_key = "openai" + elif provider == "anthropic": + provider_key = "anthropic" + elif provider == "deepseek": + provider_key = "deepseek" + else: + raise ValueError(f"Unsupported provider: {provider}") + + if provider_key not in utils.model_names: + raise ValueError(f"No models found for provider: {provider}") + + available_models = utils.model_names[provider_key] + + if model_index is not None: + if not (0 <= model_index < len(available_models)): + raise ValueError(f"Invalid model_index {model_index} for provider {provider}. Available indices: 0-{len(available_models)-1}") + model_name = available_models[model_index] + else: + # Default model selection based on vision requirement + if provider_key == "deepseek": + model_name = available_models[0] # deepseek-chat + elif provider_key == "gemini": + model_name = available_models[0] # gemini-1.5-pro + elif provider_key == "openai": + model_name = available_models[0] # gpt-4o + elif provider_key == "anthropic": + model_name = available_models[0] # claude-3-5-sonnet-latest + + # Get LLM model + llm = utils.get_llm_model( + provider=provider_key, + model_name=model_name, + temperature=0.8, + vision=vision + ) + + # Create new context with tracing/recording enabled + if record or trace_path: + # Close existing context first + if _global_browser_context is not None: + await _global_browser_context.close() + + # Create new context with tracing/recording enabled + if trace_path: + trace_dir = Path(trace_path) + if not trace_path.endswith('.zip'): + trace_dir = trace_dir / 'trace.zip' + trace_dir.parent.mkdir(parents=True, exist_ok=True) + trace_file = str(trace_dir) + else: + trace_file = None + + _global_browser_context = await _global_browser.new_context( + config=BrowserContextConfig( + trace_path=trace_file, + save_recording_path=str(record_path) if record else None, + no_viewport=False, + browser_window_size=BrowserContextWindowSize( + width=1920, + height=1080 + ), + disable_security=False + ) + ) + + # Initialize agent with starting URL if provided + agent = CustomAgent( + task=f"First, navigate to {url}. Then, {prompt}" if url else prompt, + add_infos=add_info, + llm=llm, + browser=_global_browser, + browser_context=_global_browser_context, + controller=controller, + system_prompt_class=CustomSystemPrompt, + use_vision=vision, + tool_call_in_content=True, + max_actions_per_step=max_actions + ) + + # Run task + history = await agent.run(max_steps=max_steps) + result = history.final_result() + + # Close the context to ensure trace is saved + if _global_browser_context is not None: + await _global_browser_context.close() + _global_browser_context = None + + # Analyze and display trace if enabled + if trace_file and not hide_trace: + print("\nTrace Analysis:") + print("=" * 50) + try: + # Find the actual trace file in the nested directory + trace_files = list(Path(str(trace_path)).rglob('*.zip')) + if trace_files: + actual_trace = str(trace_files[0]) # Use the first trace file found + print("\nTrace Analysis:") + print("=" * 50) + try: + trace_analysis = await analyze_trace(actual_trace) + print(json.dumps(trace_analysis, indent=2)) + except Exception as e: + print(f"Failed to analyze trace: {e}") + else: + print("No trace file found") + except Exception as e: + print(f"Error finding trace file: {e}") + + return result + +def main(): + parser = argparse.ArgumentParser(description="Control a browser using natural language") + subparsers = parser.add_subparsers(dest="command", help="Commands") + + # Start command + start_parser = subparsers.add_parser("start", help="Start a new browser session") + start_parser.add_argument("--temp-file", help="Path to temporary file for storing browser state") + start_parser.add_argument("--headless", action="store_true", help="Run browser in headless mode") + start_parser.add_argument("--window-size", default="1920x1080", help="Browser window size (WxH)") + start_parser.add_argument("--disable-security", action="store_true", help="Disable browser security features") + start_parser.add_argument("--user-data-dir", help="Use custom Chrome profile directory") + start_parser.add_argument("--proxy", help="Proxy server URL") + + # Run command + run_parser = subparsers.add_parser("run", help="Run a task in the current browser session") + run_parser.add_argument("--temp-file", help="Path to temporary file for storing browser state") + run_parser.add_argument("prompt", help="The task to perform") + run_parser.add_argument("--url", required=True, help="The starting URL for the browser automation task") + run_parser.add_argument("--provider", "-p", choices=["Deepseek", "Google", "OpenAI", "Anthropic"], + default="Deepseek", help="The LLM provider to use (system will select appropriate model)") + run_parser.add_argument("--model-index", "-m", type=int, + help="Optional index to select a specific model from the provider's available models (0-based)") + run_parser.add_argument("--vision", action="store_true", help="Enable vision capabilities") + run_parser.add_argument("--record", action="store_true", help="Enable session recording") + run_parser.add_argument("--record-path", default="./tmp/record_videos", help="Path to save recordings") + run_parser.add_argument("--trace-path", default="./tmp/traces", help="Path to save debugging traces") + run_parser.add_argument("--hide-trace", action="store_true", help="Don't display trace analysis after task completion") + run_parser.add_argument("--max-steps", type=int, default=10, help="Maximum number of steps per task") + run_parser.add_argument("--max-actions", type=int, default=1, help="Maximum actions per step") + run_parser.add_argument("--add-info", help="Additional context for the agent") + + # Close command + close_parser = subparsers.add_parser("close", help="Close the current browser session") + close_parser.add_argument("--temp-file", help="Path to temporary file for storing browser state") + + # Analyze trace command + analyze_parser = subparsers.add_parser("analyze-trace", help="Analyze a Playwright trace file") + analyze_parser.add_argument("trace_path", help="Path to the trace file") + analyze_parser.add_argument("--output", "-o", help="Path to save the analysis output (default: print to stdout)") + + args = parser.parse_args() + + if args.command == "start": + # Parse window size + try: + window_w, window_h = map(int, args.window_size.split('x')) + except ValueError: + print(f"Invalid window size format: {args.window_size}. Using default 1920x1080") + window_w, window_h = 1920, 1080 + + # Start browser + success = asyncio.run(initialize_browser( + headless=args.headless, + window_size=(window_w, window_h), + disable_security=args.disable_security, + user_data_dir=args.user_data_dir, + proxy=args.proxy + )) + if success: + print("Browser session started successfully") + _set_browser_state(True, args.temp_file) + else: + print("Failed to start browser session") + _set_browser_state(False, args.temp_file) + + elif args.command == "run": + # Run task + result = asyncio.run(run_browser_task( + prompt=args.prompt, + url=args.url, + provider=args.provider, + model_index=args.model_index, + vision=args.vision, + record=args.record, + record_path=args.record_path if args.record else None, + trace_path=args.trace_path, + hide_trace=args.hide_trace, + max_steps=args.max_steps, + max_actions=args.max_actions, + add_info=args.add_info, + headless=False, + window_size=(1920, 1080), + disable_security=False, + user_data_dir=None, + proxy=None + )) + if result: + print(result) + + elif args.command == "close": + # Close browser + asyncio.run(close_browser()) + print("Browser session closed") + _set_browser_state(False, args.temp_file) + + elif args.command == "analyze-trace": + # Analyze trace + result = asyncio.run(analyze_trace(args.trace_path)) + if args.output: + with open(args.output, 'w') as f: + json.dump(result, f, indent=2) + print(f"Analysis saved to {args.output}") + else: + print(json.dumps(result, indent=2)) + + else: + parser.print_help() + +if __name__ == "__main__": + main() \ No newline at end of file diff --git a/cli/usage-guide.md b/cli/usage-guide.md new file mode 100644 index 00000000..8a26a61e --- /dev/null +++ b/cli/usage-guide.md @@ -0,0 +1,308 @@ +# Browser-Use API Usage Guide + +## Overview + +This guide explains how to use the browser-use API to automate browser interactions using different LLM models. The API provides a powerful way to control a browser programmatically through Python. + +## Basic Setup + +```python +import asyncio +from browser_use.browser.browser import Browser, BrowserConfig +from browser_use.browser.context import BrowserContextConfig, BrowserContextWindowSize +from src.agent.custom_agent import CustomAgent +from src.controller.custom_controller import CustomController +from src.agent.custom_prompts import CustomSystemPrompt +from src.utils import utils +import os + +# Window size configuration +window_w, window_h = 1920, 1080 + +# Browser initialization +browser = Browser( + config=BrowserConfig( + headless=False, # Set to True for headless mode + disable_security=True, + extra_chromium_args=[f"--window-size={window_w},{window_h}"], + ) +) +``` + +## Browser Context Configuration + +```python +# Create a browser context with recording and tracing +browser_context = await browser.new_context( + config=BrowserContextConfig( + trace_path="./tmp/traces", # For debugging + save_recording_path="./tmp/record_videos", # For session recording + no_viewport=False, + browser_window_size=BrowserContextWindowSize( + width=window_w, height=window_h + ), + ) +) +``` + +## Model Configuration + +### DeepSeek (Default) + +```python +llm = utils.get_llm_model( + provider="deepseek", + model_name="deepseek-chat", # V2.5 model + temperature=0.8, + base_url="https://api.deepseek.com/v1", + api_key=os.getenv("DEEPSEEK_API_KEY", "") +) +``` + +### Gemini Pro + +```python +llm = utils.get_llm_model( + provider="gemini", + model_name="gemini-2.0-flash-exp", + temperature=1.0, + api_key=os.getenv("GOOGLE_API_KEY", "") +) +``` + +### GPT-4 Turbo + +```python +llm = utils.get_llm_model( + provider="openai", + model_name="gpt-4-turbo-preview", + temperature=0.8, + api_key=os.getenv("OPENAI_API_KEY", "") +) +``` + +### Claude-3 Opus + +```python +llm = utils.get_llm_model( + provider="anthropic", + model_name="claude-3-opus-20240229", + temperature=0.8, + api_key=os.getenv("ANTHROPIC_API_KEY", "") +) +``` + +## Agent Configuration + +```python +# Initialize controller +controller = CustomController() + +# Initialize agent +agent = CustomAgent( + task="your task description here", + add_infos="", # Optional hints for the LLM + llm=llm, # LLM model configured above + browser=browser, + browser_context=browser_context, + controller=controller, + system_prompt_class=CustomSystemPrompt, + use_vision=False, # Must be False for DeepSeek + tool_call_in_content=True, # Required for DeepSeek + max_actions_per_step=1 # Control action granularity +) +``` + +## Running Tasks + +```python +# Run the agent with a maximum number of steps +history = await agent.run(max_steps=10) + +# Access results +print("Final Result:", history.final_result()) +print("Errors:", history.errors()) +print("Model Actions:", history.model_actions()) +print("Thoughts:", history.model_thoughts()) +``` + +## Common Tasks + +### Navigation + +```python +task="go to google.com" +``` + +### Search + +```python +task="go to google.com and search for 'OpenAI'" +``` + +### Form Filling + +```python +task="go to example.com/login and fill in username 'user' and password 'pass'" +``` + +### Clicking Elements + +```python +task="click the 'Submit' button" +``` + +## Model-Specific Considerations + +1. **DeepSeek** + - Set `use_vision=False` + - Set `tool_call_in_content=True` + - Uses OpenAI-compatible API format + +2. **Gemini** + - Set `use_vision=True` + - Works well with visual tasks + +3. **GPT-4 & Claude-3** + - Support both vision and non-vision tasks + - Higher reasoning capabilities for complex tasks + +## Best Practices + +1. **Error Handling** + - Always check `history.errors()` for any issues + - Monitor `history.model_thoughts()` for debugging + +2. **Resource Management** + - Use async context managers for browser and context + - Close resources properly after use + +3. **Task Description** + - Be specific and clear in task descriptions + - Include necessary context in `add_infos` + +4. **Performance** + - Use `headless=True` for automated tasks + - Adjust `max_steps` and `max_actions_per_step` based on task complexity + +## Example Implementation + +```python +async def main(): + # Browser setup + browser = Browser(config=BrowserConfig(...)) + + async with await browser.new_context(...) as browser_context: + # Controller setup + controller = CustomController() + + # Agent setup + agent = CustomAgent( + task="your task", + llm=your_configured_llm, + browser=browser, + browser_context=browser_context, + controller=controller, + system_prompt_class=CustomSystemPrompt, + use_vision=False, + tool_call_in_content=True, + max_actions_per_step=1 + ) + + # Run task + history = await agent.run(max_steps=10) + + # Process results + print(history.final_result()) + +if __name__ == "__main__": + asyncio.run(main()) +``` + +## Troubleshooting + +1. **JSON Schema Errors with DeepSeek** + - Ensure using latest DeepSeek V2.5 endpoint + - Verify correct base URL and API key + - Use `tool_call_in_content=True` + +2. **Browser Connection Issues** + - Check browser configuration + - Verify Chrome/Chromium installation + - Ensure proper port access + +3. **Model Response Issues** + - Adjust temperature for more/less deterministic behavior + - Try different models for complex tasks + - Check API key validity and quotas + +## Tracing and Debugging + +### Enabling Tracing + +```python +# Enable tracing in browser context +browser_context = await browser.new_context( + config=BrowserContextConfig( + trace_path="./tmp/traces/trace.zip", # Must have .zip extension + no_viewport=False, + browser_window_size=BrowserContextWindowSize( + width=window_w, height=window_h + ), + ) +) +``` + +### Using Traces for Debugging + +1. **Recording Traces** + - Traces are automatically saved when `trace_path` is provided + - Files are saved with `.zip` extension + - Contains browser actions, network requests, and screenshots + +2. **Analyzing Traces** + - Use Playwright Trace Viewer to analyze traces + - View step-by-step browser actions + - Inspect network requests and responses + - Review page states at each step + +## Report Generation + +### Best Practices + +1. **Structure** + - Always include page title and headings + - List interactive elements with their types + - Provide clear hierarchy of content + - Include relevant metadata (URLs, timestamps) + +2. **Content** + - Focus on task-relevant information + - Include both static and dynamic content + - Document interactive elements and their states + - Note any errors or warnings + +3. **Format** + - Use clear section headings + - Include numbered or bulleted lists + - Add summary sections for complex pages + - Use markdown formatting for readability + +### Example Report Task + +```python +task = "create a report about the page structure, including any interactive elements found" +add_infos = "Focus on navigation elements and forms" + +agent = CustomAgent( + task=task, + add_infos=add_infos, + llm=llm, + browser=browser, + browser_context=browser_context, + controller=controller, + system_prompt_class=CustomSystemPrompt, + use_vision=True, # Enable vision for better structure analysis + max_actions_per_step=1 +) +``` diff --git a/demo_logging.py b/demo_logging.py new file mode 100644 index 00000000..f7c70093 --- /dev/null +++ b/demo_logging.py @@ -0,0 +1,99 @@ +import asyncio +from src.utils.task_logging import ( + TaskLogger, TaskStatus, ActionType, RetryConfig, + ColorScheme, SeparatorStyle +) + +async def demo_logging(): + # Initialize logger with custom styles + logger = TaskLogger( + "demo_task", + "Demonstrate all logging features", + color_scheme=ColorScheme(), + separator_style=SeparatorStyle( + task="★" * 40, + phase="•" * 30, + error="!" * 35 + ) + ) + + # Start navigation phase + logger.start_phase("Navigation Phase") + logger.update_step( + "Navigate to example.com", + TaskStatus.RUNNING, + action_type=ActionType.NAVIGATION, + context={"url": "https://example.com"} + ) + + # Update browser state + logger.update_browser_state( + url="https://example.com", + page_ready=True, + dynamic_content_loaded=True, + visible_elements=15, + page_title="Example Domain" + ) + + # Complete navigation + logger.update_step( + "Page loaded successfully", + TaskStatus.COMPLETE, + action_type=ActionType.NAVIGATION, + progress=0.25, + results={"status": 200, "load_time": 0.5} + ) + + # Start interaction phase + logger.start_phase("Interaction Phase") + logger.update_step( + "Click search button", + TaskStatus.RUNNING, + action_type=ActionType.INTERACTION, + context={"element": "search_button"} + ) + + # Simulate error and retry + async def failing_operation(): + raise ValueError("Search button not found") + + try: + await logger.execute_with_retry( + failing_operation, + "click_search", + RetryConfig(max_retries=2, base_delay=0.1) + ) + except ValueError: + pass + + # Start extraction phase + logger.start_phase("Data Extraction Phase") + logger.update_step( + "Extract search results", + TaskStatus.RUNNING, + action_type=ActionType.EXTRACTION, + progress=0.75 + ) + + # Complete extraction + logger.update_step( + "Data extracted successfully", + TaskStatus.COMPLETE, + action_type=ActionType.EXTRACTION, + progress=1.0, + results={"items_found": 10} + ) + + # Display log history + print("\nLog History:") + print("=" * 80) + for entry in logger.get_log_history(): + print(entry) + print("=" * 80) + + # Log final state + print("\nFinal State:") + logger.log_state() + +if __name__ == "__main__": + asyncio.run(demo_logging()) \ No newline at end of file diff --git a/docs/enhanced_tracing.md b/docs/enhanced_tracing.md new file mode 100644 index 00000000..b69aea07 --- /dev/null +++ b/docs/enhanced_tracing.md @@ -0,0 +1,297 @@ +# Enhanced Tracing Documentation + +## Overview + +The enhanced tracing system provides detailed insights into browser automation actions, decision-making processes, and error recovery strategies. This documentation covers all major components and their usage. + +## Components + +### 1. Action Context +Captures detailed information about element states and interactions. + +```json +{ + "action_context": { + "element_state_before": { + "visible": true, + "computed_styles": { + "pointer-events": "auto", + "opacity": "1", + "z-index": "100" + }, + "focus_state": "not-focused", + "accessibility": { + "aria-hidden": "false", + "aria-disabled": "false" + } + }, + "element_state_after": { + "visible": true, + "focus_state": "focused", + "triggered_events": ["click", "focus"] + } + } +} +``` + +**Key Features:** +- Before/after state tracking +- Computed style analysis +- Focus and accessibility state monitoring +- Event triggering information + +### 2. Decision Trail +Records the AI model's decision-making process and confidence levels. + +```json +{ + "decision_trail": { + "confidence_threshold": 0.8, + "attention_weights": { + "element_text": 0.6, + "aria_label": 0.3, + "position": 0.1 + }, + "alternative_paths": [ + { + "action": "click hamburger menu", + "rejected_reason": "settings directly visible", + "confidence": 0.4 + } + ] + } +} +``` + +**Key Features:** +- Confidence thresholds +- Attention weight distribution +- Alternative action consideration +- Rejection reasoning + +### 3. Element Identification +Provides comprehensive element location and relationship information. + +```json +{ + "element_identification": { + "relative_position": { + "from_top_nav": "20px from right", + "from_viewport": "top-right quadrant" + }, + "hierarchy": { + "parent": "nav.top-bar", + "siblings": ["button.new-template", "button.help"], + "children": ["span.icon", "span.text"] + } + } +} +``` + +**Key Features:** +- Relative positioning +- Element hierarchy +- Sibling relationships +- Visual landmarks + +### 4. Visual State Tracking +Monitors visual changes and layout shifts during automation. + +```json +{ + "visual_state": { + "screenshot_diffs": { + "before_click": "diff_1.png", + "after_click": "diff_2.png", + "changes_highlighted": true + }, + "layout_shifts": [ + { + "timestamp": "T+100ms", + "elements_moved": ["#settings-panel"], + "cumulative_layout_shift": 0.1 + } + ] + } +} +``` + +**Key Features:** +- Screenshot diffing +- Layout shift tracking +- Element visibility analysis +- Viewport position monitoring + +### 5. Error Recovery +Provides sophisticated error handling and recovery strategies. + +```json +{ + "error_recovery": { + "retry_strategy": { + "backoff": "exponential", + "max_attempts": 3, + "conditions": { + "network_stable": true, + "animations_complete": true + } + }, + "environment_factors": { + "network_conditions": { + "latency": "50ms", + "bandwidth": "10Mbps" + } + } + } +} +``` + +**Key Features:** +- Retry strategies +- Environmental monitoring +- Recovery checkpoints +- State restoration + +### 6. Performance Monitoring +Tracks timing and performance metrics. + +```json +{ + "timing_analysis": { + "action_breakdown": { + "element_search": "150ms", + "interaction_delay": "50ms", + "animation_duration": "200ms" + }, + "performance_markers": { + "first_paint": "100ms", + "first_contentful_paint": "200ms" + } + } +} +``` + +**Key Features:** +- Action timing breakdown +- Performance markers +- Cumulative timing +- Resource utilization + +## Usage + +### Basic Usage +```python +analyzer = EnhancedTraceAnalyzer(trace_file_path) +result = await analyzer.analyze_all() +``` + +### Component-Specific Analysis +```python +# Analyze specific components +timing = await analyzer.analyze_timing() +visual = await analyzer.analyze_visual_state() +recovery = await analyzer.analyze_error_recovery() +``` + +### Error Recovery Integration +```python +recovery_info = await analyzer.analyze_recovery_info() +if recovery_info["retry_strategy"]["backoff"] == "exponential": + # Implement exponential backoff retry logic +``` + +## Best Practices + +1. **Performance Optimization** + - Monitor cumulative timing metrics + - Track resource utilization + - Optimize retry strategies + +2. **Error Recovery** + - Use exponential backoff for retries + - Monitor environmental factors + - Maintain state checkpoints + +3. **Visual Verification** + - Use screenshot diffs for validation + - Monitor layout shifts + - Track element visibility + +4. **Decision Making** + - Review confidence thresholds + - Analyze attention weights + - Consider alternative paths + +## Common Issues and Solutions + +### 1. Element Not Found +```json +{ + "error_recovery": { + "retry_strategy": { + "backoff": "exponential", + "conditions": { + "animations_complete": true + } + } + } +} +``` +**Solution:** Wait for animations to complete and retry with exponential backoff. + +### 2. Layout Shifts +```json +{ + "visual_state": { + "layout_shifts": [ + { + "cumulative_layout_shift": 0.1 + } + ] + } +} +``` +**Solution:** Monitor CLS and wait for layout stability before interactions. + +### 3. Network Issues +```json +{ + "environment_factors": { + "network_conditions": { + "stability": "unstable" + } + } +} +``` +**Solution:** Implement network condition checks in retry strategy. + +## API Reference + +### EnhancedTraceAnalyzer Methods + +#### analyze_action_context() +Returns detailed information about element states and interactions. + +#### analyze_decision_trail() +Returns the AI model's decision-making process and confidence levels. + +#### analyze_element_identification() +Returns comprehensive element location and relationship information. + +#### analyze_visual_state() +Returns visual changes and layout shift information. + +#### analyze_error_recovery() +Returns error handling and recovery strategies. + +#### analyze_timing() +Returns detailed timing and performance metrics. + +## Contributing + +When adding new tracing features: + +1. Follow the existing data structure pattern +2. Add comprehensive test coverage +3. Update documentation with examples +4. Include error handling cases \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml new file mode 100644 index 00000000..c260ce12 --- /dev/null +++ b/pyproject.toml @@ -0,0 +1,23 @@ +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "browser-use" +version = "0.1.19" +authors = [ + { name = "Your Name", email = "your.email@example.com" } +] +description = "A Python package for browser automation with AI" +readme = "README.md" +requires-python = ">=3.11" +classifiers = [ + "Programming Language :: Python :: 3", + "License :: OSI Approved :: MIT License", + "Operating System :: OS Independent", +] + +[tool.setuptools.packages.find] +where = ["."] +include = ["src*"] +namespaces = false \ No newline at end of file diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..88001be3 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,19 @@ +[pytest] +asyncio_mode = auto +asyncio_default_fixture_loop_scope = function + +# Test discovery +testpaths = tests +python_files = test_*.py +python_classes = Test* +python_functions = test_* + +# Output configuration +console_output_style = count +log_cli = True +log_cli_level = INFO + +# Warnings +filterwarnings = + ignore::DeprecationWarning + ignore::pytest.PytestDeprecationWarning \ No newline at end of file diff --git a/pytest_output.txt b/pytest_output.txt new file mode 100644 index 00000000..fe9b67ce --- /dev/null +++ b/pytest_output.txt @@ -0,0 +1,64 @@ +============================= test session starts ============================== +platform darwin -- Python 3.11.9, pytest-8.3.4, pluggy-1.5.0 -- /Users/dmieloch/Dev/experiments/web-ui/venv/bin/python +cachedir: .pytest_cache +rootdir: /Users/dmieloch/Dev/experiments/web-ui +configfile: pytest.ini +plugins: cov-6.0.0, asyncio-0.25.2, anyio-4.8.0, timeout-2.3.1 +asyncio: mode=Mode.AUTO, asyncio_default_fixture_loop_scope=function +collecting ... +----------------------------- live log collection ------------------------------ +INFO root:service.py:51 Anonymized telemetry enabled. See https://github.com/gregpr07/browser-use for more information. +INFO httpx:_client.py:1038 HTTP Request: GET https://api.gradio.app/gradio-messaging/en "HTTP/1.1 200 OK" +collected 28 items + +tests/test_browser_cli.py::TestBrowserInitialization::test_basic_initialization +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test +PASSED [ 1/28] +------------------------------ live log teardown ------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test + +tests/test_browser_cli.py::TestBrowserInitialization::test_window_size +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test +-------------------------------- live log call --------------------------------- +INFO src.agent.custom_agent:custom_agent.py:356 🚀 Starting task: go to data:text/html, +INFO src.agent.custom_agent:custom_agent.py:196 +📍 Step 1 +INFO httpx:_client.py:1786 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 422 Unprocessable Entity" +INFO httpx:_client.py:1038 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK" +INFO src.agent.custom_agent:custom_agent.py:128 🤷 Eval: Unknown - No previous actions to evaluate. +INFO src.agent.custom_agent:custom_agent.py:129 🧠 New Memory: +INFO src.agent.custom_agent:custom_agent.py:130 ⏳ Task Progress: +INFO src.agent.custom_agent:custom_agent.py:131 🤔 Thought: The task requires navigating to a specific URL to display the window size. The current page is 'about:blank', and no actions have been taken yet. +INFO src.agent.custom_agent:custom_agent.py:132 🎯 Summary: Navigate to the specified URL to display the window size. +INFO src.agent.custom_agent:custom_agent.py:134 🛠️ Action 1/1: {"go_to_url":{"url":"data:text/html,"}} +INFO src.agent.custom_agent:custom_agent.py:207 🧠 All Memory: +INFO browser_use.controller.service:service.py:59 🔗 Navigated to data:text/html, +INFO src.agent.custom_agent:custom_agent.py:196 +📍 Step 2 +INFO httpx:_client.py:1786 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 422 Unprocessable Entity" +INFO httpx:_client.py:1038 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK" +INFO src.agent.custom_agent:custom_agent.py:128 ✅ Eval: Success - Successfully navigated to the specified URL and displayed the window size. +INFO src.agent.custom_agent:custom_agent.py:129 🧠 New Memory: Window size: 800x600 +INFO src.agent.custom_agent:custom_agent.py:130 ⏳ Task Progress: 1. Navigated to the specified URL to display the window size. +INFO src.agent.custom_agent:custom_agent.py:131 🤔 Thought: The task has been completed as the window size is now displayed on the page. No further actions are required. +INFO src.agent.custom_agent:custom_agent.py:132 🎯 Summary: The task is complete. The window size is displayed as 800x600. +INFO src.agent.custom_agent:custom_agent.py:134 🛠️ Action 1/1: {"done":{"text":"The task is complete. The window size is displayed as 800x600."}} +INFO src.agent.custom_agent:custom_agent.py:207 🧠 All Memory: Window size: 800x600 + +INFO src.agent.custom_agent:custom_agent.py:218 📄 Result: The task is complete. The window size is displayed as 800x600. +INFO src.agent.custom_agent:custom_agent.py:399 ✅ Task completed successfully +WARNING src.agent.custom_agent:custom_agent.py:260 No history or first screenshot to create GIF from +PASSED [ 2/28] +------------------------------ live log teardown ------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test + +tests/test_browser_cli.py::TestBrowserInitialization::test_headless_mode +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test diff --git a/src/__init__.py b/src/__init__.py index 93fbe7f8..0edfbf30 100644 --- a/src/__init__.py +++ b/src/__init__.py @@ -3,4 +3,23 @@ # @Author : wenshao # @Email : wenshaoguo1026@gmail.com # @Project : browser-use-webui -# @FileName: __init__.py.py +# @FileName: __init__.py + +from browser_use.browser.browser import Browser +from browser_use.browser.browser import BrowserConfig +from browser_use.browser.context import BrowserContextConfig, BrowserContextWindowSize +from .agent.custom_agent import CustomAgent +from .controller.custom_controller import CustomController +from .agent.custom_prompts import CustomSystemPrompt +from .utils import utils + +__all__ = [ + 'Browser', + 'BrowserConfig', + 'BrowserContextConfig', + 'BrowserContextWindowSize', + 'CustomAgent', + 'CustomController', + 'CustomSystemPrompt', + 'utils' +] diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py index ff8908c8..0332067d 100644 --- a/src/agent/custom_agent.py +++ b/src/agent/custom_agent.py @@ -8,11 +8,12 @@ import logging import pdb import traceback -from typing import Optional, Type +from typing import Optional, Type, Any, Dict from PIL import Image, ImageDraw, ImageFont import os import base64 import io +import datetime from browser_use.agent.prompts import SystemPrompt from browser_use.agent.service import Agent @@ -37,11 +38,13 @@ BaseMessage, ) from src.utils.agent_state import AgentState +from src.utils.logging import BatchedEventLogger from .custom_massage_manager import CustomMassageManager from .custom_views import CustomAgentOutput, CustomAgentStepInfo logger = logging.getLogger(__name__) +batched_logger = BatchedEventLogger(logger) class CustomAgent(Agent): @@ -117,23 +120,41 @@ def _setup_action_models(self) -> None: self.AgentOutput = CustomAgentOutput.type_with_custom_actions(self.ActionModel) def _log_response(self, response: CustomAgentOutput) -> None: - """Log the model's response""" - if "Success" in response.current_state.prev_action_evaluation: - emoji = "✅" - elif "Failed" in response.current_state.prev_action_evaluation: - emoji = "❌" - else: - emoji = "🤷" - - logger.info(f"{emoji} Eval: {response.current_state.prev_action_evaluation}") - logger.info(f"🧠 New Memory: {response.current_state.important_contents}") - logger.info(f"⏳ Task Progress: {response.current_state.completed_contents}") - logger.info(f"🤔 Thought: {response.current_state.thought}") - logger.info(f"🎯 Summary: {response.current_state.summary}") + """Log the model's response in a structured format""" + evaluation_status = "success" if "Success" in response.current_state.prev_action_evaluation else "failed" + + log_data = { + "timestamp": datetime.datetime.now().isoformat(), + "action": "model_response", + "status": evaluation_status, + "state": { + "evaluation": response.current_state.prev_action_evaluation, + "memory": response.current_state.important_contents, + "progress": response.current_state.completed_contents, + "thought": response.current_state.thought, + "summary": response.current_state.summary + } + } + + logger.info( + f"Model Response: {evaluation_status}", + extra={ + "event_type": "model_response", + "event_data": log_data + } + ) + + # Batch action logging for i, action in enumerate(response.action): - logger.info( - f"🛠️ Action {i + 1}/{len(response.action)}: {action.model_dump_json(exclude_unset=True)}" + batched_logger.add_event( + "action", + { + "action_number": i + 1, + "total_actions": len(response.action), + "action_data": json.loads(action.model_dump_json(exclude_unset=True)) + } ) + batched_logger.flush() def update_step_info( self, model_output: CustomAgentOutput, step_info: CustomAgentStepInfo = None @@ -193,7 +214,19 @@ async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutpu @time_execution_async("--step") async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None: """Execute one step of the task""" - logger.info(f"\n📍 Step {self.n_steps}") + step_data = { + "step_number": self.n_steps, + "timestamp": datetime.datetime.now().isoformat() + } + + logger.info( + f"Starting step {self.n_steps}", + extra={ + "event_type": "step_start", + "event_data": step_data + } + ) + state = None model_output = None result: list[ActionResult] = [] @@ -204,9 +237,18 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None: input_messages = self.message_manager.get_messages() model_output = await self.get_next_action(input_messages) self.update_step_info(model_output, step_info) - logger.info(f"🧠 All Memory: {step_info.memory}") + + if step_info: + logger.debug( + "Step memory updated", + extra={ + "event_type": "memory_update", + "event_data": {"memory": step_info.memory} + } + ) + self._save_conversation(input_messages, model_output) - self.message_manager._remove_last_state_message() # we dont want the whole state in the chat history + self.message_manager._remove_last_state_message() self.message_manager.add_model_output(model_output) result: list[ActionResult] = await self.controller.multi_act( @@ -215,17 +257,37 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None: self._last_result = result if len(result) > 0 and result[-1].is_done: - logger.info(f"📄 Result: {result[-1].extracted_content}") + logger.info( + "Task completed", + extra={ + "event_type": "task_complete", + "event_data": { + "result": result[-1].extracted_content + } + } + ) self.consecutive_failures = 0 except Exception as e: result = self._handle_step_error(e) self._last_result = result + logger.error( + f"Step error: {str(e)}", + extra={ + "event_type": "step_error", + "event_data": { + "error": str(e), + "traceback": traceback.format_exc() + } + }, + exc_info=True + ) finally: if not result: return + for r in result: if r.error: self.telemetry.capture( @@ -234,8 +296,28 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None: error=r.error, ) ) + logger.error( + f"Action error: {r.error}", + extra={ + "event_type": "action_error", + "event_data": { + "error": r.error + } + } + ) + if state: self._make_history_item(model_output, state, result) + + step_data["status"] = "completed" + logger.info( + f"Step {self.n_steps} completed", + extra={ + "event_type": "step_complete", + "event_data": step_data + } + ) + def create_history_gif( self, output_path: str = 'agent_history.gif', diff --git a/src/agent/custom_prompts.py b/src/agent/custom_prompts.py index 56aeb64b..b64b3b9f 100644 --- a/src/agent/custom_prompts.py +++ b/src/agent/custom_prompts.py @@ -66,9 +66,10 @@ def important_rules(self) -> str: - Use scroll to find elements you are looking for 5. TASK COMPLETION: - - If you think all the requirements of user\'s instruction have been completed and no further operation is required, output the done action to terminate the operation process. + - If you think all the requirements of user's instruction have been completed and no further operation is required, output the done action to terminate the operation process. - Don't hallucinate actions. - If the task requires specific information - make sure to include everything in the done function. This is what the user will see. + - When generating reports about page structure, always include the page title and headings. - If you are running out of steps (current step), think about speeding it up, and ALWAYS use the done action as the last action. 6. VISUAL CONTEXT: @@ -163,13 +164,13 @@ def __init__( def get_user_message(self) -> HumanMessage: state_description = f""" - 1. Task: {self.step_info.task} + 1. Task: {self.step_info.task if self.step_info else ""} 2. Hints(Optional): - {self.step_info.add_infos} + {self.step_info.add_infos if self.step_info else ""} 3. Memory: - {self.step_info.memory} + {self.step_info.memory if self.step_info else ""} 4. Task Progress: - {self.step_info.task_progress} + {self.step_info.task_progress if self.step_info else ""} 5. Current url: {self.state.url} 6. Available tabs: {self.state.tabs} diff --git a/src/browser/custom_context.py b/src/browser/custom_context.py index 6de991bf..c0aa1961 100644 --- a/src/browser/custom_context.py +++ b/src/browser/custom_context.py @@ -8,6 +8,7 @@ import json import logging import os +from pathlib import Path from browser_use.browser.browser import Browser from browser_use.browser.context import BrowserContext, BrowserContextConfig @@ -25,6 +26,7 @@ def __init__( config: BrowserContextConfig = BrowserContextConfig() ): super(CustomBrowserContext, self).__init__(browser=browser, config=config) + self._context = None async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowserContext: """Creates a new browser context with anti-detection measures and loads cookies if available.""" @@ -93,4 +95,20 @@ async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowser """ ) + self._context = context return context + + @property + def context(self) -> PlaywrightBrowserContext | None: + """Get the underlying Playwright browser context.""" + return self._context + + async def close(self): + """Close the browser context and stop tracing if enabled.""" + if self.config.trace_path and self._context: + trace_path = Path(self.config.trace_path) + trace_path.parent.mkdir(parents=True, exist_ok=True) + if not trace_path.suffix: + trace_path = trace_path / "trace.zip" + await self._context.tracing.stop(path=str(trace_path)) + await super().close() diff --git a/src/controller/custom_controller.py b/src/controller/custom_controller.py index 6e57dd4a..21a56b5a 100644 --- a/src/controller/custom_controller.py +++ b/src/controller/custom_controller.py @@ -8,6 +8,7 @@ from browser_use.agent.views import ActionResult from browser_use.browser.context import BrowserContext from browser_use.controller.service import Controller +from browser_use.browser.views import BrowserState class CustomController(Controller): @@ -31,3 +32,8 @@ async def paste_from_clipboard(browser: BrowserContext): await page.keyboard.type(text) return ActionResult(extracted_content=text) + + async def get_browser_state(self, browser_context: BrowserContext) -> BrowserState: + """Get the current state of the browser""" + state = await browser_context.get_state(use_vision=True) + return state diff --git a/src/trace_analyzer.py b/src/trace_analyzer.py new file mode 100644 index 00000000..0590dd5e --- /dev/null +++ b/src/trace_analyzer.py @@ -0,0 +1,644 @@ +import json +import zipfile +from pathlib import Path +from typing import Dict, List, Optional, Any +import asyncio + +class PlaywrightTrace: + def __init__(self, trace_path: str): + self.trace_path = Path(trace_path) + self.actions: List[Dict[str, Any]] = [] + self.network_requests: List[Dict[str, Any]] = [] + self.console_logs: List[str] = [] + self.errors: List[str] = [] + + @classmethod + async def parse(cls, trace_path: str) -> 'PlaywrightTrace': + """Parse a Playwright trace file and return a PlaywrightTrace instance.""" + trace = cls(trace_path) + await trace._parse_trace_file() + return trace + + async def _parse_trace_file(self): + """Parse the trace.zip file and extract relevant information.""" + if not self.trace_path.exists(): + raise FileNotFoundError(f"Trace file not found: {self.trace_path}") + + try: + with zipfile.ZipFile(self.trace_path, 'r') as zip_ref: + # List all files in the zip + files = zip_ref.namelist() + + # Parse trace files + for file in files: + if file.endswith('.trace'): + trace_data = zip_ref.read(file).decode('utf-8') + for line in trace_data.split('\n'): + if line.strip(): + try: + event = json.loads(line) + self._process_event(event) + except json.JSONDecodeError: + self.errors.append(f"Failed to parse trace event: {line}") + + # Parse network HAR if available + har_files = [f for f in files if f.endswith('.har')] + if har_files: + har_data = json.loads(zip_ref.read(har_files[0]).decode('utf-8')) + self._process_har(har_data) + + except zipfile.BadZipFile: + raise ValueError(f"Invalid trace file format: {self.trace_path}") + + def _process_event(self, event: Dict[str, Any]): + """Process a single trace event and categorize it.""" + if 'type' not in event: + return + + event_type = event['type'] + + if event_type == 'before' or event_type == 'after': + # Handle action events + if 'method' in event and 'params' in event: + self.actions.append({ + 'type': event['method'], + 'timestamp': event.get('timestamp', 0), + 'duration': event.get('duration', 0), + 'params': event['params'], + 'success': event_type == 'after' and 'error' not in event, + 'error': event.get('error') + }) + elif event_type == 'console': + # Handle console messages + if 'text' in event: + self.console_logs.append(event['text']) + elif event_type == 'error': + # Handle error events + if 'error' in event: + self.errors.append(event['error'].get('message', str(event['error']))) + + def _process_har(self, har_data: Dict[str, Any]): + """Process HAR data to extract network requests.""" + if 'log' in har_data and 'entries' in har_data['log']: + for entry in har_data['log']['entries']: + request = entry.get('request', {}) + response = entry.get('response', {}) + + self.network_requests.append({ + 'url': request.get('url'), + 'method': request.get('method'), + 'status': response.get('status'), + 'statusText': response.get('statusText'), + 'duration': entry.get('time'), # in milliseconds + 'failure': response.get('status', 0) >= 400 + }) + +async def analyze_trace(trace_path: str) -> dict: + """Parse a Playwright trace file and return structured data.""" + trace = await PlaywrightTrace.parse(trace_path) + return { + "actions": trace.actions, + "network_requests": trace.network_requests, + "console_logs": trace.console_logs, + "errors": trace.errors, + "summary": { + "total_actions": len(trace.actions), + "failed_actions": sum(1 for a in trace.actions if not a['success']), + "total_requests": len(trace.network_requests), + "failed_requests": sum(1 for r in trace.network_requests if r.get('failure')), + "total_errors": len(trace.errors), + "error_summary": "\n".join(trace.errors) if trace.errors else "No errors" + } + } + +if __name__ == "__main__": + # Example usage + async def main(): + result = await analyze_trace("path/to/trace.zip") + print(json.dumps(result, indent=2)) + + asyncio.run(main()) + +class EnhancedTraceAnalyzer: + """Enhanced trace analyzer for detailed browser automation insights. + + This class provides comprehensive analysis of browser automation traces, including: + - Action context and element states + - Decision-making processes and confidence levels + - Element identification and relationships + - Visual state changes and layout shifts + - Error recovery strategies + - Performance metrics and timing analysis + + Example: + ```python + analyzer = EnhancedTraceAnalyzer("trace.zip") + result = await analyzer.analyze_all() + + # Component-specific analysis + timing = await analyzer.analyze_timing() + visual = await analyzer.analyze_visual_state() + ``` + """ + + def __init__(self, trace_file_path: str): + """Initialize the enhanced trace analyzer. + + Args: + trace_file_path: Path to the trace file (ZIP format) containing enhanced trace data. + """ + self.trace_file_path = trace_file_path + self._trace_data: Optional[Dict[str, Any]] = None + + async def _load_trace_data(self) -> Dict[str, Any]: + """Load and validate enhanced trace data from the trace file. + + Returns: + Dict containing the parsed trace data. + + Raises: + ValueError: If the trace file is invalid or cannot be parsed. + """ + if self._trace_data is None: + try: + trace_path = Path(self.trace_file_path) + + # Handle nested directory structure + if trace_path.is_dir(): + trace_zip = trace_path / 'trace.zip' + if trace_zip.is_dir(): + trace_files = list(trace_zip.glob('*.zip')) + if not trace_files: + raise ValueError("No trace files found") + trace_path = trace_files[0] + else: + raise ValueError("Invalid trace directory structure") + + # Parse Playwright trace + with zipfile.ZipFile(trace_path) as zf: + # Load trace data + with zf.open('trace.trace') as f: + trace_events = [] + for line in f.read().decode('utf-8').splitlines(): + if line.strip(): + trace_events.append(json.loads(line)) + + # Load network data + with zf.open('trace.network') as f: + network_events = [] + for line in f.read().decode('utf-8').splitlines(): + if line.strip(): + network_events.append(json.loads(line)) + + # Convert to enhanced trace format + self._trace_data = self._convert_playwright_trace(trace_events, network_events) + + except Exception as e: + raise ValueError(f"Failed to load trace data: {str(e)}") + + return self._trace_data + + def _convert_playwright_trace(self, trace_events: List[Dict[str, Any]], network_events: List[Dict[str, Any]]) -> Dict[str, Any]: + """Convert Playwright trace format to enhanced trace format.""" + # Extract metadata + metadata = { + "session_id": trace_events[0].get('sessionId', 'unknown'), + "timestamp": trace_events[0].get('timestamp', 0), + "browser_info": { + "viewport": next( + (e.get('params', {}).get('viewport') for e in trace_events + if e.get('method') == 'setViewportSize'), + {"width": 0, "height": 0} + ), + "user_agent": next( + (e.get('params', {}).get('userAgent') for e in trace_events + if e.get('method') == 'setUserAgent'), + "unknown" + ) + } + } + + # Extract steps + steps = [] + current_step = None + + for event in trace_events: + if event.get('type') == 'before': + if current_step: + steps.append(current_step) + current_step = { + "step_id": len(steps) + 1, + "action": event.get('method', 'unknown'), + "target": event.get('params', {}).get('selector', ''), + "timing": { + "start": event.get('timestamp', 0), + "end": None, + "duration": None + }, + "status": "pending", + "error_context": None, + "visual_state": { + "screenshot_diffs": {}, + "element_visibility": {}, + "layout_shifts": [] + }, + "action_context": { + "element_state": event.get('params', {}), + "viewport_state": metadata['browser_info']['viewport'] + } + } + elif event.get('type') == 'after' and current_step: + current_step['timing']['end'] = event.get('timestamp', 0) + current_step['timing']['duration'] = ( + current_step['timing']['end'] - current_step['timing']['start'] + ) + current_step['status'] = 'error' if 'error' in event else 'success' + if 'error' in event: + current_step['error_context'] = { + "error_type": event['error'].get('name', 'unknown'), + "message": event['error'].get('message', ''), + "stack": event['error'].get('stack', '') + } + + if current_step: + steps.append(current_step) + + # Add network information + network_info = { + "requests": [ + { + "url": event.get('params', {}).get('url'), + "method": event.get('params', {}).get('method'), + "status": event.get('params', {}).get('status'), + "timing": event.get('params', {}).get('timing') + } + for event in network_events + if event.get('method') == 'Network.responseReceived' + ] + } + + return { + "metadata": metadata, + "steps": steps, + "network": network_info, + "performance": { + "navigation_timing": { + "dom_complete": next( + (e.get('timestamp', 0) for e in trace_events + if e.get('method') == 'domcontentloaded'), + 0 + ), + "load_complete": next( + (e.get('timestamp', 0) for e in trace_events + if e.get('method') == 'load'), + 0 + ) + }, + "interaction_timing": { + "time_to_first_interaction": next( + (e.get('timestamp', 0) for e in trace_events + if e.get('type') == 'before' and e.get('method') in ['click', 'fill']), + 0 + ) - metadata['timestamp'], + "action_latency": sum( + step['timing']['duration'] for step in steps + if step['timing']['duration'] is not None + ) / len(steps) if steps else 0 + } + } + } + + async def analyze_action_context(self) -> Dict[str, Any]: + """Analyze the context of actions including before/after states.""" + trace_data = await self._load_trace_data() + steps = trace_data["steps"] + + return { + "steps": [ + { + "step_id": step["step_id"], + "action": step["action"], + "target": step["target"], + "element_state": step["action_context"]["element_state"], + "viewport_state": step["action_context"]["viewport_state"] + } + for step in steps + ] + } + + async def analyze_decision_trail(self) -> Dict[str, Any]: + """Analyze the decision making process and alternatives considered.""" + trace_data = await self._load_trace_data() + steps = trace_data["steps"] + + return { + "steps": [ + { + "step_id": step["step_id"], + "action": step["action"], + "confidence": step["action_context"]["element_state"].get("confidence", 1.0), + "alternatives": step["action_context"]["element_state"].get("alternatives", []), + "reasoning": step["action_context"]["element_state"].get("reasoning", []) + } + for step in steps + ] + } + + async def analyze_element_identification(self) -> Dict[str, Any]: + """Analyze methods used to identify elements.""" + trace_data = await self._load_trace_data() + steps = trace_data["steps"] + + return { + "steps": [ + { + "step_id": step["step_id"], + "target": step["target"], + "selector": step["action_context"]["element_state"].get("selector", ""), + "position": step["action_context"]["element_state"].get("position", {}), + "relationships": step["action_context"]["element_state"].get("relationships", {}) + } + for step in steps + ] + } + + async def analyze_failures(self) -> Dict[str, Any]: + """Analyze failure scenarios and recovery attempts.""" + trace_data = await self._load_trace_data() + steps = trace_data["steps"] + failed_steps = [step for step in steps if step["status"] == "error"] + + return { + "failed_steps": [ + { + "step_id": step["step_id"], + "action": step["action"], + "error": step["error_context"], + "recovery_attempts": step["action_context"]["element_state"].get("recovery_attempts", []) + } + for step in failed_steps + ], + "total_steps": len(steps), + "failed_steps_count": len(failed_steps) + } + + async def analyze_session_context(self) -> Dict[str, Any]: + """Analyze session-wide context including navigation and network activity.""" + trace_data = await self._load_trace_data() + + return { + "metadata": trace_data["metadata"], + "network": trace_data["network"], + "performance": trace_data["performance"] + } + + async def analyze_recovery_info(self) -> Dict[str, Any]: + """Analyze recovery information and checkpoints.""" + trace_data = await self._load_trace_data() + steps = trace_data["steps"] + recovery_steps = [ + step for step in steps + if step["status"] == "error" and step["action_context"]["element_state"].get("recovery_attempts") + ] + + return { + "recovery_steps": [ + { + "step_id": step["step_id"], + "action": step["action"], + "recovery_attempts": step["action_context"]["element_state"]["recovery_attempts"], + "final_status": "recovered" if any( + attempt.get("success") + for attempt in step["action_context"]["element_state"].get("recovery_attempts", []) + ) else "failed" + } + for step in recovery_steps + ] + } + + async def analyze_model_data(self) -> Dict[str, Any]: + """Analyze model-specific data including token usage and vision analysis.""" + trace_data = await self._load_trace_data() + steps = trace_data["steps"] + + return { + "steps": [ + { + "step_id": step["step_id"], + "action": step["action"], + "model_info": step["action_context"]["element_state"].get("model_info", {}), + "vision_analysis": step["action_context"]["element_state"].get("vision_analysis", {}) + } + for step in steps + ] + } + + async def analyze_temporal_context(self) -> Dict[str, Any]: + """Analyze temporal information including timing and wait conditions.""" + trace_data = await self._load_trace_data() + steps = trace_data["steps"] + + return { + "steps": [ + { + "step_id": step["step_id"], + "timing": step["timing"], + "wait_conditions": step["action_context"]["element_state"].get("wait_conditions", []) + } + for step in steps + ], + "total_duration": sum( + step["timing"]["duration"] for step in steps + if step["timing"]["duration"] is not None + ) + } + + async def analyze_element_reporting(self) -> Dict[str, Any]: + """Analyze enhanced element reporting with detailed selection context.""" + trace_data = await self._load_trace_data() + steps = trace_data["steps"] + + return { + "steps": [ + { + "step_id": step["step_id"], + "action": step["action"], + "target": step["target"], + "element_state": step["action_context"]["element_state"], + "status": step["status"] + } + for step in steps + ] + } + + async def analyze_error_context(self) -> Dict[str, Any]: + """Analyze error context and session state information.""" + trace_data = await self._load_trace_data() + steps = trace_data["steps"] + error_steps = [step for step in steps if step["status"] == "error"] + + return { + "error_steps": [ + { + "step_id": step["step_id"], + "action": step["action"], + "error_context": step["error_context"], + "session_state": { + "url": trace_data["metadata"]["browser_info"].get("url"), + "viewport": trace_data["metadata"]["browser_info"]["viewport"], + "network_status": any( + req["status"] >= 400 + for req in trace_data["network"]["requests"] + ) + } + } + for step in error_steps + ] + } + + async def analyze_timing(self) -> Dict[str, Any]: + """Analyze detailed interaction timing information.""" + trace_data = await self._load_trace_data() + steps = trace_data["steps"] + + return { + "steps": [ + { + "step_id": step["step_id"], + "action": step["action"], + "timing": { + "start": step["timing"]["start"], + "end": step["timing"]["end"], + "duration": step["timing"]["duration"] + } + } + for step in steps + if step["timing"]["duration"] is not None + ], + "performance": trace_data["performance"], + "summary": { + "total_duration": sum( + step["timing"]["duration"] for step in steps + if step["timing"]["duration"] is not None + ), + "average_step_duration": sum( + step["timing"]["duration"] for step in steps + if step["timing"]["duration"] is not None + ) / len([s for s in steps if s["timing"]["duration"] is not None]) + } + } + + async def analyze_visual_state(self) -> Dict[str, Any]: + """Analyze visual state changes with enhanced tracking.""" + trace_data = await self._load_trace_data() + steps = trace_data.get("steps", []) + + visual_analysis = [] + for step in steps: + visual_state = step.get("visual_state", {}) + visual_analysis.append({ + "step_id": step["step_id"], + "before_action": { + "screenshot": visual_state.get("screenshot_diffs", {}).get("before"), + "visible_elements": visual_state.get("element_visibility", {}).get("before", []) + }, + "after_action": { + "screenshot": visual_state.get("screenshot_diffs", {}).get("after"), + "visible_elements": visual_state.get("element_visibility", {}).get("after", []), + "added_elements": visual_state.get("element_visibility", {}).get("added", []), + "removed_elements": visual_state.get("element_visibility", {}).get("removed", []) + }, + "layout_shifts": visual_state.get("layout_shifts", []) + }) + + return { + "visual_changes": visual_analysis, + "cumulative_layout_shift": sum( + shift.get("cumulative_layout_shift", 0) + for step in visual_analysis + for shift in step.get("layout_shifts", []) + ) + } + + async def analyze_error_recovery(self) -> Dict[str, Any]: + """Analyze enhanced error recovery capabilities with improved context.""" + trace_data = await self._load_trace_data() + steps = trace_data.get("steps", []) + error_steps = [step for step in steps if step.get("status") == "error"] + + recovery_analysis = [] + for step in error_steps: + error_ctx = step.get("error_context", {}) + recovery_analysis.append({ + "step_id": step["step_id"], + "error_type": error_ctx.get("error_type", "unknown"), + "target_element": { + "selector": error_ctx.get("target_element", {}).get("selector"), + "visible_similar_elements": error_ctx.get("target_element", {}).get("visible_similar_elements", []) + }, + "recovery_attempts": error_ctx.get("recovery_attempts", []), + "environment_factors": error_ctx.get("environment_factors", {}) + }) + + return { + "error_steps": recovery_analysis, + "recovery_success_rate": len([r for r in recovery_analysis if any( + attempt["outcome"] == "success" for attempt in r["recovery_attempts"] + )]) / len(recovery_analysis) if recovery_analysis else 1.0 + } + + async def analyze_performance(self) -> Dict[str, Any]: + """Analyze performance metrics including navigation and interaction timing.""" + trace_data = await self._load_trace_data() + performance = trace_data.get("performance", {}) + + return { + "navigation_timing": performance.get("navigation_timing", {}), + "interaction_timing": performance.get("interaction_timing", {}), + "metrics_summary": { + "avg_action_latency": performance.get("interaction_timing", {}).get("action_latency", 0), + "total_interaction_time": sum( + step.get("timing", {}).get("duration", 0) + for step in trace_data.get("steps", []) + ) + } + } + + async def analyze_all(self) -> Dict[str, Any]: + """Perform comprehensive analysis of all trace components. + + Returns: + Dict containing analysis results from all components: + - action_context: Action and element state analysis + - decision_trail: Decision-making process analysis + - element_identification: Element location and relationships + - failure_analysis: Failure scenarios and recovery attempts + - session_context: Session-wide context and navigation + - recovery_info: Recovery strategies and checkpoints + - model_data: Model-specific data and vision analysis + - temporal_context: Timing and sequence information + - element_reporting: Enhanced element selection reporting + - error_context: Error handling and recovery context + - timing_analysis: Detailed timing breakdown + - visual_state: Visual changes and layout analysis + - error_recovery: Enhanced error recovery capabilities + - performance: Performance metrics and timing analysis + """ + trace_data = await self._load_trace_data() + + return { + "action_context": await self.analyze_action_context(), + "decision_trail": await self.analyze_decision_trail(), + "element_identification": await self.analyze_element_identification(), + "failure_analysis": await self.analyze_failures(), + "session_context": await self.analyze_session_context(), + "recovery_info": await self.analyze_recovery_info(), + "model_data": await self.analyze_model_data(), + "temporal_context": await self.analyze_temporal_context(), + "element_reporting": await self.analyze_element_reporting(), + "error_context": await self.analyze_error_context(), + "timing_analysis": await self.analyze_timing(), + "visual_state": await self.analyze_visual_state(), + "error_recovery": await self.analyze_error_recovery(), + "performance": await self.analyze_performance() + } \ No newline at end of file diff --git a/src/utils/browser_controller.py b/src/utils/browser_controller.py new file mode 100644 index 00000000..2171574b --- /dev/null +++ b/src/utils/browser_controller.py @@ -0,0 +1,141 @@ +from typing import Optional, Any +import asyncio +from playwright.async_api import async_playwright, Browser, Playwright +from .structured_logging import StructuredLogger, setup_structured_logging + +class BrowserController: + def __init__(self): + self.browser: Optional[Browser] = None + self.init_promise: Optional[asyncio.Task] = None + self.init_count: int = 0 + self._playwright: Optional[Playwright] = None + self.logger = StructuredLogger("browser_controller") + setup_structured_logging() + + async def initialize(self) -> None: + """Initialize the browser if not already initialized.""" + if self.init_promise is not None: + try: + await self.init_promise + except Exception as e: + # If the current initialization fails, reset state to allow retry + self.init_promise = None + self.browser = None + self.logger.log_browser_event("initialization_failed", { + "error": str(e), + "attempt": self.init_count + 1 + }) + raise + + if self.browser is not None: + return + + # Create new initialization task + self.logger.log_progress( + step="browser_init", + status="starting", + progress=0.0, + message="Starting browser initialization" + ) + self.init_promise = asyncio.create_task(self._do_browser_init()) + try: + await self.init_promise + self.logger.log_progress( + step="browser_init", + status="completed", + progress=1.0, + message="Browser initialization completed" + ) + except Exception as e: + # Reset state on failure + self.init_promise = None + self.browser = None + self.logger.log_progress( + step="browser_init", + status="failed", + progress=0.0, + message=f"Browser initialization failed: {str(e)}" + ) + raise + + async def _do_browser_init(self) -> None: + """Internal method to handle browser initialization.""" + if self.browser is not None: + return + + self.logger.log_progress( + step="browser_init", + status="launching", + progress=0.3, + message="Launching Playwright" + ) + playwright = await async_playwright().start() + self._playwright = playwright + + try: + self.logger.log_progress( + step="browser_init", + status="configuring", + progress=0.6, + message="Configuring browser" + ) + self.browser = await playwright.chromium.launch( + headless=True, + args=['--no-sandbox'] + ) + self.init_count += 1 + + self.logger.log_browser_event("browser_launched", { + "initialization_count": self.init_count, + "headless": True + }) + + except Exception as e: + await self._cleanup_playwright() + self.logger.log_browser_event("launch_failed", { + "error": str(e), + "initialization_count": self.init_count + }) + raise + + async def _cleanup_playwright(self) -> None: + """Clean up the playwright context.""" + if self._playwright: + self.logger.log_browser_event("cleanup_playwright", { + "status": "starting" + }) + await self._playwright.stop() + self._playwright = None + self.logger.log_browser_event("cleanup_playwright", { + "status": "completed" + }) + + async def cleanup(self) -> None: + """Clean up browser resources.""" + self.logger.log_progress( + step="cleanup", + status="starting", + progress=0.0, + message="Starting browser cleanup" + ) + + if self.browser: + self.logger.log_progress( + step="cleanup", + status="closing_browser", + progress=0.5, + message="Closing browser" + ) + await self.browser.close() + self.browser = None + + await self._cleanup_playwright() + self.init_promise = None + self.init_count = 0 + + self.logger.log_progress( + step="cleanup", + status="completed", + progress=1.0, + message="Browser cleanup completed" + ) \ No newline at end of file diff --git a/src/utils/error_handling.py b/src/utils/error_handling.py new file mode 100644 index 00000000..2a4f744c --- /dev/null +++ b/src/utils/error_handling.py @@ -0,0 +1,53 @@ +import asyncio +from datetime import datetime +from typing import Dict, Any, Optional +import re + +class MaxRetriesExceededError(Exception): + def __init__(self, operation: str, original_error: Exception): + self.operation = operation + self.original_error = original_error + super().__init__(f"Max retries exceeded for operation '{operation}': {str(original_error)}") + +class ErrorHandler: + MAX_RETRIES = 3 + + def __init__(self): + self._retry_counts: Dict[str, int] = {} + self._last_error: Optional[Dict[str, Any]] = None + + async def handle_error(self, error: Exception, operation: str) -> None: + retry_count = self._retry_counts.get(operation, 0) + + if retry_count >= self.MAX_RETRIES: + raise MaxRetriesExceededError(operation, error) + + self._retry_counts[operation] = retry_count + 1 + await self._log_error(error, operation, retry_count) + + # Exponential backoff: 2^retry_count seconds + await asyncio.sleep(2 ** retry_count) + + async def _log_error(self, error: Exception, operation: str, retry_count: int) -> None: + error_context = { + "operation": operation, + "attempt": retry_count + 1, + "timestamp": datetime.now().isoformat(), + "error": { + "name": error.__class__.__name__, + "message": str(error), + "code": self.extract_error_code(error) + } + } + + self._last_error = error_context + # In a real implementation, we would log to a file or logging service + print(f"Error: {error_context}") + + def extract_error_code(self, error: Exception) -> str: + error_message = str(error) + match = re.search(r'ERR_[A-Z_]+', error_message) + return match.group(0) if match else "UNKNOWN_ERROR" + + def get_last_error(self) -> Optional[Dict[str, Any]]: + return self._last_error \ No newline at end of file diff --git a/src/utils/logging.py b/src/utils/logging.py new file mode 100644 index 00000000..982ffdc2 --- /dev/null +++ b/src/utils/logging.py @@ -0,0 +1,158 @@ +import json +import logging +import datetime +from typing import Any, Dict, List, Optional +from enum import Enum +import traceback +import types + +class LogLevel(str, Enum): + CRITICAL = "CRITICAL" + ERROR = "ERROR" + WARNING = "WARNING" + INFO = "INFO" + DEBUG = "DEBUG" + TRACE = "TRACE" + +class LogJSONEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, Exception): + return { + 'type': obj.__class__.__name__, + 'message': str(obj), + 'traceback': traceback.format_exception(type(obj), obj, obj.__traceback__) + } + if isinstance(obj, type): + return obj.__name__ + if isinstance(obj, types.TracebackType): + return traceback.format_tb(obj) + return super().default(obj) + +class LogFormatter(logging.Formatter): + def __init__(self, use_json: bool = True): + super().__init__() + self.use_json = use_json + self._event_counter: Dict[str, int] = {} + + def _serialize_error(self, exc_info) -> Dict[str, str]: + """Serialize error information into a dictionary.""" + exc_type, exc_value, exc_tb = exc_info + return { + "type": exc_type.__name__ if exc_type else "Unknown", + "message": str(exc_value) if exc_value else "", + "stack_trace": self.formatException(exc_info) if exc_tb else "" + } + + def format(self, record: logging.LogRecord) -> str: + timestamp = datetime.datetime.fromtimestamp(record.created).strftime("%Y-%m-%dT%H:%M:%S") + + # Extract additional fields if they exist + extra_fields = {} + for key, value in vars(record).items(): + if key not in logging.LogRecord.__dict__ and not key.startswith('_'): + extra_fields[key] = value + + if self.use_json: + log_entry = { + "timestamp": timestamp, + "level": record.levelname, + "logger": record.name or "root", + "message": record.getMessage(), + **extra_fields + } + + if hasattr(record, 'event_type'): + log_entry["event_type"] = getattr(record, 'event_type') + + if hasattr(record, 'event_data'): + log_entry["data"] = getattr(record, 'event_data') + + if record.exc_info and record.levelno >= logging.ERROR: + log_entry["error"] = self._serialize_error(record.exc_info) + + return json.dumps(log_entry, cls=LogJSONEncoder) + else: + # Compact format for non-JSON logs + basic_msg = f"[{timestamp}] {record.levelname[0]}: {record.getMessage()}" + + if record.exc_info and record.levelno >= logging.ERROR: + return f"{basic_msg}\n{self.formatException(record.exc_info)}" + + return basic_msg + +class BatchedEventLogger: + def __init__(self, logger: logging.Logger): + self._logger = logger + self._batched_events: Dict[str, List[Dict[str, Any]]] = {} + + def add_event(self, event_type: str, event_data: Dict[str, Any]) -> None: + if event_type not in self._batched_events: + self._batched_events[event_type] = [] + self._batched_events[event_type].append(event_data) + + def flush(self) -> None: + for event_type, events in self._batched_events.items(): + if events: + self._logger.info( + f"Batch: {len(events)} {event_type} events", + extra={ + "event_type": f"batched_{event_type}", + "event_data": { + "count": len(events), + "events": events + } + } + ) + self._batched_events.clear() + +def setup_logging( + level: str = "INFO", + use_json: bool = True, + log_file: Optional[str] = None, + exclude_patterns: Optional[List[str]] = None +) -> None: + """ + Setup logging configuration with the improved formatter + + Args: + level: The logging level to use + use_json: Whether to use JSON formatting + log_file: Optional file to write logs to + exclude_patterns: Optional list of patterns to exclude from logging + """ + root_logger = logging.getLogger() + root_logger.setLevel(level) + + # Clear any existing handlers + root_logger.handlers.clear() + + # Create console handler + console_handler = logging.StreamHandler() + console_handler.setFormatter(LogFormatter(use_json=use_json)) + + if exclude_patterns: + class ExcludeFilter(logging.Filter): + def filter(self, record: logging.LogRecord) -> bool: + return not any(pattern in record.getMessage() for pattern in exclude_patterns) + + console_handler.addFilter(ExcludeFilter()) + + root_logger.addHandler(console_handler) + + # Add file handler if specified + if log_file: + file_handler = logging.FileHandler(log_file) + file_handler.setFormatter(LogFormatter(use_json=True)) # Always use JSON for file logging + if exclude_patterns: + file_handler.addFilter(ExcludeFilter()) + root_logger.addHandler(file_handler) + +# Production filter patterns +PRODUCTION_EXCLUDE_PATTERNS = [ + "deprecated", + "virtual environment", + "Activating virtual environment", + "✅ Eval: Success", + "🤔 Thought:", + "VIRTUAL_ENV:" +] \ No newline at end of file diff --git a/src/utils/structured_logging.py b/src/utils/structured_logging.py new file mode 100644 index 00000000..8568de23 --- /dev/null +++ b/src/utils/structured_logging.py @@ -0,0 +1,223 @@ +from typing import Optional, Dict, Any, List +import logging +import json +from dataclasses import dataclass, asdict +from datetime import datetime +from colorama import init, Fore, Style +import os + +# Initialize colorama +init() + +@dataclass +class ColorScheme: + """Color scheme for different log elements.""" + ERROR: str = Fore.RED + WARNING: str = Fore.YELLOW + INFO: str = Fore.CYAN + DEBUG: str = Style.DIM + TIMESTAMP: str = Fore.WHITE + SUCCESS: str = Fore.GREEN + STEP: str = Fore.BLUE + RESET: str = Style.RESET_ALL + +class ColorizedFormatter(logging.Formatter): + """Formatter that adds colors to log output.""" + + def __init__(self, use_colors: bool = True): + super().__init__() + self.use_colors = use_colors and not os.getenv('NO_COLOR') + self.colors = ColorScheme() + + def colorize(self, text: str, color: str) -> str: + """Add color to text if colors are enabled.""" + if self.use_colors: + return f"{color}{text}{self.colors.RESET}" + return text + + def format(self, record: logging.LogRecord) -> str: + """Format the log record with colors.""" + # Get the appropriate color for the log level + level_color = getattr(self.colors, record.levelname, self.colors.INFO) + + # Format timestamp + timestamp = self.colorize( + datetime.utcnow().strftime("%H:%M:%S"), + self.colors.TIMESTAMP + ) + + # Format level + level = self.colorize(record.levelname, level_color) + + # Format message and handle special keywords + msg = record.getMessage() + if "✓" in msg: + msg = msg.replace("✓", self.colorize("✓", self.colors.SUCCESS)) + if "×" in msg: + msg = msg.replace("×", self.colorize("×", self.colors.ERROR)) + if "STEP" in msg: + msg = msg.replace("STEP", self.colorize("STEP", self.colors.STEP)) + + # Build the basic log message + log_message = f"[{timestamp}] {level} {msg}" + + # Add structured data if available + if hasattr(record, 'event_type'): + event_type = self.colorize(record.event_type, self.colors.INFO) + if hasattr(record, 'data'): + # Format the data as JSON but don't colorize it + data_str = json.dumps(record.data, indent=2) + log_message = f"{log_message} | {event_type} | {data_str}" + + return log_message + +class JSONFormatter(logging.Formatter): + """Custom JSON formatter for structured logs.""" + + def format(self, record: logging.LogRecord) -> str: + """Format the log record as a JSON string.""" + output = { + "timestamp": datetime.utcnow().isoformat(), + "level": record.levelname, + "message": record.getMessage(), + "logger": record.name + } + + # Add extra fields from record.__dict__ to handle custom attributes + if hasattr(record, '__dict__'): + for key, value in record.__dict__.items(): + if key not in output and key not in ('args', 'exc_info', 'exc_text', 'msg'): + output[key] = value + + return json.dumps(output) + +def setup_structured_logging(level: int = logging.INFO, use_colors: bool = True, json_output: bool = False) -> None: + """Set up structured logging with optional colorized output.""" + root_logger = logging.getLogger() + root_logger.setLevel(level) + + # Remove existing handlers + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + + # Create console handler with appropriate formatter + handler = logging.StreamHandler() + if json_output: + handler.setFormatter(JSONFormatter()) + else: + handler.setFormatter(ColorizedFormatter(use_colors=use_colors)) + + root_logger.addHandler(handler) + +@dataclass +class ProgressEvent: + """Represents a progress update in the browser automation process.""" + step: str + status: str + progress: float # 0.0 to 1.0 + message: str + timestamp: Optional[str] = None + + def __post_init__(self): + if self.timestamp is None: + self.timestamp = datetime.utcnow().isoformat() + +@dataclass +class BrowserEvent: + """Represents a browser-related event.""" + event_type: str + details: Dict[str, Any] + timestamp: Optional[str] = None + + def __post_init__(self): + if self.timestamp is None: + self.timestamp = datetime.utcnow().isoformat() + +class StructuredLogger: + """Handles structured logging with progress reporting and feedback.""" + + def __init__(self, logger_name: str = "browser_automation"): + self.logger = logging.getLogger(logger_name) + self.progress_events: List[ProgressEvent] = [] + self.browser_events: List[BrowserEvent] = [] + self._current_progress: float = 0.0 + + def log_progress(self, step: str, status: str, progress: float, message: str) -> None: + """Log a progress update.""" + event = ProgressEvent(step=step, status=status, progress=progress, message=message) + self.progress_events.append(event) + self._current_progress = progress + + self.logger.info("Progress Update", extra={ + "event_type": "progress", + "data": asdict(event) + }) + + def log_browser_event(self, event_type: str, details: Dict[str, Any]) -> None: + """Log a browser-related event.""" + event = BrowserEvent(event_type=event_type, details=details) + self.browser_events.append(event) + + self.logger.info(f"Browser Event: {event_type}", extra={ + "event_type": "browser", + "data": asdict(event) + }) + + def get_current_progress(self) -> float: + """Get the current progress as a float between 0 and 1.""" + return self._current_progress + + def get_progress_history(self) -> List[Dict[str, Any]]: + """Get the history of progress events.""" + return [asdict(event) for event in self.progress_events] + + def get_browser_events(self) -> List[Dict[str, Any]]: + """Get all browser events.""" + return [asdict(event) for event in self.browser_events] + + def clear_history(self) -> None: + """Clear all stored events.""" + self.progress_events.clear() + self.browser_events.clear() + self._current_progress = 0.0 + +class EventBatcher: + def __init__(self, batch_size: int = 5): + self.events: List[BrowserEvent] = [] + self.batch_size = max(1, batch_size) # Ensure minimum batch size of 1 + + def add_event(self, event: BrowserEvent) -> Optional[Dict[str, Any]]: + self.events.append(event) + if len(self.events) >= self.batch_size: + return self.flush_events() + return None + + def flush_events(self) -> Dict[str, Any]: + if not self.events: + return { + "timestamp": datetime.now().isoformat(), + "total_events": 0, + "success_count": 0, + "error_count": 0, + "duration_ms": 0 + } + + summary = { + "timestamp": datetime.now().isoformat(), + "total_events": len(self.events), + "success_count": sum(1 for e in self.events if e.get_status() == "success"), + "error_count": sum(1 for e in self.events if e.get_status() == "failed"), + "duration_ms": self._calculate_total_duration() + } + self.events = [] + return summary + + def get_event_count(self) -> int: + return len(self.events) + + def _calculate_total_duration(self) -> int: + total_duration = 0 + for event in self.events: + if event.metrics and "duration_ms" in event.metrics: + total_duration += event.metrics["duration_ms"] + return total_duration \ No newline at end of file diff --git a/src/utils/task_logging.py b/src/utils/task_logging.py new file mode 100644 index 00000000..908774a2 --- /dev/null +++ b/src/utils/task_logging.py @@ -0,0 +1,562 @@ +from typing import Dict, Any, List, Literal, Optional, Union, Callable, TypeVar, Awaitable +from dataclasses import dataclass, asdict, field +from datetime import datetime +import json +from enum import Enum +import traceback +import asyncio +import random +import os +from colorama import init, Fore, Style + +# Initialize colorama for cross-platform color support +init() + +# Define generic type parameter at module level +T = TypeVar('T') + +class TaskStatus(str, Enum): + PENDING = "pending" + RUNNING = "running" + COMPLETE = "complete" + FAILED = "failed" + +class ActionType(str, Enum): + NAVIGATION = "navigation" + INTERACTION = "interaction" + EXTRACTION = "extraction" + VALIDATION = "validation" + RECOVERY = "recovery" + + @property + def emoji(self) -> str: + """Get the emoji representation of the action type.""" + return { + ActionType.NAVIGATION: "🌐", + ActionType.INTERACTION: "🖱️", + ActionType.EXTRACTION: "📑", + ActionType.VALIDATION: "✅", + ActionType.RECOVERY: "🔄" + }[self] + +@dataclass +class PerformanceMetrics: + """Performance metrics for task execution.""" + total_duration: float = 0.0 + step_breakdown: Dict[str, float] = field(default_factory=dict) + + def add_step_duration(self, step_type: str, duration: float) -> None: + """Add duration for a step type.""" + if step_type not in self.step_breakdown: + self.step_breakdown[step_type] = 0 + self.step_breakdown[step_type] += duration + self.total_duration += duration + + def to_dict(self) -> Dict[str, Any]: + """Convert metrics to a dictionary.""" + return { + "total_duration": self.total_duration, + "step_breakdown": self.step_breakdown + } + +@dataclass +class ErrorInfo: + """Information about an error that occurred.""" + type: str + message: str + step: int + action: str + traceback: Optional[str] = None + +@dataclass +class StepInfo: + """Information about the current step in a task.""" + number: int + description: str + started_at: str + status: Union[TaskStatus, str] + duration: Optional[float] = None + progress: Optional[float] = None + action_type: Optional[ActionType] = None + context: Optional[Dict[str, Any]] = None + results: Optional[Dict[str, Any]] = None + suppress_similar: bool = False + + def __post_init__(self): + if isinstance(self.status, str): + self.status = TaskStatus(self.status) + if isinstance(self.action_type, str): + self.action_type = ActionType(self.action_type) + + @property + def status_value(self) -> str: + """Get the string value of the status.""" + return self.status.value if isinstance(self.status, TaskStatus) else str(self.status) + +@dataclass +class BrowserState: + """Current state of the browser.""" + url: str + page_ready: bool + dynamic_content_loaded: bool + visible_elements: int + current_frame: Optional[str] = None + active_element: Optional[str] = None + page_title: Optional[str] = None + +@dataclass +class RetryConfig: + """Configuration for retry behavior.""" + max_retries: int = 3 + base_delay: float = 1.0 + max_delay: float = 10.0 + jitter: float = 0.1 + + def get_delay(self, attempt: int) -> float: + """Calculate delay for a given attempt using exponential backoff.""" + if attempt == 0: + return 0 + if attempt > self.max_retries: + return -1 + + # Calculate exponential delay + delay = self.base_delay * (2 ** (attempt - 1)) + delay = min(delay, self.max_delay) + + # Add jitter if configured + if self.jitter > 0: + jitter_range = delay * self.jitter + delay += random.uniform(-jitter_range/2, jitter_range/2) + + return max(0, delay) + +@dataclass +class RetryInfo: + """Information about retry attempts.""" + attempts: int = 0 + success: bool = False + history: List[Dict[str, Any]] = field(default_factory=list) + +@dataclass +class TaskContext: + """Context information for a task.""" + id: str + goal: str + current_step: StepInfo + browser_state: BrowserState + started_at: Optional[str] = None + error: Optional[ErrorInfo] = None + performance: Optional[PerformanceMetrics] = None + log_history: List[StepInfo] = field(default_factory=list) + retries: Optional[RetryInfo] = None + + def __post_init__(self): + if self.started_at is None: + self.started_at = datetime.utcnow().isoformat() + if self.performance is None: + self.performance = PerformanceMetrics() + if self.retries is None: + self.retries = RetryInfo() + + def to_dict(self) -> Dict[str, Any]: + """Convert the context to a dictionary for logging.""" + result = { + "timestamp": datetime.utcnow().isoformat(), + "task": { + "id": self.id, + "goal": self.goal, + "progress": self._format_progress(), + "elapsed_time": self._calculate_elapsed_time(), + "status": self.current_step.status_value + } + } + + # Add retry information if available + if self.retries and self.retries.attempts > 0: + result["task"]["retries"] = { + "attempts": self.retries.attempts, + "success": self.retries.success, + "history": self.retries.history + } + + # Add current action information + if self.current_step.action_type: + result["task"]["current_action"] = self.current_step.action_type.value + if self.current_step.context: + result["task"]["action_context"] = self.current_step.context + if self.current_step.results: + result["task"]["action_results"] = self.current_step.results + + # Add browser state + result["browser"] = { + "url": self.browser_state.url, + "state": "ready" if self.browser_state.page_ready else "loading", + "visible_elements": self.browser_state.visible_elements, + "dynamic_content": "loaded" if self.browser_state.dynamic_content_loaded else "loading" + } + + if self.browser_state.current_frame: + result["browser"]["current_frame"] = self.browser_state.current_frame + if self.browser_state.active_element: + result["browser"]["active_element"] = self.browser_state.active_element + if self.browser_state.page_title: + result["browser"]["page_title"] = self.browser_state.page_title + + if self.error: + result["error"] = { + "type": self.error.type, + "message": self.error.message, + "step": self.error.step, + "action": self.error.action + } + if self.error.traceback: + result["error"]["traceback"] = self.error.traceback + + if self.performance and self.performance.step_breakdown: + result["performance"] = self.performance.to_dict() + + return result + + def _format_progress(self) -> str: + """Format the progress information.""" + if self.current_step.progress is not None: + return f"{int(self.current_step.progress * 100)}%" + return f"{self.current_step.number}/unknown steps" + + def _calculate_elapsed_time(self) -> str: + """Calculate the elapsed time since task start.""" + if self.started_at is None: + return "0.0s" + start = datetime.fromisoformat(self.started_at) + elapsed = datetime.utcnow() - start + return f"{elapsed.total_seconds():.1f}s" + +@dataclass +class ColorScheme: + """Color scheme for log messages.""" + error: str = Fore.RED + warning: str = Fore.YELLOW + info: str = Fore.CYAN + success: str = Fore.GREEN + reset: str = Style.RESET_ALL + + @property + def enabled(self) -> bool: + """Check if colors should be enabled.""" + return not bool(os.getenv("NO_COLOR")) + + def apply(self, text: str, color: str) -> str: + """Apply color to text if colors are enabled.""" + if not self.enabled: + return text + return f"{color}{text}{self.reset}" + +class LogFormatter: + """Formatter for log messages with color support.""" + + def __init__(self, color_scheme: Optional[ColorScheme] = None): + self.colors = color_scheme or ColorScheme() + + def format(self, record: Any) -> str: + """Format a log record with appropriate colors.""" + level_colors = { + "ERROR": self.colors.error, + "WARNING": self.colors.warning, + "INFO": self.colors.info + } + + # Format timestamp + timestamp = datetime.fromtimestamp(record.created).strftime("%Y-%m-%d %H:%M:%S") + + # Color the level name + level_color = level_colors.get(record.levelname, self.colors.info) + colored_level = self.colors.apply(record.levelname, level_color) + + return f"[{timestamp}] {colored_level}: {record.msg}" + +@dataclass +class SeparatorStyle: + """Style configuration for visual separators.""" + task: str = "=" * 50 # Task separator (longer) + phase: str = "-" * 30 # Phase separator (medium) + error: str = "*" * 40 # Error separator (distinct) + +class TaskLogger: + """Advanced logger for task context and state tracking.""" + + def __init__( + self, + task_id: str, + goal: str, + color_scheme: Optional[ColorScheme] = None, + separator_style: Optional[SeparatorStyle] = None, + use_separators: bool = True + ): + self.context = TaskContext( + id=task_id, + goal=goal, + current_step=StepInfo( + number=1, + description="Task initialized", + started_at=datetime.utcnow().isoformat(), + status=TaskStatus.PENDING + ), + browser_state=BrowserState( + url="", + page_ready=False, + dynamic_content_loaded=False, + visible_elements=0 + ), + retries=RetryInfo() + ) + self._step_start_time: Optional[datetime] = None + self.colors = color_scheme or ColorScheme() + self.separators = separator_style or SeparatorStyle() + self.use_separators = use_separators + + # Add initial task separator and goal + if self.use_separators: + self._add_separator("task") + self._add_log_entry(f"TASK GOAL: {goal}") + + def start_phase(self, phase_name: str) -> None: + """Start a new phase in the task.""" + if self.use_separators: + self._add_separator("phase") + self._add_log_entry(f"PHASE: {phase_name}") + + def _add_separator(self, separator_type: Literal["task", "phase", "error"]) -> None: + """Add a visual separator to the log history.""" + if not self.use_separators: + return + + separator = getattr(self.separators, separator_type) + colored_separator = self.colors.apply( + separator, + self.colors.info if separator_type != "error" else self.colors.error + ) + self._add_log_entry(colored_separator) + + def _add_log_entry(self, entry: str) -> None: + """Add a raw log entry to the history.""" + step = StepInfo( + number=self.context.current_step.number, + description=entry, + started_at=datetime.utcnow().isoformat(), + status=TaskStatus.RUNNING + ) + self.context.log_history.append(step) + + def update_step(self, + description: str, + status: TaskStatus, + progress: Optional[float] = None, + action_type: Optional[ActionType] = None, + context: Optional[Dict[str, Any]] = None, + results: Optional[Dict[str, Any]] = None, + suppress_similar: bool = False) -> None: + """Update the current step information.""" + step_duration = None + if self._step_start_time: + step_duration = (datetime.utcnow() - self._step_start_time).total_seconds() + + new_step = StepInfo( + number=self.context.current_step.number + 1, + description=description, + started_at=datetime.utcnow().isoformat(), + status=status, + duration=step_duration, + progress=progress, + action_type=action_type, + context=context, + results=results, + suppress_similar=suppress_similar + ) + + # Check if we should suppress this step + if not suppress_similar or not self._is_similar_to_previous(new_step): + self.context.log_history.append(new_step) + self.context.current_step = new_step + self._step_start_time = datetime.utcnow() + else: + # Update the previous step with new status/results + prev_step = self.context.log_history[-1] + prev_step.status = status + if results: + prev_step.results = results + # Update current step to reflect changes + self.context.current_step = prev_step + + def _is_similar_to_previous(self, step: StepInfo) -> bool: + """Check if a step is similar to the previous one.""" + if not self.context.log_history: + return False + prev_step = self.context.log_history[-1] + return ( + prev_step.action_type == step.action_type and + prev_step.description.split()[0] == step.description.split()[0] # Compare first word + ) + + def get_log_history(self) -> List[str]: + """Get the formatted history of log entries.""" + return [self._format_step(step) for step in self.context.log_history] + + def _format_step(self, step: StepInfo) -> str: + """Format a step as a log entry with colors.""" + timestamp = datetime.fromisoformat(step.started_at).strftime("%Y-%m-%d %H:%M:%S") + duration = f"({step.duration:.1f}s)" if step.duration is not None else "" + + # Color-coded status symbols + if isinstance(step.status, TaskStatus): + status_symbol = { + TaskStatus.COMPLETE: self.colors.apply("✓", self.colors.success), + TaskStatus.FAILED: self.colors.apply("×", self.colors.error), + TaskStatus.RUNNING: self.colors.apply("→", self.colors.info), + TaskStatus.PENDING: self.colors.apply("→", self.colors.info) + }.get(step.status, self.colors.apply("→", self.colors.info)) + else: + status_symbol = self.colors.apply("→", self.colors.info) + + # Color-coded action emoji + action_emoji = step.action_type.emoji if step.action_type else "" + if action_emoji: + action_emoji = self.colors.apply(action_emoji, self.colors.info) + + # Format step number with info color + step_number = self.colors.apply(f"STEP {step.number}/?", self.colors.info) + + return f"[{timestamp}] {action_emoji} {step_number} {step.description} {status_symbol} {duration}" + + def format_log_entry(self) -> str: + """Format the current state as a log entry.""" + return self._format_step(self.context.current_step) + + def update_browser_state(self, + url: Optional[str] = None, + page_ready: Optional[bool] = None, + dynamic_content_loaded: Optional[bool] = None, + visible_elements: Optional[int] = None, + current_frame: Optional[str] = None, + active_element: Optional[str] = None, + page_title: Optional[str] = None) -> None: + """Update the browser state information.""" + if url is not None: + self.context.browser_state.url = url + if page_ready is not None: + self.context.browser_state.page_ready = page_ready + if dynamic_content_loaded is not None: + self.context.browser_state.dynamic_content_loaded = dynamic_content_loaded + if visible_elements is not None: + self.context.browser_state.visible_elements = visible_elements + if current_frame is not None: + self.context.browser_state.current_frame = current_frame + if active_element is not None: + self.context.browser_state.active_element = active_element + if page_title is not None: + self.context.browser_state.page_title = page_title + + def log_error(self, error: Exception, step_number: int, action: str) -> None: + """Log an error with context.""" + if self.use_separators: + self._add_separator("error") + + self.context.error = ErrorInfo( + type=error.__class__.__name__, + message=str(error), + step=step_number, + action=action, + traceback=traceback.format_exc() + ) + self.context.current_step.status = TaskStatus.FAILED + + if self.use_separators: + self._add_separator("error") + + def start_performance_tracking(self) -> None: + """Start tracking performance metrics.""" + self._step_start_time = datetime.utcnow() + + def track_step_duration(self, step_type: str, duration: float) -> None: + """Track the duration of a specific step type.""" + if self.context.performance is not None: + self.context.performance.add_step_duration(step_type, duration) + + def get_performance_metrics(self) -> Dict[str, Any]: + """Get the current performance metrics.""" + if self.context.performance is not None: + return self.context.performance.to_dict() + return {"total_duration": 0.0, "step_breakdown": {}} + + def get_context(self) -> Dict[str, Any]: + """Get the current context as a dictionary.""" + return self.context.to_dict() + + def log_state(self) -> None: + """Log the current state.""" + state = self.get_context() + print(json.dumps(state, indent=2)) + + async def execute_with_retry( + self, + operation: Callable[[], Awaitable[T]], + operation_name: str, + retry_config: Optional[RetryConfig] = None + ) -> T: + """Execute an operation with retry logic.""" + if retry_config is None: + retry_config = RetryConfig() + + attempt = 0 + last_error = None + + while True: + try: + # Calculate and apply delay if this is a retry + delay = retry_config.get_delay(attempt) + if delay == -1: # Max retries exceeded + if last_error: + raise last_error + raise Exception("Max retries exceeded") + + if delay > 0: + await asyncio.sleep(delay) + + # Attempt the operation + result = await operation() + + # Update retry info on success + if self.context.retries is not None: + self.context.retries.attempts = attempt + 1 + self.context.retries.success = True + + return result + + except Exception as e: + last_error = e + attempt += 1 + + # Log the retry attempt + if self.context.retries is not None: + self.context.retries.history.append({ + "attempt": attempt, + "timestamp": datetime.utcnow().isoformat(), + "error": f"{e.__class__.__name__}: {str(e)}", + "delay": retry_config.get_delay(attempt) + }) + + # Update the error context + self.log_error(e, self.context.current_step.number, operation_name) + + # Continue if we haven't exceeded max retries + if attempt <= retry_config.max_retries: + self.update_step( + f"Retrying {operation_name} (attempt {attempt + 1}/{retry_config.max_retries + 1})", + TaskStatus.RUNNING + ) + continue + + # Max retries exceeded + if self.context.retries is not None: + self.context.retries.attempts = attempt + self.context.retries.success = False + raise \ No newline at end of file diff --git a/src/utils/utils.py b/src/utils/utils.py index 3ab38977..9e202fa6 100644 --- a/src/utils/utils.py +++ b/src/utils/utils.py @@ -9,6 +9,7 @@ import time from pathlib import Path from typing import Dict, Optional +from pydantic import SecretStr from langchain_anthropic import ChatAnthropic from langchain_google_genai import ChatGoogleGenerativeAI @@ -30,15 +31,17 @@ def get_llm_model(provider: str, **kwargs): base_url = kwargs.get("base_url") if not kwargs.get("api_key", ""): - api_key = os.getenv("ANTHROPIC_API_KEY", "") + api_key = SecretStr(os.getenv("ANTHROPIC_API_KEY") or "") else: - api_key = kwargs.get("api_key") + api_key = SecretStr(kwargs.get("api_key") or "") return ChatAnthropic( model_name=kwargs.get("model_name", "claude-3-5-sonnet-20240620"), temperature=kwargs.get("temperature", 0.0), base_url=base_url, api_key=api_key, + timeout=kwargs.get("timeout", 60), + stop=kwargs.get("stop", None) ) elif provider == "openai": if not kwargs.get("base_url", ""): @@ -47,15 +50,16 @@ def get_llm_model(provider: str, **kwargs): base_url = kwargs.get("base_url") if not kwargs.get("api_key", ""): - api_key = os.getenv("OPENAI_API_KEY", "") + api_key = SecretStr(os.getenv("OPENAI_API_KEY") or "") else: - api_key = kwargs.get("api_key") + api_key = SecretStr(kwargs.get("api_key") or "") return ChatOpenAI( - model=kwargs.get("model_name", "gpt-4o"), + model=kwargs.get("model_name", "gpt-4"), temperature=kwargs.get("temperature", 0.0), base_url=base_url, api_key=api_key, + timeout=kwargs.get("timeout", 60), ) elif provider == "deepseek": if not kwargs.get("base_url", ""): @@ -64,25 +68,36 @@ def get_llm_model(provider: str, **kwargs): base_url = kwargs.get("base_url") if not kwargs.get("api_key", ""): - api_key = os.getenv("DEEPSEEK_API_KEY", "") + api_key = SecretStr(os.getenv("DEEPSEEK_API_KEY") or "") else: - api_key = kwargs.get("api_key") + api_key = SecretStr(kwargs.get("api_key") or "") return ChatOpenAI( model=kwargs.get("model_name", "deepseek-chat"), temperature=kwargs.get("temperature", 0.0), base_url=base_url, api_key=api_key, + timeout=kwargs.get("timeout", 60), ) elif provider == "gemini": if not kwargs.get("api_key", ""): - api_key = os.getenv("GOOGLE_API_KEY", "") + api_key = SecretStr(os.getenv("GOOGLE_API_KEY") or "") else: - api_key = kwargs.get("api_key") + api_key = SecretStr(kwargs.get("api_key") or "") + + # Get model name from environment or kwargs + model_name = kwargs.get("model_name") + if not model_name: + if kwargs.get("vision"): + model_name = os.getenv("GOOGLE_API_MODEL", "gemini-1.5-flash") + else: + model_name = os.getenv("GOOGLE_API_TYPE", "gemini-1.5-flash") + return ChatGoogleGenerativeAI( - model=kwargs.get("model_name", "gemini-2.0-flash-exp"), + model=model_name, temperature=kwargs.get("temperature", 0.0), - google_api_key=api_key, + api_key=api_key, + timeout=kwargs.get("timeout", 60) ) elif provider == "ollama": return ChatOllama( @@ -97,27 +112,28 @@ def get_llm_model(provider: str, **kwargs): else: base_url = kwargs.get("base_url") if not kwargs.get("api_key", ""): - api_key = os.getenv("AZURE_OPENAI_API_KEY", "") + api_key = SecretStr(os.getenv("AZURE_OPENAI_API_KEY") or "") else: - api_key = kwargs.get("api_key") + api_key = SecretStr(kwargs.get("api_key") or "") return AzureChatOpenAI( model=kwargs.get("model_name", "gpt-4o"), temperature=kwargs.get("temperature", 0.0), api_version="2024-05-01-preview", azure_endpoint=base_url, api_key=api_key, + timeout=kwargs.get("timeout", 60), ) else: raise ValueError(f"Unsupported provider: {provider}") # Predefined model names for common providers model_names = { - "anthropic": ["claude-3-5-sonnet-20240620", "claude-3-opus-20240229"], - "openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"], + "anthropic": ["claude-3-5-sonnet-latest", "claude-3-5-sonnet-20241022"], + "openai": ["gpt-4o"], "deepseek": ["deepseek-chat"], - "gemini": ["gemini-2.0-flash-exp", "gemini-2.0-flash-thinking-exp", "gemini-1.5-flash-latest", "gemini-1.5-flash-8b-latest", "gemini-2.0-flash-thinking-exp-1219" ], + "gemini": ["gemini-1.5-pro", "gemini-2.0-flash"], "ollama": ["qwen2.5:7b", "llama2:7b"], - "azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"] + "azure_openai": ["gpt-4", "gpt-3.5-turbo"] } # Callback to update the model name dropdown based on the selected provider diff --git a/test_gemini_connection.py b/test_gemini_connection.py new file mode 100644 index 00000000..0feeecad --- /dev/null +++ b/test_gemini_connection.py @@ -0,0 +1,47 @@ +import google.generativeai as genai +import os +from dotenv import load_dotenv, find_dotenv + +# Force reload of environment variables +load_dotenv(find_dotenv(), override=True) + +api_key = os.environ.get("GOOGLE_API_KEY") +model_name = os.environ.get("GOOGLE_API_MODEL") + +if not api_key or not model_name: + raise ValueError("Missing required environment variables: GOOGLE_API_KEY or GOOGLE_API_MODEL") + +print(f"Using model: {model_name}") +genai.configure(api_key=api_key, transport="rest") + +# List all available models +print("\nAvailable models:") +for m in genai.list_models(): + print(f"- {m.name}") + +# Check that the model exists in the client +found_model = False +for m in genai.list_models(): + model_id = m.name.replace("models/", "") + if model_id == model_name: + found_model = True + print(f"\nFound model: {m.name}") + break + +if not found_model: + print("\nAvailable model IDs:") + for m in genai.list_models(): + print(f"- {m.name.replace('models/', '')}") + +assert found_model, f"Model not found: {model_name}" + +# Load the model +model = genai.GenerativeModel(model_name) + +# Perform a simple generation task +try: + response = model.generate_content("Hello, I'm testing the Gemini API connection. Please respond with a short greeting.") + print(f"\nResponse: {response.text}") +except Exception as e: + print(f"\nError generating content: {e}") + raise \ No newline at end of file diff --git a/test_results.txt b/test_results.txt new file mode 100644 index 00000000..86a61db1 --- /dev/null +++ b/test_results.txt @@ -0,0 +1,125 @@ +============================= test session starts ============================== +platform darwin -- Python 3.11.9, pytest-8.3.4, pluggy-1.5.0 -- /Users/dmieloch/Dev/experiments/web-ui/venv/bin/python +cachedir: .pytest_cache +rootdir: /Users/dmieloch/Dev/experiments/web-ui +configfile: pytest.ini +plugins: cov-6.0.0, asyncio-0.25.2, anyio-4.8.0, timeout-2.3.1 +asyncio: mode=Mode.AUTO, asyncio_default_fixture_loop_scope=function +collecting ... +----------------------------- live log collection ------------------------------ +INFO root:service.py:51 Anonymized telemetry enabled. See https://github.com/gregpr07/browser-use for more information. +INFO httpx:_client.py:1038 HTTP Request: GET https://api.gradio.app/gradio-messaging/en "HTTP/1.1 200 OK" +collected 133 items + +tests/test_browser_cli.py::TestBrowserInitialization::test_basic_initialization +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test +PASSED [ 1/133] +------------------------------ live log teardown ------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test + +tests/test_browser_cli.py::TestBrowserInitialization::test_window_size +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test +FAILED [ 2/133] +------------------------------ live log teardown ------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test + +tests/test_browser_cli.py::TestBrowserInitialization::test_headless_mode +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test +FAILED [ 3/133] +------------------------------ live log teardown ------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test + +tests/test_browser_cli.py::TestBrowserInitialization::test_user_data_dir +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test +PASSED [ 4/133] +------------------------------ live log teardown ------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test + +tests/test_browser_cli.py::TestBrowserInitialization::test_proxy_configuration +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test +PASSED [ 5/133] +------------------------------ live log teardown ------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test + +tests/test_browser_cli.py::TestBrowserInitialization::test_disable_security +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test +FAILED [ 6/133] +------------------------------ live log teardown ------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test + +tests/test_browser_cli.py::TestBrowserInitialization::test_multiple_initialization +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test +FAILED [ 7/133] +------------------------------ live log teardown ------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test + +tests/test_browser_cli.py::TestBrowserTasks::test_model_switching +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test +FAILED [ 8/133] +------------------------------ live log teardown ------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test + +tests/test_browser_cli.py::TestBrowserTasks::test_vision_capability +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test +FAILED [ 9/133] +------------------------------ live log teardown ------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test + +tests/test_browser_cli.py::TestBrowserTasks::test_recording +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test +-------------------------------- live log call --------------------------------- +INFO src.agent.custom_agent:custom_agent.py:438 🚀 Starting task: go to example.com +INFO src.agent.custom_agent:custom_agent.py:222 Starting step 1 +INFO httpx:_client.py:1786 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 422 Unprocessable Entity" +INFO httpx:_client.py:1038 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK" +INFO src.agent.custom_agent:custom_agent.py:139 Model Response: failed +INFO src.agent.custom_agent:logging.py:96 Batch: 1 action events +INFO browser_use.controller.service:service.py:59 🔗 Navigated to https://example.com +INFO src.agent.custom_agent:custom_agent.py:313 Step 2 completed +INFO src.agent.custom_agent:custom_agent.py:222 Starting step 2 +INFO httpx:_client.py:1786 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 422 Unprocessable Entity" +INFO httpx:_client.py:1038 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK" +INFO src.agent.custom_agent:custom_agent.py:139 Model Response: success +INFO src.agent.custom_agent:logging.py:96 Batch: 1 action events +INFO src.agent.custom_agent:custom_agent.py:260 Task completed +INFO src.agent.custom_agent:custom_agent.py:313 Step 3 completed +INFO src.agent.custom_agent:custom_agent.py:481 ✅ Task completed successfully +WARNING src.agent.custom_agent:custom_agent.py:342 No history or first screenshot to create GIF from +PASSED [ 10/133] +------------------------------ live log teardown ------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test + +tests/test_browser_cli.py::TestBrowserTasks::test_tracing +-------------------------------- live log setup -------------------------------- +INFO tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False +INFO tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test diff --git a/tests/__init__.py b/tests/__init__.py new file mode 100644 index 00000000..c74cef78 --- /dev/null +++ b/tests/__init__.py @@ -0,0 +1,7 @@ +""" +Test suite for the browser-use project. + +This package contains tests for: +- Browser automation (CLI, core functionality, Playwright) +- API integration (endpoints, LLM integration) +""" \ No newline at end of file diff --git a/tests/requirements-test.txt b/tests/requirements-test.txt new file mode 100644 index 00000000..bef705f9 --- /dev/null +++ b/tests/requirements-test.txt @@ -0,0 +1,3 @@ +pytest>=7.0.0 +pytest-asyncio>=0.21.0 +pytest-cov>=4.0.0 \ No newline at end of file diff --git a/tests/test_api.py b/tests/test_api.py new file mode 100644 index 00000000..5dc4fae7 --- /dev/null +++ b/tests/test_api.py @@ -0,0 +1,73 @@ +import asyncio +from browser_use.browser.browser import Browser, BrowserConfig +from browser_use.browser.context import BrowserContextConfig, BrowserContextWindowSize +from browser_use.agent.service import Agent +from src.utils import utils +from src.controller.custom_controller import CustomController +from src.agent.custom_agent import CustomAgent +from src.agent.custom_prompts import CustomSystemPrompt +import os + +async def main(): + window_w, window_h = 1920, 1080 + + # Initialize the browser + browser = Browser( + config=BrowserConfig( + headless=False, + disable_security=True, + extra_chromium_args=[f"--window-size={window_w},{window_h}"], + ) + ) + + # Create a browser context + async with await browser.new_context( + config=BrowserContextConfig( + trace_path="./tmp/traces", + save_recording_path="./tmp/record_videos", + no_viewport=False, + browser_window_size=BrowserContextWindowSize( + width=window_w, height=window_h + ), + ) + ) as browser_context: + # Initialize the controller + controller = CustomController() + + # Initialize the agent with a simple task using CustomAgent + agent = CustomAgent( + task="go to google.com and search for 'OpenAI'", + add_infos="", # hints for the LLM if needed + llm=utils.get_llm_model( + provider="deepseek", + model_name="deepseek-chat", # Using V2.5 via deepseek-chat endpoint + temperature=0.8, + base_url="https://api.deepseek.com/v1", + api_key=os.getenv("DEEPSEEK_API_KEY", "") + ), + browser=browser, + browser_context=browser_context, + controller=controller, + system_prompt_class=CustomSystemPrompt, + use_vision=False, # Must be False for DeepSeek + tool_call_in_content=True, # Required for DeepSeek as per test files + max_actions_per_step=1 # Control granularity of actions + ) + + # Run the agent + history = await agent.run(max_steps=10) + + print("Final Result:") + print(history.final_result()) + + print("\nErrors:") + print(history.errors()) + + print("\nModel Actions:") + print(history.model_actions()) + + print("\nThoughts:") + print(history.model_thoughts()) + +if __name__ == "__main__": + asyncio.run(main()) \ No newline at end of file diff --git a/tests/test_browser_cli.py b/tests/test_browser_cli.py new file mode 100644 index 00000000..7974ea62 --- /dev/null +++ b/tests/test_browser_cli.py @@ -0,0 +1,591 @@ +import sys +from pathlib import Path +import tempfile +import logging +from io import StringIO +import contextlib + +# Add project root to Python path +PROJECT_ROOT = Path(__file__).parent.parent +sys.path.insert(0, str(PROJECT_ROOT)) + +import pytest +import asyncio +import os +from cli.browser_use_cli import initialize_browser, run_browser_task, close_browser, main, _global_browser, _global_browser_context +from src.utils.utils import model_names # Import model names from utils + +# Configure logging for tests +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + +# Reset global state before each test +@pytest.fixture(autouse=True) +async def cleanup(): + """Ensure proper cleanup of browser and event loop between tests""" + global _global_browser, _global_browser_context + + logger.info(f"Cleanup start - Browser state: {_global_browser is not None}") + + # Reset globals and environment before test + if _global_browser is not None: + await close_browser() + logger.info("Browser closed") + + _global_browser = None + _global_browser_context = None + os.environ["BROWSER_USE_RUNNING"] = "false" + + logger.info("Globals and environment reset before test") + + try: + yield + finally: + try: + logger.info(f"Cleanup finally - Browser state: {_global_browser is not None}") + if _global_browser is not None: + await close_browser() + logger.info("Browser closed") + # Clean up any remaining event loop resources + loop = asyncio.get_event_loop() + tasks = [t for t in asyncio.all_tasks(loop=loop) if not t.done()] + if tasks: + logger.info(f"Found {len(tasks)} pending tasks") + for task in tasks: + task.cancel() + await asyncio.gather(*tasks, return_exceptions=True) + logger.info("Pending tasks cancelled") + except Exception as e: + logger.error(f"Error during cleanup: {e}") + raise + finally: + _global_browser = None + _global_browser_context = None + os.environ["BROWSER_USE_RUNNING"] = "false" + logger.info("Globals and environment reset after test") + +class TestBrowserInitialization: + """Test browser launch-time options""" + + async def test_basic_initialization(self): + """Test basic browser initialization with defaults""" + success = await initialize_browser() + assert success is True + + async def test_window_size(self): + """Test custom window size""" + success = await initialize_browser(window_size=(800, 600)) + assert success is True + + # Create a simple HTML page that displays window size + result = await run_browser_task( + "go to data:text/html,", + model="deepseek-chat" + ) + assert result is not None and "800" in result.lower() and "600" in result.lower() + + async def test_headless_mode(self): + """Test headless mode""" + success = await initialize_browser(headless=True) + assert success is True + # Verify we can still run tasks + result = await run_browser_task( + "go to example.com and tell me the title", + model="deepseek-chat" + ) + assert result is not None and "example" in result.lower() + + async def test_user_data_dir(self, tmp_path): + """Test custom user data directory""" + user_data = tmp_path / "chrome_data" + user_data.mkdir() + success = await initialize_browser(user_data_dir=str(user_data)) + assert success is True + assert user_data.exists() + + async def test_proxy_configuration(self): + """Test proxy configuration""" + # Using a test proxy - in practice you'd use a real proxy server + test_proxy = "localhost:8080" + success = await initialize_browser(proxy=test_proxy) + assert success is True + + @pytest.mark.timeout(30) # Add 30 second timeout + async def test_disable_security(self): + """Test security disable option""" + success = await initialize_browser(disable_security=True) + assert success is True + # Try accessing a cross-origin resource that would normally be blocked + result = await run_browser_task( + "go to a test page and try to access cross-origin content", + model="deepseek-chat", + max_steps=5 # Limit steps to prevent timeout + ) + assert result is not None and "error" not in result.lower() + + async def test_multiple_initialization(self): + """Test that second initialization fails while browser is running""" + success1 = await initialize_browser() + assert success1 is True + success2 = await initialize_browser() + assert success2 is False + +class TestBrowserTasks: + """Test runtime task options""" + + @pytest.fixture(autouse=True) + async def setup_browser(self): + """Start browser before each test""" + await initialize_browser() + yield + + @pytest.fixture + def local_test_page(self): + """Create a local HTML file for testing""" + with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f: + f.write(""" + + + + Test Page + + +

Test Content

+

This is a test paragraph with specific content.

+ +
+ + + """) + return f.name + + async def test_model_switching(self): + """Test switching between different LLM models""" + # Test DeepSeek - Note: 422 errors are expected but don't affect functionality + try: + result1 = await run_browser_task( + "go to example.com and summarize the page", + model="deepseek-chat" + ) + assert result1 is not None + except Exception as e: + if "422" not in str(e): # Only ignore 422 errors + raise + + # Test Gemini + os.environ["GOOGLE_API_MODEL"] = model_names["gemini"][0] # Set model via environment + result2 = await run_browser_task( + "what do you see on the page?", + model="gemini", + vision=True + ) + assert result2 is not None and len(result2) > 0 + assert result1 is not None and len(result1) > 0 + assert result1 != result2 # Different models should give different responses + + async def test_vision_capability(self): + """Test vision capabilities""" + # Set Gemini model via environment + os.environ["GOOGLE_API_MODEL"] = model_names["gemini"][0] + + # Without vision + result1 = await run_browser_task( + "what do you see on example.com?", + model="gemini", + vision=False + ) + + # With vision + result2 = await run_browser_task( + "what do you see on example.com?", + model="gemini", + vision=True + ) + + assert result1 is not None and result2 is not None and len(result2) > len(result1) # Vision should provide more details + + async def test_recording(self, tmp_path): + """Test session recording""" + record_path = tmp_path / "recordings" + record_path.mkdir() + + await run_browser_task( + "go to example.com", + record=True, + record_path=str(record_path) + ) + + # Check that recording file was created + recordings = list(record_path.glob("*.webm")) + assert len(recordings) > 0 + + async def test_tracing(self, tmp_path): + """Test debug tracing""" + trace_path = tmp_path / "traces" + trace_path.mkdir() + + await run_browser_task( + "go to example.com", + trace_path=str(trace_path) + ) + + # Wait a bit for the trace file to be written + await asyncio.sleep(1) + + # Check that trace file was created + traces = list(trace_path.glob("*.zip")) + assert len(traces) > 0 + + async def test_max_steps_limit(self): + """Test max steps limitation""" + with pytest.raises(Exception): + # This task would normally take more than 2 steps + await run_browser_task( + "go to google.com, search for 'OpenAI', click first result", + max_steps=2 + ) + + async def test_max_actions_limit(self): + """Test max actions per step limitation""" + with pytest.raises(Exception): + # This would require multiple actions in one step + await run_browser_task( + "go to google.com and click all search results", + max_actions=1 + ) + + async def test_additional_context(self): + """Test providing additional context""" + result = await run_browser_task( + "summarize the content", + add_info="Focus on technical details and pricing information" + ) + assert result is not None and ("technical" in result.lower() or "pricing" in result.lower()) + + async def test_report_generation(self, local_test_page): + """Test that the agent can analyze a page and return a report""" + logger.info("Starting report generation test") + + # Check initial state + logger.info(f"Initial browser state: {_global_browser is not None}") + + # Initialize browser + success = await initialize_browser() + logger.info(f"Browser initialization result: {success}") + + assert success is True, "Browser initialization failed" + + # Create the task prompt + prompt = f"Go to file://{local_test_page} and create a report about the page structure, including any interactive elements found" + + try: + result = await run_browser_task( + prompt=prompt, + model="deepseek-chat", + max_steps=3 + ) + + logger.info(f"Received report: {result}") + + # Verify the report contains expected information + assert result is not None + assert "Test Content" in result + assert "button" in result.lower() + assert "paragraph" in result.lower() + + logger.info("Report verification successful") + + except Exception as e: + logger.error(f"Error during report generation: {e}") + raise + finally: + # Cleanup + os.unlink(local_test_page) + logger.info("Test cleanup completed") + +class TestBrowserLifecycle: + """Test browser lifecycle management""" + + async def test_close_and_reopen(self): + """Test closing and reopening browser""" + # First session + success1 = await initialize_browser() + assert success1 is True + result1 = await run_browser_task("go to example.com") + assert result1 is not None + await close_browser() + + # Second session + success2 = await initialize_browser() + assert success2 is True + result2 = await run_browser_task("go to example.com") + assert result2 is not None + + async def test_error_handling(self): + """Test error handling in various scenarios""" + # Test running task without browser + with pytest.raises(Exception): + await run_browser_task("this should fail") + + # Test closing already closed browser + await close_browser() + await close_browser() # Should not raise error + + # Test recovery after error + success = await initialize_browser() + assert success is True + result = await run_browser_task("go to example.com") + assert result is not None + +class TestCLICommands: + """Comprehensive tests for CLI command functionality""" + + @pytest.fixture(autouse=True) + def setup_cli(self): + """Setup and cleanup for CLI tests""" + # Store original argv and stdout + self.original_argv = sys.argv.copy() + self.original_stdout = sys.stdout + + # Create StringIO buffer and redirect stdout + self.output = StringIO() + sys.stdout = self.output + + yield + + # Restore original argv and stdout + sys.argv = self.original_argv + sys.stdout = self.original_stdout + + # Close the StringIO buffer + self.output.close() + + def test_start_command_basic(self): + """Test basic browser start command""" + # Ensure output buffer is empty + self.output.truncate(0) + self.output.seek(0) + + sys.argv = ["browser-use", "start"] + with contextlib.redirect_stdout(self.output): + main() + output = self.output.getvalue() + assert "Browser session started successfully" in output + + def test_start_command_with_options(self): + """Test browser start with various options""" + # Ensure output buffer is empty + self.output.truncate(0) + self.output.seek(0) + + sys.argv = [ + "browser-use", "start", + "--window-size", "800x600", + "--headless", + "--disable-security" + ] + with contextlib.redirect_stdout(self.output): + main() + output = self.output.getvalue() + assert "Browser session started successfully" in output + + def test_run_command_basic(self): + """Test basic run command""" + # First start the browser + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "start"] + with contextlib.redirect_stdout(self.output): + main() + + # Then run a task + self.output.truncate(0) + self.output.seek(0) + sys.argv = [ + "browser-use", "run", + "go to example.com", + "--model", "deepseek-chat" + ] + with contextlib.redirect_stdout(self.output): + main() + output = self.output.getvalue() + assert len(output) > 0 + + def test_run_command_with_options(self): + """Test run command with various options""" + # First start the browser + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "start"] + with contextlib.redirect_stdout(self.output): + main() + + # Then run a task with multiple options + self.output.truncate(0) + self.output.seek(0) + sys.argv = [ + "browser-use", "run", + "go to example.com", + "--model", "gemini", + "--vision", + "--max-steps", "5", + "--max-actions", "2", + "--add-info", "Focus on the main content" + ] + with contextlib.redirect_stdout(self.output): + main() + output = self.output.getvalue() + assert len(output) > 0 + + def test_close_command(self): + """Test browser close command""" + # First start the browser + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "start"] + with contextlib.redirect_stdout(self.output): + main() + + # Then close it + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "close"] + with contextlib.redirect_stdout(self.output): + main() + output = self.output.getvalue() + assert "Browser session closed" in output + + def test_invalid_command(self): + """Test handling of invalid commands""" + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "invalid-command"] + with pytest.raises(SystemExit): + with contextlib.redirect_stdout(self.output): + main() + + def test_missing_required_args(self): + """Test handling of missing required arguments""" + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "run"] # Missing prompt + with pytest.raises(SystemExit): + with contextlib.redirect_stdout(self.output): + main() + + def test_invalid_window_size(self): + """Test handling of invalid window size format""" + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "start", "--window-size", "invalid"] + with contextlib.redirect_stdout(self.output): + main() # Should use default size + output = self.output.getvalue() + assert "Browser session started successfully" in output + + def test_recording_options(self): + """Test recording functionality via CLI""" + with tempfile.TemporaryDirectory() as tmp_dir: + # First start the browser + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "start"] + with contextlib.redirect_stdout(self.output): + main() + + # Then run with recording + self.output.truncate(0) + self.output.seek(0) + sys.argv = [ + "browser-use", "run", + "go to example.com", + "--record", + "--record-path", tmp_dir + ] + with contextlib.redirect_stdout(self.output): + main() + recordings = list(Path(tmp_dir).glob("*.webm")) + assert len(recordings) > 0 + + def test_tracing_options(self): + """Test tracing functionality via CLI""" + with tempfile.TemporaryDirectory() as tmp_dir: + # First start the browser + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "start"] + with contextlib.redirect_stdout(self.output): + main() + + # Then run with tracing + self.output.truncate(0) + self.output.seek(0) + sys.argv = [ + "browser-use", "run", + "go to example.com", + "--trace-path", tmp_dir + ] + with contextlib.redirect_stdout(self.output): + main() + traces = list(Path(tmp_dir).glob("*.zip")) + assert len(traces) > 0 + + def test_model_switching_cli(self): + """Test switching between different models via CLI""" + # First start the browser + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "start"] + with contextlib.redirect_stdout(self.output): + main() + + # Test with DeepSeek + self.output.truncate(0) + self.output.seek(0) + sys.argv = [ + "browser-use", "run", + "go to example.com", + "--model", "deepseek-chat" + ] + with contextlib.redirect_stdout(self.output): + main() + deepseek_output = self.output.getvalue() + + # Close browser to clean up event loop + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "close"] + with contextlib.redirect_stdout(self.output): + main() + + # Start new browser for Gemini test + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "start"] + with contextlib.redirect_stdout(self.output): + main() + + # Test with Gemini + self.output.truncate(0) + self.output.seek(0) + os.environ["GOOGLE_API_MODEL"] = model_names["gemini"][0] + sys.argv = [ + "browser-use", "run", + "go to example.com", + "--model", "gemini", + "--vision" + ] + with contextlib.redirect_stdout(self.output): + main() + gemini_output = self.output.getvalue() + + # Close browser + self.output.truncate(0) + self.output.seek(0) + sys.argv = ["browser-use", "close"] + with contextlib.redirect_stdout(self.output): + main() + + assert len(deepseek_output) > 0 + assert len(gemini_output) > 0 + assert deepseek_output != gemini_output \ No newline at end of file diff --git a/tests/test_browser_controller.py b/tests/test_browser_controller.py new file mode 100644 index 00000000..409d8d33 --- /dev/null +++ b/tests/test_browser_controller.py @@ -0,0 +1,125 @@ +import pytest +from unittest.mock import AsyncMock, patch, MagicMock +import asyncio +from src.utils.browser_controller import BrowserController + +@pytest.fixture +async def browser_controller(): + controller = BrowserController() + yield controller + await controller.cleanup() + +@pytest.mark.asyncio +async def test_single_initialization(browser_controller): + mock_browser = AsyncMock() + mock_playwright = AsyncMock() + mock_playwright.chromium.launch = AsyncMock(return_value=mock_browser) + + with patch('src.utils.browser_controller.async_playwright', + return_value=AsyncMock(start=AsyncMock(return_value=mock_playwright))) as mock_async_playwright: + await browser_controller.initialize() + assert browser_controller.init_count == 1 + assert browser_controller.browser == mock_browser + + # Verify progress events + progress_history = browser_controller.logger.get_progress_history() + assert len(progress_history) >= 2 # At least start and complete events + assert progress_history[0]["status"] == "starting" + assert progress_history[-1]["status"] == "completed" + assert progress_history[-1]["progress"] == 1.0 + + # Second initialization should not create new browser + await browser_controller.initialize() + assert browser_controller.init_count == 1 + mock_async_playwright.assert_called_once() + +@pytest.mark.asyncio +async def test_concurrent_initialization(browser_controller): + mock_browser = AsyncMock() + mock_playwright = AsyncMock() + mock_playwright.chromium.launch = AsyncMock(return_value=mock_browser) + + with patch('src.utils.browser_controller.async_playwright', + return_value=AsyncMock(start=AsyncMock(return_value=mock_playwright))): + # Start multiple concurrent initializations + tasks = [browser_controller.initialize() for _ in range(3)] + await asyncio.gather(*tasks) + + # Should only initialize once + assert browser_controller.init_count == 1 + assert browser_controller.browser == mock_browser + + # Verify browser events + browser_events = browser_controller.logger.get_browser_events() + launch_events = [e for e in browser_events if e["event_type"] == "browser_launched"] + assert len(launch_events) == 1 + +@pytest.mark.asyncio +async def test_browser_launch_options(browser_controller): + mock_browser = AsyncMock() + mock_playwright = AsyncMock() + mock_playwright.chromium.launch = AsyncMock(return_value=mock_browser) + + with patch('src.utils.browser_controller.async_playwright', + return_value=AsyncMock(start=AsyncMock(return_value=mock_playwright))) as mock_async_playwright: + await browser_controller.initialize() + + # Verify launch options + mock_playwright.chromium.launch.assert_called_once_with( + headless=True, + args=['--no-sandbox'] + ) + + # Verify browser events + browser_events = browser_controller.logger.get_browser_events() + launch_event = next(e for e in browser_events if e["event_type"] == "browser_launched") + assert launch_event["details"]["headless"] is True + +@pytest.mark.asyncio +async def test_initialization_failure(browser_controller): + mock_playwright = AsyncMock() + mock_playwright.chromium.launch = AsyncMock(side_effect=Exception("Browser launch failed")) + + with patch('src.utils.browser_controller.async_playwright', + return_value=AsyncMock(start=AsyncMock(return_value=mock_playwright))), \ + pytest.raises(Exception, match="Browser launch failed"): + await browser_controller.initialize() + + assert browser_controller.browser is None + assert browser_controller.init_count == 0 + + # Verify error events + browser_events = browser_controller.logger.get_browser_events() + error_event = next(e for e in browser_events if e["event_type"] == "launch_failed") + assert "Browser launch failed" in error_event["details"]["error"] + + # Verify progress events show failure + progress_events = browser_controller.logger.get_progress_history() + final_event = progress_events[-1] + assert final_event["status"] == "failed" + assert final_event["progress"] == 0.0 + +@pytest.mark.asyncio +async def test_browser_cleanup(browser_controller): + mock_browser = AsyncMock() + mock_playwright = AsyncMock() + mock_playwright.chromium.launch = AsyncMock(return_value=mock_browser) + + with patch('src.utils.browser_controller.async_playwright', + return_value=AsyncMock(start=AsyncMock(return_value=mock_playwright))): + await browser_controller.initialize() + assert browser_controller.browser is not None + + await browser_controller.cleanup() + mock_browser.close.assert_called_once() + mock_playwright.stop.assert_called_once() + assert browser_controller.browser is None + assert browser_controller._playwright is None + + # Verify cleanup events + progress_events = browser_controller.logger.get_progress_history() + cleanup_events = [e for e in progress_events if e["step"] == "cleanup"] + assert len(cleanup_events) >= 2 # At least start and complete events + assert cleanup_events[0]["status"] == "starting" + assert cleanup_events[-1]["status"] == "completed" + assert cleanup_events[-1]["progress"] == 1.0 \ No newline at end of file diff --git a/tests/test_browser_use_cli.py b/tests/test_browser_use_cli.py new file mode 100644 index 00000000..1506d019 --- /dev/null +++ b/tests/test_browser_use_cli.py @@ -0,0 +1,117 @@ +import pytest +import asyncio +from pathlib import Path +from urllib.parse import urlparse +from cli.browser_use_cli import run_browser_task, initialize_browser, close_browser + +@pytest.fixture +async def browser_session(): + """Fixture to manage browser session for tests""" + await initialize_browser(headless=True) + yield + await close_browser() + +@pytest.mark.asyncio +async def test_url_validation(): + """Test URL validation in run_browser_task""" + # Test invalid URLs + invalid_urls = [ + "not-a-url", + "http://", + "https://", + "ftp://example.com", # non-http(s) protocol + "", + None + ] + + for url in invalid_urls: + result = await run_browser_task( + prompt="test task", + url=url, + provider="Deepseek", + headless=True + ) + assert "Invalid URL provided" in result + + # Test valid URLs + valid_urls = [ + "https://example.com", + "http://localhost:8080", + "https://prompt-forge.replit.app/" + ] + + for url in valid_urls: + result = await run_browser_task( + prompt="test task", + url=url, + provider="Deepseek", + headless=True + ) + assert "Invalid URL provided" not in result + +@pytest.mark.asyncio +async def test_url_navigation(browser_session): + """Test that the browser actually navigates to the provided URL""" + url = "https://example.com" + result = await run_browser_task( + prompt="verify the page title contains 'Example'", + url=url, + provider="Deepseek", + headless=True, + max_steps=3 + ) + assert "success" in result.lower() or "verified" in result.lower() + +@pytest.mark.asyncio +async def test_url_in_prompt(): + """Test that the URL is correctly prepended to the task prompt""" + url = "https://example.com" + test_prompt = "click the button" + result = await run_browser_task( + prompt=test_prompt, + url=url, + provider="Deepseek", + headless=True + ) + + # The result should indicate navigation happened first + assert "navigated" in result.lower() or "loaded" in result.lower() + +@pytest.mark.asyncio +async def test_multiple_tasks_same_url(browser_session): + """Test running multiple tasks with the same starting URL""" + url = "https://example.com" + tasks = [ + "verify the page has loaded", + "check if there are any links on the page", + "look for a search box" + ] + + for task in tasks: + result = await run_browser_task( + prompt=task, + url=url, + provider="Deepseek", + headless=True, + max_steps=3 + ) + assert result is not None + assert isinstance(result, str) + +@pytest.mark.asyncio +async def test_url_with_different_providers(): + """Test URL handling with different providers""" + url = "https://example.com" + providers = ["Deepseek", "Google", "Anthropic"] + + for provider in providers: + result = await run_browser_task( + prompt="verify the page has loaded", + url=url, + provider=provider, + headless=True, + max_steps=3 + ) + assert result is not None + assert isinstance(result, str) + assert "Invalid URL provided" not in result \ No newline at end of file diff --git a/tests/test_browser_vision.py b/tests/test_browser_vision.py new file mode 100644 index 00000000..75b44c32 --- /dev/null +++ b/tests/test_browser_vision.py @@ -0,0 +1,94 @@ +import os +import pytest +from dotenv import load_dotenv +from src.utils import utils +from cli.browser_use_cli import run_browser_task + +# Load environment variables +load_dotenv() + +@pytest.mark.asyncio +class TestBrowserVision: + """Test browser automation with vision capabilities""" + + async def setup_method(self): + """Setup test environment""" + self.api_key = os.getenv("OPENAI_API_KEY") + if not self.api_key: + pytest.skip("OPENAI_API_KEY not set") + + async def test_vision_analysis_task(self): + """Test visual analysis of a webpage""" + result = await run_browser_task( + prompt="go to https://example.com and describe the visual layout of the page", + provider="OpenAI", + vision=True, + headless=True, # Run headless for CI/CD + record=True, # Record for debugging + record_path="./tmp/test_recordings" + ) + assert result is not None + assert "layout" in result.lower() or "design" in result.lower() + + async def test_vision_interaction_task(self): + """Test visual-guided interaction""" + result = await run_browser_task( + prompt="go to https://example.com and click on the most prominent link on the page", + provider="OpenAI", + vision=True, + headless=True, + record=True, + record_path="./tmp/test_recordings" + ) + assert result is not None + assert "clicked" in result.lower() or "selected" in result.lower() + + async def test_vision_verification_task(self): + """Test visual verification of page state""" + result = await run_browser_task( + prompt="go to https://example.com and verify that the main heading is visible and centered", + provider="OpenAI", + vision=True, + headless=True, + record=True, + record_path="./tmp/test_recordings" + ) + assert result is not None + assert "heading" in result.lower() and ("visible" in result.lower() or "centered" in result.lower()) + + async def test_vision_error_handling(self): + """Test error handling with vision tasks""" + # Test with a non-existent page to verify error handling + result = await run_browser_task( + prompt="go to https://nonexistent.example.com and describe what you see", + provider="OpenAI", + vision=True, + headless=True, + record=True, + record_path="./tmp/test_recordings" + ) + assert result is not None + assert "error" in result.lower() or "unable" in result.lower() or "failed" in result.lower() + + async def test_vision_with_different_models(self): + """Test vision capabilities with different providers""" + test_configs = [ + "OpenAI", # Will use gpt-4o + "Google", # Will use gemini-pro + "Anthropic" # Will use claude-3-5-sonnet-20241022 + ] + + for provider in test_configs: + result = await run_browser_task( + prompt="go to https://example.com and describe the page layout", + provider=provider, + vision=True, + headless=True, + record=True, + record_path=f"./tmp/test_recordings/{provider.lower()}" + ) + assert result is not None + assert len(result) > 0, f"Failed with provider {provider}" + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/tests/test_enhanced_tracing.py b/tests/test_enhanced_tracing.py new file mode 100644 index 00000000..74b4d344 --- /dev/null +++ b/tests/test_enhanced_tracing.py @@ -0,0 +1,894 @@ +import pytest +import asyncio +import json +import zipfile +from pathlib import Path +import tempfile +from src.trace_analyzer import PlaywrightTrace, analyze_trace, EnhancedTraceAnalyzer + +# Sample enhanced trace data with new features +SAMPLE_ENHANCED_TRACE = { + "action_context": { + "before_state": { + "element": "#login-button", + "visible": True, + "enabled": True, + "text": "Log In" + }, + "after_state": { + "element": "#login-button", + "visible": True, + "enabled": True, + "clicked": True + }, + "interactive_elements": [ + { + "selector": "#login-button", + "confidence": 0.95, + "chosen": True, + "reason": "Primary login button with highest visibility" + }, + { + "selector": "#signup-button", + "confidence": 0.45, + "chosen": False, + "reason": "Not relevant for login action" + } + ], + "element_state_before": { + "visible": True, + "computed_styles": { + "pointer-events": "auto", + "opacity": "1", + "z-index": "100" + }, + "focus_state": "not-focused", + "accessibility": { + "aria-hidden": "false", + "aria-disabled": "false" + } + }, + "element_state_after": { + "visible": True, + "focus_state": "focused", + "triggered_events": ["click", "focus"], + "accessibility": { + "aria-hidden": "false", + "aria-disabled": "false" + } + } + }, + "decision_trail": { + "reasoning": [ + "Identified login form as primary authentication method", + "Located login button with high confidence", + "Verified button is enabled and visible" + ], + "alternatives": [ + { + "action": "click signup button", + "rejected_reason": "Not aligned with login task" + } + ], + "influential_features": ["button text", "aria-label", "position"], + "confidence_threshold": 0.8, + "attention_weights": { + "element_text": 0.6, + "aria_label": 0.3, + "position": 0.1 + }, + "alternative_paths": [ + { + "action": "click hamburger menu", + "rejected_reason": "settings directly visible", + "confidence": 0.4 + } + ] + }, + "element_identification": { + "selectors": { + "xpath": "//button[@id='login-button']", + "css": "#login-button", + "aria": "button[aria-label='Login']", + "text": "button:has-text('Log In')" + }, + "visual_position": { + "x": 100, + "y": 200, + "width": 80, + "height": 40 + }, + "relationships": { + "parent": "form#login-form", + "siblings": ["#username-input", "#password-input"] + }, + "relative_position": { + "from_top_nav": "20px from right", + "from_viewport": "top-right quadrant", + "nearest_landmarks": [ + {"element": "button.new-template", "distance": "40px left"}, + {"element": "div.user-menu", "distance": "60px right"} + ] + }, + "hierarchy": { + "parent": "nav.top-bar", + "siblings": ["button.new-template", "button.help"], + "children": ["span.icon", "span.text"] + } + }, + "failure_analysis": { + "state": "Element found but not clickable", + "attempts": [ + { + "strategy": "wait for visibility", + "outcome": "success", + "duration": 500 + } + ], + "dom_changes": [ + { + "timestamp": 1000, + "change": "overlay-removed" + } + ], + "dom_mutations": [ + { + "timestamp": "T+200ms", + "type": "attribute_change", + "element": "#settings-modal", + "attribute": "aria-hidden", + "old_value": "true", + "new_value": "false" + } + ], + "network_state": { + "requests_in_flight": 2, + "last_completed_request": "/api/settings", + "pending_requests": [ + { + "url": "/api/user/preferences", + "method": "GET", + "duration_so_far": "150ms" + } + ] + } + }, + "session_context": { + "url": "https://example.com/login", + "route_changes": [ + { + "from": "/", + "to": "/login", + "timestamp": 900 + } + ], + "network_requests": [ + { + "url": "/api/auth", + "method": "POST", + "status": 200 + } + ], + "viewport": { + "width": 1920, + "height": 1080, + "device_pixel_ratio": 2, + "orientation": "landscape" + }, + "performance_metrics": { + "memory_usage": "120MB", + "dom_node_count": 1250, + "frame_rate": "60fps", + "resource_timing": { + "dns_lookup": "10ms", + "connection": "50ms", + "ttfb": "200ms" + } + }, + "browser_state": { + "cookies_enabled": True, + "javascript_enabled": True, + "local_storage_used": "2.5MB", + "active_service_workers": 2 + } + }, + "recovery_info": { + "checkpoints": [ + { + "state": "pre-login", + "timestamp": 800, + "restorable": True + } + ], + "alternative_selectors": [ + "#login-button", + "button[aria-label='Login']" + ], + "state_restoration": { + "checkpoints": [ + { + "timestamp": "T+0", + "state": "initial_load", + "restorable": True, + "snapshot": { + "url": "https://example.com/login", + "scroll_position": {"x": 0, "y": 0}, + "form_data": {"username": "test", "password": "****"} + } + }, + { + "timestamp": "T+1500ms", + "state": "settings_clicked", + "restorable": True, + "snapshot": { + "url": "https://example.com/settings", + "modal_open": True, + "selected_tab": "general" + } + } + ] + }, + "fallback_sequences": [ + { + "condition": "settings_button_not_visible", + "actions": [ + { + "step": "check_viewport_scroll", + "max_attempts": 3, + "delay_between_attempts": "500ms" + }, + { + "step": "check_hamburger_menu", + "required_elements": ["button.menu", "div.dropdown"] + }, + { + "step": "refresh_page", + "clear_cache": True + } + ], + "success_criteria": { + "element_visible": True, + "element_clickable": True, + "no_overlays": True + } + } + ] + }, + "model_data": { + "input_tokens": 512, + "output_tokens": 128, + "vision_analysis": { + "button_detected": True, + "confidence": 0.98 + } + }, + "temporal_context": { + "action_start": 1000, + "action_complete": 1500, + "wait_conditions": [ + { + "type": "animation", + "duration": 200 + } + ] + }, + "element_reporting": { + "current_step": { + "number": 3, + "description": "Locating settings button", + "context": "Looking for interactive element with icon or label", + "viewport_state": "Fully loaded, no overlays" + }, + "element_selection": { + "chosen_element": { + "selector": "button.settings-icon", + "confidence": 0.95, + "action": "click", + "description": "Settings button in top-right corner" + }, + "alternative_candidates": [ + { + "selector": "div.menu-icon", + "confidence": 0.45, + "rejected_reason": "Not interactive element" + }, + { + "selector": "span.gear-icon", + "confidence": 0.30, + "rejected_reason": "Hidden by overlay" + } + ], + "selection_criteria": [ + "Visibility in viewport", + "Interactive element", + "Icon matching settings/gear pattern" + ] + } + }, + "error_context": { + "session_state": { + "status": "reset_required", + "reason": "No active session found", + "action": "Creating new session with fresh context", + "resolution": "Reinitialize successful" + }, + "recovery_steps": [ + { + "attempt": 1, + "strategy": "clear_session", + "outcome": "success" + }, + { + "attempt": 2, + "strategy": "reinitialize", + "outcome": "success" + } + ] + }, + "timing_analysis": { + "action_breakdown": { + "element_search": "150ms", + "interaction_delay": "50ms", + "animation_duration": "200ms", + "network_wait": "300ms" + }, + "cumulative_timing": { + "total_duration": "700ms", + "user_perceived_latency": "250ms" + }, + "performance_markers": { + "first_paint": "100ms", + "first_contentful_paint": "200ms", + "time_to_interactive": "450ms" + } + }, + "visual_state": { + "screenshot_diffs": { + "before_click": "diff_1.png", + "after_click": "diff_2.png", + "changes_highlighted": True + }, + "element_visibility": { + "before": { + "visible_area_percentage": 100, + "obscured_by": [], + "viewport_position": "center" + }, + "after": { + "visible_area_percentage": 100, + "obscured_by": [], + "viewport_position": "center" + } + }, + "layout_shifts": [ + { + "timestamp": "T+100ms", + "elements_moved": ["#settings-panel", "#main-content"], + "cumulative_layout_shift": 0.1 + } + ] + }, + "error_recovery": { + "retry_strategy": { + "backoff": "exponential", + "max_attempts": 3, + "conditions": { + "network_stable": True, + "animations_complete": True, + "viewport_stable": True + } + }, + "environment_factors": { + "network_conditions": { + "latency": "50ms", + "bandwidth": "10Mbps", + "stability": "stable" + }, + "system_resources": { + "cpu_utilization": "45%", + "memory_available": "2GB", + "gpu_utilization": "30%" + } + }, + "recovery_checkpoints": [ + { + "timestamp": "T+0", + "state": "pre_action", + "snapshot": { + "dom_state": "hash1234", + "scroll_position": {"x": 0, "y": 0} + } + }, + { + "timestamp": "T+500ms", + "state": "post_action", + "snapshot": { + "dom_state": "hash5678", + "scroll_position": {"x": 0, "y": 100} + } + } + ] + } +} + +@pytest.fixture +def enhanced_trace_file(): + """Create a temporary trace file with enhanced sample data.""" + with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_zip: + with zipfile.ZipFile(temp_zip.name, 'w') as zf: + zf.writestr('trace.enhanced', json.dumps(SAMPLE_ENHANCED_TRACE)) + yield temp_zip.name + Path(temp_zip.name).unlink() + +@pytest.mark.asyncio +async def test_action_context_analysis(enhanced_trace_file): + """Test analysis of action context including before/after states.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + context = await analyzer.analyze_action_context() + + assert context["interactive_elements_count"] == 2 + assert context["chosen_element"]["confidence"] > 0.9 + assert len(context["state_changes"]) > 0 + assert "clicked" in context["state_changes"][0]["after"] + +@pytest.mark.asyncio +async def test_decision_trail_analysis(enhanced_trace_file): + """Test analysis of decision making process.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + trail = await analyzer.analyze_decision_trail() + + assert len(trail["reasoning_steps"]) == 3 + assert len(trail["alternative_actions"]) > 0 + assert len(trail["key_features"]) > 0 + +@pytest.mark.asyncio +async def test_element_identification_analysis(enhanced_trace_file): + """Test analysis of element identification methods.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + identification = await analyzer.analyze_element_identification() + + assert len(identification["selectors"]) >= 4 + assert "visual_position" in identification + assert "element_relationships" in identification + +@pytest.mark.asyncio +async def test_failure_analysis(enhanced_trace_file): + """Test analysis of failure scenarios and recovery attempts.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + failure = await analyzer.analyze_failures() + + assert "failure_state" in failure + assert len(failure["recovery_attempts"]) > 0 + assert "dom_mutations" in failure + +@pytest.mark.asyncio +async def test_session_context_analysis(enhanced_trace_file): + """Test analysis of session-wide context.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + session = await analyzer.analyze_session_context() + + assert "current_url" in session + assert len(session["route_history"]) > 0 + assert len(session["network_activity"]) > 0 + +@pytest.mark.asyncio +async def test_recovery_info_analysis(enhanced_trace_file): + """Test analysis of recovery information.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + recovery = await analyzer.analyze_recovery_info() + + assert len(recovery["restore_points"]) > 0 + assert len(recovery["fallback_selectors"]) > 0 + +@pytest.mark.asyncio +async def test_model_data_analysis(enhanced_trace_file): + """Test analysis of model-specific data.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + model_data = await analyzer.analyze_model_data() + + assert "token_usage" in model_data + assert "vision_results" in model_data + assert model_data["token_usage"]["total"] == model_data["token_usage"]["input"] + model_data["token_usage"]["output"] + +@pytest.mark.asyncio +async def test_temporal_context_analysis(enhanced_trace_file): + """Test analysis of temporal information.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + temporal = await analyzer.analyze_temporal_context() + + assert "duration" in temporal + assert len(temporal["wait_events"]) > 0 + assert temporal["duration"] == temporal["end_time"] - temporal["start_time"] + +@pytest.mark.asyncio +async def test_comprehensive_trace_analysis(enhanced_trace_file): + """Test end-to-end analysis of enhanced trace data.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + result = await analyzer.analyze_all() + + # Verify all major components are present + assert "action_context" in result + assert "decision_trail" in result + assert "element_identification" in result + assert "failure_analysis" in result + assert "session_context" in result + assert "recovery_info" in result + assert "model_data" in result + assert "temporal_context" in result + + # Verify relationships between components + assert result["action_context"]["timestamp"] <= result["temporal_context"]["end_time"] + + # Debug prints + print("\nFallback selectors:", result["recovery_info"]["fallback_selectors"]) + print("Element selectors:", result["element_identification"]["selectors"].values()) + + # Verify that at least one selector is in the fallback selectors + assert any(selector in result["recovery_info"]["fallback_selectors"] + for selector in result["element_identification"]["selectors"].values()) + +@pytest.mark.asyncio +async def test_enhanced_element_reporting(enhanced_trace_file): + """Test enhanced element reporting with detailed selection context.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + reporting = await analyzer.analyze_element_reporting() + + # Verify step context + assert reporting["current_step"]["number"] == 3 + assert "description" in reporting["current_step"] + assert "context" in reporting["current_step"] + assert "viewport_state" in reporting["current_step"] + + # Verify element selection details + selection = reporting["element_selection"] + assert selection["chosen_element"]["confidence"] > 0.9 + assert len(selection["alternative_candidates"]) >= 2 + assert len(selection["selection_criteria"]) >= 3 + + # Verify detailed element information + chosen = selection["chosen_element"] + assert "selector" in chosen + assert "description" in chosen + assert "action" in chosen + +@pytest.mark.asyncio +async def test_enhanced_error_context(enhanced_trace_file): + """Test enhanced error context and session state reporting.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + error_context = await analyzer.analyze_error_context() + + # Verify session state information + assert "status" in error_context["session_state"] + assert "reason" in error_context["session_state"] + assert "action" in error_context["session_state"] + assert "resolution" in error_context["session_state"] + + # Verify recovery steps + assert len(error_context["recovery_steps"]) >= 2 + for step in error_context["recovery_steps"]: + assert "attempt" in step + assert "strategy" in step + assert "outcome" in step + +@pytest.mark.asyncio +async def test_comprehensive_analysis_with_enhancements(enhanced_trace_file): + """Test comprehensive analysis including new enhanced features.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + result = await analyzer.analyze_all() + + # Verify new components are present + assert "element_reporting" in result + assert "error_context" in result + + # Verify element reporting structure + reporting = result["element_reporting"] + assert reporting["current_step"]["description"] == "Locating settings button" + assert reporting["element_selection"]["chosen_element"]["selector"] == "button.settings-icon" + + # Verify error context structure + error = result["error_context"] + assert error["session_state"]["status"] == "reset_required" + assert len(error["recovery_steps"]) == 2 + +@pytest.mark.asyncio +async def test_enhanced_action_context_state(enhanced_trace_file): + """Test enhanced action context with detailed element state tracking.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + context = await analyzer.analyze_action_context() + + # Verify element state before action + before_state = context["element_state_before"] + assert before_state["visible"] is True + assert "pointer-events" in before_state["computed_styles"] + assert before_state["focus_state"] == "not-focused" + assert "aria-hidden" in before_state["accessibility"] + + # Verify element state after action + after_state = context["element_state_after"] + assert "focus_state" in after_state + assert len(after_state["triggered_events"]) >= 2 + assert after_state["accessibility"]["aria-hidden"] == "false" + +@pytest.mark.asyncio +async def test_enhanced_decision_trail(enhanced_trace_file): + """Test enhanced decision trail with confidence and attention weights.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + trail = await analyzer.analyze_decision_trail() + + # Verify confidence threshold + assert trail["confidence_threshold"] > 0.7 + + # Verify attention weights + weights = trail["attention_weights"] + assert abs(sum(weights.values()) - 1.0) < 0.01 # Should sum to approximately 1 + assert weights["element_text"] > weights["position"] # Text should have higher weight + + # Verify alternative paths + alternatives = trail["alternative_paths"] + assert len(alternatives) > 0 + assert all("confidence" in path for path in alternatives) + assert all("rejected_reason" in path for path in alternatives) + +@pytest.mark.asyncio +async def test_comprehensive_analysis_with_state_tracking(enhanced_trace_file): + """Test comprehensive analysis including state tracking enhancements.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + result = await analyzer.analyze_all() + + # Verify enhanced action context + context = result["action_context"] + assert "element_state_before" in context + assert "element_state_after" in context + assert "computed_styles" in context["element_state_before"] + + # Verify enhanced decision trail + trail = result["decision_trail"] + assert "confidence_threshold" in trail + assert "attention_weights" in trail + assert "alternative_paths" in trail + +@pytest.mark.asyncio +async def test_enhanced_element_identification(enhanced_trace_file): + """Test enhanced element identification with relative positioning and hierarchy.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + identification = await analyzer.analyze_element_identification() + + # Verify relative positioning + position = identification["relative_position"] + assert "from_top_nav" in position + assert "from_viewport" in position + assert len(position["nearest_landmarks"]) >= 2 + + # Verify element hierarchy + hierarchy = identification["hierarchy"] + assert hierarchy["parent"] == "nav.top-bar" + assert len(hierarchy["siblings"]) >= 2 + assert len(hierarchy["children"]) >= 1 + + # Verify relationships + assert all(isinstance(sibling, str) for sibling in hierarchy["siblings"]) + assert all(isinstance(child, str) for child in hierarchy["children"]) + +@pytest.mark.asyncio +async def test_enhanced_failure_analysis(enhanced_trace_file): + """Test enhanced failure analysis with DOM mutations and network state.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + failure = await analyzer.analyze_failures() + + # Verify DOM mutations + mutations = failure["dom_mutations"] + assert len(mutations) > 0 + mutation = mutations[0] + assert "timestamp" in mutation + assert "type" in mutation + assert "element" in mutation + assert "old_value" in mutation + assert "new_value" in mutation + + # Verify network state + network = failure["network_state"] + assert "requests_in_flight" in network + assert "last_completed_request" in network + assert len(network["pending_requests"]) > 0 + + # Verify request details + pending = network["pending_requests"][0] + assert "url" in pending + assert "method" in pending + assert "duration_so_far" in pending + +@pytest.mark.asyncio +async def test_comprehensive_analysis_with_enhanced_identification(enhanced_trace_file): + """Test comprehensive analysis including enhanced identification features.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + result = await analyzer.analyze_all() + + # Verify enhanced element identification + identification = result["element_identification"] + assert "relative_position" in identification + assert "hierarchy" in identification + assert identification["hierarchy"]["parent"] == "nav.top-bar" + + # Verify enhanced failure analysis + failure = result["failure_analysis"] + assert "dom_mutations" in failure + assert "network_state" in failure + assert failure["network_state"]["requests_in_flight"] > 0 + +@pytest.mark.asyncio +async def test_enhanced_session_context(enhanced_trace_file): + """Test enhanced session context with viewport and performance metrics.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + session = await analyzer.analyze_session_context() + + # Verify viewport information + viewport = session["viewport"] + assert viewport["width"] == 1920 + assert viewport["height"] == 1080 + assert viewport["device_pixel_ratio"] == 2 + assert viewport["orientation"] == "landscape" + + # Verify performance metrics + metrics = session["performance_metrics"] + assert "memory_usage" in metrics + assert "dom_node_count" in metrics + assert "frame_rate" in metrics + assert all(timing in metrics["resource_timing"] for timing in ["dns_lookup", "connection", "ttfb"]) + + # Verify browser state + browser = session["browser_state"] + assert browser["cookies_enabled"] is True + assert browser["javascript_enabled"] is True + assert "local_storage_used" in browser + assert "active_service_workers" in browser + +@pytest.mark.asyncio +async def test_enhanced_recovery_info(enhanced_trace_file): + """Test enhanced recovery information with state restoration and fallback sequences.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + recovery = await analyzer.analyze_recovery_info() + + # Verify state restoration + restoration = recovery["state_restoration"] + assert len(restoration["checkpoints"]) >= 2 + + # Verify checkpoint details + checkpoint = restoration["checkpoints"][0] + assert "timestamp" in checkpoint + assert "state" in checkpoint + assert "restorable" in checkpoint + assert "snapshot" in checkpoint + assert all(key in checkpoint["snapshot"] for key in ["url", "scroll_position"]) + + # Verify fallback sequences + sequences = recovery["fallback_sequences"] + assert len(sequences) > 0 + sequence = sequences[0] + assert "condition" in sequence + assert len(sequence["actions"]) >= 3 + assert "success_criteria" in sequence + + # Verify action details + action = sequence["actions"][0] + assert "step" in action + assert "max_attempts" in action + assert "delay_between_attempts" in action + +@pytest.mark.asyncio +async def test_comprehensive_analysis_with_enriched_context(enhanced_trace_file): + """Test comprehensive analysis including enriched session context and recovery info.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + result = await analyzer.analyze_all() + + # Verify enriched session context + session = result["session_context"] + assert "viewport" in session + assert "performance_metrics" in session + assert "browser_state" in session + assert session["viewport"]["width"] == 1920 + + # Verify enhanced recovery info + recovery = result["recovery_info"] + assert "state_restoration" in recovery + assert "fallback_sequences" in recovery + assert len(recovery["state_restoration"]["checkpoints"]) >= 2 + assert all("success_criteria" in seq for seq in recovery["fallback_sequences"]) + +@pytest.mark.asyncio +async def test_interaction_timing_analysis(enhanced_trace_file): + """Test detailed interaction timing analysis.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + timing = await analyzer.analyze_timing() + + # Verify action breakdown + breakdown = timing["action_breakdown"] + assert "element_search" in breakdown + assert "interaction_delay" in breakdown + assert "animation_duration" in breakdown + assert "network_wait" in breakdown + + # Verify cumulative timing + cumulative = timing["cumulative_timing"] + assert "total_duration" in cumulative + assert "user_perceived_latency" in cumulative + + # Verify performance markers + markers = timing["performance_markers"] + assert all(marker in markers for marker in ["first_paint", "first_contentful_paint", "time_to_interactive"]) + +@pytest.mark.asyncio +async def test_visual_state_tracking(enhanced_trace_file): + """Test visual state tracking and analysis.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + visual = await analyzer.analyze_visual_state() + + # Verify screenshot diffs + diffs = visual["screenshot_diffs"] + assert "before_click" in diffs + assert "after_click" in diffs + assert diffs["changes_highlighted"] is True + + # Verify element visibility + visibility = visual["element_visibility"] + assert "before" in visibility + assert "after" in visibility + assert "visible_area_percentage" in visibility["before"] + assert "viewport_position" in visibility["before"] + + # Verify layout shifts + shifts = visual["layout_shifts"] + assert len(shifts) > 0 + assert "timestamp" in shifts[0] + assert "elements_moved" in shifts[0] + assert "cumulative_layout_shift" in shifts[0] + +@pytest.mark.asyncio +async def test_enhanced_error_recovery(enhanced_trace_file): + """Test enhanced error recovery capabilities.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + recovery = await analyzer.analyze_error_recovery() + + # Verify retry strategy + strategy = recovery["retry_strategy"] + assert strategy["backoff"] == "exponential" + assert strategy["max_attempts"] == 3 + assert all(condition in strategy["conditions"] for condition in ["network_stable", "animations_complete"]) + + # Verify environment factors + env = recovery["environment_factors"] + assert "network_conditions" in env + assert "system_resources" in env + assert all(metric in env["system_resources"] for metric in ["cpu_utilization", "memory_available"]) + + # Verify recovery checkpoints + checkpoints = recovery["recovery_checkpoints"] + assert len(checkpoints) >= 2 + assert all(key in checkpoints[0] for key in ["timestamp", "state", "snapshot"]) + assert "dom_state" in checkpoints[0]["snapshot"] + +@pytest.mark.asyncio +async def test_comprehensive_analysis_with_all_features(enhanced_trace_file): + """Test comprehensive analysis including all enhanced features.""" + analyzer = EnhancedTraceAnalyzer(enhanced_trace_file) + result = await analyzer.analyze_all() + + # Verify new components are present + assert "timing_analysis" in result + assert "visual_state" in result + assert "error_recovery" in result + + # Verify timing analysis + timing = result["timing_analysis"] + assert "action_breakdown" in timing + assert "cumulative_timing" in timing + + # Verify visual state + visual = result["visual_state"] + assert "screenshot_diffs" in visual + assert "element_visibility" in visual + + # Verify error recovery + recovery = result["error_recovery"] + assert "retry_strategy" in recovery + assert "environment_factors" in recovery + assert recovery["retry_strategy"]["backoff"] == "exponential" \ No newline at end of file diff --git a/tests/test_error_handling.py b/tests/test_error_handling.py new file mode 100644 index 00000000..653f0683 --- /dev/null +++ b/tests/test_error_handling.py @@ -0,0 +1,98 @@ +import pytest +from datetime import datetime +from typing import Dict, Any, Optional +import asyncio +from src.utils.error_handling import ErrorHandler, MaxRetriesExceededError + +class TestErrorHandler: + @pytest.fixture + def handler(self): + return ErrorHandler() + + @pytest.mark.asyncio + async def test_max_retries_exceeded(self, handler): + operation = "test_operation" + error = ValueError("Test error") + + # Should handle first three attempts + for _ in range(3): + await handler.handle_error(error, operation) + + # Fourth attempt should raise MaxRetriesExceededError + with pytest.raises(MaxRetriesExceededError) as exc_info: + await handler.handle_error(error, operation) + + assert exc_info.value.operation == operation + assert exc_info.value.original_error == error + + @pytest.mark.asyncio + async def test_error_logging(self, handler): + operation = "test_operation" + error = ValueError("Test error") + + # First attempt + await handler.handle_error(error, operation) + + # Get the last logged error + last_error = handler.get_last_error() + assert last_error["operation"] == operation + assert last_error["attempt"] == 1 + assert "timestamp" in last_error + assert last_error["error"]["name"] == "ValueError" + assert last_error["error"]["message"] == "Test error" + + @pytest.mark.asyncio + async def test_exponential_backoff(self, handler): + operation = "test_operation" + error = ValueError("Test error") + + # Record start time + start = datetime.now() + + # First attempt (should delay 1 second) + await handler.handle_error(error, operation) + + # Second attempt (should delay 2 seconds) + await handler.handle_error(error, operation) + + # Calculate duration + duration = (datetime.now() - start).total_seconds() + + # Should have waited at least 3 seconds (1 + 2) + assert duration >= 3 + + @pytest.mark.asyncio + async def test_error_code_extraction(self, handler): + # Test with connection error + error = ConnectionError("ERR_CONNECTION_REFUSED: Failed to connect") + code = handler.extract_error_code(error) + assert code == "ERR_CONNECTION_REFUSED" + + # Test with DNS error + error = Exception("ERR_NAME_NOT_RESOLVED: Could not resolve hostname") + code = handler.extract_error_code(error) + assert code == "ERR_NAME_NOT_RESOLVED" + + # Test with unknown error + error = ValueError("Some other error") + code = handler.extract_error_code(error) + assert code == "UNKNOWN_ERROR" + + @pytest.mark.asyncio + async def test_concurrent_retries(self, handler): + operation = "test_operation" + error = ValueError("Test error") + + # Try to handle the same error concurrently + tasks = [ + handler.handle_error(error, operation), + handler.handle_error(error, operation), + handler.handle_error(error, operation) + ] + + # Should complete without raising an error + await asyncio.gather(*tasks, return_exceptions=True) + + # Fourth attempt should still raise MaxRetriesExceededError + with pytest.raises(MaxRetriesExceededError): + await handler.handle_error(error, operation) \ No newline at end of file diff --git a/tests/test_llm_api.py b/tests/test_llm_api.py index 9e2a1d6d..5b29cb3d 100644 --- a/tests/test_llm_api.py +++ b/tests/test_llm_api.py @@ -6,6 +6,7 @@ # @FileName: test_llm_api.py import os import pdb +import pytest from dotenv import load_dotenv @@ -20,12 +21,16 @@ def test_openai_model(): from langchain_core.messages import HumanMessage from src.utils import utils + api_key = os.getenv("OPENAI_API_KEY") + if not api_key: + pytest.skip("OPENAI_API_KEY not set") + llm = utils.get_llm_model( provider="openai", model_name="gpt-4o", temperature=0.8, base_url=os.getenv("OPENAI_ENDPOINT", ""), - api_key=os.getenv("OPENAI_API_KEY", "") + api_key=api_key ) image_path = "assets/examples/test.png" image_data = utils.encode_image(image_path) @@ -47,11 +52,15 @@ def test_gemini_model(): from langchain_core.messages import HumanMessage from src.utils import utils + api_key = os.getenv("GOOGLE_API_KEY") + if not api_key: + pytest.skip("GOOGLE_API_KEY not set") + llm = utils.get_llm_model( provider="gemini", - model_name="gemini-2.0-flash-exp", + model_name="gemini-1.5-pro", temperature=0.8, - api_key=os.getenv("GOOGLE_API_KEY", "") + api_key=api_key ) image_path = "assets/examples/test.png" @@ -73,12 +82,17 @@ def test_azure_openai_model(): from langchain_core.messages import HumanMessage from src.utils import utils + api_key = os.getenv("AZURE_OPENAI_API_KEY") + endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + if not api_key or not endpoint: + pytest.skip("AZURE_OPENAI_API_KEY or AZURE_OPENAI_ENDPOINT not set") + llm = utils.get_llm_model( provider="azure_openai", - model_name="gpt-4o", + model_name="gpt-4", temperature=0.8, - base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""), - api_key=os.getenv("AZURE_OPENAI_API_KEY", "") + base_url=endpoint, + api_key=api_key ) image_path = "assets/examples/test.png" image_data = utils.encode_image(image_path) @@ -99,12 +113,39 @@ def test_deepseek_model(): from langchain_core.messages import HumanMessage from src.utils import utils + api_key = os.getenv("DEEPSEEK_API_KEY") + if not api_key: + pytest.skip("DEEPSEEK_API_KEY not set") + llm = utils.get_llm_model( provider="deepseek", model_name="deepseek-chat", temperature=0.8, base_url=os.getenv("DEEPSEEK_ENDPOINT", ""), - api_key=os.getenv("DEEPSEEK_API_KEY", "") + api_key=api_key + ) + message = HumanMessage( + content=[ + {"type": "text", "text": "who are you?"} + ] + ) + ai_msg = llm.invoke([message]) + print(ai_msg.content) + + +def test_anthropic_model(): + from langchain_core.messages import HumanMessage + from src.utils import utils + + api_key = os.getenv("ANTHROPIC_API_KEY") + if not api_key: + pytest.skip("ANTHROPIC_API_KEY not set") + + llm = utils.get_llm_model( + provider="anthropic", + model_name="claude-3-5-sonnet-latest", + temperature=0.8, + api_key=api_key ) message = HumanMessage( content=[ @@ -118,6 +159,16 @@ def test_deepseek_model(): def test_ollama_model(): from langchain_ollama import ChatOllama + # Check if Ollama is running by trying to connect to its default port + import socket + sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM) + try: + result = sock.connect_ex(('localhost', 11434)) + if result != 0: + pytest.skip("Ollama server not running on localhost:11434") + finally: + sock.close() + llm = ChatOllama(model="qwen2.5:7b") ai_msg = llm.invoke("Sing a ballad of LangChain.") print(ai_msg.content) @@ -128,4 +179,5 @@ def test_ollama_model(): # test_gemini_model() # test_azure_openai_model() # test_deepseek_model() + # test_anthropic_model() test_ollama_model() diff --git a/tests/test_llm_integration.py b/tests/test_llm_integration.py new file mode 100644 index 00000000..60dc0056 --- /dev/null +++ b/tests/test_llm_integration.py @@ -0,0 +1,182 @@ +import os +import pytest +from dotenv import load_dotenv +from langchain_core.messages import HumanMessage +from src.utils import utils + +# Load environment variables +load_dotenv() + +class TestOpenAIIntegration: + """Test OpenAI model integration and vision capabilities""" + + def setup_method(self): + """Setup test environment""" + # Ensure required environment variables are set + self.api_key = os.getenv("OPENAI_API_KEY") + self.base_url = os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1") + if not self.api_key: + pytest.skip("OPENAI_API_KEY not set") + + def test_gpt4_turbo_initialization(self): + """Test GPT-4 Turbo model initialization""" + llm = utils.get_llm_model( + provider="openai", + model_name="gpt-4o", + temperature=0.8, + base_url=self.base_url, + api_key=self.api_key + ) + assert llm is not None + + def test_gpt4_vision_initialization(self): + """Test GPT-4 Vision model initialization""" + llm = utils.get_llm_model( + provider="openai", + model_name="gpt-4o", + temperature=0.8, + base_url=self.base_url, + api_key=self.api_key, + vision=True + ) + assert llm is not None + + @pytest.mark.asyncio + async def test_vision_capability(self): + """Test vision capability with an example image""" + llm = utils.get_llm_model( + provider="openai", + model_name="gpt-4o", + temperature=0.8, + base_url=self.base_url, + api_key=self.api_key, + vision=True + ) + + # Use a test image + image_path = "assets/examples/test.png" + if not os.path.exists(image_path): + pytest.skip(f"Test image not found at {image_path}") + + image_data = utils.encode_image(image_path) + message = HumanMessage( + content=[ + {"type": "text", "text": "describe this image"}, + { + "type": "image_url", + "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, + }, + ] + ) + response = await llm.ainvoke([message]) + assert response is not None + assert isinstance(response.content, str) + assert len(response.content) > 0 + +class TestAzureOpenAIIntegration: + """Test Azure OpenAI integration""" + + def setup_method(self): + """Setup test environment""" + self.api_key = os.getenv("AZURE_OPENAI_API_KEY") + self.endpoint = os.getenv("AZURE_OPENAI_ENDPOINT") + if not self.api_key or not self.endpoint: + pytest.skip("Azure OpenAI credentials not set") + + def test_azure_model_initialization(self): + """Test Azure OpenAI model initialization""" + llm = utils.get_llm_model( + provider="azure_openai", + model_name="gpt-4", + temperature=0.8, + base_url=self.endpoint, + api_key=self.api_key + ) + assert llm is not None + + @pytest.mark.asyncio + async def test_azure_basic_completion(self): + """Test basic completion with Azure OpenAI""" + llm = utils.get_llm_model( + provider="azure_openai", + model_name="gpt-4", + temperature=0.8, + base_url=self.endpoint, + api_key=self.api_key + ) + + message = HumanMessage(content="Say hello!") + response = await llm.ainvoke([message]) + assert response is not None + assert isinstance(response.content, str) + assert len(response.content) > 0 + +class TestAnthropicIntegration: + """Test Anthropic model integration""" + + def setup_method(self): + """Setup test environment""" + self.api_key = os.getenv("ANTHROPIC_API_KEY") + if not self.api_key: + pytest.skip("ANTHROPIC_API_KEY not set") + + def test_claude_initialization(self): + """Test Claude model initialization""" + llm = utils.get_llm_model( + provider="anthropic", + model_name="claude-3-5-sonnet-latest", + temperature=0.8, + api_key=self.api_key + ) + assert llm is not None + + @pytest.mark.asyncio + async def test_basic_completion(self): + """Test basic completion with Claude""" + llm = utils.get_llm_model( + provider="anthropic", + model_name="claude-3-5-sonnet-latest", + temperature=0.8, + api_key=self.api_key + ) + + message = HumanMessage(content="Say hello!") + response = await llm.ainvoke([message]) + assert response is not None + assert isinstance(response.content, str) + assert len(response.content) > 0 + +def test_model_names_consistency(): + """Test that model names are consistent between toolchain and utils""" + # Test OpenAI models + openai_models = utils.model_names["openai"] + expected_openai = ["gpt-4o"] + assert all(model in openai_models for model in expected_openai), "Missing expected OpenAI models" + + # Test Gemini models + gemini_models = utils.model_names["gemini"] + expected_gemini = ["gemini-1.5-pro", "gemini-2.0-flash"] + assert all(model in gemini_models for model in expected_gemini), "Missing expected Gemini models" + + # Test Anthropic models + anthropic_models = utils.model_names["anthropic"] + expected_anthropic = ["claude-3-5-sonnet-latest", "claude-3-5-sonnet-20241022"] + assert all(model in anthropic_models for model in expected_anthropic), "Missing expected Anthropic models" + + # Test DeepSeek models + deepseek_models = utils.model_names["deepseek"] + expected_deepseek = ["deepseek-chat"] + assert all(model in deepseek_models for model in expected_deepseek), "Missing expected DeepSeek models" + + # Test Azure OpenAI models + azure_models = utils.model_names["azure_openai"] + expected_azure = ["gpt-4", "gpt-3.5-turbo"] + assert all(model in azure_models for model in expected_azure), "Missing expected Azure OpenAI models" + + # Test Ollama models + ollama_models = utils.model_names["ollama"] + expected_ollama = ["qwen2.5:7b", "llama2:7b"] + assert all(model in ollama_models for model in expected_ollama), "Missing expected Ollama models" + +if __name__ == "__main__": + pytest.main([__file__, "-v"]) \ No newline at end of file diff --git a/tests/test_logging.py b/tests/test_logging.py new file mode 100644 index 00000000..5e871e0a --- /dev/null +++ b/tests/test_logging.py @@ -0,0 +1,216 @@ +import json +import logging +import datetime +import pytest +from io import StringIO +from typing import Dict, Any +from src.utils.logging import ( + LogFormatter, + BatchedEventLogger, + setup_logging, + PRODUCTION_EXCLUDE_PATTERNS, + LogLevel +) +import sys + +class TestLogFormatter: + @pytest.fixture + def json_formatter(self): + return LogFormatter(use_json=True) + + @pytest.fixture + def compact_formatter(self): + return LogFormatter(use_json=False) + + def test_json_format_basic_log(self, json_formatter): + record = logging.LogRecord( + name="test_logger", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="Test message", + args=(), + exc_info=None + ) + + formatted = json_formatter.format(record) + parsed = json.loads(formatted) + + assert parsed["level"] == "INFO" + assert parsed["logger"] == "test_logger" + assert parsed["message"] == "Test message" + assert "timestamp" in parsed + + def test_json_format_with_extra_fields(self, json_formatter): + record = logging.LogRecord( + name="test_logger", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="Test message", + args=(), + exc_info=None + ) + record.event_type = "test_event" + record.event_data = {"key": "value"} + + formatted = json_formatter.format(record) + parsed = json.loads(formatted) + + assert parsed["event_type"] == "test_event" + assert parsed["data"] == {"key": "value"} + + def test_json_format_with_error(self, json_formatter): + try: + raise ValueError("Test error") + except ValueError as e: + record = logging.LogRecord( + name="test_logger", + level=logging.ERROR, + pathname="test.py", + lineno=1, + msg="Error occurred", + args=(), + exc_info=sys.exc_info() + ) + + formatted = json_formatter.format(record) + parsed = json.loads(formatted) + + assert parsed["error"]["type"] == "ValueError" + assert parsed["error"]["message"] == "Test error" + assert "stack_trace" in parsed["error"] + + def test_compact_format_basic_log(self, compact_formatter): + record = logging.LogRecord( + name="test_logger", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="Test message", + args=(), + exc_info=None + ) + + formatted = compact_formatter.format(record) + assert "] I: Test message" in formatted + + def test_compact_format_with_error(self, compact_formatter): + try: + raise ValueError("Test error") + except ValueError as e: + record = logging.LogRecord( + name="test_logger", + level=logging.ERROR, + pathname="test.py", + lineno=1, + msg="Error occurred", + args=(), + exc_info=sys.exc_info() + ) + + formatted = compact_formatter.format(record) + assert "] E: Error occurred" in formatted + assert "ValueError: Test error" in formatted + +class TestBatchedEventLogger: + @pytest.fixture + def string_io(self): + return StringIO() + + @pytest.fixture + def logger(self, string_io): + handler = logging.StreamHandler(string_io) + handler.setFormatter(LogFormatter(use_json=True)) + logger = logging.getLogger("test_batched") + logger.handlers = [handler] + logger.setLevel(logging.INFO) + return logger + + @pytest.fixture + def batched_logger(self, logger): + return BatchedEventLogger(logger) + + def test_batch_single_event(self, batched_logger, string_io): + event_data = {"action": "click", "element": "button"} + batched_logger.add_event("ui_action", event_data) + batched_logger.flush() + + output = string_io.getvalue() + parsed = json.loads(output) + + assert parsed["event_type"] == "batched_ui_action" + assert parsed["data"]["count"] == 1 + assert parsed["data"]["events"][0] == event_data + + def test_batch_multiple_events(self, batched_logger, string_io): + events = [ + {"action": "click", "element": "button1"}, + {"action": "type", "element": "input1"}, + {"action": "click", "element": "button2"} + ] + + for event in events: + batched_logger.add_event("ui_action", event) + + batched_logger.flush() + + output = string_io.getvalue() + parsed = json.loads(output) + + assert parsed["event_type"] == "batched_ui_action" + assert parsed["data"]["count"] == 3 + assert parsed["data"]["events"] == events + +class TestLoggingSetup: + @pytest.fixture + def temp_logger(self): + # Store original handlers + root_logger = logging.getLogger() + original_handlers = root_logger.handlers[:] + + yield root_logger + + # Restore original handlers + root_logger.handlers = original_handlers + + def test_setup_basic_logging(self, temp_logger): + setup_logging(level="INFO", use_json=True) + assert len(temp_logger.handlers) == 1 + assert isinstance(temp_logger.handlers[0].formatter, LogFormatter) + assert temp_logger.level == logging.INFO + + def test_setup_with_exclude_patterns(self, temp_logger): + test_patterns = ["debug", "deprecated"] + setup_logging(level="INFO", exclude_patterns=test_patterns) + + # Create a test record that should be filtered + record = logging.LogRecord( + name="test_logger", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="This is a debug message", + args=(), + exc_info=None + ) + + # The record should be filtered out + assert not temp_logger.handlers[0].filter(record) + + def test_production_exclude_patterns(self): + # Verify that all production patterns are strings + assert all(isinstance(pattern, str) for pattern in PRODUCTION_EXCLUDE_PATTERNS) + + # Verify that common patterns are included + common_patterns = ["deprecated", "virtual environment"] + assert all(pattern in PRODUCTION_EXCLUDE_PATTERNS for pattern in common_patterns) + +def test_log_levels(): + # Test that all expected log levels are defined + expected_levels = ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"] + assert all(level in LogLevel.__members__ for level in expected_levels) + + # Test that the values match the names + for level in LogLevel: + assert level.value == level.name \ No newline at end of file diff --git a/tests/test_logging_integration.py b/tests/test_logging_integration.py new file mode 100644 index 00000000..3fa7a1f5 --- /dev/null +++ b/tests/test_logging_integration.py @@ -0,0 +1,219 @@ +import json +import logging +import pytest +import asyncio +from pathlib import Path +from io import StringIO +from typing import Dict, Any, List, Optional + +from src.utils.logging import LogFormatter, BatchedEventLogger, setup_logging +from src.agent.custom_agent import CustomAgent +from browser_use.agent.views import ActionResult +from browser_use.browser.views import BrowserStateHistory +from browser_use.browser.browser import Browser, BrowserConfig +from browser_use.browser.context import BrowserContext, BrowserContextConfig +from langchain_core.language_models.chat_models import BaseChatModel +from langchain_core.messages import BaseMessage + +class MockElementTree: + def clickable_elements_to_string(self, include_attributes=None): + return "Mock clickable elements" + +class MockBrowserContext(BrowserContext): + def __init__(self): + self.config = BrowserContextConfig() + self.selector_map = {} + self.cached_state = BrowserStateHistory( + url="https://example.com", + title="Example Page", + tabs=[], + interacted_element=[None], + screenshot=None + ) + setattr(self.cached_state, 'selector_map', self.selector_map) + setattr(self.cached_state, 'element_tree', MockElementTree()) + + async def get_state(self, use_vision=True): + return self.cached_state + + async def close(self): + pass + + def __del__(self): + # Override to prevent errors about missing session attribute + pass + +class MockBrowser(Browser): + def __init__(self): + self.config = BrowserConfig() + + async def new_context(self, config): + return MockBrowserContext() + + async def close(self): + pass + +class MockLLM(BaseChatModel): + def with_structured_output(self, output_type, include_raw=False): + self._output_type = output_type + return self + + async def ainvoke(self, messages: List[BaseMessage], **kwargs): + return { + 'parsed': self._output_type( + action=[], + current_state={ + 'prev_action_evaluation': 'Success', + 'important_contents': 'Test memory', + 'completed_contents': 'Test progress', + 'thought': 'Test thought', + 'summary': 'Test summary' + } + ) + } + + @property + def _llm_type(self) -> str: + return "mock" + + def _generate(self, messages: List[BaseMessage], stop: Optional[List[str]] = None, run_manager = None, **kwargs): + raise NotImplementedError("Use ainvoke instead") + + @property + def _identifying_params(self) -> Dict[str, Any]: + return {"mock_param": True} + +class ErrorLLM(MockLLM): + async def ainvoke(self, messages: List[BaseMessage], **kwargs): + raise ValueError("Test error") + +class ActionLLM(MockLLM): + async def ainvoke(self, messages: List[BaseMessage], **kwargs): + return { + 'parsed': self._output_type( + action=[ + {'type': 'click', 'selector': '#button1'}, + {'type': 'type', 'selector': '#input1', 'text': 'test'}, + ], + current_state={ + 'prev_action_evaluation': 'Success', + 'important_contents': 'Test memory', + 'completed_contents': 'Test progress', + 'thought': 'Test thought', + 'summary': 'Test summary' + } + ) + } + +@pytest.fixture +def logger(): + # Configure root logger + root_logger = logging.getLogger() + root_logger.setLevel(logging.INFO) + + # Configure test logger + logger = logging.getLogger("test_integration") + logger.setLevel(logging.INFO) + return logger + +@pytest.fixture +def string_io(): + string_io = StringIO() + handler = logging.StreamHandler(string_io) + handler.setFormatter(LogFormatter(use_json=True)) + + # Add handler to root logger + root_logger = logging.getLogger() + root_logger.addHandler(handler) + + # Add handler to test logger + logger = logging.getLogger("test_integration") + logger.addHandler(handler) + + yield string_io + + # Clean up + root_logger.removeHandler(handler) + logger.removeHandler(handler) + +@pytest.mark.asyncio +async def test_agent_logging_integration(logger, string_io): + # Setup + agent = CustomAgent( + task="Test task", + llm=MockLLM(), + browser=MockBrowser(), + browser_context=MockBrowserContext(), + use_vision=True + ) + + # Execute a step + await agent.step() + + # Get all log output + log_output = string_io.getvalue() + log_entries = [json.loads(line) for line in log_output.strip().split('\n') if line.strip()] + + # Print log entries for debugging + print("\nLog entries:", log_entries) + + # Verify log entries + assert len(log_entries) > 0 + assert any('Starting step 1' in str(entry.get('msg', '')) for entry in log_entries) + assert any('Model Response: success' in str(entry.get('msg', '')) for entry in log_entries) + assert any('Step error' in str(entry.get('msg', '')) for entry in log_entries) + +@pytest.mark.asyncio +async def test_agent_error_logging(logger, string_io): + # Setup + agent = CustomAgent( + task="Test task", + llm=ErrorLLM(), + browser=MockBrowser(), + browser_context=MockBrowserContext(), + use_vision=True + ) + + # Execute a step + await agent.step() + + # Get all log output + log_output = string_io.getvalue() + log_entries = [json.loads(line) for line in log_output.strip().split('\n') if line.strip()] + + # Print log entries for debugging + print("\nLog entries:", log_entries) + + # Verify log entries + assert len(log_entries) > 0 + assert any('Starting step 1' in str(entry.get('msg', '')) for entry in log_entries) + assert any('Step error' in str(entry.get('msg', '')) for entry in log_entries) + assert any('Use ainvoke instead' in str(entry.get('msg', '')) for entry in log_entries) + +@pytest.mark.asyncio +async def test_agent_batched_logging(logger, string_io): + # Setup + agent = CustomAgent( + task="Test task", + llm=ActionLLM(), + browser=MockBrowser(), + browser_context=MockBrowserContext(), + use_vision=True + ) + + # Execute a step + await agent.step() + + # Get all log output + log_output = string_io.getvalue() + log_entries = [json.loads(line) for line in log_output.strip().split('\n') if line.strip()] + + # Print log entries for debugging + print("\nLog entries:", log_entries) + + # Verify log entries + assert len(log_entries) > 0 + assert any('Starting step 1' in str(entry.get('msg', '')) for entry in log_entries) + assert any('Model Response: success' in str(entry.get('msg', '')) for entry in log_entries) + assert any('Batch: 2 action events' in str(entry.get('msg', '')) for entry in log_entries) + assert any('Step error' in str(entry.get('msg', '')) for entry in log_entries) \ No newline at end of file diff --git a/tests/test_structured_logging.py b/tests/test_structured_logging.py new file mode 100644 index 00000000..9134c4f5 --- /dev/null +++ b/tests/test_structured_logging.py @@ -0,0 +1,270 @@ +import pytest +import json +import logging +import os +from datetime import datetime +from src.utils.structured_logging import ( + StructuredLogger, + ProgressEvent, + BrowserEvent, + JSONFormatter, + ColorizedFormatter, + ColorScheme, + setup_structured_logging +) +from colorama import Fore, Style + +@pytest.fixture +def structured_logger(): + logger = StructuredLogger("test_logger") + return logger + +def test_progress_event_creation(): + event = ProgressEvent( + step="test_step", + status="in_progress", + progress=0.5, + message="Testing progress" + ) + assert event.step == "test_step" + assert event.status == "in_progress" + assert event.progress == 0.5 + assert event.message == "Testing progress" + assert event.timestamp is not None + +def test_browser_event_creation(): + details = {"action": "click", "selector": "#button"} + event = BrowserEvent( + event_type="interaction", + details=details + ) + assert event.event_type == "interaction" + assert event.details == details + assert event.timestamp is not None + +def test_progress_logging(structured_logger): + structured_logger.log_progress( + step="test_step", + status="started", + progress=0.0, + message="Starting test" + ) + + history = structured_logger.get_progress_history() + assert len(history) == 1 + assert history[0]["step"] == "test_step" + assert history[0]["status"] == "started" + assert history[0]["progress"] == 0.0 + assert history[0]["message"] == "Starting test" + +def test_browser_event_logging(structured_logger): + details = {"page": "test.html", "action": "navigate"} + structured_logger.log_browser_event( + event_type="navigation", + details=details + ) + + events = structured_logger.get_browser_events() + assert len(events) == 1 + assert events[0]["event_type"] == "navigation" + assert events[0]["details"] == details + +def test_progress_tracking(structured_logger): + # Test multiple progress updates + steps = [ + ("step1", "started", 0.0, "Starting"), + ("step1", "in_progress", 0.5, "Halfway"), + ("step1", "completed", 1.0, "Done") + ] + + for step, status, progress, message in steps: + structured_logger.log_progress(step, status, progress, message) + + assert structured_logger.get_current_progress() == 1.0 + history = structured_logger.get_progress_history() + assert len(history) == 3 + + for i, (step, status, progress, message) in enumerate(steps): + assert history[i]["step"] == step + assert history[i]["status"] == status + assert history[i]["progress"] == progress + assert history[i]["message"] == message + +def test_clear_history(structured_logger): + # Add some events + structured_logger.log_progress("test", "started", 0.5, "Test progress") + structured_logger.log_browser_event("test", {"action": "test"}) + + # Clear history + structured_logger.clear_history() + + assert len(structured_logger.get_progress_history()) == 0 + assert len(structured_logger.get_browser_events()) == 0 + assert structured_logger.get_current_progress() == 0.0 + +def test_json_formatter(): + formatter = JSONFormatter() + record = logging.LogRecord( + name="test_logger", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="Test message", + args=(), + exc_info=None + ) + + # Add custom fields + setattr(record, 'event_type', 'test_event') + setattr(record, 'data', {'test_key': 'test_value'}) + + formatted = formatter.format(record) + parsed = json.loads(formatted) + + assert parsed["level"] == "INFO" + assert parsed["message"] == "Test message" + assert parsed["logger"] == "test_logger" + assert parsed["event_type"] == "test_event" + assert parsed["data"] == {"test_key": "test_value"} + assert "timestamp" in parsed + +def test_colorized_formatter_with_colors(): + formatter = ColorizedFormatter(use_colors=True) + record = logging.LogRecord( + name="test_logger", + level=logging.ERROR, + pathname="test.py", + lineno=1, + msg="Test error message", + args=(), + exc_info=None + ) + + formatted = formatter.format(record) + assert Fore.RED in formatted # Error level should be red + assert Style.RESET_ALL in formatted # Should have reset codes + assert "[" in formatted and "]" in formatted # Should have timestamp brackets + assert "ERROR" in formatted # Should include level name + +def test_colorized_formatter_without_colors(): + formatter = ColorizedFormatter(use_colors=False) + record = logging.LogRecord( + name="test_logger", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="Test message", + args=(), + exc_info=None + ) + + formatted = formatter.format(record) + assert Fore.CYAN not in formatted # Should not have color codes + assert Style.RESET_ALL not in formatted + assert "[" in formatted and "]" in formatted + assert "INFO" in formatted + +def test_colorized_formatter_special_keywords(): + formatter = ColorizedFormatter(use_colors=True) + record = logging.LogRecord( + name="test_logger", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="✓ STEP(1) completed × failed", + args=(), + exc_info=None + ) + + formatted = formatter.format(record) + assert Fore.GREEN in formatted # Success checkmark + assert Fore.BLUE in formatted # STEP keyword + assert Fore.RED in formatted # Error cross + +def test_colorized_formatter_with_structured_data(): + formatter = ColorizedFormatter(use_colors=True) + record = logging.LogRecord( + name="test_logger", + level=logging.INFO, + pathname="test.py", + lineno=1, + msg="Progress Update", + args=(), + exc_info=None + ) + + # Add structured data + setattr(record, 'event_type', 'progress') + setattr(record, 'data', {'step': 'test', 'progress': 0.5}) + + formatted = formatter.format(record) + assert 'progress' in formatted + assert '"step": "test"' in formatted + assert '"progress": 0.5' in formatted + +def test_color_scheme(): + scheme = ColorScheme() + assert scheme.ERROR == Fore.RED + assert scheme.WARNING == Fore.YELLOW + assert scheme.INFO == Fore.CYAN + assert scheme.DEBUG == Style.DIM + assert scheme.SUCCESS == Fore.GREEN + assert scheme.RESET == Style.RESET_ALL + +def test_no_color_environment_variable(): + os.environ['NO_COLOR'] = '1' + formatter = ColorizedFormatter(use_colors=True) # Even with colors enabled + record = logging.LogRecord( + name="test_logger", + level=logging.ERROR, + pathname="test.py", + lineno=1, + msg="Test message", + args=(), + exc_info=None + ) + + formatted = formatter.format(record) + assert Fore.RED not in formatted # Should not have color codes + assert Style.RESET_ALL not in formatted + + # Clean up + del os.environ['NO_COLOR'] + +def test_setup_structured_logging_with_colors(): + # Remove existing handlers + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + + # Set up logging with colors + setup_structured_logging(level=logging.DEBUG, use_colors=True, json_output=False) + + assert len(root_logger.handlers) == 1 + assert isinstance(root_logger.handlers[0].formatter, ColorizedFormatter) + assert root_logger.handlers[0].formatter.use_colors is True + +def test_setup_structured_logging_json(): + # Remove existing handlers + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + + # Set up logging with JSON output + setup_structured_logging(level=logging.DEBUG, json_output=True) + + assert len(root_logger.handlers) == 1 + assert isinstance(root_logger.handlers[0].formatter, JSONFormatter) + +def test_setup_structured_logging(): + # Remove existing handlers + root_logger = logging.getLogger() + for handler in root_logger.handlers[:]: + root_logger.removeHandler(handler) + + # Set up logging with default settings + setup_structured_logging(level=logging.DEBUG) + + assert root_logger.level == logging.DEBUG + assert len(root_logger.handlers) == 1 + assert isinstance(root_logger.handlers[0].formatter, ColorizedFormatter) # Default to ColorizedFormatter \ No newline at end of file diff --git a/tests/test_task_logging.py b/tests/test_task_logging.py new file mode 100644 index 00000000..50bf5aae --- /dev/null +++ b/tests/test_task_logging.py @@ -0,0 +1,641 @@ +import pytest +from datetime import datetime, timedelta +import json +import asyncio +import os +from src.utils.task_logging import ( + TaskLogger, + TaskContext, + StepInfo, + BrowserState, + TaskStatus, + PerformanceMetrics, + ErrorInfo, + ActionType, + RetryConfig, + RetryInfo, + ColorScheme, + LogFormatter, + SeparatorStyle +) + +def test_task_logger_initialization(): + logger = TaskLogger("test_task", "Test task goal") + context = logger.get_context() + + assert context["task"]["id"] == "test_task" + assert context["task"]["goal"] == "Test task goal" + assert context["task"]["status"] == "pending" + assert context["browser"]["url"] == "" + assert context["browser"]["state"] == "loading" + assert context["browser"]["visible_elements"] == 0 + assert context["browser"]["dynamic_content"] == "loading" + +def test_step_update(): + logger = TaskLogger("test_task", "Test task goal") + + # Update to running state + logger.update_step("Starting navigation", TaskStatus.RUNNING) + context = logger.get_context() + + assert context["task"]["status"] == "running" + assert context["task"]["progress"] == "2/unknown steps" # Step number incremented + + # Update to complete state + logger.update_step("Navigation complete", TaskStatus.COMPLETE) + context = logger.get_context() + + assert context["task"]["status"] == "complete" + assert context["task"]["progress"] == "3/unknown steps" + +def test_browser_state_update(): + logger = TaskLogger("test_task", "Test task goal") + + # Update browser state + logger.update_browser_state( + url="https://example.com", + page_ready=True, + dynamic_content_loaded=True, + visible_elements=10 + ) + + context = logger.get_context() + assert context["browser"]["url"] == "https://example.com" + assert context["browser"]["state"] == "ready" + assert context["browser"]["dynamic_content"] == "loaded" + assert context["browser"]["visible_elements"] == 10 + +def test_partial_browser_state_update(): + logger = TaskLogger("test_task", "Test task goal") + + # Update only some fields + logger.update_browser_state(url="https://example.com") + context = logger.get_context() + + assert context["browser"]["url"] == "https://example.com" + assert context["browser"]["state"] == "loading" # Unchanged + assert context["browser"]["dynamic_content"] == "loading" # Unchanged + assert context["browser"]["visible_elements"] == 0 # Unchanged + +def test_elapsed_time_calculation(): + logger = TaskLogger("test_task", "Test task goal") + + # Set a specific start time + start_time = datetime.utcnow() - timedelta(seconds=5) + logger.context.started_at = start_time.isoformat() + + context = logger.get_context() + elapsed_time = float(context["task"]["elapsed_time"].rstrip("s")) + + assert 4.5 <= elapsed_time <= 5.5 # Allow for small timing variations + +def test_task_status_validation(): + logger = TaskLogger("test_task", "Test task goal") + + # Test all valid status values + for status in TaskStatus: + logger.update_step(f"Step with status {status}", status) + context = logger.get_context() + assert context["task"]["status"] == status.value + +def test_json_serialization(): + logger = TaskLogger("test_task", "Test task goal") + context = logger.get_context() + + # Verify that the context can be JSON serialized + json_str = json.dumps(context) + parsed = json.loads(json_str) + + assert parsed["task"]["id"] == "test_task" + assert parsed["task"]["goal"] == "Test task goal" + assert "timestamp" in parsed + assert "elapsed_time" in parsed["task"] + +def test_step_info_status_conversion(): + # Test that string status values are converted to TaskStatus enum + step = StepInfo( + number=1, + description="Test step", + started_at=datetime.utcnow().isoformat(), + status="running" # Pass as string + ) + + assert isinstance(step.status, TaskStatus) + assert step.status == TaskStatus.RUNNING + +def test_error_handling(): + logger = TaskLogger("error_task", "Test error handling") + + # Simulate an error + error = ValueError("Test error") + logger.log_error(error, step_number=1, action="test action") + + context = logger.get_context() + assert context["task"]["status"] == "failed" + assert context["error"]["message"] == "Test error" + assert context["error"]["type"] == "ValueError" + assert context["error"]["step"] == 1 + assert context["error"]["action"] == "test action" + +def test_performance_metrics(): + logger = TaskLogger("perf_task", "Test performance tracking") + + # Start tracking performance + logger.start_performance_tracking() + + # Simulate some steps with timing + logger.update_step("Navigation", TaskStatus.RUNNING) + logger.track_step_duration("navigation", 0.5) + + logger.update_step("Interaction", TaskStatus.RUNNING) + logger.track_step_duration("interaction", 0.3) + + # Get performance metrics + metrics = logger.get_performance_metrics() + assert metrics["step_breakdown"]["navigation"] == pytest.approx(0.5) + assert metrics["step_breakdown"]["interaction"] == pytest.approx(0.3) + assert metrics["total_duration"] > 0 + +def test_detailed_browser_state(): + logger = TaskLogger("browser_task", "Test browser state") + + # Update with detailed browser state + logger.update_browser_state( + url="https://example.com", + page_ready=True, + dynamic_content_loaded=True, + visible_elements=10, + current_frame="main", + active_element="search_input", + page_title="Example Page" + ) + + context = logger.get_context() + browser_state = context["browser"] + assert browser_state["url"] == "https://example.com" + assert browser_state["state"] == "ready" + assert browser_state["current_frame"] == "main" + assert browser_state["active_element"] == "search_input" + assert browser_state["page_title"] == "Example Page" + +def test_task_progress_tracking(): + logger = TaskLogger("progress_task", "Test progress tracking") + + # Add steps with progress information + logger.update_step("Step 1", TaskStatus.COMPLETE, progress=0.25) + context = logger.get_context() + assert context["task"]["progress"] == "25%" + + logger.update_step("Step 2", TaskStatus.COMPLETE, progress=0.5) + context = logger.get_context() + assert context["task"]["progress"] == "50%" + + logger.update_step("Final Step", TaskStatus.COMPLETE, progress=1.0) + context = logger.get_context() + assert context["task"]["progress"] == "100%" + +def test_log_formatting(): + logger = TaskLogger("format_task", "Test log formatting") + + # Capture log output + logger.update_step("Navigation", TaskStatus.RUNNING) + log_output = logger.format_log_entry() + + # Verify log format matches the specified structure + assert "[" in log_output # Has timestamp + assert "STEP 2/" in log_output # Has step number (2 because update_step increments) + assert "Navigation" in log_output # Has action + assert "→" in log_output # Has status symbol for running + + # Add another step to test duration + logger.update_step("Click button", TaskStatus.COMPLETE) + log_output = logger.format_log_entry() + assert "(" in log_output and "s)" in log_output # Now we should have duration + +def test_semantic_step_descriptions(): + logger = TaskLogger("semantic_task", "Test semantic descriptions") + + # Test navigation step + logger.update_step( + "Navigate to example.com", + TaskStatus.RUNNING, + action_type=ActionType.NAVIGATION + ) + context = logger.get_context() + assert context["task"]["current_action"] == "navigation" + assert "🌐" in logger.format_log_entry() # Navigation emoji + + # Test interaction step + logger.update_step( + "Click search button", + TaskStatus.RUNNING, + action_type=ActionType.INTERACTION + ) + context = logger.get_context() + assert context["task"]["current_action"] == "interaction" + assert "🖱️" in logger.format_log_entry() # Interaction emoji + + # Test extraction step + logger.update_step( + "Extract search results", + TaskStatus.RUNNING, + action_type=ActionType.EXTRACTION + ) + context = logger.get_context() + assert context["task"]["current_action"] == "extraction" + assert "📑" in logger.format_log_entry() # Extraction emoji + +def test_redundant_message_filtering(): + logger = TaskLogger("filter_task", "Test message filtering") + + # Add multiple steps of the same type + logger.update_step( + "Navigate to example.com", + TaskStatus.RUNNING, + action_type=ActionType.NAVIGATION + ) + logger.update_step( + "Page loaded successfully", + TaskStatus.COMPLETE, + action_type=ActionType.NAVIGATION, + suppress_similar=True # Should be filtered as it's a completion of the same action + ) + + # Get all log entries + log_entries = logger.get_log_history() + + # Verify that redundant messages are consolidated + navigation_entries = [entry for entry in log_entries if "Navigate" in entry] + assert len(navigation_entries) == 1 # Only the main action should be logged + + # Verify that the current step shows the completion status + current_log = logger.format_log_entry() + assert "✓" in current_log # Success symbol should be in current state + +def test_action_context_tracking(): + logger = TaskLogger("context_task", "Test action context") + + # Start a navigation action + logger.update_step( + "Navigate to example.com", + TaskStatus.RUNNING, + action_type=ActionType.NAVIGATION, + context={ + "url": "https://example.com", + "method": "GET", + "headers": {"User-Agent": "browser-use"} + } + ) + + context = logger.get_context() + assert "action_context" in context["task"] + assert context["task"]["action_context"]["url"] == "https://example.com" + + # Complete the action with results + logger.update_step( + "Navigation complete", + TaskStatus.COMPLETE, + action_type=ActionType.NAVIGATION, + results={ + "status_code": 200, + "page_title": "Example Domain", + "load_time": 0.5 + } + ) + + context = logger.get_context() + assert "action_results" in context["task"] + assert context["task"]["action_results"]["status_code"] == 200 + +def test_retry_configuration(): + config = RetryConfig( + max_retries=3, + base_delay=1.0, + max_delay=10.0, + jitter=0.1 + ) + + # Test that delays follow exponential backoff pattern + delays = [config.get_delay(attempt) for attempt in range(5)] + assert delays[0] == 0 # First attempt has no delay + assert 0.9 <= delays[1] <= 1.1 # First retry ~1.0s with jitter + assert 1.8 <= delays[2] <= 2.2 # Second retry ~2.0s with jitter + assert 3.6 <= delays[3] <= 4.4 # Third retry ~4.0s with jitter + assert delays[4] == -1 # Beyond max retries + + # Test max delay capping + config = RetryConfig( + max_retries=5, + base_delay=1.0, + max_delay=5.0, + jitter=0.0 # Disable jitter for predictable testing + ) + assert config.get_delay(3) == 4.0 # Within max + assert config.get_delay(4) == 5.0 # Capped at max + +@pytest.mark.asyncio +async def test_retry_execution(): + logger = TaskLogger("retry_task", "Test retry logic") + + # Mock function that fails twice then succeeds + attempt_count = 0 + async def mock_operation(): + nonlocal attempt_count + attempt_count += 1 + if attempt_count <= 2: + raise ValueError("Temporary error") + return "success" + + # Configure retry behavior + retry_config = RetryConfig(max_retries=3, base_delay=0.1) + + # Execute with retry + result = await logger.execute_with_retry( + mock_operation, + "test_operation", + retry_config=retry_config + ) + + assert result == "success" + assert attempt_count == 3 # Two failures + one success + + # Verify retry information in logs + context = logger.get_context() + assert "retries" in context["task"] + retry_info = context["task"]["retries"] + assert retry_info["attempts"] == 3 + assert retry_info["success"] is True + assert len(retry_info["history"]) == 2 # Two retry attempts + +@pytest.mark.asyncio +async def test_retry_max_attempts_exceeded(): + logger = TaskLogger("retry_task", "Test retry logic") + + # Mock function that always fails + async def mock_operation(): + raise ValueError("Persistent error") + + # Configure retry behavior + retry_config = RetryConfig(max_retries=2, base_delay=0.1) + + # Execute with retry and expect failure + with pytest.raises(ValueError) as exc_info: + await logger.execute_with_retry( + mock_operation, + "test_operation", + retry_config=retry_config + ) + + assert str(exc_info.value) == "Persistent error" + + # Verify retry information in logs + context = logger.get_context() + assert "retries" in context["task"] + retry_info = context["task"]["retries"] + assert retry_info["attempts"] == 3 # Initial + 2 retries + assert retry_info["success"] is False + assert len(retry_info["history"]) == 3 # Initial attempt + two retries + assert all(entry["error"] == "ValueError: Persistent error" for entry in retry_info["history"]) + + # Verify the delays follow the expected pattern + delays = [entry["delay"] for entry in retry_info["history"]] + assert delays[0] > 0 # First retry has positive delay + assert delays[1] > delays[0] # Second retry has longer delay + assert delays[2] == -1 # Final attempt indicates max retries exceeded + +def test_retry_backoff_calculation(): + config = RetryConfig( + max_retries=3, + base_delay=1.0, + max_delay=10.0, + jitter=0.0 # Disable jitter for predictable testing + ) + + # Test exponential backoff sequence + assert config.get_delay(0) == 0 # First attempt + assert config.get_delay(1) == 1.0 # First retry + assert config.get_delay(2) == 2.0 # Second retry + assert config.get_delay(3) == 4.0 # Third retry + assert config.get_delay(4) == -1 # Beyond max retries + + # Test max delay capping + config = RetryConfig( + max_retries=5, + base_delay=1.0, + max_delay=5.0, + jitter=0.0 + ) + assert config.get_delay(3) == 4.0 # Within max + assert config.get_delay(4) == 5.0 # Capped at max + +def test_color_scheme(): + """Test that color scheme is properly defined and accessible.""" + scheme = ColorScheme() + + # Test error colors + assert scheme.error.startswith("\033[31m") # Red + assert scheme.warning.startswith("\033[33m") # Yellow + assert scheme.info.startswith("\033[36m") # Cyan + assert scheme.success.startswith("\033[32m") # Green + assert scheme.reset == "\033[0m" # Reset + +def test_log_formatting_with_colors(): + """Test that log messages are properly formatted with colors.""" + logger = TaskLogger("color_task", "Test color formatting") + + # Test error formatting + logger.update_step("Failed operation", TaskStatus.FAILED) + log_output = logger.format_log_entry() + assert "\033[31m" in log_output # Contains red color code + assert "×" in log_output # Contains error symbol + + # Test success formatting + logger.update_step("Successful operation", TaskStatus.COMPLETE) + log_output = logger.format_log_entry() + assert "\033[32m" in log_output # Contains green color code + assert "✓" in log_output # Contains success symbol + + # Test running state formatting + logger.update_step("Running operation", TaskStatus.RUNNING) + log_output = logger.format_log_entry() + assert "\033[36m" in log_output # Contains cyan color code + assert "→" in log_output # Contains running symbol + +def test_color_disabled(): + """Test that colors can be disabled via environment variable.""" + os.environ["NO_COLOR"] = "1" + logger = TaskLogger("no_color_task", "Test without colors") + + logger.update_step("Test operation", TaskStatus.COMPLETE) + log_output = logger.format_log_entry() + + # Verify no color codes are present + assert "\033[" not in log_output + assert "✓" in log_output # Symbols still present + + # Clean up + del os.environ["NO_COLOR"] + +def test_color_scheme_customization(): + """Test that color scheme can be customized.""" + custom_scheme = ColorScheme( + error="\033[35m", # Magenta for errors + warning="\033[34m", # Blue for warnings + info="\033[37m", # White for info + success="\033[32m" # Keep green for success + ) + + logger = TaskLogger("custom_color_task", "Test custom colors", color_scheme=custom_scheme) + + # Test custom error color + logger.update_step("Failed operation", TaskStatus.FAILED) + log_output = logger.format_log_entry() + assert "\033[35m" in log_output # Contains magenta color code + + # Test custom info color + logger.update_step("Info message", TaskStatus.RUNNING) + log_output = logger.format_log_entry() + assert "\033[37m" in log_output # Contains white color code + +def test_log_formatter_with_colors(): + """Test that the log formatter properly applies colors to different components.""" + formatter = LogFormatter() + + # Create a mock log record + class MockRecord: + def __init__(self, levelname, msg): + self.levelname = levelname + self.msg = msg + self.created = datetime.utcnow().timestamp() + + # Test error formatting + error_record = MockRecord("ERROR", "Test error message") + formatted = formatter.format(error_record) + assert "\033[31m" in formatted # Red for error + assert "ERROR" in formatted + + # Test info formatting + info_record = MockRecord("INFO", "Test info message") + formatted = formatter.format(info_record) + assert "\033[36m" in formatted # Cyan for info + assert "INFO" in formatted + + # Test warning formatting + warn_record = MockRecord("WARNING", "Test warning message") + formatted = formatter.format(warn_record) + assert "\033[33m" in formatted # Yellow for warning + assert "WARNING" in formatted + +def test_task_separator_style(): + """Test that separator styles are properly defined and formatted.""" + style = SeparatorStyle() + + # Test default separator styles + assert len(style.task) >= 50 # Task separator should be substantial + assert len(style.phase) >= 30 # Phase separator should be visible but less prominent + assert len(style.error) >= 40 # Error separator should be distinct + + # Test that styles are different + assert style.task != style.phase + assert style.task != style.error + assert style.phase != style.error + +def test_task_start_separator(): + """Test that separators are added at task start.""" + logger = TaskLogger("separator_task", "Test separators") + + # Get initial log output + log_entries = logger.get_log_history() + + # Should have task separator and initial step + assert len(log_entries) == 2 + assert "=" * 50 in log_entries[0] # Task separator + assert "TASK GOAL: Test separators" in log_entries[1] # Initial step message + +def test_phase_separators(): + """Test that separators are added between different phases.""" + logger = TaskLogger("separator_task", "Test separators") + + # Navigation phase + logger.start_phase("Navigation") + logger.update_step("Navigate to example.com", TaskStatus.COMPLETE, action_type=ActionType.NAVIGATION) + + # Interaction phase + logger.start_phase("Interaction") + logger.update_step("Click button", TaskStatus.COMPLETE, action_type=ActionType.INTERACTION) + + # Get log entries + log_entries = logger.get_log_history() + + # Count phase separators + phase_separators = [entry for entry in log_entries if "-" * 30 in entry] + assert len(phase_separators) == 2 # One before each phase + +def test_error_separators(): + """Test that separators are added around error messages.""" + logger = TaskLogger("separator_task", "Test separators") + + # Simulate an error + try: + raise ValueError("Test error") + except Exception as e: + logger.log_error(e, step_number=1, action="test_action") + + # Get log entries + log_entries = logger.get_log_history() + + # Should have error separators + error_separators = [entry for entry in log_entries if "*" * 40 in entry] + assert len(error_separators) == 2 # One before and one after error + +def test_custom_separator_style(): + """Test that separator styles can be customized.""" + custom_style = SeparatorStyle( + task="◈" * 30, + phase="•" * 20, + error="!" * 25 + ) + + logger = TaskLogger("custom_separator_task", "Test custom separators", separator_style=custom_style) + + # Start a phase + logger.start_phase("Test Phase") + + # Get log entries + log_entries = logger.get_log_history() + + # Verify custom separators are used + assert "◈" * 30 in log_entries[0] # Task separator + assert "•" * 20 in log_entries[2] # Phase separator + assert "→" in log_entries[2] # Arrow indicator for phase start + +def test_separator_with_colors(): + """Test that separators can be colored.""" + logger = TaskLogger("colored_separator_task", "Test colored separators") + + # Start a phase + logger.start_phase("Test Phase") + + # Get log entries + log_entries = logger.get_log_history() + + # Verify separators have color codes + task_separator = log_entries[0] + phase_separator = log_entries[1] + + assert "\033[" in task_separator # Contains color code + assert "\033[" in phase_separator # Contains color code + +def test_separator_disabled(): + """Test that separators can be disabled.""" + logger = TaskLogger("no_separator_task", "Test without separators", use_separators=False) + + # Start a phase + logger.start_phase("Test Phase") + + # Get log entries + log_entries = logger.get_log_history() + + # Verify no separators are present + separators = [entry for entry in log_entries if any(c * 20 in entry for c in "=-*")] + assert len(separators) == 0 # No separators should be present \ No newline at end of file diff --git a/tests/test_trace_analyzer.py b/tests/test_trace_analyzer.py new file mode 100644 index 00000000..841d180d --- /dev/null +++ b/tests/test_trace_analyzer.py @@ -0,0 +1,162 @@ +import pytest +import asyncio +import json +import zipfile +from pathlib import Path +import tempfile +from src.trace_analyzer import PlaywrightTrace, analyze_trace + +# Sample trace data +SAMPLE_TRACE_DATA = [ + # Action event (before) + { + "type": "before", + "method": "goto", + "params": {"url": "https://example.com"}, + "timestamp": 1000, + "duration": 500 + }, + # Action event (after - success) + { + "type": "after", + "method": "goto", + "params": {"url": "https://example.com"}, + "timestamp": 1500, + "duration": 500 + }, + # Action event (after - error) + { + "type": "after", + "method": "click", + "params": {"selector": "#missing-button"}, + "timestamp": 2000, + "duration": 100, + "error": {"message": "Element not found"} + }, + # Console event + { + "type": "console", + "text": "Test console message" + }, + # Error event + { + "type": "error", + "error": {"message": "Test error message"} + } +] + +# Sample HAR data +SAMPLE_HAR_DATA = { + "log": { + "entries": [ + { + "request": { + "url": "https://example.com", + "method": "GET" + }, + "response": { + "status": 200, + "statusText": "OK" + }, + "time": 150 + }, + { + "request": { + "url": "https://example.com/missing", + "method": "GET" + }, + "response": { + "status": 404, + "statusText": "Not Found" + }, + "time": 100 + } + ] + } +} + +@pytest.fixture +def sample_trace_file(): + """Create a temporary trace file with sample data.""" + with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_zip: + with zipfile.ZipFile(temp_zip.name, 'w') as zf: + # Add trace data + trace_data = '\n'.join(json.dumps(event) for event in SAMPLE_TRACE_DATA) + zf.writestr('trace.trace', trace_data) + + # Add HAR data + zf.writestr('trace.har', json.dumps(SAMPLE_HAR_DATA)) + + yield temp_zip.name + Path(temp_zip.name).unlink() + +@pytest.mark.asyncio +async def test_trace_parsing(sample_trace_file): + """Test basic trace file parsing.""" + trace = await PlaywrightTrace.parse(sample_trace_file) + + # Check actions + assert len(trace.actions) == 3 + assert any(a['type'] == 'goto' and a['success'] for a in trace.actions) + assert any(a['type'] == 'click' and not a['success'] for a in trace.actions) + + # Check console logs + assert len(trace.console_logs) == 1 + assert trace.console_logs[0] == "Test console message" + + # Check errors + assert len(trace.errors) == 1 + assert "Test error message" in trace.errors[0] + + # Check network requests + assert len(trace.network_requests) == 2 + assert any(r['status'] == 200 for r in trace.network_requests) + assert any(r['status'] == 404 for r in trace.network_requests) + +@pytest.mark.asyncio +async def test_analyze_trace(sample_trace_file): + """Test the analyze_trace function.""" + result = await analyze_trace(sample_trace_file) + + assert "actions" in result + assert "network_requests" in result + assert "console_logs" in result + assert "errors" in result + assert "summary" in result + + summary = result["summary"] + assert summary["total_actions"] == 3 + assert summary["failed_actions"] == 1 + assert summary["total_requests"] == 2 + assert summary["failed_requests"] == 1 + assert summary["total_errors"] == 1 + +@pytest.mark.asyncio +async def test_invalid_trace_file(): + """Test handling of invalid trace files.""" + with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_file: + temp_file.write(b"Invalid zip data") + + with pytest.raises(ValueError, match="Invalid trace file format"): + await PlaywrightTrace.parse(temp_file.name) + + Path(temp_file.name).unlink() + +@pytest.mark.asyncio +async def test_missing_trace_file(): + """Test handling of missing trace files.""" + with pytest.raises(FileNotFoundError): + await PlaywrightTrace.parse("nonexistent_file.zip") + +@pytest.mark.asyncio +async def test_malformed_trace_data(sample_trace_file): + """Test handling of malformed trace data.""" + with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_zip: + with zipfile.ZipFile(temp_zip.name, 'w') as zf: + zf.writestr('trace.trace', 'Invalid JSON data\n{"type": "console", "text": "Valid event"}') + + trace = await PlaywrightTrace.parse(temp_zip.name) + assert len(trace.errors) == 1 # One error for the invalid JSON + assert len(trace.console_logs) == 1 # One valid console event + + Path(temp_zip.name).unlink() \ No newline at end of file diff --git a/webui.py b/webui.py index b7acffe4..ca96dfc7 100644 --- a/webui.py +++ b/webui.py @@ -7,15 +7,29 @@ import pdb import logging - -from dotenv import load_dotenv - -load_dotenv() import os +import sys import glob import asyncio import argparse import os +import warnings + +from dotenv import load_dotenv +from src.utils.logging import setup_logging, PRODUCTION_EXCLUDE_PATTERNS + +# Filter out the specific deprecation warning from langchain-google-genai +warnings.filterwarnings('ignore', message='Convert_system_message_to_human will be deprecated!') + +load_dotenv() + +# Setup logging before importing other modules +setup_logging( + level=os.getenv("LOG_LEVEL", "INFO"), + use_json=os.getenv("LOG_JSON", "true").lower() == "true", + log_file=os.getenv("LOG_FILE"), + exclude_patterns=PRODUCTION_EXCLUDE_PATTERNS if os.getenv("ENVIRONMENT") == "production" else None +) logger = logging.getLogger(__name__)