diff --git a/.gitignore b/.gitignore
index 2d83410f..142b465a 100644
--- a/.gitignore
+++ b/.gitignore
@@ -184,3 +184,17 @@ AgentHistoryList.json
 
 # For Docker
 data/
+
+# cursor
+.cursorrules
+.cursorignore
+.backup.env
+.brain/** */
+
+# Brain directory
+.brain/
+
+.env.google
+.zip
+traces/
+debug-session/
\ No newline at end of file
diff --git a/.vscode/settings.json b/.vscode/settings.json
index 8b09300d..58dcb3c6 100644
--- a/.vscode/settings.json
+++ b/.vscode/settings.json
@@ -7,5 +7,11 @@
       "source.fixAll.ruff": "explicit",
       "source.organizeImports.ruff": "explicit"
     }
+  },
+  "dotenv.enableAutocloaking": false,
+  "workbench.colorCustomizations": {
+    "activityBar.background": "#452606",
+    "titleBar.activeBackground": "#603608",
+    "titleBar.activeForeground": "#FEFBF7"
   }
 }
diff --git a/README.md b/README.md
index 184eeb93..698b00de 100644
--- a/README.md
+++ b/README.md
@@ -1,3 +1,79 @@
+# Fork Purpose
+
+This fork of browser-use/web-ui adds CLI support specifically designed for AI agents like Cursor Agent. It enables direct command-line interaction with browser automation tasks, making it ideal for integration with AI development environments and automated workflows.
+
+## CLI Documentation
+
+See [CLI Guide](cli/README.md) for comprehensive documentation on:
+- Available LLM providers and models
+- Detailed command reference
+- Environment configuration
+- Example usage patterns
+
+### Quick Start
+
+```bash
+# Run a task (browser will auto-start if needed)
+browser-use run "go to example.com and create a report about the page structure"
+
+# Run with specific provider and vision capabilities
+browser-use run "analyze the layout and visual elements" --provider Google --vision
+
+# Run with specific model selection
+browser-use run "analyze the page" --provider Anthropic --model-index 1
+
+# Explicitly start browser with custom options (optional)
+browser-use start --headless --window-size 1920x1080
+
+# Close browser when done
+browser-use close
+```
+
+### Supported LLM Providers
+
+- **OpenAI** (`gpt-4o`) - Vision-capable model for advanced analysis
+- **Anthropic** (`claude-3-5-sonnet-latest`, `claude-3-5-sonnet-20241022`) - Advanced language understanding
+- **Google** (`gemini-1.5-pro`, `gemini-2.0-flash`) - Fast and efficient processing
+- **DeepSeek** (`deepseek-chat`) - Cost-effective default option
+
+See the [CLI Guide](cli/README.md) for detailed provider configuration and usage examples.
+
+### CLI Commands
+
+- `start` - (Optional) Initialize browser session with custom options:
+  - `--headless` - Run in headless mode
+  - `--window-size` - Set window dimensions (e.g., "1920x1080")
+  - `--disable-security` - Disable security features
+  - `--user-data-dir` - Use custom Chrome profile
+  - `--proxy` - Set proxy server
+
+- `run` - Execute tasks (auto-starts browser if needed):
+  - `--model` - Choose LLM (deepseek-chat, gemini, gpt-4, claude-3)
+  - `--vision` - Enable visual analysis
+  - `--record` - Record browser session
+  - `--trace-path` - Save debugging traces
+  - `--max-steps` - Limit task steps
+  - `--add-info` - Provide additional context
+
+- `close` - Clean up browser session
+
+### Example Tasks
+
+The [browser-tasks-example.ts](cli/browser-tasks-example.ts) provides ready-to-use task sequences for:
+
+- Product research automation
+- Documentation analysis
+- Page structure analysis
+- Debug sessions with tracing
+
+### Configuration
+
+See [.env.example](.env.example) for all available configuration options, including:
+
+- API keys for different LLM providers
+- Browser settings
+- Session persistence options
+
 <img src="./assets/web-ui.png" alt="Browser Use Web UI" width="full"/>
 
 <br/>
@@ -51,134 +127,4 @@ Then install playwright:
 
 ```bash
 playwright install
-```
-
-### Option 2: Docker Installation
-
-1. **Prerequisites:**
-   - Docker and Docker Compose installed on your system
-   - Git to clone the repository
-
-2. **Setup:**
-   ```bash
-   # Clone the repository
-   git clone https://github.com/browser-use/web-ui.git
-   cd web-ui
-
-   # Copy and configure environment variables
-   cp .env.example .env
-   # Edit .env with your preferred text editor and add your API keys
-   ```
-
-3. **Run with Docker:**
-   ```bash
-   # Build and start the container with default settings (browser closes after AI tasks)
-   docker compose up --build
-
-   # Or run with persistent browser (browser stays open between AI tasks)
-   CHROME_PERSISTENT_SESSION=true docker compose up --build
-   ```
-
-4. **Access the Application:**
-   - WebUI: `http://localhost:7788`
-   - VNC Viewer (to see browser interactions): `http://localhost:6080/vnc.html`
-   
-   Default VNC password is "vncpassword". You can change it by setting the `VNC_PASSWORD` environment variable in your `.env` file.
-
-
-## Usage
-
-### Local Setup
-1.  Copy `.env.example` to `.env` and set your environment variables, including API keys for the LLM. `cp .env.example .env`
-2.  **Run the WebUI:**
-    ```bash
-    python webui.py --ip 127.0.0.1 --port 7788
-    ```
-4. WebUI options:
-   - `--ip`: The IP address to bind the WebUI to. Default is `127.0.0.1`.
-   - `--port`: The port to bind the WebUI to. Default is `7788`.
-   - `--theme`: The theme for the user interface. Default is `Ocean`.
-     - **Default**: The standard theme with a balanced design.
-     - **Soft**: A gentle, muted color scheme for a relaxed viewing experience.
-     - **Monochrome**: A grayscale theme with minimal color for simplicity and focus.
-     - **Glass**: A sleek, semi-transparent design for a modern appearance.
-     - **Origin**: A classic, retro-inspired theme for a nostalgic feel.
-     - **Citrus**: A vibrant, citrus-inspired palette with bright and fresh colors.
-     - **Ocean** (default): A blue, ocean-inspired theme providing a calming effect.
-   - `--dark-mode`: Enables dark mode for the user interface.
-3.  **Access the WebUI:** Open your web browser and navigate to `http://127.0.0.1:7788`.
-4.  **Using Your Own Browser(Optional):**
-    - Set `CHROME_PATH` to the executable path of your browser and `CHROME_USER_DATA` to the user data directory of your browser.
-      - Windows
-        ```env
-         CHROME_PATH="C:\Program Files\Google\Chrome\Application\chrome.exe"
-         CHROME_USER_DATA="C:\Users\YourUsername\AppData\Local\Google\Chrome\User Data"
-        ```
-        > Note: Replace `YourUsername` with your actual Windows username for Windows systems.
-      - Mac
-        ```env
-         CHROME_PATH="/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
-         CHROME_USER_DATA="~/Library/Application Support/Google/Chrome/Profile 1"
-        ```
-    - Close all Chrome windows
-    - Open the WebUI in a non-Chrome browser, such as Firefox or Edge. This is important because the persistent browser context will use the Chrome data when running the agent.
-    - Check the "Use Own Browser" option within the Browser Settings.
-5. **Keep Browser Open(Optional):**
-    - Set `CHROME_PERSISTENT_SESSION=true` in the `.env` file.
-
-### Docker Setup
-1. **Environment Variables:**
-   - All configuration is done through the `.env` file
-   - Available environment variables:
-     ```
-     # LLM API Keys
-     OPENAI_API_KEY=your_key_here
-     ANTHROPIC_API_KEY=your_key_here
-     GOOGLE_API_KEY=your_key_here
-
-     # Browser Settings
-     CHROME_PERSISTENT_SESSION=true   # Set to true to keep browser open between AI tasks
-     RESOLUTION=1920x1080x24         # Custom resolution format: WIDTHxHEIGHTxDEPTH
-     RESOLUTION_WIDTH=1920           # Custom width in pixels
-     RESOLUTION_HEIGHT=1080          # Custom height in pixels
-
-     # VNC Settings
-     VNC_PASSWORD=your_vnc_password  # Optional, defaults to "vncpassword"
-     ```
-
-2. **Browser Persistence Modes:**
-   - **Default Mode (CHROME_PERSISTENT_SESSION=false):**
-     - Browser opens and closes with each AI task
-     - Clean state for each interaction
-     - Lower resource usage
-
-   - **Persistent Mode (CHROME_PERSISTENT_SESSION=true):**
-     - Browser stays open between AI tasks
-     - Maintains history and state
-     - Allows viewing previous AI interactions
-     - Set in `.env` file or via environment variable when starting container
-
-3. **Viewing Browser Interactions:**
-   - Access the noVNC viewer at `http://localhost:6080/vnc.html`
-   - Enter the VNC password (default: "vncpassword" or what you set in VNC_PASSWORD)
-   - You can now see all browser interactions in real-time
-
-4. **Container Management:**
-   ```bash
-   # Start with persistent browser
-   CHROME_PERSISTENT_SESSION=true docker compose up -d
-
-   # Start with default mode (browser closes after tasks)
-   docker compose up -d
-
-   # View logs
-   docker compose logs -f
-
-   # Stop the container
-   docker compose down
-   ```
-
-## Changelog
-
-- [x] **2025/01/10:** Thanks to @casistack. Now we have Docker Setup option and also Support keep browser open between tasks.[Video tutorial demo](https://github.com/browser-use/web-ui/issues/1#issuecomment-2582511750).
-- [x] **2025/01/06:** Thanks to @richard-devbot. A New and Well-Designed WebUI is released. [Video tutorial demo](https://github.com/warmshao/browser-use-webui/issues/1#issuecomment-2573393113).
\ No newline at end of file
+```
\ No newline at end of file
diff --git a/analyze_trace.py b/analyze_trace.py
new file mode 100644
index 00000000..a66a26b8
--- /dev/null
+++ b/analyze_trace.py
@@ -0,0 +1,11 @@
+from src.trace_analyzer import EnhancedTraceAnalyzer
+import asyncio
+import json
+
+async def main():
+    analyzer = EnhancedTraceAnalyzer('traces/enhanced-test.json')
+    result = await analyzer.analyze_all()
+    print(json.dumps(result, indent=2))
+
+if __name__ == "__main__":
+    asyncio.run(main()) 
\ No newline at end of file
diff --git a/cli/README.md b/cli/README.md
new file mode 100644
index 00000000..03e7466c
--- /dev/null
+++ b/cli/README.md
@@ -0,0 +1,161 @@
+# Browser-Use CLI Guide
+
+This guide details the available models and commands for the browser-use CLI tool.
+
+## Available Models
+
+### OpenAI
+- Model: `gpt-4o` (Vision-capable)
+```bash
+# Basic usage
+browser-use run "analyze this webpage" --provider OpenAI
+
+# With vision capabilities
+browser-use run "describe what you see on the page" --provider OpenAI --vision
+```
+
+### Anthropic
+- Models:
+  - `claude-3-5-sonnet-latest` (Default)
+  - `claude-3-5-sonnet-20241022`
+```bash
+# Using default model
+browser-use run "analyze this webpage" --provider Anthropic
+
+# Using specific model version
+browser-use run "analyze this webpage" --provider Anthropic --model-index 1
+```
+
+### Google (Gemini)
+- Models:
+  - `gemini-1.5-pro` (Default)
+  - `gemini-2.0-flash`
+```bash
+# Using default model
+browser-use run "analyze this webpage" --provider Google
+
+# Using flash model
+browser-use run "analyze this webpage" --provider Google --model-index 1
+```
+
+### DeepSeek
+- Model: `deepseek-chat`
+```bash
+# DeepSeek is the default provider
+browser-use run "analyze this webpage"
+
+# Explicitly specifying DeepSeek
+browser-use run "analyze this webpage" --provider Deepseek
+```
+
+## CLI Commands
+
+### Start Browser Session
+```bash
+# Basic start
+browser-use start
+
+# With custom window size
+browser-use start --window-size 1920x1080
+
+# Headless mode
+browser-use start --headless
+
+# With custom Chrome profile
+browser-use start --user-data-dir "/path/to/profile"
+
+# With proxy
+browser-use start --proxy "localhost:8080"
+```
+
+### Run Tasks
+```bash
+# Basic task
+browser-use run "analyze the page" --url "https://example.com"
+
+# With vision capabilities
+browser-use run "describe the visual layout" --url "https://example.com" --vision
+
+# With specific provider and model
+browser-use run "analyze this webpage" --url "https://example.com" --provider Google --model-index 1
+
+# With recording
+browser-use run "test the checkout flow" --url "https://example.com/checkout" --record --record-path ./recordings
+
+# With debugging traces
+browser-use run "analyze form submission" --url "https://example.com/form" --trace-path ./traces
+
+# With step limits
+browser-use run "complex task" --url "https://example.com" --max-steps 5 --max-actions 2
+
+# With additional context
+browser-use run "analyze pricing" --url "https://example.com/pricing" --add-info "Focus on enterprise plans"
+```
+
+### Close Browser
+```bash
+browser-use close
+```
+
+## Environment Variables
+
+Required API keys should be set in your `.env` file:
+```env
+# OpenAI
+OPENAI_API_KEY=your_key_here
+OPENAI_ENDPOINT=https://api.openai.com/v1  # Optional
+
+# Anthropic
+ANTHROPIC_API_KEY=your_key_here
+
+# Google (Gemini)
+GOOGLE_API_KEY=your_key_here
+
+# DeepSeek
+DEEPSEEK_API_KEY=your_key_here
+DEEPSEEK_ENDPOINT=your_endpoint  # Optional
+```
+
+## Browser Settings
+
+Optional browser configuration in `.env`:
+```env
+# Custom Chrome settings
+CHROME_PATH=/path/to/chrome
+CHROME_USER_DATA=/path/to/user/data
+
+# Session persistence
+CHROME_PERSISTENT_SESSION=true  # Keep browser open between tasks
+```
+
+## Examples
+
+### Visual Analysis Task
+```bash
+browser-use run \
+  "analyze the page layout" \
+  --url "https://example.com" \
+  --provider Google \
+  --vision \
+  --record \
+  --record-path ./recordings
+```
+
+### Multi-Step Task
+```bash
+browser-use run \
+  "fill the form and verify success" \
+  --url "https://example.com/login" \
+  --provider Anthropic \
+  --max-steps 5 \
+  --trace-path ./traces/login
+```
+
+### Research Task
+```bash
+browser-use run \
+  "research pricing information for top 3 competitors" \
+  --url "https://example.com" \
+  --provider OpenAI \
+  --add-info "Focus on enterprise features and annual pricing"
+``` 
\ No newline at end of file
diff --git a/cli/__init__.py b/cli/__init__.py
new file mode 100644
index 00000000..d1f449a5
--- /dev/null
+++ b/cli/__init__.py
@@ -0,0 +1,3 @@
+"""
+Command-line interface for browser-use.
+""" 
\ No newline at end of file
diff --git a/cli/browser-tasks-example.ts b/cli/browser-tasks-example.ts
new file mode 100644
index 00000000..3f39f97a
--- /dev/null
+++ b/cli/browser-tasks-example.ts
@@ -0,0 +1,287 @@
+/**
+ * Browser Automation Task Sequences
+ * 
+ * This file defines task sequences for browser automation using the browser-use command.
+ * Each sequence represents a series of browser interactions that can be executed in order.
+ */
+
+export interface BrowserCommand {
+    prompt: string;
+    url: string;
+    provider?: 'Deepseek' | 'Google' | 'OpenAI' | 'Anthropic';
+    modelIndex?: number;
+    headless?: boolean;
+    vision?: boolean;
+    record?: boolean;
+    recordPath?: string;
+    tracePath?: string;
+    maxSteps?: number;
+    maxActions?: number;
+    addInfo?: string;
+    windowSize?: string;
+    userDataDir?: string;
+    proxy?: string;
+}
+
+export interface BrowserTask {
+    description: string;
+    command: BrowserCommand;
+    subtasks?: BrowserTask[];
+}
+
+export interface BrowserTaskSequence {
+    name: string;
+    description: string;
+    tasks: BrowserTask[];
+}
+
+// Example task sequences
+export const browserTasks: BrowserTaskSequence[] = [
+    {
+        name: "Product Research",
+        description: "Compare product prices across multiple e-commerce sites",
+        tasks: [
+            {
+                description: "Search Amazon for wireless earbuds",
+                command: {
+                    prompt: "search for 'wireless earbuds' and tell me the price of the top 3 results",
+                    url: "https://www.amazon.com",
+                    provider: "Deepseek"
+                }
+            },
+            {
+                description: "Search Best Buy for comparison",
+                command: {
+                    prompt: "search for 'wireless earbuds' and tell me the price of the top 3 results",
+                    url: "https://www.bestbuy.com",
+                    provider: "Deepseek"
+                }
+            },
+            {
+                description: "Create price comparison",
+                command: {
+                    prompt: "create a comparison table of the prices from both sites",
+                    url: "about:blank",
+                    provider: "Deepseek"
+                }
+            }
+        ]
+    },
+    {
+        name: "Site Health Check",
+        description: "Monitor website availability and performance",
+        tasks: [
+            {
+                description: "Check main site",
+                command: {
+                    prompt: "check if it loads properly",
+                    url: "https://example.com",
+                    provider: "Deepseek",
+                    headless: true
+                }
+            },
+            {
+                description: "Verify API health",
+                command: {
+                    prompt: "check the API health status",
+                    url: "https://api.example.com/health",
+                    provider: "Deepseek",
+                    headless: true
+                }
+            },
+            {
+                description: "Test documentation site",
+                command: {
+                    prompt: "verify all navigation links are working",
+                    url: "https://docs.example.com",
+                    provider: "Deepseek",
+                    headless: true
+                }
+            }
+        ]
+    },
+    {
+        name: "Content Analysis",
+        description: "Analyze blog content and engagement",
+        tasks: [
+            {
+                description: "List articles",
+                command: {
+                    prompt: "list all article titles from the homepage",
+                    url: "https://blog.example.com",
+                    provider: "Deepseek"
+                }
+            },
+            {
+                description: "Analyze first article",
+                command: {
+                    prompt: "click on the first article and summarize its main points",
+                    url: "https://blog.example.com",
+                    provider: "Deepseek"
+                },
+                subtasks: [
+                    {
+                        description: "Get metadata",
+                        command: {
+                            prompt: "tell me the author, publication date, and reading time",
+                            url: "https://blog.example.com",
+                            provider: "Deepseek"
+                        }
+                    },
+                    {
+                        description: "Analyze comments",
+                        command: {
+                            prompt: "scroll to the comments section and summarize the main discussion points",
+                            url: "https://blog.example.com",
+                            provider: "Deepseek"
+                        }
+                    }
+                ]
+            }
+        ]
+    },
+    {
+        name: "Advanced Content Analysis",
+        description: "Analyze website content using different models for different tasks",
+        tasks: [
+            {
+                description: "Initial navigation and basic text extraction",
+                command: {
+                    prompt: "navigate to the Actions documentation and extract basic text content",
+                    url: "https://docs.github.com",
+                    provider: "Deepseek"
+                }
+            },
+            {
+                description: "Visual analysis of page structure",
+                command: {
+                    prompt: "analyze the layout of the page and tell me how the documentation is structured, including sidebars, navigation, and content areas",
+                    url: "https://docs.github.com",
+                    provider: "Google",
+                    vision: true,
+                    modelIndex: 1,
+                    addInfo: "Only using Google here because we need vision capabilities"
+                }
+            },
+            {
+                description: "Complex content summarization",
+                command: {
+                    prompt: "summarize the key concepts of GitHub Actions based on the documentation",
+                    url: "https://docs.github.com",
+                    provider: "Deepseek"
+                }
+            },
+            {
+                description: "Extract code examples",
+                command: {
+                    prompt: "find and list all YAML workflow examples on the page",
+                    url: "https://docs.github.com",
+                    provider: "Deepseek"
+                }
+            }
+        ]
+    },
+    {
+        name: "Page Structure Analysis",
+        description: "Generate detailed reports about page structure and interactive elements",
+        tasks: [
+            {
+                description: "Analyze homepage structure",
+                command: {
+                    prompt: "create a report about the page structure, including the page title, headings, and any interactive elements found",
+                    url: "https://example.com",
+                    provider: "Deepseek"
+                }
+            },
+            {
+                description: "Analyze navigation structure",
+                command: {
+                    prompt: "focus on the navigation menu and create a detailed report of its structure and all available links",
+                    url: "https://example.com",
+                    provider: "Google",
+                    vision: true,
+                    addInfo: "Only using Google here because we need vision capabilities for complex layout analysis"
+                }
+            },
+            {
+                description: "Document forms and inputs",
+                command: {
+                    prompt: "find all forms on the page and document their inputs, buttons, and validation requirements",
+                    url: "https://example.com",
+                    provider: "Google",
+                    vision: true,
+                    addInfo: "Only using Google here because we need vision capabilities for form analysis"
+                }
+            }
+        ]
+    },
+    {
+        name: "Debug Session",
+        description: "Record and analyze browser interactions for debugging",
+        tasks: [
+            {
+                description: "Start debug session",
+                command: {
+                    prompt: "attempt to log in with test credentials",
+                    url: "https://example.com/login",
+                    provider: "Deepseek",
+                    headless: false,
+                    tracePath: "./tmp/traces/login",
+                    record: true,
+                    recordPath: "./recordings/login"
+                }
+            },
+            {
+                description: "Navigate complex workflow",
+                command: {
+                    prompt: "complete the multi-step registration process",
+                    url: "https://example.com/register",
+                    provider: "Deepseek",
+                    maxSteps: 5,
+                    maxActions: 2,
+                    tracePath: "./tmp/traces/registration"
+                }
+            },
+            {
+                description: "Generate debug report",
+                command: {
+                    prompt: "create a report of all actions taken and any errors encountered",
+                    url: "about:blank",
+                    provider: "Deepseek",
+                    addInfo: "Focus on error patterns and user interaction points"
+                }
+            }
+        ]
+    }
+];
+
+// Updated execute task function to match CLI arguments
+const executeTask = (task: BrowserCommand): string => {
+    const options: string[] = [];
+    
+    if (task.provider) options.push(`--provider ${task.provider}`);
+    if (task.modelIndex !== undefined) options.push(`--model-index ${task.modelIndex}`);
+    if (task.headless) options.push('--headless');
+    if (task.vision) options.push('--vision');
+    if (task.record) {
+        options.push('--record');
+        if (task.recordPath) options.push(`--record-path ${task.recordPath}`);
+    }
+    if (task.tracePath) options.push(`--trace-path ${task.tracePath}`);
+    if (task.maxSteps) options.push(`--max-steps ${task.maxSteps}`);
+    if (task.maxActions) options.push(`--max-actions ${task.maxActions}`);
+    if (task.addInfo) options.push(`--add-info "${task.addInfo}"`);
+    if (task.windowSize) options.push(`--window-size ${task.windowSize}`);
+    if (task.userDataDir) options.push(`--user-data-dir "${task.userDataDir}"`);
+    if (task.proxy) options.push(`--proxy "${task.proxy}"`);
+    
+    return `browser-use run "${task.prompt}" --url "${task.url}" ${options.join(' ')}`.trim();
+};
+
+// Example usage:
+const sequence = browserTasks[0]; // Get Product Research sequence
+console.log(`Executing sequence: ${sequence.name}`);
+sequence.tasks.forEach(task => {
+    console.log(`\n${task.description}:`);
+    console.log(executeTask(task.command));
+}); 
\ No newline at end of file
diff --git a/cli/browser-use b/cli/browser-use
new file mode 100755
index 00000000..4c83fb85
--- /dev/null
+++ b/cli/browser-use
@@ -0,0 +1,76 @@
+#!/bin/bash
+
+# Get the absolute path of the script's real location (dereference symbolic link)
+REAL_SCRIPT_PATH=$(readlink -f "${BASH_SOURCE[0]}")
+
+# Get the directory of the script
+SCRIPT_DIR="$(dirname "$REAL_SCRIPT_PATH")"
+
+# Project root is one level up from the script's directory
+PROJECT_ROOT="$SCRIPT_DIR/.."
+
+# Change to the project root directory
+cd "$PROJECT_ROOT"
+
+# Activate the virtual environment
+if [ -f "venv/bin/activate" ]; then
+    echo "Activating virtual environment"
+    source "venv/bin/activate"
+    echo "VIRTUAL_ENV: $VIRTUAL_ENV"
+else
+    echo "Virtual environment activation script not found"
+fi
+
+# Create a temporary file for state transfer
+TEMP_STATE_FILE=$(mktemp)
+echo "Created temporary state file: $TEMP_STATE_FILE"
+
+# Run the Python script and capture its output
+echo "Running: venv/bin/python cli/browser_use_cli.py '$@'"
+if ! "venv/bin/python" "cli/browser_use_cli.py" "$@" --temp-file "$TEMP_STATE_FILE"; then
+    echo "Error running command. Exiting."
+    echo "Cleaning up temp file: $TEMP_STATE_FILE"
+    rm -f "$TEMP_STATE_FILE"
+    exit 1
+fi
+
+# Check the exit code of the Python script
+PYTHON_EXIT_CODE=$?
+
+# If Python script exited with a non-zero code, exit with the same code
+if [ $PYTHON_EXIT_CODE -ne 0 ]; then
+    echo "Python script exited with error code: $PYTHON_EXIT_CODE"
+    echo "Cleaning up temp file: $TEMP_STATE_FILE"
+    rm -f "$TEMP_STATE_FILE"
+    exit $PYTHON_EXIT_CODE
+fi
+
+# Read the BROWSER_USE_RUNNING value from the temporary file
+if [ -f "$TEMP_STATE_FILE" ]; then
+    BROWSER_USE_RUNNING=$(cat "$TEMP_STATE_FILE")
+    echo "Read BROWSER_USE_RUNNING from file: $BROWSER_USE_RUNNING"
+    echo "Cleaning up temp file: $TEMP_STATE_FILE"
+    rm -f "$TEMP_STATE_FILE"
+else
+    BROWSER_USE_RUNNING="false"
+    echo "Warning: Temp file not found at: $TEMP_STATE_FILE"
+    echo "Defaulting BROWSER_USE_RUNNING to: false"
+fi
+
+# Set the environment variable in the shell script based on captured value
+export BROWSER_USE_RUNNING
+echo "Environment variable BROWSER_USE_RUNNING set to: $BROWSER_USE_RUNNING"
+
+# Check if the BROWSER_USE_RUNNING environment variable is set to true
+echo "BROWSER_USE_RUNNING: $BROWSER_USE_RUNNING"
+if [ "$BROWSER_USE_RUNNING" = "true" ]; then
+    echo "Keeping virtual environment active for persistent session."
+else
+    # Deactivate the virtual environment only if not running persistently
+    if [ -n "$VIRTUAL_ENV" ]; then
+        echo "Deactivating virtual environment"
+        deactivate
+    else
+        echo "Virtual environment was not active."
+    fi
+fi
\ No newline at end of file
diff --git a/cli/browser-use.toolchain.json b/cli/browser-use.toolchain.json
new file mode 100644
index 00000000..18ca2c0b
--- /dev/null
+++ b/cli/browser-use.toolchain.json
@@ -0,0 +1,114 @@
+{
+    "name": "browser-use",
+    "description": "Execute natural language browser automation commands",
+    "type": "terminal_command",
+    "functions": [
+        {
+            "name": "browser_command",
+            "description": "Control a browser using natural language instructions",
+            "parameters": {
+                "properties": {
+                    "prompt": {
+                        "type": "string",
+                        "description": "The natural language instruction (e.g., 'go to google.com and search for OpenAI'). **Ensure URLs are well-formed and include the protocol (e.g., https://).**"
+                    },
+                    "url": {
+                        "type": "string",
+                        "description": "The starting URL for the browser automation task. Must include the protocol (e.g., https://example.com)."
+                    },
+                    "provider": {
+                        "type": "string",
+                        "enum": [
+                            "Deepseek",
+                            "Google",
+                            "OpenAI",
+                            "Anthropic"
+                        ],
+                        "default": "Deepseek",
+                        "description": "The LLM provider to use. DeepSeek is recommended for most tasks due to its cost-effectiveness and performance. The system will automatically select the appropriate model based on your task requirements (e.g., vision capabilities)."
+                    },
+                    "model_index": {
+                        "type": "integer",
+                        "description": "Optional index to select a specific model from the provider's available models (0-based). Available models per provider:\nDeepseek: [0: deepseek-chat]\nGoogle: [0: gemini-1.5-pro, 1: gemini-2.0-flash]\nOpenAI: [0: gpt-4o]\nAnthropic: [0: claude-3-5-sonnet-latest, 1: claude-3-5-sonnet-20241022]"
+                    },
+                    "vision": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "Enable vision capabilities (optional). **When enabled, the system will automatically select a vision-capable model from your chosen provider.**"
+                    },
+                    "headless": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "Run browser in headless mode (optional). **Headless mode might be necessary for certain environments or tasks but can limit interaction with visually-dependent elements.**"
+                    },
+                    "record": {
+                        "type": "boolean",
+                        "default": false,
+                        "description": "Enable session recording (optional). **Useful for debugging and understanding the agent's actions.**"
+                    },
+                    "recordPath": {
+                        "type": "string",
+                        "default": "./tmp/record_videos",
+                        "description": "Path to save recordings (optional). **Ensure the directory exists and is writable.**"
+                    },
+                    "tracePath": {
+                        "type": "string",
+                        "description": "Path to save debugging traces (optional). **Traces can provide detailed information about the automation process.**"
+                    },
+                    "maxSteps": {
+                        "type": "integer",
+                        "default": 10,
+                        "description": "Maximum number of steps per task (optional). **Increase this for complex tasks, but be mindful of potential infinite loops.**"
+                    },
+                    "maxActions": {
+                        "type": "integer",
+                        "default": 1,
+                        "description": "Maximum actions per step (optional). **Adjust this based on the complexity of each step.**"
+                    },
+                    "addInfo": {
+                        "type": "string",
+                        "description": "Additional context or instructions for the agent (optional). **Use this to provide specific details not covered in the main prompt.**"
+                    },
+                    "tempFile": {
+                        "type": "string",
+                        "description": "Path to temporary file to store the browser session state (optional). **Used for resuming or closing specific sessions.**"
+                    },
+                    "userDataDir": {
+                        "type": "string",
+                        "description": "Path to user data directory for a persistent browser session (optional). **Use this to maintain browser state across sessions (e.g., cookies, extensions).**"
+                    }
+                },
+                "required": [
+                    "prompt",
+                    "url"
+                ]
+            }
+        }
+    ],
+    "examples": [
+        {
+            "description": "Basic usage with default provider (DeepSeek)",
+            "command": "browser-use run \"search for OpenAI\" --url \"https://www.google.com\""
+        },
+        {
+            "description": "Using Google Gemini with vision for visual analysis",
+            "command": "browser-use run \"analyze the visual layout\" --url \"https://www.openai.com\" --provider Google --vision"
+        },
+        {
+            "description": "Using OpenAI for complex analysis",
+            "command": "browser-use run \"analyze the layout and design\" --url \"https://www.example.com\" --provider OpenAI --vision"
+        },
+        {
+            "description": "Using Anthropic with specific model version",
+            "command": "browser-use run \"analyze the documentation\" --url \"https://docs.example.com\" --provider Anthropic --model-index 1"
+        },
+        {
+            "description": "Running a check in headless mode",
+            "command": "browser-use run \"check if site is up\" --url \"https://www.github.com\" --provider Deepseek --headless"
+        },
+        {
+            "description": "Recording a debug session",
+            "command": "browser-use run \"test the login process\" --url \"https://example.com\" --provider Google --record --record-path ./debug_session"
+        }
+    ]
+} 
\ No newline at end of file
diff --git a/cli/browser_use_cli.py b/cli/browser_use_cli.py
new file mode 100644
index 00000000..fde15355
--- /dev/null
+++ b/cli/browser_use_cli.py
@@ -0,0 +1,411 @@
+#!/usr/bin/env python3
+import asyncio
+import argparse
+import os
+import sys
+from pathlib import Path
+import json
+import tempfile
+
+# Add the project root to PYTHONPATH
+project_root = str(Path(__file__).parent.parent)
+sys.path.append(project_root)
+
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext, BrowserContextConfig, BrowserContextWindowSize
+from src.agent.custom_agent import CustomAgent
+from src.controller.custom_controller import CustomController
+from src.agent.custom_prompts import CustomSystemPrompt
+from src.utils import utils
+from dotenv import load_dotenv
+from src.trace_analyzer import analyze_trace
+
+# Load .env from the project root
+load_dotenv(Path(project_root) / '.env')
+
+# Global variables for browser persistence
+_global_browser = None
+_global_browser_context = None
+
+def _get_browser_state():
+    """Get browser state from temporary file."""
+    temp_file = os.path.join(tempfile.gettempdir(), "browser_use_state")
+    try:
+        with open(temp_file, "r") as f:
+            return f.read().strip().lower() == "true"
+    except FileNotFoundError:
+        return False
+
+def _set_browser_state(running=True, temp_file_path=None):
+    """Set browser state in a temporary file."""
+    value = str(running).lower()
+    if temp_file_path:
+        with open(temp_file_path, "w") as f:
+            f.write(value)
+
+async def initialize_browser(
+    headless=False,
+    window_size=(1920, 1080),
+    disable_security=False,
+    user_data_dir=None,
+    proxy=None
+):
+    """Initialize a new browser instance with the given configuration."""
+    global _global_browser, _global_browser_context
+    
+    # Check both environment and global variables
+    if _get_browser_state() or _global_browser is not None:
+        # Close any existing browser first
+        if _global_browser is not None:
+            await close_browser()
+        else:
+            _set_browser_state(False)
+        
+    window_w, window_h = window_size
+    
+    # Initialize browser with launch-time options
+    browser = Browser(
+        config=BrowserConfig(
+            headless=headless,
+            disable_security=disable_security,
+            chrome_instance_path=user_data_dir,
+            extra_chromium_args=[f"--window-size={window_w},{window_h}"],
+            proxy=proxy
+        )
+    )
+    
+    # Create initial browser context
+    browser_context = await browser.new_context(
+        config=BrowserContextConfig(
+            no_viewport=False,
+            browser_window_size=BrowserContextWindowSize(
+                width=window_w,
+                height=window_h
+            ),
+            disable_security=disable_security
+        )
+    )
+    
+    # Store globally
+    _global_browser = browser
+    _global_browser_context = browser_context
+    _set_browser_state(True)
+    return True
+
+async def close_browser():
+    """Close the current browser instance if one exists."""
+    global _global_browser, _global_browser_context
+    
+    if _global_browser_context is not None:
+        await _global_browser_context.close()
+        _global_browser_context = None
+        
+    if _global_browser is not None:
+        await _global_browser.close()
+        _global_browser = None
+    
+    _set_browser_state(False)
+
+async def run_browser_task(
+    prompt, 
+    url=None,
+    provider="Deepseek",
+    model_index=None,
+    vision=False,
+    record=False,
+    record_path=None,
+    trace_path=None,
+    hide_trace=False,
+    max_steps=10,
+    max_actions=1,
+    add_info="",
+    on_init=None,
+    headless=False,
+    window_size=(1920, 1080),
+    disable_security=False,
+    user_data_dir=None,
+    proxy=None
+):
+    """Execute a task using the current browser instance, auto-initializing if needed."""
+    global _global_browser, _global_browser_context
+    
+    # Validate URL if provided
+    if url:
+        try:
+            from urllib.parse import urlparse
+            result = urlparse(url)
+            if not all([result.scheme, result.netloc]):
+                raise ValueError("Invalid URL format")
+        except Exception as e:
+            return f"Invalid URL provided: {str(e)}"
+
+    # Store the trace file path if tracing is enabled
+    trace_file = None
+    
+    # Check if browser is running and initialize if needed
+    if not _get_browser_state():
+        print("Browser not running. Starting browser session...")
+        if not await initialize_browser(
+            headless=headless,
+            window_size=window_size,
+            disable_security=disable_security,
+            user_data_dir=user_data_dir,
+            proxy=proxy
+        ):
+            return "Browser initialization failed"
+        
+        # Signal successful initialization if callback provided
+        if _get_browser_state() and on_init:
+            await on_init()
+    
+    # Verify browser state is consistent
+    if _global_browser is None or _global_browser_context is None:
+        print("Browser session state is inconsistent. Attempting to reinitialize...")
+        if not await initialize_browser(
+            headless=headless,
+            window_size=window_size,
+            disable_security=disable_security,
+            user_data_dir=user_data_dir,
+            proxy=proxy
+        ):
+            return "Browser reinitialization failed"
+        if _global_browser is None or _global_browser_context is None:
+            return "Browser session state remains inconsistent after reinitialization"
+
+    # Initialize controller
+    controller = CustomController()
+
+    # Normalize provider name to lowercase for consistency
+    provider = provider.lower()
+
+    # Handle Deepseek + vision case
+    if provider == "deepseek" and vision:
+        print("WARNING: Deepseek does not support vision capabilities. Falling back to standard Deepseek model.")
+        vision = False
+
+    # Select appropriate model based on provider, model_index, and vision requirement
+    provider_key = provider
+    if provider == "google":
+        provider_key = "gemini"
+    elif provider == "openai":
+        provider_key = "openai"
+    elif provider == "anthropic":
+        provider_key = "anthropic"
+    elif provider == "deepseek":
+        provider_key = "deepseek"
+    else:
+        raise ValueError(f"Unsupported provider: {provider}")
+
+    if provider_key not in utils.model_names:
+        raise ValueError(f"No models found for provider: {provider}")
+
+    available_models = utils.model_names[provider_key]
+    
+    if model_index is not None:
+        if not (0 <= model_index < len(available_models)):
+            raise ValueError(f"Invalid model_index {model_index} for provider {provider}. Available indices: 0-{len(available_models)-1}")
+        model_name = available_models[model_index]
+    else:
+        # Default model selection based on vision requirement
+        if provider_key == "deepseek":
+            model_name = available_models[0]  # deepseek-chat
+        elif provider_key == "gemini":
+            model_name = available_models[0]  # gemini-1.5-pro
+        elif provider_key == "openai":
+            model_name = available_models[0]  # gpt-4o
+        elif provider_key == "anthropic":
+            model_name = available_models[0]  # claude-3-5-sonnet-latest
+
+    # Get LLM model
+    llm = utils.get_llm_model(
+        provider=provider_key,
+        model_name=model_name,
+        temperature=0.8,
+        vision=vision
+    )
+
+    # Create new context with tracing/recording enabled
+    if record or trace_path:
+        # Close existing context first
+        if _global_browser_context is not None:
+            await _global_browser_context.close()
+        
+        # Create new context with tracing/recording enabled
+        if trace_path:
+            trace_dir = Path(trace_path)
+            if not trace_path.endswith('.zip'):
+                trace_dir = trace_dir / 'trace.zip'
+            trace_dir.parent.mkdir(parents=True, exist_ok=True)
+            trace_file = str(trace_dir)
+        else:
+            trace_file = None
+
+        _global_browser_context = await _global_browser.new_context(
+            config=BrowserContextConfig(
+                trace_path=trace_file,
+                save_recording_path=str(record_path) if record else None,
+                no_viewport=False,
+                browser_window_size=BrowserContextWindowSize(
+                    width=1920,
+                    height=1080
+                ),
+                disable_security=False
+            )
+        )
+    
+    # Initialize agent with starting URL if provided
+    agent = CustomAgent(
+        task=f"First, navigate to {url}. Then, {prompt}" if url else prompt,
+        add_infos=add_info,
+        llm=llm,
+        browser=_global_browser,
+        browser_context=_global_browser_context,
+        controller=controller,
+        system_prompt_class=CustomSystemPrompt,
+        use_vision=vision,
+        tool_call_in_content=True,
+        max_actions_per_step=max_actions
+    )
+    
+    # Run task
+    history = await agent.run(max_steps=max_steps)
+    result = history.final_result()
+    
+    # Close the context to ensure trace is saved
+    if _global_browser_context is not None:
+        await _global_browser_context.close()
+        _global_browser_context = None
+    
+    # Analyze and display trace if enabled
+    if trace_file and not hide_trace:
+        print("\nTrace Analysis:")
+        print("=" * 50)
+        try:
+            # Find the actual trace file in the nested directory
+            trace_files = list(Path(str(trace_path)).rglob('*.zip'))
+            if trace_files:
+                actual_trace = str(trace_files[0])  # Use the first trace file found
+                print("\nTrace Analysis:")
+                print("=" * 50)
+                try:
+                    trace_analysis = await analyze_trace(actual_trace)
+                    print(json.dumps(trace_analysis, indent=2))
+                except Exception as e:
+                    print(f"Failed to analyze trace: {e}")
+            else:
+                print("No trace file found")
+        except Exception as e:
+            print(f"Error finding trace file: {e}")
+    
+    return result
+
+def main():
+    parser = argparse.ArgumentParser(description="Control a browser using natural language")
+    subparsers = parser.add_subparsers(dest="command", help="Commands")
+    
+    # Start command
+    start_parser = subparsers.add_parser("start", help="Start a new browser session")
+    start_parser.add_argument("--temp-file", help="Path to temporary file for storing browser state")
+    start_parser.add_argument("--headless", action="store_true", help="Run browser in headless mode")
+    start_parser.add_argument("--window-size", default="1920x1080", help="Browser window size (WxH)")
+    start_parser.add_argument("--disable-security", action="store_true", help="Disable browser security features")
+    start_parser.add_argument("--user-data-dir", help="Use custom Chrome profile directory")
+    start_parser.add_argument("--proxy", help="Proxy server URL")
+    
+    # Run command
+    run_parser = subparsers.add_parser("run", help="Run a task in the current browser session")
+    run_parser.add_argument("--temp-file", help="Path to temporary file for storing browser state")
+    run_parser.add_argument("prompt", help="The task to perform")
+    run_parser.add_argument("--url", required=True, help="The starting URL for the browser automation task")
+    run_parser.add_argument("--provider", "-p", choices=["Deepseek", "Google", "OpenAI", "Anthropic"], 
+                           default="Deepseek", help="The LLM provider to use (system will select appropriate model)")
+    run_parser.add_argument("--model-index", "-m", type=int,
+                           help="Optional index to select a specific model from the provider's available models (0-based)")
+    run_parser.add_argument("--vision", action="store_true", help="Enable vision capabilities")
+    run_parser.add_argument("--record", action="store_true", help="Enable session recording")
+    run_parser.add_argument("--record-path", default="./tmp/record_videos", help="Path to save recordings")
+    run_parser.add_argument("--trace-path", default="./tmp/traces", help="Path to save debugging traces")
+    run_parser.add_argument("--hide-trace", action="store_true", help="Don't display trace analysis after task completion")
+    run_parser.add_argument("--max-steps", type=int, default=10, help="Maximum number of steps per task")
+    run_parser.add_argument("--max-actions", type=int, default=1, help="Maximum actions per step")
+    run_parser.add_argument("--add-info", help="Additional context for the agent")
+    
+    # Close command
+    close_parser = subparsers.add_parser("close", help="Close the current browser session")
+    close_parser.add_argument("--temp-file", help="Path to temporary file for storing browser state")
+
+    # Analyze trace command
+    analyze_parser = subparsers.add_parser("analyze-trace", help="Analyze a Playwright trace file")
+    analyze_parser.add_argument("trace_path", help="Path to the trace file")
+    analyze_parser.add_argument("--output", "-o", help="Path to save the analysis output (default: print to stdout)")
+
+    args = parser.parse_args()
+    
+    if args.command == "start":
+        # Parse window size
+        try:
+            window_w, window_h = map(int, args.window_size.split('x'))
+        except ValueError:
+            print(f"Invalid window size format: {args.window_size}. Using default 1920x1080")
+            window_w, window_h = 1920, 1080
+            
+        # Start browser
+        success = asyncio.run(initialize_browser(
+            headless=args.headless,
+            window_size=(window_w, window_h),
+            disable_security=args.disable_security,
+            user_data_dir=args.user_data_dir,
+            proxy=args.proxy
+        ))
+        if success:
+            print("Browser session started successfully")
+            _set_browser_state(True, args.temp_file)
+        else:
+            print("Failed to start browser session")
+            _set_browser_state(False, args.temp_file)
+            
+    elif args.command == "run":
+        # Run task
+        result = asyncio.run(run_browser_task(
+            prompt=args.prompt,
+            url=args.url,
+            provider=args.provider,
+            model_index=args.model_index,
+            vision=args.vision,
+            record=args.record,
+            record_path=args.record_path if args.record else None,
+            trace_path=args.trace_path,
+            hide_trace=args.hide_trace,
+            max_steps=args.max_steps,
+            max_actions=args.max_actions,
+            add_info=args.add_info,
+            headless=False,
+            window_size=(1920, 1080),
+            disable_security=False,
+            user_data_dir=None,
+            proxy=None
+        ))
+        if result:
+            print(result)
+        
+    elif args.command == "close":
+        # Close browser
+        asyncio.run(close_browser())
+        print("Browser session closed")
+        _set_browser_state(False, args.temp_file)
+
+    elif args.command == "analyze-trace":
+        # Analyze trace
+        result = asyncio.run(analyze_trace(args.trace_path))
+        if args.output:
+            with open(args.output, 'w') as f:
+                json.dump(result, f, indent=2)
+            print(f"Analysis saved to {args.output}")
+        else:
+            print(json.dumps(result, indent=2))
+        
+    else:
+        parser.print_help()
+
+if __name__ == "__main__":
+    main() 
\ No newline at end of file
diff --git a/cli/usage-guide.md b/cli/usage-guide.md
new file mode 100644
index 00000000..8a26a61e
--- /dev/null
+++ b/cli/usage-guide.md
@@ -0,0 +1,308 @@
+# Browser-Use API Usage Guide
+
+## Overview
+
+This guide explains how to use the browser-use API to automate browser interactions using different LLM models. The API provides a powerful way to control a browser programmatically through Python.
+
+## Basic Setup
+
+```python
+import asyncio
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContextConfig, BrowserContextWindowSize
+from src.agent.custom_agent import CustomAgent
+from src.controller.custom_controller import CustomController
+from src.agent.custom_prompts import CustomSystemPrompt
+from src.utils import utils
+import os
+
+# Window size configuration
+window_w, window_h = 1920, 1080
+
+# Browser initialization
+browser = Browser(
+    config=BrowserConfig(
+        headless=False,  # Set to True for headless mode
+        disable_security=True,
+        extra_chromium_args=[f"--window-size={window_w},{window_h}"],
+    )
+)
+```
+
+## Browser Context Configuration
+
+```python
+# Create a browser context with recording and tracing
+browser_context = await browser.new_context(
+    config=BrowserContextConfig(
+        trace_path="./tmp/traces",  # For debugging
+        save_recording_path="./tmp/record_videos",  # For session recording
+        no_viewport=False,
+        browser_window_size=BrowserContextWindowSize(
+            width=window_w, height=window_h
+        ),
+    )
+)
+```
+
+## Model Configuration
+
+### DeepSeek (Default)
+
+```python
+llm = utils.get_llm_model(
+    provider="deepseek",
+    model_name="deepseek-chat",  # V2.5 model
+    temperature=0.8,
+    base_url="https://api.deepseek.com/v1",
+    api_key=os.getenv("DEEPSEEK_API_KEY", "")
+)
+```
+
+### Gemini Pro
+
+```python
+llm = utils.get_llm_model(
+    provider="gemini",
+    model_name="gemini-2.0-flash-exp",
+    temperature=1.0,
+    api_key=os.getenv("GOOGLE_API_KEY", "")
+)
+```
+
+### GPT-4 Turbo
+
+```python
+llm = utils.get_llm_model(
+    provider="openai",
+    model_name="gpt-4-turbo-preview",
+    temperature=0.8,
+    api_key=os.getenv("OPENAI_API_KEY", "")
+)
+```
+
+### Claude-3 Opus
+
+```python
+llm = utils.get_llm_model(
+    provider="anthropic",
+    model_name="claude-3-opus-20240229",
+    temperature=0.8,
+    api_key=os.getenv("ANTHROPIC_API_KEY", "")
+)
+```
+
+## Agent Configuration
+
+```python
+# Initialize controller
+controller = CustomController()
+
+# Initialize agent
+agent = CustomAgent(
+    task="your task description here",
+    add_infos="",  # Optional hints for the LLM
+    llm=llm,  # LLM model configured above
+    browser=browser,
+    browser_context=browser_context,
+    controller=controller,
+    system_prompt_class=CustomSystemPrompt,
+    use_vision=False,  # Must be False for DeepSeek
+    tool_call_in_content=True,  # Required for DeepSeek
+    max_actions_per_step=1  # Control action granularity
+)
+```
+
+## Running Tasks
+
+```python
+# Run the agent with a maximum number of steps
+history = await agent.run(max_steps=10)
+
+# Access results
+print("Final Result:", history.final_result())
+print("Errors:", history.errors())
+print("Model Actions:", history.model_actions())
+print("Thoughts:", history.model_thoughts())
+```
+
+## Common Tasks
+
+### Navigation
+
+```python
+task="go to google.com"
+```
+
+### Search
+
+```python
+task="go to google.com and search for 'OpenAI'"
+```
+
+### Form Filling
+
+```python
+task="go to example.com/login and fill in username 'user' and password 'pass'"
+```
+
+### Clicking Elements
+
+```python
+task="click the 'Submit' button"
+```
+
+## Model-Specific Considerations
+
+1. **DeepSeek**
+   - Set `use_vision=False`
+   - Set `tool_call_in_content=True`
+   - Uses OpenAI-compatible API format
+
+2. **Gemini**
+   - Set `use_vision=True`
+   - Works well with visual tasks
+
+3. **GPT-4 & Claude-3**
+   - Support both vision and non-vision tasks
+   - Higher reasoning capabilities for complex tasks
+
+## Best Practices
+
+1. **Error Handling**
+   - Always check `history.errors()` for any issues
+   - Monitor `history.model_thoughts()` for debugging
+
+2. **Resource Management**
+   - Use async context managers for browser and context
+   - Close resources properly after use
+
+3. **Task Description**
+   - Be specific and clear in task descriptions
+   - Include necessary context in `add_infos`
+
+4. **Performance**
+   - Use `headless=True` for automated tasks
+   - Adjust `max_steps` and `max_actions_per_step` based on task complexity
+
+## Example Implementation
+
+```python
+async def main():
+    # Browser setup
+    browser = Browser(config=BrowserConfig(...))
+    
+    async with await browser.new_context(...) as browser_context:
+        # Controller setup
+        controller = CustomController()
+        
+        # Agent setup
+        agent = CustomAgent(
+            task="your task",
+            llm=your_configured_llm,
+            browser=browser,
+            browser_context=browser_context,
+            controller=controller,
+            system_prompt_class=CustomSystemPrompt,
+            use_vision=False,
+            tool_call_in_content=True,
+            max_actions_per_step=1
+        )
+        
+        # Run task
+        history = await agent.run(max_steps=10)
+        
+        # Process results
+        print(history.final_result())
+
+if __name__ == "__main__":
+    asyncio.run(main())
+```
+
+## Troubleshooting
+
+1. **JSON Schema Errors with DeepSeek**
+   - Ensure using latest DeepSeek V2.5 endpoint
+   - Verify correct base URL and API key
+   - Use `tool_call_in_content=True`
+
+2. **Browser Connection Issues**
+   - Check browser configuration
+   - Verify Chrome/Chromium installation
+   - Ensure proper port access
+
+3. **Model Response Issues**
+   - Adjust temperature for more/less deterministic behavior
+   - Try different models for complex tasks
+   - Check API key validity and quotas
+
+## Tracing and Debugging
+
+### Enabling Tracing
+
+```python
+# Enable tracing in browser context
+browser_context = await browser.new_context(
+    config=BrowserContextConfig(
+        trace_path="./tmp/traces/trace.zip",  # Must have .zip extension
+        no_viewport=False,
+        browser_window_size=BrowserContextWindowSize(
+            width=window_w, height=window_h
+        ),
+    )
+)
+```
+
+### Using Traces for Debugging
+
+1. **Recording Traces**
+   - Traces are automatically saved when `trace_path` is provided
+   - Files are saved with `.zip` extension
+   - Contains browser actions, network requests, and screenshots
+
+2. **Analyzing Traces**
+   - Use Playwright Trace Viewer to analyze traces
+   - View step-by-step browser actions
+   - Inspect network requests and responses
+   - Review page states at each step
+
+## Report Generation
+
+### Best Practices
+
+1. **Structure**
+   - Always include page title and headings
+   - List interactive elements with their types
+   - Provide clear hierarchy of content
+   - Include relevant metadata (URLs, timestamps)
+
+2. **Content**
+   - Focus on task-relevant information
+   - Include both static and dynamic content
+   - Document interactive elements and their states
+   - Note any errors or warnings
+
+3. **Format**
+   - Use clear section headings
+   - Include numbered or bulleted lists
+   - Add summary sections for complex pages
+   - Use markdown formatting for readability
+
+### Example Report Task
+
+```python
+task = "create a report about the page structure, including any interactive elements found"
+add_infos = "Focus on navigation elements and forms"
+
+agent = CustomAgent(
+    task=task,
+    add_infos=add_infos,
+    llm=llm,
+    browser=browser,
+    browser_context=browser_context,
+    controller=controller,
+    system_prompt_class=CustomSystemPrompt,
+    use_vision=True,  # Enable vision for better structure analysis
+    max_actions_per_step=1
+)
+```
diff --git a/demo_logging.py b/demo_logging.py
new file mode 100644
index 00000000..f7c70093
--- /dev/null
+++ b/demo_logging.py
@@ -0,0 +1,99 @@
+import asyncio
+from src.utils.task_logging import (
+    TaskLogger, TaskStatus, ActionType, RetryConfig,
+    ColorScheme, SeparatorStyle
+)
+
+async def demo_logging():
+    # Initialize logger with custom styles
+    logger = TaskLogger(
+        "demo_task",
+        "Demonstrate all logging features",
+        color_scheme=ColorScheme(),
+        separator_style=SeparatorStyle(
+            task="★" * 40,
+            phase="•" * 30,
+            error="!" * 35
+        )
+    )
+    
+    # Start navigation phase
+    logger.start_phase("Navigation Phase")
+    logger.update_step(
+        "Navigate to example.com",
+        TaskStatus.RUNNING,
+        action_type=ActionType.NAVIGATION,
+        context={"url": "https://example.com"}
+    )
+    
+    # Update browser state
+    logger.update_browser_state(
+        url="https://example.com",
+        page_ready=True,
+        dynamic_content_loaded=True,
+        visible_elements=15,
+        page_title="Example Domain"
+    )
+    
+    # Complete navigation
+    logger.update_step(
+        "Page loaded successfully",
+        TaskStatus.COMPLETE,
+        action_type=ActionType.NAVIGATION,
+        progress=0.25,
+        results={"status": 200, "load_time": 0.5}
+    )
+    
+    # Start interaction phase
+    logger.start_phase("Interaction Phase")
+    logger.update_step(
+        "Click search button",
+        TaskStatus.RUNNING,
+        action_type=ActionType.INTERACTION,
+        context={"element": "search_button"}
+    )
+    
+    # Simulate error and retry
+    async def failing_operation():
+        raise ValueError("Search button not found")
+    
+    try:
+        await logger.execute_with_retry(
+            failing_operation,
+            "click_search",
+            RetryConfig(max_retries=2, base_delay=0.1)
+        )
+    except ValueError:
+        pass
+    
+    # Start extraction phase
+    logger.start_phase("Data Extraction Phase")
+    logger.update_step(
+        "Extract search results",
+        TaskStatus.RUNNING,
+        action_type=ActionType.EXTRACTION,
+        progress=0.75
+    )
+    
+    # Complete extraction
+    logger.update_step(
+        "Data extracted successfully",
+        TaskStatus.COMPLETE,
+        action_type=ActionType.EXTRACTION,
+        progress=1.0,
+        results={"items_found": 10}
+    )
+    
+    # Display log history
+    print("\nLog History:")
+    print("=" * 80)
+    for entry in logger.get_log_history():
+        print(entry)
+    print("=" * 80)
+    
+    # Log final state
+    print("\nFinal State:")
+    logger.log_state()
+
+if __name__ == "__main__":
+    asyncio.run(demo_logging()) 
\ No newline at end of file
diff --git a/docs/enhanced_tracing.md b/docs/enhanced_tracing.md
new file mode 100644
index 00000000..b69aea07
--- /dev/null
+++ b/docs/enhanced_tracing.md
@@ -0,0 +1,297 @@
+# Enhanced Tracing Documentation
+
+## Overview
+
+The enhanced tracing system provides detailed insights into browser automation actions, decision-making processes, and error recovery strategies. This documentation covers all major components and their usage.
+
+## Components
+
+### 1. Action Context
+Captures detailed information about element states and interactions.
+
+```json
+{
+  "action_context": {
+    "element_state_before": {
+      "visible": true,
+      "computed_styles": {
+        "pointer-events": "auto",
+        "opacity": "1",
+        "z-index": "100"
+      },
+      "focus_state": "not-focused",
+      "accessibility": {
+        "aria-hidden": "false",
+        "aria-disabled": "false"
+      }
+    },
+    "element_state_after": {
+      "visible": true,
+      "focus_state": "focused",
+      "triggered_events": ["click", "focus"]
+    }
+  }
+}
+```
+
+**Key Features:**
+- Before/after state tracking
+- Computed style analysis
+- Focus and accessibility state monitoring
+- Event triggering information
+
+### 2. Decision Trail
+Records the AI model's decision-making process and confidence levels.
+
+```json
+{
+  "decision_trail": {
+    "confidence_threshold": 0.8,
+    "attention_weights": {
+      "element_text": 0.6,
+      "aria_label": 0.3,
+      "position": 0.1
+    },
+    "alternative_paths": [
+      {
+        "action": "click hamburger menu",
+        "rejected_reason": "settings directly visible",
+        "confidence": 0.4
+      }
+    ]
+  }
+}
+```
+
+**Key Features:**
+- Confidence thresholds
+- Attention weight distribution
+- Alternative action consideration
+- Rejection reasoning
+
+### 3. Element Identification
+Provides comprehensive element location and relationship information.
+
+```json
+{
+  "element_identification": {
+    "relative_position": {
+      "from_top_nav": "20px from right",
+      "from_viewport": "top-right quadrant"
+    },
+    "hierarchy": {
+      "parent": "nav.top-bar",
+      "siblings": ["button.new-template", "button.help"],
+      "children": ["span.icon", "span.text"]
+    }
+  }
+}
+```
+
+**Key Features:**
+- Relative positioning
+- Element hierarchy
+- Sibling relationships
+- Visual landmarks
+
+### 4. Visual State Tracking
+Monitors visual changes and layout shifts during automation.
+
+```json
+{
+  "visual_state": {
+    "screenshot_diffs": {
+      "before_click": "diff_1.png",
+      "after_click": "diff_2.png",
+      "changes_highlighted": true
+    },
+    "layout_shifts": [
+      {
+        "timestamp": "T+100ms",
+        "elements_moved": ["#settings-panel"],
+        "cumulative_layout_shift": 0.1
+      }
+    ]
+  }
+}
+```
+
+**Key Features:**
+- Screenshot diffing
+- Layout shift tracking
+- Element visibility analysis
+- Viewport position monitoring
+
+### 5. Error Recovery
+Provides sophisticated error handling and recovery strategies.
+
+```json
+{
+  "error_recovery": {
+    "retry_strategy": {
+      "backoff": "exponential",
+      "max_attempts": 3,
+      "conditions": {
+        "network_stable": true,
+        "animations_complete": true
+      }
+    },
+    "environment_factors": {
+      "network_conditions": {
+        "latency": "50ms",
+        "bandwidth": "10Mbps"
+      }
+    }
+  }
+}
+```
+
+**Key Features:**
+- Retry strategies
+- Environmental monitoring
+- Recovery checkpoints
+- State restoration
+
+### 6. Performance Monitoring
+Tracks timing and performance metrics.
+
+```json
+{
+  "timing_analysis": {
+    "action_breakdown": {
+      "element_search": "150ms",
+      "interaction_delay": "50ms",
+      "animation_duration": "200ms"
+    },
+    "performance_markers": {
+      "first_paint": "100ms",
+      "first_contentful_paint": "200ms"
+    }
+  }
+}
+```
+
+**Key Features:**
+- Action timing breakdown
+- Performance markers
+- Cumulative timing
+- Resource utilization
+
+## Usage
+
+### Basic Usage
+```python
+analyzer = EnhancedTraceAnalyzer(trace_file_path)
+result = await analyzer.analyze_all()
+```
+
+### Component-Specific Analysis
+```python
+# Analyze specific components
+timing = await analyzer.analyze_timing()
+visual = await analyzer.analyze_visual_state()
+recovery = await analyzer.analyze_error_recovery()
+```
+
+### Error Recovery Integration
+```python
+recovery_info = await analyzer.analyze_recovery_info()
+if recovery_info["retry_strategy"]["backoff"] == "exponential":
+    # Implement exponential backoff retry logic
+```
+
+## Best Practices
+
+1. **Performance Optimization**
+   - Monitor cumulative timing metrics
+   - Track resource utilization
+   - Optimize retry strategies
+
+2. **Error Recovery**
+   - Use exponential backoff for retries
+   - Monitor environmental factors
+   - Maintain state checkpoints
+
+3. **Visual Verification**
+   - Use screenshot diffs for validation
+   - Monitor layout shifts
+   - Track element visibility
+
+4. **Decision Making**
+   - Review confidence thresholds
+   - Analyze attention weights
+   - Consider alternative paths
+
+## Common Issues and Solutions
+
+### 1. Element Not Found
+```json
+{
+  "error_recovery": {
+    "retry_strategy": {
+      "backoff": "exponential",
+      "conditions": {
+        "animations_complete": true
+      }
+    }
+  }
+}
+```
+**Solution:** Wait for animations to complete and retry with exponential backoff.
+
+### 2. Layout Shifts
+```json
+{
+  "visual_state": {
+    "layout_shifts": [
+      {
+        "cumulative_layout_shift": 0.1
+      }
+    ]
+  }
+}
+```
+**Solution:** Monitor CLS and wait for layout stability before interactions.
+
+### 3. Network Issues
+```json
+{
+  "environment_factors": {
+    "network_conditions": {
+      "stability": "unstable"
+    }
+  }
+}
+```
+**Solution:** Implement network condition checks in retry strategy.
+
+## API Reference
+
+### EnhancedTraceAnalyzer Methods
+
+#### analyze_action_context()
+Returns detailed information about element states and interactions.
+
+#### analyze_decision_trail()
+Returns the AI model's decision-making process and confidence levels.
+
+#### analyze_element_identification()
+Returns comprehensive element location and relationship information.
+
+#### analyze_visual_state()
+Returns visual changes and layout shift information.
+
+#### analyze_error_recovery()
+Returns error handling and recovery strategies.
+
+#### analyze_timing()
+Returns detailed timing and performance metrics.
+
+## Contributing
+
+When adding new tracing features:
+
+1. Follow the existing data structure pattern
+2. Add comprehensive test coverage
+3. Update documentation with examples
+4. Include error handling cases 
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
new file mode 100644
index 00000000..c260ce12
--- /dev/null
+++ b/pyproject.toml
@@ -0,0 +1,23 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "browser-use"
+version = "0.1.19"
+authors = [
+    { name = "Your Name", email = "your.email@example.com" }
+]
+description = "A Python package for browser automation with AI"
+readme = "README.md"
+requires-python = ">=3.11"
+classifiers = [
+    "Programming Language :: Python :: 3",
+    "License :: OSI Approved :: MIT License",
+    "Operating System :: OS Independent",
+]
+
+[tool.setuptools.packages.find]
+where = ["."]
+include = ["src*"]
+namespaces = false 
\ No newline at end of file
diff --git a/pytest.ini b/pytest.ini
new file mode 100644
index 00000000..88001be3
--- /dev/null
+++ b/pytest.ini
@@ -0,0 +1,19 @@
+[pytest]
+asyncio_mode = auto
+asyncio_default_fixture_loop_scope = function
+
+# Test discovery
+testpaths = tests
+python_files = test_*.py
+python_classes = Test*
+python_functions = test_*
+
+# Output configuration
+console_output_style = count
+log_cli = True
+log_cli_level = INFO
+
+# Warnings
+filterwarnings =
+    ignore::DeprecationWarning
+    ignore::pytest.PytestDeprecationWarning 
\ No newline at end of file
diff --git a/pytest_output.txt b/pytest_output.txt
new file mode 100644
index 00000000..fe9b67ce
--- /dev/null
+++ b/pytest_output.txt
@@ -0,0 +1,64 @@
+============================= test session starts ==============================
+platform darwin -- Python 3.11.9, pytest-8.3.4, pluggy-1.5.0 -- /Users/dmieloch/Dev/experiments/web-ui/venv/bin/python
+cachedir: .pytest_cache
+rootdir: /Users/dmieloch/Dev/experiments/web-ui
+configfile: pytest.ini
+plugins: cov-6.0.0, asyncio-0.25.2, anyio-4.8.0, timeout-2.3.1
+asyncio: mode=Mode.AUTO, asyncio_default_fixture_loop_scope=function
+collecting ... 
+----------------------------- live log collection ------------------------------
+INFO     root:service.py:51 Anonymized telemetry enabled. See https://github.com/gregpr07/browser-use for more information.
+INFO     httpx:_client.py:1038 HTTP Request: GET https://api.gradio.app/gradio-messaging/en "HTTP/1.1 200 OK"
+collected 28 items
+
+tests/test_browser_cli.py::TestBrowserInitialization::test_basic_initialization 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
+PASSED                                                                  [ 1/28]
+------------------------------ live log teardown -------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test
+
+tests/test_browser_cli.py::TestBrowserInitialization::test_window_size 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
+-------------------------------- live log call ---------------------------------
+INFO     src.agent.custom_agent:custom_agent.py:356 🚀 Starting task: go to data:text/html,<script>document.write('Window size: ' + window.innerWidth + 'x' + window.innerHeight)</script>
+INFO     src.agent.custom_agent:custom_agent.py:196 
+📍 Step 1
+INFO     httpx:_client.py:1786 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 422 Unprocessable Entity"
+INFO     httpx:_client.py:1038 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
+INFO     src.agent.custom_agent:custom_agent.py:128 🤷 Eval: Unknown - No previous actions to evaluate.
+INFO     src.agent.custom_agent:custom_agent.py:129 🧠 New Memory: 
+INFO     src.agent.custom_agent:custom_agent.py:130 ⏳ Task Progress: 
+INFO     src.agent.custom_agent:custom_agent.py:131 🤔 Thought: The task requires navigating to a specific URL to display the window size. The current page is 'about:blank', and no actions have been taken yet.
+INFO     src.agent.custom_agent:custom_agent.py:132 🎯 Summary: Navigate to the specified URL to display the window size.
+INFO     src.agent.custom_agent:custom_agent.py:134 🛠️  Action 1/1: {"go_to_url":{"url":"data:text/html,<script>document.write('Window size: ' + window.innerWidth + 'x' + window.innerHeight)</script>"}}
+INFO     src.agent.custom_agent:custom_agent.py:207 🧠 All Memory: 
+INFO     browser_use.controller.service:service.py:59 🔗  Navigated to data:text/html,<script>document.write('Window size: ' + window.innerWidth + 'x' + window.innerHeight)</script>
+INFO     src.agent.custom_agent:custom_agent.py:196 
+📍 Step 2
+INFO     httpx:_client.py:1786 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 422 Unprocessable Entity"
+INFO     httpx:_client.py:1038 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
+INFO     src.agent.custom_agent:custom_agent.py:128 ✅ Eval: Success - Successfully navigated to the specified URL and displayed the window size.
+INFO     src.agent.custom_agent:custom_agent.py:129 🧠 New Memory: Window size: 800x600
+INFO     src.agent.custom_agent:custom_agent.py:130 ⏳ Task Progress: 1. Navigated to the specified URL to display the window size.
+INFO     src.agent.custom_agent:custom_agent.py:131 🤔 Thought: The task has been completed as the window size is now displayed on the page. No further actions are required.
+INFO     src.agent.custom_agent:custom_agent.py:132 🎯 Summary: The task is complete. The window size is displayed as 800x600.
+INFO     src.agent.custom_agent:custom_agent.py:134 🛠️  Action 1/1: {"done":{"text":"The task is complete. The window size is displayed as 800x600."}}
+INFO     src.agent.custom_agent:custom_agent.py:207 🧠 All Memory: Window size: 800x600
+
+INFO     src.agent.custom_agent:custom_agent.py:218 📄 Result: The task is complete. The window size is displayed as 800x600.
+INFO     src.agent.custom_agent:custom_agent.py:399 ✅ Task completed successfully
+WARNING  src.agent.custom_agent:custom_agent.py:260 No history or first screenshot to create GIF from
+PASSED                                                                  [ 2/28]
+------------------------------ live log teardown -------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test
+
+tests/test_browser_cli.py::TestBrowserInitialization::test_headless_mode 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
diff --git a/src/__init__.py b/src/__init__.py
index 93fbe7f8..0edfbf30 100644
--- a/src/__init__.py
+++ b/src/__init__.py
@@ -3,4 +3,23 @@
 # @Author  : wenshao
 # @Email   : wenshaoguo1026@gmail.com
 # @Project : browser-use-webui
-# @FileName: __init__.py.py
+# @FileName: __init__.py
+
+from browser_use.browser.browser import Browser
+from browser_use.browser.browser import BrowserConfig
+from browser_use.browser.context import BrowserContextConfig, BrowserContextWindowSize
+from .agent.custom_agent import CustomAgent
+from .controller.custom_controller import CustomController
+from .agent.custom_prompts import CustomSystemPrompt
+from .utils import utils
+
+__all__ = [
+    'Browser',
+    'BrowserConfig',
+    'BrowserContextConfig',
+    'BrowserContextWindowSize',
+    'CustomAgent',
+    'CustomController',
+    'CustomSystemPrompt',
+    'utils'
+]
diff --git a/src/agent/custom_agent.py b/src/agent/custom_agent.py
index ff8908c8..0332067d 100644
--- a/src/agent/custom_agent.py
+++ b/src/agent/custom_agent.py
@@ -8,11 +8,12 @@
 import logging
 import pdb
 import traceback
-from typing import Optional, Type
+from typing import Optional, Type, Any, Dict
 from PIL import Image, ImageDraw, ImageFont
 import os
 import base64
 import io
+import datetime
 
 from browser_use.agent.prompts import SystemPrompt
 from browser_use.agent.service import Agent
@@ -37,11 +38,13 @@
     BaseMessage,
 )
 from src.utils.agent_state import AgentState
+from src.utils.logging import BatchedEventLogger
 
 from .custom_massage_manager import CustomMassageManager
 from .custom_views import CustomAgentOutput, CustomAgentStepInfo
 
 logger = logging.getLogger(__name__)
+batched_logger = BatchedEventLogger(logger)
 
 
 class CustomAgent(Agent):
@@ -117,23 +120,41 @@ def _setup_action_models(self) -> None:
         self.AgentOutput = CustomAgentOutput.type_with_custom_actions(self.ActionModel)
 
     def _log_response(self, response: CustomAgentOutput) -> None:
-        """Log the model's response"""
-        if "Success" in response.current_state.prev_action_evaluation:
-            emoji = "✅"
-        elif "Failed" in response.current_state.prev_action_evaluation:
-            emoji = "❌"
-        else:
-            emoji = "🤷"
-
-        logger.info(f"{emoji} Eval: {response.current_state.prev_action_evaluation}")
-        logger.info(f"🧠 New Memory: {response.current_state.important_contents}")
-        logger.info(f"⏳ Task Progress: {response.current_state.completed_contents}")
-        logger.info(f"🤔 Thought: {response.current_state.thought}")
-        logger.info(f"🎯 Summary: {response.current_state.summary}")
+        """Log the model's response in a structured format"""
+        evaluation_status = "success" if "Success" in response.current_state.prev_action_evaluation else "failed"
+        
+        log_data = {
+            "timestamp": datetime.datetime.now().isoformat(),
+            "action": "model_response",
+            "status": evaluation_status,
+            "state": {
+                "evaluation": response.current_state.prev_action_evaluation,
+                "memory": response.current_state.important_contents,
+                "progress": response.current_state.completed_contents,
+                "thought": response.current_state.thought,
+                "summary": response.current_state.summary
+            }
+        }
+        
+        logger.info(
+            f"Model Response: {evaluation_status}",
+            extra={
+                "event_type": "model_response",
+                "event_data": log_data
+            }
+        )
+        
+        # Batch action logging
         for i, action in enumerate(response.action):
-            logger.info(
-                f"🛠️  Action {i + 1}/{len(response.action)}: {action.model_dump_json(exclude_unset=True)}"
+            batched_logger.add_event(
+                "action",
+                {
+                    "action_number": i + 1,
+                    "total_actions": len(response.action),
+                    "action_data": json.loads(action.model_dump_json(exclude_unset=True))
+                }
             )
+        batched_logger.flush()
 
     def update_step_info(
             self, model_output: CustomAgentOutput, step_info: CustomAgentStepInfo = None
@@ -193,7 +214,19 @@ async def get_next_action(self, input_messages: list[BaseMessage]) -> AgentOutpu
     @time_execution_async("--step")
     async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
         """Execute one step of the task"""
-        logger.info(f"\n📍 Step {self.n_steps}")
+        step_data = {
+            "step_number": self.n_steps,
+            "timestamp": datetime.datetime.now().isoformat()
+        }
+        
+        logger.info(
+            f"Starting step {self.n_steps}",
+            extra={
+                "event_type": "step_start",
+                "event_data": step_data
+            }
+        )
+        
         state = None
         model_output = None
         result: list[ActionResult] = []
@@ -204,9 +237,18 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
             input_messages = self.message_manager.get_messages()
             model_output = await self.get_next_action(input_messages)
             self.update_step_info(model_output, step_info)
-            logger.info(f"🧠 All Memory: {step_info.memory}")
+            
+            if step_info:
+                logger.debug(
+                    "Step memory updated",
+                    extra={
+                        "event_type": "memory_update",
+                        "event_data": {"memory": step_info.memory}
+                    }
+                )
+            
             self._save_conversation(input_messages, model_output)
-            self.message_manager._remove_last_state_message()  # we dont want the whole state in the chat history
+            self.message_manager._remove_last_state_message()
             self.message_manager.add_model_output(model_output)
 
             result: list[ActionResult] = await self.controller.multi_act(
@@ -215,17 +257,37 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
             self._last_result = result
 
             if len(result) > 0 and result[-1].is_done:
-                logger.info(f"📄 Result: {result[-1].extracted_content}")
+                logger.info(
+                    "Task completed",
+                    extra={
+                        "event_type": "task_complete",
+                        "event_data": {
+                            "result": result[-1].extracted_content
+                        }
+                    }
+                )
 
             self.consecutive_failures = 0
 
         except Exception as e:
             result = self._handle_step_error(e)
             self._last_result = result
+            logger.error(
+                f"Step error: {str(e)}",
+                extra={
+                    "event_type": "step_error",
+                    "event_data": {
+                        "error": str(e),
+                        "traceback": traceback.format_exc()
+                    }
+                },
+                exc_info=True
+            )
 
         finally:
             if not result:
                 return
+            
             for r in result:
                 if r.error:
                     self.telemetry.capture(
@@ -234,8 +296,28 @@ async def step(self, step_info: Optional[CustomAgentStepInfo] = None) -> None:
                             error=r.error,
                         )
                     )
+                    logger.error(
+                        f"Action error: {r.error}",
+                        extra={
+                            "event_type": "action_error",
+                            "event_data": {
+                                "error": r.error
+                            }
+                        }
+                    )
+            
             if state:
                 self._make_history_item(model_output, state, result)
+            
+            step_data["status"] = "completed"
+            logger.info(
+                f"Step {self.n_steps} completed",
+                extra={
+                    "event_type": "step_complete",
+                    "event_data": step_data
+                }
+            )
+
     def create_history_gif(
             self,
             output_path: str = 'agent_history.gif',
diff --git a/src/agent/custom_prompts.py b/src/agent/custom_prompts.py
index 56aeb64b..b64b3b9f 100644
--- a/src/agent/custom_prompts.py
+++ b/src/agent/custom_prompts.py
@@ -66,9 +66,10 @@ def important_rules(self) -> str:
        - Use scroll to find elements you are looking for
 
     5. TASK COMPLETION:
-       - If you think all the requirements of user\'s instruction have been completed and no further operation is required, output the done action to terminate the operation process.
+       - If you think all the requirements of user's instruction have been completed and no further operation is required, output the done action to terminate the operation process.
        - Don't hallucinate actions.
        - If the task requires specific information - make sure to include everything in the done function. This is what the user will see.
+       - When generating reports about page structure, always include the page title and headings.
        - If you are running out of steps (current step), think about speeding it up, and ALWAYS use the done action as the last action.
 
     6. VISUAL CONTEXT:
@@ -163,13 +164,13 @@ def __init__(
 
     def get_user_message(self) -> HumanMessage:
         state_description = f"""
-    1. Task: {self.step_info.task}
+    1. Task: {self.step_info.task if self.step_info else ""}
     2. Hints(Optional): 
-    {self.step_info.add_infos}
+    {self.step_info.add_infos if self.step_info else ""}
     3. Memory: 
-    {self.step_info.memory}
+    {self.step_info.memory if self.step_info else ""}
     4. Task Progress: 
-    {self.step_info.task_progress}
+    {self.step_info.task_progress if self.step_info else ""}
     5. Current url: {self.state.url}
     6. Available tabs:
     {self.state.tabs}
diff --git a/src/browser/custom_context.py b/src/browser/custom_context.py
index 6de991bf..c0aa1961 100644
--- a/src/browser/custom_context.py
+++ b/src/browser/custom_context.py
@@ -8,6 +8,7 @@
 import json
 import logging
 import os
+from pathlib import Path
 
 from browser_use.browser.browser import Browser
 from browser_use.browser.context import BrowserContext, BrowserContextConfig
@@ -25,6 +26,7 @@ def __init__(
         config: BrowserContextConfig = BrowserContextConfig()
     ):
         super(CustomBrowserContext, self).__init__(browser=browser, config=config)
+        self._context = None
 
     async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowserContext:
         """Creates a new browser context with anti-detection measures and loads cookies if available."""
@@ -93,4 +95,20 @@ async def _create_context(self, browser: PlaywrightBrowser) -> PlaywrightBrowser
             """
         )
 
+        self._context = context
         return context
+
+    @property
+    def context(self) -> PlaywrightBrowserContext | None:
+        """Get the underlying Playwright browser context."""
+        return self._context
+
+    async def close(self):
+        """Close the browser context and stop tracing if enabled."""
+        if self.config.trace_path and self._context:
+            trace_path = Path(self.config.trace_path)
+            trace_path.parent.mkdir(parents=True, exist_ok=True)
+            if not trace_path.suffix:
+                trace_path = trace_path / "trace.zip"
+            await self._context.tracing.stop(path=str(trace_path))
+        await super().close()
diff --git a/src/controller/custom_controller.py b/src/controller/custom_controller.py
index 6e57dd4a..21a56b5a 100644
--- a/src/controller/custom_controller.py
+++ b/src/controller/custom_controller.py
@@ -8,6 +8,7 @@
 from browser_use.agent.views import ActionResult
 from browser_use.browser.context import BrowserContext
 from browser_use.controller.service import Controller
+from browser_use.browser.views import BrowserState
 
 
 class CustomController(Controller):
@@ -31,3 +32,8 @@ async def paste_from_clipboard(browser: BrowserContext):
             await page.keyboard.type(text)
 
             return ActionResult(extracted_content=text)
+
+    async def get_browser_state(self, browser_context: BrowserContext) -> BrowserState:
+        """Get the current state of the browser"""
+        state = await browser_context.get_state(use_vision=True)
+        return state
diff --git a/src/trace_analyzer.py b/src/trace_analyzer.py
new file mode 100644
index 00000000..0590dd5e
--- /dev/null
+++ b/src/trace_analyzer.py
@@ -0,0 +1,644 @@
+import json
+import zipfile
+from pathlib import Path
+from typing import Dict, List, Optional, Any
+import asyncio
+
+class PlaywrightTrace:
+    def __init__(self, trace_path: str):
+        self.trace_path = Path(trace_path)
+        self.actions: List[Dict[str, Any]] = []
+        self.network_requests: List[Dict[str, Any]] = []
+        self.console_logs: List[str] = []
+        self.errors: List[str] = []
+
+    @classmethod
+    async def parse(cls, trace_path: str) -> 'PlaywrightTrace':
+        """Parse a Playwright trace file and return a PlaywrightTrace instance."""
+        trace = cls(trace_path)
+        await trace._parse_trace_file()
+        return trace
+
+    async def _parse_trace_file(self):
+        """Parse the trace.zip file and extract relevant information."""
+        if not self.trace_path.exists():
+            raise FileNotFoundError(f"Trace file not found: {self.trace_path}")
+
+        try:
+            with zipfile.ZipFile(self.trace_path, 'r') as zip_ref:
+                # List all files in the zip
+                files = zip_ref.namelist()
+                
+                # Parse trace files
+                for file in files:
+                    if file.endswith('.trace'):
+                        trace_data = zip_ref.read(file).decode('utf-8')
+                        for line in trace_data.split('\n'):
+                            if line.strip():
+                                try:
+                                    event = json.loads(line)
+                                    self._process_event(event)
+                                except json.JSONDecodeError:
+                                    self.errors.append(f"Failed to parse trace event: {line}")
+                
+                # Parse network HAR if available
+                har_files = [f for f in files if f.endswith('.har')]
+                if har_files:
+                    har_data = json.loads(zip_ref.read(har_files[0]).decode('utf-8'))
+                    self._process_har(har_data)
+
+        except zipfile.BadZipFile:
+            raise ValueError(f"Invalid trace file format: {self.trace_path}")
+
+    def _process_event(self, event: Dict[str, Any]):
+        """Process a single trace event and categorize it."""
+        if 'type' not in event:
+            return
+
+        event_type = event['type']
+        
+        if event_type == 'before' or event_type == 'after':
+            # Handle action events
+            if 'method' in event and 'params' in event:
+                self.actions.append({
+                    'type': event['method'],
+                    'timestamp': event.get('timestamp', 0),
+                    'duration': event.get('duration', 0),
+                    'params': event['params'],
+                    'success': event_type == 'after' and 'error' not in event,
+                    'error': event.get('error')
+                })
+        elif event_type == 'console':
+            # Handle console messages
+            if 'text' in event:
+                self.console_logs.append(event['text'])
+        elif event_type == 'error':
+            # Handle error events
+            if 'error' in event:
+                self.errors.append(event['error'].get('message', str(event['error'])))
+
+    def _process_har(self, har_data: Dict[str, Any]):
+        """Process HAR data to extract network requests."""
+        if 'log' in har_data and 'entries' in har_data['log']:
+            for entry in har_data['log']['entries']:
+                request = entry.get('request', {})
+                response = entry.get('response', {})
+                
+                self.network_requests.append({
+                    'url': request.get('url'),
+                    'method': request.get('method'),
+                    'status': response.get('status'),
+                    'statusText': response.get('statusText'),
+                    'duration': entry.get('time'),  # in milliseconds
+                    'failure': response.get('status', 0) >= 400
+                })
+
+async def analyze_trace(trace_path: str) -> dict:
+    """Parse a Playwright trace file and return structured data."""
+    trace = await PlaywrightTrace.parse(trace_path)
+    return {
+        "actions": trace.actions,
+        "network_requests": trace.network_requests,
+        "console_logs": trace.console_logs,
+        "errors": trace.errors,
+        "summary": {
+            "total_actions": len(trace.actions),
+            "failed_actions": sum(1 for a in trace.actions if not a['success']),
+            "total_requests": len(trace.network_requests),
+            "failed_requests": sum(1 for r in trace.network_requests if r.get('failure')),
+            "total_errors": len(trace.errors),
+            "error_summary": "\n".join(trace.errors) if trace.errors else "No errors"
+        }
+    }
+
+if __name__ == "__main__":
+    # Example usage
+    async def main():
+        result = await analyze_trace("path/to/trace.zip")
+        print(json.dumps(result, indent=2))
+
+    asyncio.run(main())
+
+class EnhancedTraceAnalyzer:
+    """Enhanced trace analyzer for detailed browser automation insights.
+    
+    This class provides comprehensive analysis of browser automation traces, including:
+    - Action context and element states
+    - Decision-making processes and confidence levels
+    - Element identification and relationships
+    - Visual state changes and layout shifts
+    - Error recovery strategies
+    - Performance metrics and timing analysis
+    
+    Example:
+        ```python
+        analyzer = EnhancedTraceAnalyzer("trace.zip")
+        result = await analyzer.analyze_all()
+        
+        # Component-specific analysis
+        timing = await analyzer.analyze_timing()
+        visual = await analyzer.analyze_visual_state()
+        ```
+    """
+    
+    def __init__(self, trace_file_path: str):
+        """Initialize the enhanced trace analyzer.
+        
+        Args:
+            trace_file_path: Path to the trace file (ZIP format) containing enhanced trace data.
+        """
+        self.trace_file_path = trace_file_path
+        self._trace_data: Optional[Dict[str, Any]] = None
+
+    async def _load_trace_data(self) -> Dict[str, Any]:
+        """Load and validate enhanced trace data from the trace file.
+        
+        Returns:
+            Dict containing the parsed trace data.
+            
+        Raises:
+            ValueError: If the trace file is invalid or cannot be parsed.
+        """
+        if self._trace_data is None:
+            try:
+                trace_path = Path(self.trace_file_path)
+                
+                # Handle nested directory structure
+                if trace_path.is_dir():
+                    trace_zip = trace_path / 'trace.zip'
+                    if trace_zip.is_dir():
+                        trace_files = list(trace_zip.glob('*.zip'))
+                        if not trace_files:
+                            raise ValueError("No trace files found")
+                        trace_path = trace_files[0]
+                    else:
+                        raise ValueError("Invalid trace directory structure")
+                
+                # Parse Playwright trace
+                with zipfile.ZipFile(trace_path) as zf:
+                    # Load trace data
+                    with zf.open('trace.trace') as f:
+                        trace_events = []
+                        for line in f.read().decode('utf-8').splitlines():
+                            if line.strip():
+                                trace_events.append(json.loads(line))
+                    
+                    # Load network data
+                    with zf.open('trace.network') as f:
+                        network_events = []
+                        for line in f.read().decode('utf-8').splitlines():
+                            if line.strip():
+                                network_events.append(json.loads(line))
+                    
+                    # Convert to enhanced trace format
+                    self._trace_data = self._convert_playwright_trace(trace_events, network_events)
+                
+            except Exception as e:
+                raise ValueError(f"Failed to load trace data: {str(e)}")
+        
+        return self._trace_data
+
+    def _convert_playwright_trace(self, trace_events: List[Dict[str, Any]], network_events: List[Dict[str, Any]]) -> Dict[str, Any]:
+        """Convert Playwright trace format to enhanced trace format."""
+        # Extract metadata
+        metadata = {
+            "session_id": trace_events[0].get('sessionId', 'unknown'),
+            "timestamp": trace_events[0].get('timestamp', 0),
+            "browser_info": {
+                "viewport": next(
+                    (e.get('params', {}).get('viewport') for e in trace_events 
+                     if e.get('method') == 'setViewportSize'),
+                    {"width": 0, "height": 0}
+                ),
+                "user_agent": next(
+                    (e.get('params', {}).get('userAgent') for e in trace_events 
+                     if e.get('method') == 'setUserAgent'),
+                    "unknown"
+                )
+            }
+        }
+
+        # Extract steps
+        steps = []
+        current_step = None
+        
+        for event in trace_events:
+            if event.get('type') == 'before':
+                if current_step:
+                    steps.append(current_step)
+                current_step = {
+                    "step_id": len(steps) + 1,
+                    "action": event.get('method', 'unknown'),
+                    "target": event.get('params', {}).get('selector', ''),
+                    "timing": {
+                        "start": event.get('timestamp', 0),
+                        "end": None,
+                        "duration": None
+                    },
+                    "status": "pending",
+                    "error_context": None,
+                    "visual_state": {
+                        "screenshot_diffs": {},
+                        "element_visibility": {},
+                        "layout_shifts": []
+                    },
+                    "action_context": {
+                        "element_state": event.get('params', {}),
+                        "viewport_state": metadata['browser_info']['viewport']
+                    }
+                }
+            elif event.get('type') == 'after' and current_step:
+                current_step['timing']['end'] = event.get('timestamp', 0)
+                current_step['timing']['duration'] = (
+                    current_step['timing']['end'] - current_step['timing']['start']
+                )
+                current_step['status'] = 'error' if 'error' in event else 'success'
+                if 'error' in event:
+                    current_step['error_context'] = {
+                        "error_type": event['error'].get('name', 'unknown'),
+                        "message": event['error'].get('message', ''),
+                        "stack": event['error'].get('stack', '')
+                    }
+
+        if current_step:
+            steps.append(current_step)
+
+        # Add network information
+        network_info = {
+            "requests": [
+                {
+                    "url": event.get('params', {}).get('url'),
+                    "method": event.get('params', {}).get('method'),
+                    "status": event.get('params', {}).get('status'),
+                    "timing": event.get('params', {}).get('timing')
+                }
+                for event in network_events
+                if event.get('method') == 'Network.responseReceived'
+            ]
+        }
+
+        return {
+            "metadata": metadata,
+            "steps": steps,
+            "network": network_info,
+            "performance": {
+                "navigation_timing": {
+                    "dom_complete": next(
+                        (e.get('timestamp', 0) for e in trace_events 
+                         if e.get('method') == 'domcontentloaded'),
+                        0
+                    ),
+                    "load_complete": next(
+                        (e.get('timestamp', 0) for e in trace_events 
+                         if e.get('method') == 'load'),
+                        0
+                    )
+                },
+                "interaction_timing": {
+                    "time_to_first_interaction": next(
+                        (e.get('timestamp', 0) for e in trace_events 
+                         if e.get('type') == 'before' and e.get('method') in ['click', 'fill']),
+                        0
+                    ) - metadata['timestamp'],
+                    "action_latency": sum(
+                        step['timing']['duration'] for step in steps 
+                        if step['timing']['duration'] is not None
+                    ) / len(steps) if steps else 0
+                }
+            }
+        }
+
+    async def analyze_action_context(self) -> Dict[str, Any]:
+        """Analyze the context of actions including before/after states."""
+        trace_data = await self._load_trace_data()
+        steps = trace_data["steps"]
+        
+        return {
+            "steps": [
+                {
+                    "step_id": step["step_id"],
+                    "action": step["action"],
+                    "target": step["target"],
+                    "element_state": step["action_context"]["element_state"],
+                    "viewport_state": step["action_context"]["viewport_state"]
+                }
+                for step in steps
+            ]
+        }
+
+    async def analyze_decision_trail(self) -> Dict[str, Any]:
+        """Analyze the decision making process and alternatives considered."""
+        trace_data = await self._load_trace_data()
+        steps = trace_data["steps"]
+        
+        return {
+            "steps": [
+                {
+                    "step_id": step["step_id"],
+                    "action": step["action"],
+                    "confidence": step["action_context"]["element_state"].get("confidence", 1.0),
+                    "alternatives": step["action_context"]["element_state"].get("alternatives", []),
+                    "reasoning": step["action_context"]["element_state"].get("reasoning", [])
+                }
+                for step in steps
+            ]
+        }
+
+    async def analyze_element_identification(self) -> Dict[str, Any]:
+        """Analyze methods used to identify elements."""
+        trace_data = await self._load_trace_data()
+        steps = trace_data["steps"]
+        
+        return {
+            "steps": [
+                {
+                    "step_id": step["step_id"],
+                    "target": step["target"],
+                    "selector": step["action_context"]["element_state"].get("selector", ""),
+                    "position": step["action_context"]["element_state"].get("position", {}),
+                    "relationships": step["action_context"]["element_state"].get("relationships", {})
+                }
+                for step in steps
+            ]
+        }
+
+    async def analyze_failures(self) -> Dict[str, Any]:
+        """Analyze failure scenarios and recovery attempts."""
+        trace_data = await self._load_trace_data()
+        steps = trace_data["steps"]
+        failed_steps = [step for step in steps if step["status"] == "error"]
+        
+        return {
+            "failed_steps": [
+                {
+                    "step_id": step["step_id"],
+                    "action": step["action"],
+                    "error": step["error_context"],
+                    "recovery_attempts": step["action_context"]["element_state"].get("recovery_attempts", [])
+                }
+                for step in failed_steps
+            ],
+            "total_steps": len(steps),
+            "failed_steps_count": len(failed_steps)
+        }
+
+    async def analyze_session_context(self) -> Dict[str, Any]:
+        """Analyze session-wide context including navigation and network activity."""
+        trace_data = await self._load_trace_data()
+        
+        return {
+            "metadata": trace_data["metadata"],
+            "network": trace_data["network"],
+            "performance": trace_data["performance"]
+        }
+
+    async def analyze_recovery_info(self) -> Dict[str, Any]:
+        """Analyze recovery information and checkpoints."""
+        trace_data = await self._load_trace_data()
+        steps = trace_data["steps"]
+        recovery_steps = [
+            step for step in steps 
+            if step["status"] == "error" and step["action_context"]["element_state"].get("recovery_attempts")
+        ]
+        
+        return {
+            "recovery_steps": [
+                {
+                    "step_id": step["step_id"],
+                    "action": step["action"],
+                    "recovery_attempts": step["action_context"]["element_state"]["recovery_attempts"],
+                    "final_status": "recovered" if any(
+                        attempt.get("success") 
+                        for attempt in step["action_context"]["element_state"].get("recovery_attempts", [])
+                    ) else "failed"
+                }
+                for step in recovery_steps
+            ]
+        }
+
+    async def analyze_model_data(self) -> Dict[str, Any]:
+        """Analyze model-specific data including token usage and vision analysis."""
+        trace_data = await self._load_trace_data()
+        steps = trace_data["steps"]
+        
+        return {
+            "steps": [
+                {
+                    "step_id": step["step_id"],
+                    "action": step["action"],
+                    "model_info": step["action_context"]["element_state"].get("model_info", {}),
+                    "vision_analysis": step["action_context"]["element_state"].get("vision_analysis", {})
+                }
+                for step in steps
+            ]
+        }
+
+    async def analyze_temporal_context(self) -> Dict[str, Any]:
+        """Analyze temporal information including timing and wait conditions."""
+        trace_data = await self._load_trace_data()
+        steps = trace_data["steps"]
+        
+        return {
+            "steps": [
+                {
+                    "step_id": step["step_id"],
+                    "timing": step["timing"],
+                    "wait_conditions": step["action_context"]["element_state"].get("wait_conditions", [])
+                }
+                for step in steps
+            ],
+            "total_duration": sum(
+                step["timing"]["duration"] for step in steps 
+                if step["timing"]["duration"] is not None
+            )
+        }
+
+    async def analyze_element_reporting(self) -> Dict[str, Any]:
+        """Analyze enhanced element reporting with detailed selection context."""
+        trace_data = await self._load_trace_data()
+        steps = trace_data["steps"]
+        
+        return {
+            "steps": [
+                {
+                    "step_id": step["step_id"],
+                    "action": step["action"],
+                    "target": step["target"],
+                    "element_state": step["action_context"]["element_state"],
+                    "status": step["status"]
+                }
+                for step in steps
+            ]
+        }
+
+    async def analyze_error_context(self) -> Dict[str, Any]:
+        """Analyze error context and session state information."""
+        trace_data = await self._load_trace_data()
+        steps = trace_data["steps"]
+        error_steps = [step for step in steps if step["status"] == "error"]
+        
+        return {
+            "error_steps": [
+                {
+                    "step_id": step["step_id"],
+                    "action": step["action"],
+                    "error_context": step["error_context"],
+                    "session_state": {
+                        "url": trace_data["metadata"]["browser_info"].get("url"),
+                        "viewport": trace_data["metadata"]["browser_info"]["viewport"],
+                        "network_status": any(
+                            req["status"] >= 400 
+                            for req in trace_data["network"]["requests"]
+                        )
+                    }
+                }
+                for step in error_steps
+            ]
+        }
+
+    async def analyze_timing(self) -> Dict[str, Any]:
+        """Analyze detailed interaction timing information."""
+        trace_data = await self._load_trace_data()
+        steps = trace_data["steps"]
+        
+        return {
+            "steps": [
+                {
+                    "step_id": step["step_id"],
+                    "action": step["action"],
+                    "timing": {
+                        "start": step["timing"]["start"],
+                        "end": step["timing"]["end"],
+                        "duration": step["timing"]["duration"]
+                    }
+                }
+                for step in steps
+                if step["timing"]["duration"] is not None
+            ],
+            "performance": trace_data["performance"],
+            "summary": {
+                "total_duration": sum(
+                    step["timing"]["duration"] for step in steps 
+                    if step["timing"]["duration"] is not None
+                ),
+                "average_step_duration": sum(
+                    step["timing"]["duration"] for step in steps 
+                    if step["timing"]["duration"] is not None
+                ) / len([s for s in steps if s["timing"]["duration"] is not None])
+            }
+        }
+
+    async def analyze_visual_state(self) -> Dict[str, Any]:
+        """Analyze visual state changes with enhanced tracking."""
+        trace_data = await self._load_trace_data()
+        steps = trace_data.get("steps", [])
+        
+        visual_analysis = []
+        for step in steps:
+            visual_state = step.get("visual_state", {})
+            visual_analysis.append({
+                "step_id": step["step_id"],
+                "before_action": {
+                    "screenshot": visual_state.get("screenshot_diffs", {}).get("before"),
+                    "visible_elements": visual_state.get("element_visibility", {}).get("before", [])
+                },
+                "after_action": {
+                    "screenshot": visual_state.get("screenshot_diffs", {}).get("after"),
+                    "visible_elements": visual_state.get("element_visibility", {}).get("after", []),
+                    "added_elements": visual_state.get("element_visibility", {}).get("added", []),
+                    "removed_elements": visual_state.get("element_visibility", {}).get("removed", [])
+                },
+                "layout_shifts": visual_state.get("layout_shifts", [])
+            })
+        
+        return {
+            "visual_changes": visual_analysis,
+            "cumulative_layout_shift": sum(
+                shift.get("cumulative_layout_shift", 0) 
+                for step in visual_analysis 
+                for shift in step.get("layout_shifts", [])
+            )
+        }
+
+    async def analyze_error_recovery(self) -> Dict[str, Any]:
+        """Analyze enhanced error recovery capabilities with improved context."""
+        trace_data = await self._load_trace_data()
+        steps = trace_data.get("steps", [])
+        error_steps = [step for step in steps if step.get("status") == "error"]
+        
+        recovery_analysis = []
+        for step in error_steps:
+            error_ctx = step.get("error_context", {})
+            recovery_analysis.append({
+                "step_id": step["step_id"],
+                "error_type": error_ctx.get("error_type", "unknown"),
+                "target_element": {
+                    "selector": error_ctx.get("target_element", {}).get("selector"),
+                    "visible_similar_elements": error_ctx.get("target_element", {}).get("visible_similar_elements", [])
+                },
+                "recovery_attempts": error_ctx.get("recovery_attempts", []),
+                "environment_factors": error_ctx.get("environment_factors", {})
+            })
+        
+        return {
+            "error_steps": recovery_analysis,
+            "recovery_success_rate": len([r for r in recovery_analysis if any(
+                attempt["outcome"] == "success" for attempt in r["recovery_attempts"]
+            )]) / len(recovery_analysis) if recovery_analysis else 1.0
+        }
+
+    async def analyze_performance(self) -> Dict[str, Any]:
+        """Analyze performance metrics including navigation and interaction timing."""
+        trace_data = await self._load_trace_data()
+        performance = trace_data.get("performance", {})
+        
+        return {
+            "navigation_timing": performance.get("navigation_timing", {}),
+            "interaction_timing": performance.get("interaction_timing", {}),
+            "metrics_summary": {
+                "avg_action_latency": performance.get("interaction_timing", {}).get("action_latency", 0),
+                "total_interaction_time": sum(
+                    step.get("timing", {}).get("duration", 0) 
+                    for step in trace_data.get("steps", [])
+                )
+            }
+        }
+
+    async def analyze_all(self) -> Dict[str, Any]:
+        """Perform comprehensive analysis of all trace components.
+        
+        Returns:
+            Dict containing analysis results from all components:
+            - action_context: Action and element state analysis
+            - decision_trail: Decision-making process analysis
+            - element_identification: Element location and relationships
+            - failure_analysis: Failure scenarios and recovery attempts
+            - session_context: Session-wide context and navigation
+            - recovery_info: Recovery strategies and checkpoints
+            - model_data: Model-specific data and vision analysis
+            - temporal_context: Timing and sequence information
+            - element_reporting: Enhanced element selection reporting
+            - error_context: Error handling and recovery context
+            - timing_analysis: Detailed timing breakdown
+            - visual_state: Visual changes and layout analysis
+            - error_recovery: Enhanced error recovery capabilities
+            - performance: Performance metrics and timing analysis
+        """
+        trace_data = await self._load_trace_data()
+        
+        return {
+            "action_context": await self.analyze_action_context(),
+            "decision_trail": await self.analyze_decision_trail(),
+            "element_identification": await self.analyze_element_identification(),
+            "failure_analysis": await self.analyze_failures(),
+            "session_context": await self.analyze_session_context(),
+            "recovery_info": await self.analyze_recovery_info(),
+            "model_data": await self.analyze_model_data(),
+            "temporal_context": await self.analyze_temporal_context(),
+            "element_reporting": await self.analyze_element_reporting(),
+            "error_context": await self.analyze_error_context(),
+            "timing_analysis": await self.analyze_timing(),
+            "visual_state": await self.analyze_visual_state(),
+            "error_recovery": await self.analyze_error_recovery(),
+            "performance": await self.analyze_performance()
+        } 
\ No newline at end of file
diff --git a/src/utils/browser_controller.py b/src/utils/browser_controller.py
new file mode 100644
index 00000000..2171574b
--- /dev/null
+++ b/src/utils/browser_controller.py
@@ -0,0 +1,141 @@
+from typing import Optional, Any
+import asyncio
+from playwright.async_api import async_playwright, Browser, Playwright
+from .structured_logging import StructuredLogger, setup_structured_logging
+
+class BrowserController:
+    def __init__(self):
+        self.browser: Optional[Browser] = None
+        self.init_promise: Optional[asyncio.Task] = None
+        self.init_count: int = 0
+        self._playwright: Optional[Playwright] = None
+        self.logger = StructuredLogger("browser_controller")
+        setup_structured_logging()
+
+    async def initialize(self) -> None:
+        """Initialize the browser if not already initialized."""
+        if self.init_promise is not None:
+            try:
+                await self.init_promise
+            except Exception as e:
+                # If the current initialization fails, reset state to allow retry
+                self.init_promise = None
+                self.browser = None
+                self.logger.log_browser_event("initialization_failed", {
+                    "error": str(e),
+                    "attempt": self.init_count + 1
+                })
+                raise
+
+        if self.browser is not None:
+            return
+
+        # Create new initialization task
+        self.logger.log_progress(
+            step="browser_init",
+            status="starting",
+            progress=0.0,
+            message="Starting browser initialization"
+        )
+        self.init_promise = asyncio.create_task(self._do_browser_init())
+        try:
+            await self.init_promise
+            self.logger.log_progress(
+                step="browser_init",
+                status="completed",
+                progress=1.0,
+                message="Browser initialization completed"
+            )
+        except Exception as e:
+            # Reset state on failure
+            self.init_promise = None
+            self.browser = None
+            self.logger.log_progress(
+                step="browser_init",
+                status="failed",
+                progress=0.0,
+                message=f"Browser initialization failed: {str(e)}"
+            )
+            raise
+
+    async def _do_browser_init(self) -> None:
+        """Internal method to handle browser initialization."""
+        if self.browser is not None:
+            return
+
+        self.logger.log_progress(
+            step="browser_init",
+            status="launching",
+            progress=0.3,
+            message="Launching Playwright"
+        )
+        playwright = await async_playwright().start()
+        self._playwright = playwright
+        
+        try:
+            self.logger.log_progress(
+                step="browser_init",
+                status="configuring",
+                progress=0.6,
+                message="Configuring browser"
+            )
+            self.browser = await playwright.chromium.launch(
+                headless=True,
+                args=['--no-sandbox']
+            )
+            self.init_count += 1
+            
+            self.logger.log_browser_event("browser_launched", {
+                "initialization_count": self.init_count,
+                "headless": True
+            })
+            
+        except Exception as e:
+            await self._cleanup_playwright()
+            self.logger.log_browser_event("launch_failed", {
+                "error": str(e),
+                "initialization_count": self.init_count
+            })
+            raise
+
+    async def _cleanup_playwright(self) -> None:
+        """Clean up the playwright context."""
+        if self._playwright:
+            self.logger.log_browser_event("cleanup_playwright", {
+                "status": "starting"
+            })
+            await self._playwright.stop()
+            self._playwright = None
+            self.logger.log_browser_event("cleanup_playwright", {
+                "status": "completed"
+            })
+
+    async def cleanup(self) -> None:
+        """Clean up browser resources."""
+        self.logger.log_progress(
+            step="cleanup",
+            status="starting",
+            progress=0.0,
+            message="Starting browser cleanup"
+        )
+        
+        if self.browser:
+            self.logger.log_progress(
+                step="cleanup",
+                status="closing_browser",
+                progress=0.5,
+                message="Closing browser"
+            )
+            await self.browser.close()
+            self.browser = None
+            
+        await self._cleanup_playwright()
+        self.init_promise = None
+        self.init_count = 0
+        
+        self.logger.log_progress(
+            step="cleanup",
+            status="completed",
+            progress=1.0,
+            message="Browser cleanup completed"
+        ) 
\ No newline at end of file
diff --git a/src/utils/error_handling.py b/src/utils/error_handling.py
new file mode 100644
index 00000000..2a4f744c
--- /dev/null
+++ b/src/utils/error_handling.py
@@ -0,0 +1,53 @@
+import asyncio
+from datetime import datetime
+from typing import Dict, Any, Optional
+import re
+
+class MaxRetriesExceededError(Exception):
+    def __init__(self, operation: str, original_error: Exception):
+        self.operation = operation
+        self.original_error = original_error
+        super().__init__(f"Max retries exceeded for operation '{operation}': {str(original_error)}")
+
+class ErrorHandler:
+    MAX_RETRIES = 3
+
+    def __init__(self):
+        self._retry_counts: Dict[str, int] = {}
+        self._last_error: Optional[Dict[str, Any]] = None
+
+    async def handle_error(self, error: Exception, operation: str) -> None:
+        retry_count = self._retry_counts.get(operation, 0)
+        
+        if retry_count >= self.MAX_RETRIES:
+            raise MaxRetriesExceededError(operation, error)
+        
+        self._retry_counts[operation] = retry_count + 1
+        await self._log_error(error, operation, retry_count)
+        
+        # Exponential backoff: 2^retry_count seconds
+        await asyncio.sleep(2 ** retry_count)
+
+    async def _log_error(self, error: Exception, operation: str, retry_count: int) -> None:
+        error_context = {
+            "operation": operation,
+            "attempt": retry_count + 1,
+            "timestamp": datetime.now().isoformat(),
+            "error": {
+                "name": error.__class__.__name__,
+                "message": str(error),
+                "code": self.extract_error_code(error)
+            }
+        }
+        
+        self._last_error = error_context
+        # In a real implementation, we would log to a file or logging service
+        print(f"Error: {error_context}")
+
+    def extract_error_code(self, error: Exception) -> str:
+        error_message = str(error)
+        match = re.search(r'ERR_[A-Z_]+', error_message)
+        return match.group(0) if match else "UNKNOWN_ERROR"
+
+    def get_last_error(self) -> Optional[Dict[str, Any]]:
+        return self._last_error 
\ No newline at end of file
diff --git a/src/utils/logging.py b/src/utils/logging.py
new file mode 100644
index 00000000..982ffdc2
--- /dev/null
+++ b/src/utils/logging.py
@@ -0,0 +1,158 @@
+import json
+import logging
+import datetime
+from typing import Any, Dict, List, Optional
+from enum import Enum
+import traceback
+import types
+
+class LogLevel(str, Enum):
+    CRITICAL = "CRITICAL"
+    ERROR = "ERROR"
+    WARNING = "WARNING"
+    INFO = "INFO"
+    DEBUG = "DEBUG"
+    TRACE = "TRACE"
+
+class LogJSONEncoder(json.JSONEncoder):
+    def default(self, obj):
+        if isinstance(obj, Exception):
+            return {
+                'type': obj.__class__.__name__,
+                'message': str(obj),
+                'traceback': traceback.format_exception(type(obj), obj, obj.__traceback__)
+            }
+        if isinstance(obj, type):
+            return obj.__name__
+        if isinstance(obj, types.TracebackType):
+            return traceback.format_tb(obj)
+        return super().default(obj)
+
+class LogFormatter(logging.Formatter):
+    def __init__(self, use_json: bool = True):
+        super().__init__()
+        self.use_json = use_json
+        self._event_counter: Dict[str, int] = {}
+
+    def _serialize_error(self, exc_info) -> Dict[str, str]:
+        """Serialize error information into a dictionary."""
+        exc_type, exc_value, exc_tb = exc_info
+        return {
+            "type": exc_type.__name__ if exc_type else "Unknown",
+            "message": str(exc_value) if exc_value else "",
+            "stack_trace": self.formatException(exc_info) if exc_tb else ""
+        }
+
+    def format(self, record: logging.LogRecord) -> str:
+        timestamp = datetime.datetime.fromtimestamp(record.created).strftime("%Y-%m-%dT%H:%M:%S")
+        
+        # Extract additional fields if they exist
+        extra_fields = {}
+        for key, value in vars(record).items():
+            if key not in logging.LogRecord.__dict__ and not key.startswith('_'):
+                extra_fields[key] = value
+
+        if self.use_json:
+            log_entry = {
+                "timestamp": timestamp,
+                "level": record.levelname,
+                "logger": record.name or "root",
+                "message": record.getMessage(),
+                **extra_fields
+            }
+            
+            if hasattr(record, 'event_type'):
+                log_entry["event_type"] = getattr(record, 'event_type')
+                
+            if hasattr(record, 'event_data'):
+                log_entry["data"] = getattr(record, 'event_data')
+
+            if record.exc_info and record.levelno >= logging.ERROR:
+                log_entry["error"] = self._serialize_error(record.exc_info)
+            
+            return json.dumps(log_entry, cls=LogJSONEncoder)
+        else:
+            # Compact format for non-JSON logs
+            basic_msg = f"[{timestamp}] {record.levelname[0]}: {record.getMessage()}"
+            
+            if record.exc_info and record.levelno >= logging.ERROR:
+                return f"{basic_msg}\n{self.formatException(record.exc_info)}"
+            
+            return basic_msg
+
+class BatchedEventLogger:
+    def __init__(self, logger: logging.Logger):
+        self._logger = logger
+        self._batched_events: Dict[str, List[Dict[str, Any]]] = {}
+        
+    def add_event(self, event_type: str, event_data: Dict[str, Any]) -> None:
+        if event_type not in self._batched_events:
+            self._batched_events[event_type] = []
+        self._batched_events[event_type].append(event_data)
+        
+    def flush(self) -> None:
+        for event_type, events in self._batched_events.items():
+            if events:
+                self._logger.info(
+                    f"Batch: {len(events)} {event_type} events",
+                    extra={
+                        "event_type": f"batched_{event_type}",
+                        "event_data": {
+                            "count": len(events),
+                            "events": events
+                        }
+                    }
+                )
+        self._batched_events.clear()
+
+def setup_logging(
+    level: str = "INFO",
+    use_json: bool = True,
+    log_file: Optional[str] = None,
+    exclude_patterns: Optional[List[str]] = None
+) -> None:
+    """
+    Setup logging configuration with the improved formatter
+    
+    Args:
+        level: The logging level to use
+        use_json: Whether to use JSON formatting
+        log_file: Optional file to write logs to
+        exclude_patterns: Optional list of patterns to exclude from logging
+    """
+    root_logger = logging.getLogger()
+    root_logger.setLevel(level)
+    
+    # Clear any existing handlers
+    root_logger.handlers.clear()
+    
+    # Create console handler
+    console_handler = logging.StreamHandler()
+    console_handler.setFormatter(LogFormatter(use_json=use_json))
+    
+    if exclude_patterns:
+        class ExcludeFilter(logging.Filter):
+            def filter(self, record: logging.LogRecord) -> bool:
+                return not any(pattern in record.getMessage() for pattern in exclude_patterns)
+        
+        console_handler.addFilter(ExcludeFilter())
+    
+    root_logger.addHandler(console_handler)
+    
+    # Add file handler if specified
+    if log_file:
+        file_handler = logging.FileHandler(log_file)
+        file_handler.setFormatter(LogFormatter(use_json=True))  # Always use JSON for file logging
+        if exclude_patterns:
+            file_handler.addFilter(ExcludeFilter())
+        root_logger.addHandler(file_handler)
+
+# Production filter patterns
+PRODUCTION_EXCLUDE_PATTERNS = [
+    "deprecated",
+    "virtual environment",
+    "Activating virtual environment",
+    "✅ Eval: Success",
+    "🤔 Thought:",
+    "VIRTUAL_ENV:"
+] 
\ No newline at end of file
diff --git a/src/utils/structured_logging.py b/src/utils/structured_logging.py
new file mode 100644
index 00000000..8568de23
--- /dev/null
+++ b/src/utils/structured_logging.py
@@ -0,0 +1,223 @@
+from typing import Optional, Dict, Any, List
+import logging
+import json
+from dataclasses import dataclass, asdict
+from datetime import datetime
+from colorama import init, Fore, Style
+import os
+
+# Initialize colorama
+init()
+
+@dataclass
+class ColorScheme:
+    """Color scheme for different log elements."""
+    ERROR: str = Fore.RED
+    WARNING: str = Fore.YELLOW
+    INFO: str = Fore.CYAN
+    DEBUG: str = Style.DIM
+    TIMESTAMP: str = Fore.WHITE
+    SUCCESS: str = Fore.GREEN
+    STEP: str = Fore.BLUE
+    RESET: str = Style.RESET_ALL
+
+class ColorizedFormatter(logging.Formatter):
+    """Formatter that adds colors to log output."""
+    
+    def __init__(self, use_colors: bool = True):
+        super().__init__()
+        self.use_colors = use_colors and not os.getenv('NO_COLOR')
+        self.colors = ColorScheme()
+    
+    def colorize(self, text: str, color: str) -> str:
+        """Add color to text if colors are enabled."""
+        if self.use_colors:
+            return f"{color}{text}{self.colors.RESET}"
+        return text
+    
+    def format(self, record: logging.LogRecord) -> str:
+        """Format the log record with colors."""
+        # Get the appropriate color for the log level
+        level_color = getattr(self.colors, record.levelname, self.colors.INFO)
+        
+        # Format timestamp
+        timestamp = self.colorize(
+            datetime.utcnow().strftime("%H:%M:%S"),
+            self.colors.TIMESTAMP
+        )
+        
+        # Format level
+        level = self.colorize(record.levelname, level_color)
+        
+        # Format message and handle special keywords
+        msg = record.getMessage()
+        if "✓" in msg:
+            msg = msg.replace("✓", self.colorize("✓", self.colors.SUCCESS))
+        if "×" in msg:
+            msg = msg.replace("×", self.colorize("×", self.colors.ERROR))
+        if "STEP" in msg:
+            msg = msg.replace("STEP", self.colorize("STEP", self.colors.STEP))
+        
+        # Build the basic log message
+        log_message = f"[{timestamp}] {level} {msg}"
+        
+        # Add structured data if available
+        if hasattr(record, 'event_type'):
+            event_type = self.colorize(record.event_type, self.colors.INFO)
+            if hasattr(record, 'data'):
+                # Format the data as JSON but don't colorize it
+                data_str = json.dumps(record.data, indent=2)
+                log_message = f"{log_message} | {event_type} | {data_str}"
+        
+        return log_message
+
+class JSONFormatter(logging.Formatter):
+    """Custom JSON formatter for structured logs."""
+    
+    def format(self, record: logging.LogRecord) -> str:
+        """Format the log record as a JSON string."""
+        output = {
+            "timestamp": datetime.utcnow().isoformat(),
+            "level": record.levelname,
+            "message": record.getMessage(),
+            "logger": record.name
+        }
+        
+        # Add extra fields from record.__dict__ to handle custom attributes
+        if hasattr(record, '__dict__'):
+            for key, value in record.__dict__.items():
+                if key not in output and key not in ('args', 'exc_info', 'exc_text', 'msg'):
+                    output[key] = value
+            
+        return json.dumps(output)
+
+def setup_structured_logging(level: int = logging.INFO, use_colors: bool = True, json_output: bool = False) -> None:
+    """Set up structured logging with optional colorized output."""
+    root_logger = logging.getLogger()
+    root_logger.setLevel(level)
+    
+    # Remove existing handlers
+    for handler in root_logger.handlers[:]:
+        root_logger.removeHandler(handler)
+    
+    # Create console handler with appropriate formatter
+    handler = logging.StreamHandler()
+    if json_output:
+        handler.setFormatter(JSONFormatter())
+    else:
+        handler.setFormatter(ColorizedFormatter(use_colors=use_colors))
+    
+    root_logger.addHandler(handler)
+
+@dataclass
+class ProgressEvent:
+    """Represents a progress update in the browser automation process."""
+    step: str
+    status: str
+    progress: float  # 0.0 to 1.0
+    message: str
+    timestamp: Optional[str] = None
+    
+    def __post_init__(self):
+        if self.timestamp is None:
+            self.timestamp = datetime.utcnow().isoformat()
+
+@dataclass
+class BrowserEvent:
+    """Represents a browser-related event."""
+    event_type: str
+    details: Dict[str, Any]
+    timestamp: Optional[str] = None
+    
+    def __post_init__(self):
+        if self.timestamp is None:
+            self.timestamp = datetime.utcnow().isoformat()
+
+class StructuredLogger:
+    """Handles structured logging with progress reporting and feedback."""
+    
+    def __init__(self, logger_name: str = "browser_automation"):
+        self.logger = logging.getLogger(logger_name)
+        self.progress_events: List[ProgressEvent] = []
+        self.browser_events: List[BrowserEvent] = []
+        self._current_progress: float = 0.0
+        
+    def log_progress(self, step: str, status: str, progress: float, message: str) -> None:
+        """Log a progress update."""
+        event = ProgressEvent(step=step, status=status, progress=progress, message=message)
+        self.progress_events.append(event)
+        self._current_progress = progress
+        
+        self.logger.info("Progress Update", extra={
+            "event_type": "progress",
+            "data": asdict(event)
+        })
+    
+    def log_browser_event(self, event_type: str, details: Dict[str, Any]) -> None:
+        """Log a browser-related event."""
+        event = BrowserEvent(event_type=event_type, details=details)
+        self.browser_events.append(event)
+        
+        self.logger.info(f"Browser Event: {event_type}", extra={
+            "event_type": "browser",
+            "data": asdict(event)
+        })
+    
+    def get_current_progress(self) -> float:
+        """Get the current progress as a float between 0 and 1."""
+        return self._current_progress
+    
+    def get_progress_history(self) -> List[Dict[str, Any]]:
+        """Get the history of progress events."""
+        return [asdict(event) for event in self.progress_events]
+    
+    def get_browser_events(self) -> List[Dict[str, Any]]:
+        """Get all browser events."""
+        return [asdict(event) for event in self.browser_events]
+    
+    def clear_history(self) -> None:
+        """Clear all stored events."""
+        self.progress_events.clear()
+        self.browser_events.clear()
+        self._current_progress = 0.0
+
+class EventBatcher:
+    def __init__(self, batch_size: int = 5):
+        self.events: List[BrowserEvent] = []
+        self.batch_size = max(1, batch_size)  # Ensure minimum batch size of 1
+
+    def add_event(self, event: BrowserEvent) -> Optional[Dict[str, Any]]:
+        self.events.append(event)
+        if len(self.events) >= self.batch_size:
+            return self.flush_events()
+        return None
+
+    def flush_events(self) -> Dict[str, Any]:
+        if not self.events:
+            return {
+                "timestamp": datetime.now().isoformat(),
+                "total_events": 0,
+                "success_count": 0,
+                "error_count": 0,
+                "duration_ms": 0
+            }
+
+        summary = {
+            "timestamp": datetime.now().isoformat(),
+            "total_events": len(self.events),
+            "success_count": sum(1 for e in self.events if e.get_status() == "success"),
+            "error_count": sum(1 for e in self.events if e.get_status() == "failed"),
+            "duration_ms": self._calculate_total_duration()
+        }
+        self.events = []
+        return summary
+
+    def get_event_count(self) -> int:
+        return len(self.events)
+
+    def _calculate_total_duration(self) -> int:
+        total_duration = 0
+        for event in self.events:
+            if event.metrics and "duration_ms" in event.metrics:
+                total_duration += event.metrics["duration_ms"]
+        return total_duration 
\ No newline at end of file
diff --git a/src/utils/task_logging.py b/src/utils/task_logging.py
new file mode 100644
index 00000000..908774a2
--- /dev/null
+++ b/src/utils/task_logging.py
@@ -0,0 +1,562 @@
+from typing import Dict, Any, List, Literal, Optional, Union, Callable, TypeVar, Awaitable
+from dataclasses import dataclass, asdict, field
+from datetime import datetime
+import json
+from enum import Enum
+import traceback
+import asyncio
+import random
+import os
+from colorama import init, Fore, Style
+
+# Initialize colorama for cross-platform color support
+init()
+
+# Define generic type parameter at module level
+T = TypeVar('T')
+
+class TaskStatus(str, Enum):
+    PENDING = "pending"
+    RUNNING = "running"
+    COMPLETE = "complete"
+    FAILED = "failed"
+
+class ActionType(str, Enum):
+    NAVIGATION = "navigation"
+    INTERACTION = "interaction"
+    EXTRACTION = "extraction"
+    VALIDATION = "validation"
+    RECOVERY = "recovery"
+
+    @property
+    def emoji(self) -> str:
+        """Get the emoji representation of the action type."""
+        return {
+            ActionType.NAVIGATION: "🌐",
+            ActionType.INTERACTION: "🖱️",
+            ActionType.EXTRACTION: "📑",
+            ActionType.VALIDATION: "✅",
+            ActionType.RECOVERY: "🔄"
+        }[self]
+
+@dataclass
+class PerformanceMetrics:
+    """Performance metrics for task execution."""
+    total_duration: float = 0.0
+    step_breakdown: Dict[str, float] = field(default_factory=dict)
+    
+    def add_step_duration(self, step_type: str, duration: float) -> None:
+        """Add duration for a step type."""
+        if step_type not in self.step_breakdown:
+            self.step_breakdown[step_type] = 0
+        self.step_breakdown[step_type] += duration
+        self.total_duration += duration
+
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert metrics to a dictionary."""
+        return {
+            "total_duration": self.total_duration,
+            "step_breakdown": self.step_breakdown
+        }
+
+@dataclass
+class ErrorInfo:
+    """Information about an error that occurred."""
+    type: str
+    message: str
+    step: int
+    action: str
+    traceback: Optional[str] = None
+
+@dataclass
+class StepInfo:
+    """Information about the current step in a task."""
+    number: int
+    description: str
+    started_at: str
+    status: Union[TaskStatus, str]
+    duration: Optional[float] = None
+    progress: Optional[float] = None
+    action_type: Optional[ActionType] = None
+    context: Optional[Dict[str, Any]] = None
+    results: Optional[Dict[str, Any]] = None
+    suppress_similar: bool = False
+
+    def __post_init__(self):
+        if isinstance(self.status, str):
+            self.status = TaskStatus(self.status)
+        if isinstance(self.action_type, str):
+            self.action_type = ActionType(self.action_type)
+    
+    @property
+    def status_value(self) -> str:
+        """Get the string value of the status."""
+        return self.status.value if isinstance(self.status, TaskStatus) else str(self.status)
+
+@dataclass
+class BrowserState:
+    """Current state of the browser."""
+    url: str
+    page_ready: bool
+    dynamic_content_loaded: bool
+    visible_elements: int
+    current_frame: Optional[str] = None
+    active_element: Optional[str] = None
+    page_title: Optional[str] = None
+
+@dataclass
+class RetryConfig:
+    """Configuration for retry behavior."""
+    max_retries: int = 3
+    base_delay: float = 1.0
+    max_delay: float = 10.0
+    jitter: float = 0.1
+    
+    def get_delay(self, attempt: int) -> float:
+        """Calculate delay for a given attempt using exponential backoff."""
+        if attempt == 0:
+            return 0
+        if attempt > self.max_retries:
+            return -1
+            
+        # Calculate exponential delay
+        delay = self.base_delay * (2 ** (attempt - 1))
+        delay = min(delay, self.max_delay)
+        
+        # Add jitter if configured
+        if self.jitter > 0:
+            jitter_range = delay * self.jitter
+            delay += random.uniform(-jitter_range/2, jitter_range/2)
+            
+        return max(0, delay)
+
+@dataclass
+class RetryInfo:
+    """Information about retry attempts."""
+    attempts: int = 0
+    success: bool = False
+    history: List[Dict[str, Any]] = field(default_factory=list)
+
+@dataclass
+class TaskContext:
+    """Context information for a task."""
+    id: str
+    goal: str
+    current_step: StepInfo
+    browser_state: BrowserState
+    started_at: Optional[str] = None
+    error: Optional[ErrorInfo] = None
+    performance: Optional[PerformanceMetrics] = None
+    log_history: List[StepInfo] = field(default_factory=list)
+    retries: Optional[RetryInfo] = None
+    
+    def __post_init__(self):
+        if self.started_at is None:
+            self.started_at = datetime.utcnow().isoformat()
+        if self.performance is None:
+            self.performance = PerformanceMetrics()
+        if self.retries is None:
+            self.retries = RetryInfo()
+    
+    def to_dict(self) -> Dict[str, Any]:
+        """Convert the context to a dictionary for logging."""
+        result = {
+            "timestamp": datetime.utcnow().isoformat(),
+            "task": {
+                "id": self.id,
+                "goal": self.goal,
+                "progress": self._format_progress(),
+                "elapsed_time": self._calculate_elapsed_time(),
+                "status": self.current_step.status_value
+            }
+        }
+        
+        # Add retry information if available
+        if self.retries and self.retries.attempts > 0:
+            result["task"]["retries"] = {
+                "attempts": self.retries.attempts,
+                "success": self.retries.success,
+                "history": self.retries.history
+            }
+        
+        # Add current action information
+        if self.current_step.action_type:
+            result["task"]["current_action"] = self.current_step.action_type.value
+        if self.current_step.context:
+            result["task"]["action_context"] = self.current_step.context
+        if self.current_step.results:
+            result["task"]["action_results"] = self.current_step.results
+            
+        # Add browser state
+        result["browser"] = {
+            "url": self.browser_state.url,
+            "state": "ready" if self.browser_state.page_ready else "loading",
+            "visible_elements": self.browser_state.visible_elements,
+            "dynamic_content": "loaded" if self.browser_state.dynamic_content_loaded else "loading"
+        }
+        
+        if self.browser_state.current_frame:
+            result["browser"]["current_frame"] = self.browser_state.current_frame
+        if self.browser_state.active_element:
+            result["browser"]["active_element"] = self.browser_state.active_element
+        if self.browser_state.page_title:
+            result["browser"]["page_title"] = self.browser_state.page_title
+            
+        if self.error:
+            result["error"] = {
+                "type": self.error.type,
+                "message": self.error.message,
+                "step": self.error.step,
+                "action": self.error.action
+            }
+            if self.error.traceback:
+                result["error"]["traceback"] = self.error.traceback
+                
+        if self.performance and self.performance.step_breakdown:
+            result["performance"] = self.performance.to_dict()
+            
+        return result
+    
+    def _format_progress(self) -> str:
+        """Format the progress information."""
+        if self.current_step.progress is not None:
+            return f"{int(self.current_step.progress * 100)}%"
+        return f"{self.current_step.number}/unknown steps"
+    
+    def _calculate_elapsed_time(self) -> str:
+        """Calculate the elapsed time since task start."""
+        if self.started_at is None:
+            return "0.0s"
+        start = datetime.fromisoformat(self.started_at)
+        elapsed = datetime.utcnow() - start
+        return f"{elapsed.total_seconds():.1f}s"
+
+@dataclass
+class ColorScheme:
+    """Color scheme for log messages."""
+    error: str = Fore.RED
+    warning: str = Fore.YELLOW
+    info: str = Fore.CYAN
+    success: str = Fore.GREEN
+    reset: str = Style.RESET_ALL
+    
+    @property
+    def enabled(self) -> bool:
+        """Check if colors should be enabled."""
+        return not bool(os.getenv("NO_COLOR"))
+    
+    def apply(self, text: str, color: str) -> str:
+        """Apply color to text if colors are enabled."""
+        if not self.enabled:
+            return text
+        return f"{color}{text}{self.reset}"
+
+class LogFormatter:
+    """Formatter for log messages with color support."""
+    
+    def __init__(self, color_scheme: Optional[ColorScheme] = None):
+        self.colors = color_scheme or ColorScheme()
+    
+    def format(self, record: Any) -> str:
+        """Format a log record with appropriate colors."""
+        level_colors = {
+            "ERROR": self.colors.error,
+            "WARNING": self.colors.warning,
+            "INFO": self.colors.info
+        }
+        
+        # Format timestamp
+        timestamp = datetime.fromtimestamp(record.created).strftime("%Y-%m-%d %H:%M:%S")
+        
+        # Color the level name
+        level_color = level_colors.get(record.levelname, self.colors.info)
+        colored_level = self.colors.apply(record.levelname, level_color)
+        
+        return f"[{timestamp}] {colored_level}: {record.msg}"
+
+@dataclass
+class SeparatorStyle:
+    """Style configuration for visual separators."""
+    task: str = "=" * 50  # Task separator (longer)
+    phase: str = "-" * 30  # Phase separator (medium)
+    error: str = "*" * 40  # Error separator (distinct)
+
+class TaskLogger:
+    """Advanced logger for task context and state tracking."""
+    
+    def __init__(
+        self,
+        task_id: str,
+        goal: str,
+        color_scheme: Optional[ColorScheme] = None,
+        separator_style: Optional[SeparatorStyle] = None,
+        use_separators: bool = True
+    ):
+        self.context = TaskContext(
+            id=task_id,
+            goal=goal,
+            current_step=StepInfo(
+                number=1,
+                description="Task initialized",
+                started_at=datetime.utcnow().isoformat(),
+                status=TaskStatus.PENDING
+            ),
+            browser_state=BrowserState(
+                url="",
+                page_ready=False,
+                dynamic_content_loaded=False,
+                visible_elements=0
+            ),
+            retries=RetryInfo()
+        )
+        self._step_start_time: Optional[datetime] = None
+        self.colors = color_scheme or ColorScheme()
+        self.separators = separator_style or SeparatorStyle()
+        self.use_separators = use_separators
+        
+        # Add initial task separator and goal
+        if self.use_separators:
+            self._add_separator("task")
+            self._add_log_entry(f"TASK GOAL: {goal}")
+    
+    def start_phase(self, phase_name: str) -> None:
+        """Start a new phase in the task."""
+        if self.use_separators:
+            self._add_separator("phase")
+            self._add_log_entry(f"PHASE: {phase_name}")
+    
+    def _add_separator(self, separator_type: Literal["task", "phase", "error"]) -> None:
+        """Add a visual separator to the log history."""
+        if not self.use_separators:
+            return
+            
+        separator = getattr(self.separators, separator_type)
+        colored_separator = self.colors.apply(
+            separator,
+            self.colors.info if separator_type != "error" else self.colors.error
+        )
+        self._add_log_entry(colored_separator)
+    
+    def _add_log_entry(self, entry: str) -> None:
+        """Add a raw log entry to the history."""
+        step = StepInfo(
+            number=self.context.current_step.number,
+            description=entry,
+            started_at=datetime.utcnow().isoformat(),
+            status=TaskStatus.RUNNING
+        )
+        self.context.log_history.append(step)
+    
+    def update_step(self, 
+                   description: str, 
+                   status: TaskStatus, 
+                   progress: Optional[float] = None,
+                   action_type: Optional[ActionType] = None,
+                   context: Optional[Dict[str, Any]] = None,
+                   results: Optional[Dict[str, Any]] = None,
+                   suppress_similar: bool = False) -> None:
+        """Update the current step information."""
+        step_duration = None
+        if self._step_start_time:
+            step_duration = (datetime.utcnow() - self._step_start_time).total_seconds()
+            
+        new_step = StepInfo(
+            number=self.context.current_step.number + 1,
+            description=description,
+            started_at=datetime.utcnow().isoformat(),
+            status=status,
+            duration=step_duration,
+            progress=progress,
+            action_type=action_type,
+            context=context,
+            results=results,
+            suppress_similar=suppress_similar
+        )
+        
+        # Check if we should suppress this step
+        if not suppress_similar or not self._is_similar_to_previous(new_step):
+            self.context.log_history.append(new_step)
+            self.context.current_step = new_step
+            self._step_start_time = datetime.utcnow()
+        else:
+            # Update the previous step with new status/results
+            prev_step = self.context.log_history[-1]
+            prev_step.status = status
+            if results:
+                prev_step.results = results
+            # Update current step to reflect changes
+            self.context.current_step = prev_step
+    
+    def _is_similar_to_previous(self, step: StepInfo) -> bool:
+        """Check if a step is similar to the previous one."""
+        if not self.context.log_history:
+            return False
+        prev_step = self.context.log_history[-1]
+        return (
+            prev_step.action_type == step.action_type and
+            prev_step.description.split()[0] == step.description.split()[0]  # Compare first word
+        )
+    
+    def get_log_history(self) -> List[str]:
+        """Get the formatted history of log entries."""
+        return [self._format_step(step) for step in self.context.log_history]
+    
+    def _format_step(self, step: StepInfo) -> str:
+        """Format a step as a log entry with colors."""
+        timestamp = datetime.fromisoformat(step.started_at).strftime("%Y-%m-%d %H:%M:%S")
+        duration = f"({step.duration:.1f}s)" if step.duration is not None else ""
+        
+        # Color-coded status symbols
+        if isinstance(step.status, TaskStatus):
+            status_symbol = {
+                TaskStatus.COMPLETE: self.colors.apply("✓", self.colors.success),
+                TaskStatus.FAILED: self.colors.apply("×", self.colors.error),
+                TaskStatus.RUNNING: self.colors.apply("→", self.colors.info),
+                TaskStatus.PENDING: self.colors.apply("→", self.colors.info)
+            }.get(step.status, self.colors.apply("→", self.colors.info))
+        else:
+            status_symbol = self.colors.apply("→", self.colors.info)
+        
+        # Color-coded action emoji
+        action_emoji = step.action_type.emoji if step.action_type else ""
+        if action_emoji:
+            action_emoji = self.colors.apply(action_emoji, self.colors.info)
+        
+        # Format step number with info color
+        step_number = self.colors.apply(f"STEP {step.number}/?", self.colors.info)
+        
+        return f"[{timestamp}] {action_emoji} {step_number} {step.description} {status_symbol} {duration}"
+    
+    def format_log_entry(self) -> str:
+        """Format the current state as a log entry."""
+        return self._format_step(self.context.current_step)
+    
+    def update_browser_state(self, 
+                           url: Optional[str] = None,
+                           page_ready: Optional[bool] = None,
+                           dynamic_content_loaded: Optional[bool] = None,
+                           visible_elements: Optional[int] = None,
+                           current_frame: Optional[str] = None,
+                           active_element: Optional[str] = None,
+                           page_title: Optional[str] = None) -> None:
+        """Update the browser state information."""
+        if url is not None:
+            self.context.browser_state.url = url
+        if page_ready is not None:
+            self.context.browser_state.page_ready = page_ready
+        if dynamic_content_loaded is not None:
+            self.context.browser_state.dynamic_content_loaded = dynamic_content_loaded
+        if visible_elements is not None:
+            self.context.browser_state.visible_elements = visible_elements
+        if current_frame is not None:
+            self.context.browser_state.current_frame = current_frame
+        if active_element is not None:
+            self.context.browser_state.active_element = active_element
+        if page_title is not None:
+            self.context.browser_state.page_title = page_title
+    
+    def log_error(self, error: Exception, step_number: int, action: str) -> None:
+        """Log an error with context."""
+        if self.use_separators:
+            self._add_separator("error")
+            
+        self.context.error = ErrorInfo(
+            type=error.__class__.__name__,
+            message=str(error),
+            step=step_number,
+            action=action,
+            traceback=traceback.format_exc()
+        )
+        self.context.current_step.status = TaskStatus.FAILED
+        
+        if self.use_separators:
+            self._add_separator("error")
+    
+    def start_performance_tracking(self) -> None:
+        """Start tracking performance metrics."""
+        self._step_start_time = datetime.utcnow()
+    
+    def track_step_duration(self, step_type: str, duration: float) -> None:
+        """Track the duration of a specific step type."""
+        if self.context.performance is not None:
+            self.context.performance.add_step_duration(step_type, duration)
+    
+    def get_performance_metrics(self) -> Dict[str, Any]:
+        """Get the current performance metrics."""
+        if self.context.performance is not None:
+            return self.context.performance.to_dict()
+        return {"total_duration": 0.0, "step_breakdown": {}}
+    
+    def get_context(self) -> Dict[str, Any]:
+        """Get the current context as a dictionary."""
+        return self.context.to_dict()
+    
+    def log_state(self) -> None:
+        """Log the current state."""
+        state = self.get_context()
+        print(json.dumps(state, indent=2))
+    
+    async def execute_with_retry(
+        self,
+        operation: Callable[[], Awaitable[T]],
+        operation_name: str,
+        retry_config: Optional[RetryConfig] = None
+    ) -> T:
+        """Execute an operation with retry logic."""
+        if retry_config is None:
+            retry_config = RetryConfig()
+            
+        attempt = 0
+        last_error = None
+        
+        while True:
+            try:
+                # Calculate and apply delay if this is a retry
+                delay = retry_config.get_delay(attempt)
+                if delay == -1:  # Max retries exceeded
+                    if last_error:
+                        raise last_error
+                    raise Exception("Max retries exceeded")
+                    
+                if delay > 0:
+                    await asyncio.sleep(delay)
+                
+                # Attempt the operation
+                result = await operation()
+                
+                # Update retry info on success
+                if self.context.retries is not None:
+                    self.context.retries.attempts = attempt + 1
+                    self.context.retries.success = True
+                
+                return result
+                
+            except Exception as e:
+                last_error = e
+                attempt += 1
+                
+                # Log the retry attempt
+                if self.context.retries is not None:
+                    self.context.retries.history.append({
+                        "attempt": attempt,
+                        "timestamp": datetime.utcnow().isoformat(),
+                        "error": f"{e.__class__.__name__}: {str(e)}",
+                        "delay": retry_config.get_delay(attempt)
+                    })
+                
+                # Update the error context
+                self.log_error(e, self.context.current_step.number, operation_name)
+                
+                # Continue if we haven't exceeded max retries
+                if attempt <= retry_config.max_retries:
+                    self.update_step(
+                        f"Retrying {operation_name} (attempt {attempt + 1}/{retry_config.max_retries + 1})",
+                        TaskStatus.RUNNING
+                    )
+                    continue
+                    
+                # Max retries exceeded
+                if self.context.retries is not None:
+                    self.context.retries.attempts = attempt
+                    self.context.retries.success = False
+                raise 
\ No newline at end of file
diff --git a/src/utils/utils.py b/src/utils/utils.py
index 3ab38977..9e202fa6 100644
--- a/src/utils/utils.py
+++ b/src/utils/utils.py
@@ -9,6 +9,7 @@
 import time
 from pathlib import Path
 from typing import Dict, Optional
+from pydantic import SecretStr
 
 from langchain_anthropic import ChatAnthropic
 from langchain_google_genai import ChatGoogleGenerativeAI
@@ -30,15 +31,17 @@ def get_llm_model(provider: str, **kwargs):
             base_url = kwargs.get("base_url")
 
         if not kwargs.get("api_key", ""):
-            api_key = os.getenv("ANTHROPIC_API_KEY", "")
+            api_key = SecretStr(os.getenv("ANTHROPIC_API_KEY") or "")
         else:
-            api_key = kwargs.get("api_key")
+            api_key = SecretStr(kwargs.get("api_key") or "")
 
         return ChatAnthropic(
             model_name=kwargs.get("model_name", "claude-3-5-sonnet-20240620"),
             temperature=kwargs.get("temperature", 0.0),
             base_url=base_url,
             api_key=api_key,
+            timeout=kwargs.get("timeout", 60),
+            stop=kwargs.get("stop", None)
         )
     elif provider == "openai":
         if not kwargs.get("base_url", ""):
@@ -47,15 +50,16 @@ def get_llm_model(provider: str, **kwargs):
             base_url = kwargs.get("base_url")
 
         if not kwargs.get("api_key", ""):
-            api_key = os.getenv("OPENAI_API_KEY", "")
+            api_key = SecretStr(os.getenv("OPENAI_API_KEY") or "")
         else:
-            api_key = kwargs.get("api_key")
+            api_key = SecretStr(kwargs.get("api_key") or "")
 
         return ChatOpenAI(
-            model=kwargs.get("model_name", "gpt-4o"),
+            model=kwargs.get("model_name", "gpt-4"),
             temperature=kwargs.get("temperature", 0.0),
             base_url=base_url,
             api_key=api_key,
+            timeout=kwargs.get("timeout", 60),
         )
     elif provider == "deepseek":
         if not kwargs.get("base_url", ""):
@@ -64,25 +68,36 @@ def get_llm_model(provider: str, **kwargs):
             base_url = kwargs.get("base_url")
 
         if not kwargs.get("api_key", ""):
-            api_key = os.getenv("DEEPSEEK_API_KEY", "")
+            api_key = SecretStr(os.getenv("DEEPSEEK_API_KEY") or "")
         else:
-            api_key = kwargs.get("api_key")
+            api_key = SecretStr(kwargs.get("api_key") or "")
 
         return ChatOpenAI(
             model=kwargs.get("model_name", "deepseek-chat"),
             temperature=kwargs.get("temperature", 0.0),
             base_url=base_url,
             api_key=api_key,
+            timeout=kwargs.get("timeout", 60),
         )
     elif provider == "gemini":
         if not kwargs.get("api_key", ""):
-            api_key = os.getenv("GOOGLE_API_KEY", "")
+            api_key = SecretStr(os.getenv("GOOGLE_API_KEY") or "")
         else:
-            api_key = kwargs.get("api_key")
+            api_key = SecretStr(kwargs.get("api_key") or "")
+        
+        # Get model name from environment or kwargs
+        model_name = kwargs.get("model_name")
+        if not model_name:
+            if kwargs.get("vision"):
+                model_name = os.getenv("GOOGLE_API_MODEL", "gemini-1.5-flash")
+            else:
+                model_name = os.getenv("GOOGLE_API_TYPE", "gemini-1.5-flash")
+
         return ChatGoogleGenerativeAI(
-            model=kwargs.get("model_name", "gemini-2.0-flash-exp"),
+            model=model_name,
             temperature=kwargs.get("temperature", 0.0),
-            google_api_key=api_key,
+            api_key=api_key,
+            timeout=kwargs.get("timeout", 60)
         )
     elif provider == "ollama":
         return ChatOllama(
@@ -97,27 +112,28 @@ def get_llm_model(provider: str, **kwargs):
         else:
             base_url = kwargs.get("base_url")
         if not kwargs.get("api_key", ""):
-            api_key = os.getenv("AZURE_OPENAI_API_KEY", "")
+            api_key = SecretStr(os.getenv("AZURE_OPENAI_API_KEY") or "")
         else:
-            api_key = kwargs.get("api_key")
+            api_key = SecretStr(kwargs.get("api_key") or "")
         return AzureChatOpenAI(
             model=kwargs.get("model_name", "gpt-4o"),
             temperature=kwargs.get("temperature", 0.0),
             api_version="2024-05-01-preview",
             azure_endpoint=base_url,
             api_key=api_key,
+            timeout=kwargs.get("timeout", 60),
         )
     else:
         raise ValueError(f"Unsupported provider: {provider}")
     
 # Predefined model names for common providers
 model_names = {
-    "anthropic": ["claude-3-5-sonnet-20240620", "claude-3-opus-20240229"],
-    "openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"],
+    "anthropic": ["claude-3-5-sonnet-latest", "claude-3-5-sonnet-20241022"],
+    "openai": ["gpt-4o"],
     "deepseek": ["deepseek-chat"],
-    "gemini": ["gemini-2.0-flash-exp", "gemini-2.0-flash-thinking-exp", "gemini-1.5-flash-latest", "gemini-1.5-flash-8b-latest", "gemini-2.0-flash-thinking-exp-1219" ],
+    "gemini": ["gemini-1.5-pro", "gemini-2.0-flash"],
     "ollama": ["qwen2.5:7b", "llama2:7b"],
-    "azure_openai": ["gpt-4o", "gpt-4", "gpt-3.5-turbo"]
+    "azure_openai": ["gpt-4", "gpt-3.5-turbo"]
 }
 
 # Callback to update the model name dropdown based on the selected provider
diff --git a/test_gemini_connection.py b/test_gemini_connection.py
new file mode 100644
index 00000000..0feeecad
--- /dev/null
+++ b/test_gemini_connection.py
@@ -0,0 +1,47 @@
+import google.generativeai as genai
+import os
+from dotenv import load_dotenv, find_dotenv
+
+# Force reload of environment variables
+load_dotenv(find_dotenv(), override=True)
+
+api_key = os.environ.get("GOOGLE_API_KEY")
+model_name = os.environ.get("GOOGLE_API_MODEL")
+
+if not api_key or not model_name:
+    raise ValueError("Missing required environment variables: GOOGLE_API_KEY or GOOGLE_API_MODEL")
+
+print(f"Using model: {model_name}")
+genai.configure(api_key=api_key, transport="rest")
+
+# List all available models
+print("\nAvailable models:")
+for m in genai.list_models():
+    print(f"- {m.name}")
+
+# Check that the model exists in the client
+found_model = False
+for m in genai.list_models():
+    model_id = m.name.replace("models/", "")
+    if model_id == model_name:
+        found_model = True
+        print(f"\nFound model: {m.name}")
+        break
+
+if not found_model:
+    print("\nAvailable model IDs:")
+    for m in genai.list_models():
+        print(f"- {m.name.replace('models/', '')}")
+
+assert found_model, f"Model not found: {model_name}"
+
+# Load the model
+model = genai.GenerativeModel(model_name)
+
+# Perform a simple generation task
+try:
+    response = model.generate_content("Hello, I'm testing the Gemini API connection. Please respond with a short greeting.")
+    print(f"\nResponse: {response.text}")
+except Exception as e:
+    print(f"\nError generating content: {e}")
+    raise 
\ No newline at end of file
diff --git a/test_results.txt b/test_results.txt
new file mode 100644
index 00000000..86a61db1
--- /dev/null
+++ b/test_results.txt
@@ -0,0 +1,125 @@
+============================= test session starts ==============================
+platform darwin -- Python 3.11.9, pytest-8.3.4, pluggy-1.5.0 -- /Users/dmieloch/Dev/experiments/web-ui/venv/bin/python
+cachedir: .pytest_cache
+rootdir: /Users/dmieloch/Dev/experiments/web-ui
+configfile: pytest.ini
+plugins: cov-6.0.0, asyncio-0.25.2, anyio-4.8.0, timeout-2.3.1
+asyncio: mode=Mode.AUTO, asyncio_default_fixture_loop_scope=function
+collecting ... 
+----------------------------- live log collection ------------------------------
+INFO     root:service.py:51 Anonymized telemetry enabled. See https://github.com/gregpr07/browser-use for more information.
+INFO     httpx:_client.py:1038 HTTP Request: GET https://api.gradio.app/gradio-messaging/en "HTTP/1.1 200 OK"
+collected 133 items
+
+tests/test_browser_cli.py::TestBrowserInitialization::test_basic_initialization 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
+PASSED                                                                [  1/133]
+------------------------------ live log teardown -------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test
+
+tests/test_browser_cli.py::TestBrowserInitialization::test_window_size 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
+FAILED                                                                [  2/133]
+------------------------------ live log teardown -------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test
+
+tests/test_browser_cli.py::TestBrowserInitialization::test_headless_mode 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
+FAILED                                                                [  3/133]
+------------------------------ live log teardown -------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test
+
+tests/test_browser_cli.py::TestBrowserInitialization::test_user_data_dir 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
+PASSED                                                                [  4/133]
+------------------------------ live log teardown -------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test
+
+tests/test_browser_cli.py::TestBrowserInitialization::test_proxy_configuration 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
+PASSED                                                                [  5/133]
+------------------------------ live log teardown -------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test
+
+tests/test_browser_cli.py::TestBrowserInitialization::test_disable_security 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
+FAILED                                                                [  6/133]
+------------------------------ live log teardown -------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test
+
+tests/test_browser_cli.py::TestBrowserInitialization::test_multiple_initialization 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
+FAILED                                                                [  7/133]
+------------------------------ live log teardown -------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test
+
+tests/test_browser_cli.py::TestBrowserTasks::test_model_switching 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
+FAILED                                                                [  8/133]
+------------------------------ live log teardown -------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test
+
+tests/test_browser_cli.py::TestBrowserTasks::test_vision_capability 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
+FAILED                                                                [  9/133]
+------------------------------ live log teardown -------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test
+
+tests/test_browser_cli.py::TestBrowserTasks::test_recording 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
+-------------------------------- live log call ---------------------------------
+INFO     src.agent.custom_agent:custom_agent.py:438 🚀 Starting task: go to example.com
+INFO     src.agent.custom_agent:custom_agent.py:222 Starting step 1
+INFO     httpx:_client.py:1786 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 422 Unprocessable Entity"
+INFO     httpx:_client.py:1038 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
+INFO     src.agent.custom_agent:custom_agent.py:139 Model Response: failed
+INFO     src.agent.custom_agent:logging.py:96 Batch: 1 action events
+INFO     browser_use.controller.service:service.py:59 🔗  Navigated to https://example.com
+INFO     src.agent.custom_agent:custom_agent.py:313 Step 2 completed
+INFO     src.agent.custom_agent:custom_agent.py:222 Starting step 2
+INFO     httpx:_client.py:1786 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 422 Unprocessable Entity"
+INFO     httpx:_client.py:1038 HTTP Request: POST https://api.deepseek.com/v1/chat/completions "HTTP/1.1 200 OK"
+INFO     src.agent.custom_agent:custom_agent.py:139 Model Response: success
+INFO     src.agent.custom_agent:logging.py:96 Batch: 1 action events
+INFO     src.agent.custom_agent:custom_agent.py:260 Task completed
+INFO     src.agent.custom_agent:custom_agent.py:313 Step 3 completed
+INFO     src.agent.custom_agent:custom_agent.py:481 ✅ Task completed successfully
+WARNING  src.agent.custom_agent:custom_agent.py:342 No history or first screenshot to create GIF from
+PASSED                                                                [ 10/133]
+------------------------------ live log teardown -------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:45 Cleanup finally - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:65 Globals and environment reset after test
+
+tests/test_browser_cli.py::TestBrowserTasks::test_tracing 
+-------------------------------- live log setup --------------------------------
+INFO     tests.test_browser_cli:test_browser_cli.py:28 Cleanup start - Browser state: False
+INFO     tests.test_browser_cli:test_browser_cli.py:39 Globals and environment reset before test
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 00000000..c74cef78
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1,7 @@
+"""
+Test suite for the browser-use project.
+
+This package contains tests for:
+- Browser automation (CLI, core functionality, Playwright)
+- API integration (endpoints, LLM integration)
+""" 
\ No newline at end of file
diff --git a/tests/requirements-test.txt b/tests/requirements-test.txt
new file mode 100644
index 00000000..bef705f9
--- /dev/null
+++ b/tests/requirements-test.txt
@@ -0,0 +1,3 @@
+pytest>=7.0.0
+pytest-asyncio>=0.21.0
+pytest-cov>=4.0.0 
\ No newline at end of file
diff --git a/tests/test_api.py b/tests/test_api.py
new file mode 100644
index 00000000..5dc4fae7
--- /dev/null
+++ b/tests/test_api.py
@@ -0,0 +1,73 @@
+import asyncio
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContextConfig, BrowserContextWindowSize
+from browser_use.agent.service import Agent
+from src.utils import utils
+from src.controller.custom_controller import CustomController
+from src.agent.custom_agent import CustomAgent
+from src.agent.custom_prompts import CustomSystemPrompt
+import os
+
+async def main():
+    window_w, window_h = 1920, 1080
+    
+    # Initialize the browser
+    browser = Browser(
+        config=BrowserConfig(
+            headless=False,
+            disable_security=True,
+            extra_chromium_args=[f"--window-size={window_w},{window_h}"],
+        )
+    )
+    
+    # Create a browser context
+    async with await browser.new_context(
+        config=BrowserContextConfig(
+            trace_path="./tmp/traces",
+            save_recording_path="./tmp/record_videos",
+            no_viewport=False,
+            browser_window_size=BrowserContextWindowSize(
+                width=window_w, height=window_h
+            ),
+        )
+    ) as browser_context:
+        # Initialize the controller
+        controller = CustomController()
+        
+        # Initialize the agent with a simple task using CustomAgent
+        agent = CustomAgent(
+            task="go to google.com and search for 'OpenAI'",
+            add_infos="",  # hints for the LLM if needed
+            llm=utils.get_llm_model(
+                provider="deepseek",
+                model_name="deepseek-chat",  # Using V2.5 via deepseek-chat endpoint
+                temperature=0.8,
+                base_url="https://api.deepseek.com/v1",
+                api_key=os.getenv("DEEPSEEK_API_KEY", "")
+            ),
+            browser=browser,
+            browser_context=browser_context,
+            controller=controller,
+            system_prompt_class=CustomSystemPrompt,
+            use_vision=False,  # Must be False for DeepSeek
+            tool_call_in_content=True,  # Required for DeepSeek as per test files
+            max_actions_per_step=1  # Control granularity of actions
+        )
+        
+        # Run the agent
+        history = await agent.run(max_steps=10)
+        
+        print("Final Result:")
+        print(history.final_result())
+        
+        print("\nErrors:")
+        print(history.errors())
+        
+        print("\nModel Actions:")
+        print(history.model_actions())
+        
+        print("\nThoughts:")
+        print(history.model_thoughts())
+
+if __name__ == "__main__":
+    asyncio.run(main()) 
\ No newline at end of file
diff --git a/tests/test_browser_cli.py b/tests/test_browser_cli.py
new file mode 100644
index 00000000..7974ea62
--- /dev/null
+++ b/tests/test_browser_cli.py
@@ -0,0 +1,591 @@
+import sys
+from pathlib import Path
+import tempfile
+import logging
+from io import StringIO
+import contextlib
+
+# Add project root to Python path
+PROJECT_ROOT = Path(__file__).parent.parent
+sys.path.insert(0, str(PROJECT_ROOT))
+
+import pytest
+import asyncio
+import os
+from cli.browser_use_cli import initialize_browser, run_browser_task, close_browser, main, _global_browser, _global_browser_context
+from src.utils.utils import model_names  # Import model names from utils
+
+# Configure logging for tests
+logging.basicConfig(level=logging.INFO)
+logger = logging.getLogger(__name__)
+
+# Reset global state before each test
+@pytest.fixture(autouse=True)
+async def cleanup():
+    """Ensure proper cleanup of browser and event loop between tests"""
+    global _global_browser, _global_browser_context
+    
+    logger.info(f"Cleanup start - Browser state: {_global_browser is not None}")
+    
+    # Reset globals and environment before test
+    if _global_browser is not None:
+        await close_browser()
+        logger.info("Browser closed")
+    
+    _global_browser = None
+    _global_browser_context = None
+    os.environ["BROWSER_USE_RUNNING"] = "false"
+    
+    logger.info("Globals and environment reset before test")
+    
+    try:
+        yield
+    finally:
+        try:
+            logger.info(f"Cleanup finally - Browser state: {_global_browser is not None}")
+            if _global_browser is not None:
+                await close_browser()
+                logger.info("Browser closed")
+                # Clean up any remaining event loop resources
+                loop = asyncio.get_event_loop()
+                tasks = [t for t in asyncio.all_tasks(loop=loop) if not t.done()]
+                if tasks:
+                    logger.info(f"Found {len(tasks)} pending tasks")
+                    for task in tasks:
+                        task.cancel()
+                    await asyncio.gather(*tasks, return_exceptions=True)
+                    logger.info("Pending tasks cancelled")
+        except Exception as e:
+            logger.error(f"Error during cleanup: {e}")
+            raise
+        finally:
+            _global_browser = None
+            _global_browser_context = None
+            os.environ["BROWSER_USE_RUNNING"] = "false"
+            logger.info("Globals and environment reset after test")
+
+class TestBrowserInitialization:
+    """Test browser launch-time options"""
+    
+    async def test_basic_initialization(self):
+        """Test basic browser initialization with defaults"""
+        success = await initialize_browser()
+        assert success is True
+        
+    async def test_window_size(self):
+        """Test custom window size"""
+        success = await initialize_browser(window_size=(800, 600))
+        assert success is True
+        
+        # Create a simple HTML page that displays window size
+        result = await run_browser_task(
+            "go to data:text/html,<script>document.write('Window size: ' + window.innerWidth + 'x' + window.innerHeight)</script>",
+            model="deepseek-chat"
+        )
+        assert result is not None and "800" in result.lower() and "600" in result.lower()
+        
+    async def test_headless_mode(self):
+        """Test headless mode"""
+        success = await initialize_browser(headless=True)
+        assert success is True
+        # Verify we can still run tasks
+        result = await run_browser_task(
+            "go to example.com and tell me the title",
+            model="deepseek-chat"
+        )
+        assert result is not None and "example" in result.lower()
+        
+    async def test_user_data_dir(self, tmp_path):
+        """Test custom user data directory"""
+        user_data = tmp_path / "chrome_data"
+        user_data.mkdir()
+        success = await initialize_browser(user_data_dir=str(user_data))
+        assert success is True
+        assert user_data.exists()
+        
+    async def test_proxy_configuration(self):
+        """Test proxy configuration"""
+        # Using a test proxy - in practice you'd use a real proxy server
+        test_proxy = "localhost:8080"
+        success = await initialize_browser(proxy=test_proxy)
+        assert success is True
+        
+    @pytest.mark.timeout(30)  # Add 30 second timeout
+    async def test_disable_security(self):
+        """Test security disable option"""
+        success = await initialize_browser(disable_security=True)
+        assert success is True
+        # Try accessing a cross-origin resource that would normally be blocked
+        result = await run_browser_task(
+            "go to a test page and try to access cross-origin content",
+            model="deepseek-chat",
+            max_steps=5  # Limit steps to prevent timeout
+        )
+        assert result is not None and "error" not in result.lower()
+        
+    async def test_multiple_initialization(self):
+        """Test that second initialization fails while browser is running"""
+        success1 = await initialize_browser()
+        assert success1 is True
+        success2 = await initialize_browser()
+        assert success2 is False
+
+class TestBrowserTasks:
+    """Test runtime task options"""
+    
+    @pytest.fixture(autouse=True)
+    async def setup_browser(self):
+        """Start browser before each test"""
+        await initialize_browser()
+        yield
+    
+    @pytest.fixture
+    def local_test_page(self):
+        """Create a local HTML file for testing"""
+        with tempfile.NamedTemporaryFile(mode='w', suffix='.html', delete=False) as f:
+            f.write("""
+            <!DOCTYPE html>
+            <html>
+            <head>
+                <title>Test Page</title>
+            </head>
+            <body>
+                <h1>Test Content</h1>
+                <p>This is a test paragraph with specific content.</p>
+                <button id="testButton">Click Me</button>
+                <div id="result"></div>
+            </body>
+            </html>
+            """)
+            return f.name
+
+    async def test_model_switching(self):
+        """Test switching between different LLM models"""
+        # Test DeepSeek - Note: 422 errors are expected but don't affect functionality
+        try:
+            result1 = await run_browser_task(
+                "go to example.com and summarize the page",
+                model="deepseek-chat"
+            )
+            assert result1 is not None
+        except Exception as e:
+            if "422" not in str(e):  # Only ignore 422 errors
+                raise
+        
+        # Test Gemini
+        os.environ["GOOGLE_API_MODEL"] = model_names["gemini"][0]  # Set model via environment
+        result2 = await run_browser_task(
+            "what do you see on the page?",
+            model="gemini",
+            vision=True
+        )
+        assert result2 is not None and len(result2) > 0
+        assert result1 is not None and len(result1) > 0
+        assert result1 != result2  # Different models should give different responses
+        
+    async def test_vision_capability(self):
+        """Test vision capabilities"""
+        # Set Gemini model via environment
+        os.environ["GOOGLE_API_MODEL"] = model_names["gemini"][0]
+        
+        # Without vision
+        result1 = await run_browser_task(
+            "what do you see on example.com?",
+            model="gemini",
+            vision=False
+        )
+        
+        # With vision
+        result2 = await run_browser_task(
+            "what do you see on example.com?",
+            model="gemini",
+            vision=True
+        )
+        
+        assert result1 is not None and result2 is not None and len(result2) > len(result1)  # Vision should provide more details
+        
+    async def test_recording(self, tmp_path):
+        """Test session recording"""
+        record_path = tmp_path / "recordings"
+        record_path.mkdir()
+        
+        await run_browser_task(
+            "go to example.com",
+            record=True,
+            record_path=str(record_path)
+        )
+        
+        # Check that recording file was created
+        recordings = list(record_path.glob("*.webm"))
+        assert len(recordings) > 0
+        
+    async def test_tracing(self, tmp_path):
+        """Test debug tracing"""
+        trace_path = tmp_path / "traces"
+        trace_path.mkdir()
+        
+        await run_browser_task(
+            "go to example.com",
+            trace_path=str(trace_path)
+        )
+        
+        # Wait a bit for the trace file to be written
+        await asyncio.sleep(1)
+        
+        # Check that trace file was created
+        traces = list(trace_path.glob("*.zip"))
+        assert len(traces) > 0
+        
+    async def test_max_steps_limit(self):
+        """Test max steps limitation"""
+        with pytest.raises(Exception):
+            # This task would normally take more than 2 steps
+            await run_browser_task(
+                "go to google.com, search for 'OpenAI', click first result",
+                max_steps=2
+            )
+            
+    async def test_max_actions_limit(self):
+        """Test max actions per step limitation"""
+        with pytest.raises(Exception):
+            # This would require multiple actions in one step
+            await run_browser_task(
+                "go to google.com and click all search results",
+                max_actions=1
+            )
+            
+    async def test_additional_context(self):
+        """Test providing additional context"""
+        result = await run_browser_task(
+            "summarize the content",
+            add_info="Focus on technical details and pricing information"
+        )
+        assert result is not None and ("technical" in result.lower() or "pricing" in result.lower())
+
+    async def test_report_generation(self, local_test_page):
+        """Test that the agent can analyze a page and return a report"""
+        logger.info("Starting report generation test")
+        
+        # Check initial state
+        logger.info(f"Initial browser state: {_global_browser is not None}")
+        
+        # Initialize browser
+        success = await initialize_browser()
+        logger.info(f"Browser initialization result: {success}")
+        
+        assert success is True, "Browser initialization failed"
+        
+        # Create the task prompt
+        prompt = f"Go to file://{local_test_page} and create a report about the page structure, including any interactive elements found"
+        
+        try:
+            result = await run_browser_task(
+                prompt=prompt,
+                model="deepseek-chat",
+                max_steps=3
+            )
+            
+            logger.info(f"Received report: {result}")
+            
+            # Verify the report contains expected information
+            assert result is not None
+            assert "Test Content" in result
+            assert "button" in result.lower()
+            assert "paragraph" in result.lower()
+            
+            logger.info("Report verification successful")
+            
+        except Exception as e:
+            logger.error(f"Error during report generation: {e}")
+            raise
+        finally:
+            # Cleanup
+            os.unlink(local_test_page)
+            logger.info("Test cleanup completed")
+
+class TestBrowserLifecycle:
+    """Test browser lifecycle management"""
+    
+    async def test_close_and_reopen(self):
+        """Test closing and reopening browser"""
+        # First session
+        success1 = await initialize_browser()
+        assert success1 is True
+        result1 = await run_browser_task("go to example.com")
+        assert result1 is not None
+        await close_browser()
+        
+        # Second session
+        success2 = await initialize_browser()
+        assert success2 is True
+        result2 = await run_browser_task("go to example.com")
+        assert result2 is not None
+        
+    async def test_error_handling(self):
+        """Test error handling in various scenarios"""
+        # Test running task without browser
+        with pytest.raises(Exception):
+            await run_browser_task("this should fail")
+            
+        # Test closing already closed browser
+        await close_browser()
+        await close_browser()  # Should not raise error
+        
+        # Test recovery after error
+        success = await initialize_browser()
+        assert success is True
+        result = await run_browser_task("go to example.com")
+        assert result is not None
+
+class TestCLICommands:
+    """Comprehensive tests for CLI command functionality"""
+    
+    @pytest.fixture(autouse=True)
+    def setup_cli(self):
+        """Setup and cleanup for CLI tests"""
+        # Store original argv and stdout
+        self.original_argv = sys.argv.copy()
+        self.original_stdout = sys.stdout
+        
+        # Create StringIO buffer and redirect stdout
+        self.output = StringIO()
+        sys.stdout = self.output
+        
+        yield
+        
+        # Restore original argv and stdout
+        sys.argv = self.original_argv
+        sys.stdout = self.original_stdout
+        
+        # Close the StringIO buffer
+        self.output.close()
+        
+    def test_start_command_basic(self):
+        """Test basic browser start command"""
+        # Ensure output buffer is empty
+        self.output.truncate(0)
+        self.output.seek(0)
+        
+        sys.argv = ["browser-use", "start"]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        output = self.output.getvalue()
+        assert "Browser session started successfully" in output
+        
+    def test_start_command_with_options(self):
+        """Test browser start with various options"""
+        # Ensure output buffer is empty
+        self.output.truncate(0)
+        self.output.seek(0)
+        
+        sys.argv = [
+            "browser-use", "start",
+            "--window-size", "800x600",
+            "--headless",
+            "--disable-security"
+        ]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        output = self.output.getvalue()
+        assert "Browser session started successfully" in output
+        
+    def test_run_command_basic(self):
+        """Test basic run command"""
+        # First start the browser
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = ["browser-use", "start"]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        
+        # Then run a task
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = [
+            "browser-use", "run",
+            "go to example.com",
+            "--model", "deepseek-chat"
+        ]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        output = self.output.getvalue()
+        assert len(output) > 0
+        
+    def test_run_command_with_options(self):
+        """Test run command with various options"""
+        # First start the browser
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = ["browser-use", "start"]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        
+        # Then run a task with multiple options
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = [
+            "browser-use", "run",
+            "go to example.com",
+            "--model", "gemini",
+            "--vision",
+            "--max-steps", "5",
+            "--max-actions", "2",
+            "--add-info", "Focus on the main content"
+        ]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        output = self.output.getvalue()
+        assert len(output) > 0
+        
+    def test_close_command(self):
+        """Test browser close command"""
+        # First start the browser
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = ["browser-use", "start"]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        
+        # Then close it
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = ["browser-use", "close"]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        output = self.output.getvalue()
+        assert "Browser session closed" in output
+        
+    def test_invalid_command(self):
+        """Test handling of invalid commands"""
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = ["browser-use", "invalid-command"]
+        with pytest.raises(SystemExit):
+            with contextlib.redirect_stdout(self.output):
+                main()
+            
+    def test_missing_required_args(self):
+        """Test handling of missing required arguments"""
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = ["browser-use", "run"]  # Missing prompt
+        with pytest.raises(SystemExit):
+            with contextlib.redirect_stdout(self.output):
+                main()
+            
+    def test_invalid_window_size(self):
+        """Test handling of invalid window size format"""
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = ["browser-use", "start", "--window-size", "invalid"]
+        with contextlib.redirect_stdout(self.output):
+            main()  # Should use default size
+        output = self.output.getvalue()
+        assert "Browser session started successfully" in output
+            
+    def test_recording_options(self):
+        """Test recording functionality via CLI"""
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            # First start the browser
+            self.output.truncate(0)
+            self.output.seek(0)
+            sys.argv = ["browser-use", "start"]
+            with contextlib.redirect_stdout(self.output):
+                main()
+            
+            # Then run with recording
+            self.output.truncate(0)
+            self.output.seek(0)
+            sys.argv = [
+                "browser-use", "run",
+                "go to example.com",
+                "--record",
+                "--record-path", tmp_dir
+            ]
+            with contextlib.redirect_stdout(self.output):
+                main()
+            recordings = list(Path(tmp_dir).glob("*.webm"))
+            assert len(recordings) > 0
+            
+    def test_tracing_options(self):
+        """Test tracing functionality via CLI"""
+        with tempfile.TemporaryDirectory() as tmp_dir:
+            # First start the browser
+            self.output.truncate(0)
+            self.output.seek(0)
+            sys.argv = ["browser-use", "start"]
+            with contextlib.redirect_stdout(self.output):
+                main()
+            
+            # Then run with tracing
+            self.output.truncate(0)
+            self.output.seek(0)
+            sys.argv = [
+                "browser-use", "run",
+                "go to example.com",
+                "--trace-path", tmp_dir
+            ]
+            with contextlib.redirect_stdout(self.output):
+                main()
+            traces = list(Path(tmp_dir).glob("*.zip"))
+            assert len(traces) > 0
+            
+    def test_model_switching_cli(self):
+        """Test switching between different models via CLI"""
+        # First start the browser
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = ["browser-use", "start"]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        
+        # Test with DeepSeek
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = [
+            "browser-use", "run",
+            "go to example.com",
+            "--model", "deepseek-chat"
+        ]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        deepseek_output = self.output.getvalue()
+        
+        # Close browser to clean up event loop
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = ["browser-use", "close"]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        
+        # Start new browser for Gemini test
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = ["browser-use", "start"]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        
+        # Test with Gemini
+        self.output.truncate(0)
+        self.output.seek(0)
+        os.environ["GOOGLE_API_MODEL"] = model_names["gemini"][0]
+        sys.argv = [
+            "browser-use", "run",
+            "go to example.com",
+            "--model", "gemini",
+            "--vision"
+        ]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        gemini_output = self.output.getvalue()
+        
+        # Close browser
+        self.output.truncate(0)
+        self.output.seek(0)
+        sys.argv = ["browser-use", "close"]
+        with contextlib.redirect_stdout(self.output):
+            main()
+        
+        assert len(deepseek_output) > 0
+        assert len(gemini_output) > 0
+        assert deepseek_output != gemini_output 
\ No newline at end of file
diff --git a/tests/test_browser_controller.py b/tests/test_browser_controller.py
new file mode 100644
index 00000000..409d8d33
--- /dev/null
+++ b/tests/test_browser_controller.py
@@ -0,0 +1,125 @@
+import pytest
+from unittest.mock import AsyncMock, patch, MagicMock
+import asyncio
+from src.utils.browser_controller import BrowserController
+
+@pytest.fixture
+async def browser_controller():
+    controller = BrowserController()
+    yield controller
+    await controller.cleanup()
+
+@pytest.mark.asyncio
+async def test_single_initialization(browser_controller):
+    mock_browser = AsyncMock()
+    mock_playwright = AsyncMock()
+    mock_playwright.chromium.launch = AsyncMock(return_value=mock_browser)
+    
+    with patch('src.utils.browser_controller.async_playwright', 
+               return_value=AsyncMock(start=AsyncMock(return_value=mock_playwright))) as mock_async_playwright:
+        await browser_controller.initialize()
+        assert browser_controller.init_count == 1
+        assert browser_controller.browser == mock_browser
+        
+        # Verify progress events
+        progress_history = browser_controller.logger.get_progress_history()
+        assert len(progress_history) >= 2  # At least start and complete events
+        assert progress_history[0]["status"] == "starting"
+        assert progress_history[-1]["status"] == "completed"
+        assert progress_history[-1]["progress"] == 1.0
+        
+        # Second initialization should not create new browser
+        await browser_controller.initialize()
+        assert browser_controller.init_count == 1
+        mock_async_playwright.assert_called_once()
+
+@pytest.mark.asyncio
+async def test_concurrent_initialization(browser_controller):
+    mock_browser = AsyncMock()
+    mock_playwright = AsyncMock()
+    mock_playwright.chromium.launch = AsyncMock(return_value=mock_browser)
+    
+    with patch('src.utils.browser_controller.async_playwright', 
+               return_value=AsyncMock(start=AsyncMock(return_value=mock_playwright))):
+        # Start multiple concurrent initializations
+        tasks = [browser_controller.initialize() for _ in range(3)]
+        await asyncio.gather(*tasks)
+        
+        # Should only initialize once
+        assert browser_controller.init_count == 1
+        assert browser_controller.browser == mock_browser
+        
+        # Verify browser events
+        browser_events = browser_controller.logger.get_browser_events()
+        launch_events = [e for e in browser_events if e["event_type"] == "browser_launched"]
+        assert len(launch_events) == 1
+
+@pytest.mark.asyncio
+async def test_browser_launch_options(browser_controller):
+    mock_browser = AsyncMock()
+    mock_playwright = AsyncMock()
+    mock_playwright.chromium.launch = AsyncMock(return_value=mock_browser)
+    
+    with patch('src.utils.browser_controller.async_playwright', 
+               return_value=AsyncMock(start=AsyncMock(return_value=mock_playwright))) as mock_async_playwright:
+        await browser_controller.initialize()
+        
+        # Verify launch options
+        mock_playwright.chromium.launch.assert_called_once_with(
+            headless=True,
+            args=['--no-sandbox']
+        )
+        
+        # Verify browser events
+        browser_events = browser_controller.logger.get_browser_events()
+        launch_event = next(e for e in browser_events if e["event_type"] == "browser_launched")
+        assert launch_event["details"]["headless"] is True
+
+@pytest.mark.asyncio
+async def test_initialization_failure(browser_controller):
+    mock_playwright = AsyncMock()
+    mock_playwright.chromium.launch = AsyncMock(side_effect=Exception("Browser launch failed"))
+    
+    with patch('src.utils.browser_controller.async_playwright', 
+               return_value=AsyncMock(start=AsyncMock(return_value=mock_playwright))), \
+         pytest.raises(Exception, match="Browser launch failed"):
+        await browser_controller.initialize()
+    
+    assert browser_controller.browser is None
+    assert browser_controller.init_count == 0
+    
+    # Verify error events
+    browser_events = browser_controller.logger.get_browser_events()
+    error_event = next(e for e in browser_events if e["event_type"] == "launch_failed")
+    assert "Browser launch failed" in error_event["details"]["error"]
+    
+    # Verify progress events show failure
+    progress_events = browser_controller.logger.get_progress_history()
+    final_event = progress_events[-1]
+    assert final_event["status"] == "failed"
+    assert final_event["progress"] == 0.0
+
+@pytest.mark.asyncio
+async def test_browser_cleanup(browser_controller):
+    mock_browser = AsyncMock()
+    mock_playwright = AsyncMock()
+    mock_playwright.chromium.launch = AsyncMock(return_value=mock_browser)
+    
+    with patch('src.utils.browser_controller.async_playwright', 
+               return_value=AsyncMock(start=AsyncMock(return_value=mock_playwright))):
+        await browser_controller.initialize()
+        assert browser_controller.browser is not None
+        
+        await browser_controller.cleanup()
+        mock_browser.close.assert_called_once()
+        mock_playwright.stop.assert_called_once()
+        assert browser_controller.browser is None
+        assert browser_controller._playwright is None
+        
+        # Verify cleanup events
+        progress_events = browser_controller.logger.get_progress_history()
+        cleanup_events = [e for e in progress_events if e["step"] == "cleanup"]
+        assert len(cleanup_events) >= 2  # At least start and complete events
+        assert cleanup_events[0]["status"] == "starting"
+        assert cleanup_events[-1]["status"] == "completed"
+        assert cleanup_events[-1]["progress"] == 1.0 
\ No newline at end of file
diff --git a/tests/test_browser_use_cli.py b/tests/test_browser_use_cli.py
new file mode 100644
index 00000000..1506d019
--- /dev/null
+++ b/tests/test_browser_use_cli.py
@@ -0,0 +1,117 @@
+import pytest
+import asyncio
+from pathlib import Path
+from urllib.parse import urlparse
+from cli.browser_use_cli import run_browser_task, initialize_browser, close_browser
+
+@pytest.fixture
+async def browser_session():
+    """Fixture to manage browser session for tests"""
+    await initialize_browser(headless=True)
+    yield
+    await close_browser()
+
+@pytest.mark.asyncio
+async def test_url_validation():
+    """Test URL validation in run_browser_task"""
+    # Test invalid URLs
+    invalid_urls = [
+        "not-a-url",
+        "http://",
+        "https://",
+        "ftp://example.com",  # non-http(s) protocol
+        "",
+        None
+    ]
+    
+    for url in invalid_urls:
+        result = await run_browser_task(
+            prompt="test task",
+            url=url,
+            provider="Deepseek",
+            headless=True
+        )
+        assert "Invalid URL provided" in result
+
+    # Test valid URLs
+    valid_urls = [
+        "https://example.com",
+        "http://localhost:8080",
+        "https://prompt-forge.replit.app/"
+    ]
+    
+    for url in valid_urls:
+        result = await run_browser_task(
+            prompt="test task",
+            url=url,
+            provider="Deepseek",
+            headless=True
+        )
+        assert "Invalid URL provided" not in result
+
+@pytest.mark.asyncio
+async def test_url_navigation(browser_session):
+    """Test that the browser actually navigates to the provided URL"""
+    url = "https://example.com"
+    result = await run_browser_task(
+        prompt="verify the page title contains 'Example'",
+        url=url,
+        provider="Deepseek",
+        headless=True,
+        max_steps=3
+    )
+    assert "success" in result.lower() or "verified" in result.lower()
+
+@pytest.mark.asyncio
+async def test_url_in_prompt():
+    """Test that the URL is correctly prepended to the task prompt"""
+    url = "https://example.com"
+    test_prompt = "click the button"
+    result = await run_browser_task(
+        prompt=test_prompt,
+        url=url,
+        provider="Deepseek",
+        headless=True
+    )
+    
+    # The result should indicate navigation happened first
+    assert "navigated" in result.lower() or "loaded" in result.lower()
+
+@pytest.mark.asyncio
+async def test_multiple_tasks_same_url(browser_session):
+    """Test running multiple tasks with the same starting URL"""
+    url = "https://example.com"
+    tasks = [
+        "verify the page has loaded",
+        "check if there are any links on the page",
+        "look for a search box"
+    ]
+    
+    for task in tasks:
+        result = await run_browser_task(
+            prompt=task,
+            url=url,
+            provider="Deepseek",
+            headless=True,
+            max_steps=3
+        )
+        assert result is not None
+        assert isinstance(result, str)
+
+@pytest.mark.asyncio
+async def test_url_with_different_providers():
+    """Test URL handling with different providers"""
+    url = "https://example.com"
+    providers = ["Deepseek", "Google", "Anthropic"]
+    
+    for provider in providers:
+        result = await run_browser_task(
+            prompt="verify the page has loaded",
+            url=url,
+            provider=provider,
+            headless=True,
+            max_steps=3
+        )
+        assert result is not None
+        assert isinstance(result, str)
+        assert "Invalid URL provided" not in result 
\ No newline at end of file
diff --git a/tests/test_browser_vision.py b/tests/test_browser_vision.py
new file mode 100644
index 00000000..75b44c32
--- /dev/null
+++ b/tests/test_browser_vision.py
@@ -0,0 +1,94 @@
+import os
+import pytest
+from dotenv import load_dotenv
+from src.utils import utils
+from cli.browser_use_cli import run_browser_task
+
+# Load environment variables
+load_dotenv()
+
+@pytest.mark.asyncio
+class TestBrowserVision:
+    """Test browser automation with vision capabilities"""
+
+    async def setup_method(self):
+        """Setup test environment"""
+        self.api_key = os.getenv("OPENAI_API_KEY")
+        if not self.api_key:
+            pytest.skip("OPENAI_API_KEY not set")
+
+    async def test_vision_analysis_task(self):
+        """Test visual analysis of a webpage"""
+        result = await run_browser_task(
+            prompt="go to https://example.com and describe the visual layout of the page",
+            provider="OpenAI",
+            vision=True,
+            headless=True,  # Run headless for CI/CD
+            record=True,  # Record for debugging
+            record_path="./tmp/test_recordings"
+        )
+        assert result is not None
+        assert "layout" in result.lower() or "design" in result.lower()
+
+    async def test_vision_interaction_task(self):
+        """Test visual-guided interaction"""
+        result = await run_browser_task(
+            prompt="go to https://example.com and click on the most prominent link on the page",
+            provider="OpenAI",
+            vision=True,
+            headless=True,
+            record=True,
+            record_path="./tmp/test_recordings"
+        )
+        assert result is not None
+        assert "clicked" in result.lower() or "selected" in result.lower()
+
+    async def test_vision_verification_task(self):
+        """Test visual verification of page state"""
+        result = await run_browser_task(
+            prompt="go to https://example.com and verify that the main heading is visible and centered",
+            provider="OpenAI",
+            vision=True,
+            headless=True,
+            record=True,
+            record_path="./tmp/test_recordings"
+        )
+        assert result is not None
+        assert "heading" in result.lower() and ("visible" in result.lower() or "centered" in result.lower())
+
+    async def test_vision_error_handling(self):
+        """Test error handling with vision tasks"""
+        # Test with a non-existent page to verify error handling
+        result = await run_browser_task(
+            prompt="go to https://nonexistent.example.com and describe what you see",
+            provider="OpenAI",
+            vision=True,
+            headless=True,
+            record=True,
+            record_path="./tmp/test_recordings"
+        )
+        assert result is not None
+        assert "error" in result.lower() or "unable" in result.lower() or "failed" in result.lower()
+
+    async def test_vision_with_different_models(self):
+        """Test vision capabilities with different providers"""
+        test_configs = [
+            "OpenAI",  # Will use gpt-4o
+            "Google",  # Will use gemini-pro
+            "Anthropic"  # Will use claude-3-5-sonnet-20241022
+        ]
+        
+        for provider in test_configs:
+            result = await run_browser_task(
+                prompt="go to https://example.com and describe the page layout",
+                provider=provider,
+                vision=True,
+                headless=True,
+                record=True,
+                record_path=f"./tmp/test_recordings/{provider.lower()}"
+            )
+            assert result is not None
+            assert len(result) > 0, f"Failed with provider {provider}"
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"]) 
\ No newline at end of file
diff --git a/tests/test_enhanced_tracing.py b/tests/test_enhanced_tracing.py
new file mode 100644
index 00000000..74b4d344
--- /dev/null
+++ b/tests/test_enhanced_tracing.py
@@ -0,0 +1,894 @@
+import pytest
+import asyncio
+import json
+import zipfile
+from pathlib import Path
+import tempfile
+from src.trace_analyzer import PlaywrightTrace, analyze_trace, EnhancedTraceAnalyzer
+
+# Sample enhanced trace data with new features
+SAMPLE_ENHANCED_TRACE = {
+    "action_context": {
+        "before_state": {
+            "element": "#login-button",
+            "visible": True,
+            "enabled": True,
+            "text": "Log In"
+        },
+        "after_state": {
+            "element": "#login-button",
+            "visible": True,
+            "enabled": True,
+            "clicked": True
+        },
+        "interactive_elements": [
+            {
+                "selector": "#login-button",
+                "confidence": 0.95,
+                "chosen": True,
+                "reason": "Primary login button with highest visibility"
+            },
+            {
+                "selector": "#signup-button",
+                "confidence": 0.45,
+                "chosen": False,
+                "reason": "Not relevant for login action"
+            }
+        ],
+        "element_state_before": {
+            "visible": True,
+            "computed_styles": {
+                "pointer-events": "auto",
+                "opacity": "1",
+                "z-index": "100"
+            },
+            "focus_state": "not-focused",
+            "accessibility": {
+                "aria-hidden": "false",
+                "aria-disabled": "false"
+            }
+        },
+        "element_state_after": {
+            "visible": True,
+            "focus_state": "focused",
+            "triggered_events": ["click", "focus"],
+            "accessibility": {
+                "aria-hidden": "false",
+                "aria-disabled": "false"
+            }
+        }
+    },
+    "decision_trail": {
+        "reasoning": [
+            "Identified login form as primary authentication method",
+            "Located login button with high confidence",
+            "Verified button is enabled and visible"
+        ],
+        "alternatives": [
+            {
+                "action": "click signup button",
+                "rejected_reason": "Not aligned with login task"
+            }
+        ],
+        "influential_features": ["button text", "aria-label", "position"],
+        "confidence_threshold": 0.8,
+        "attention_weights": {
+            "element_text": 0.6,
+            "aria_label": 0.3,
+            "position": 0.1
+        },
+        "alternative_paths": [
+            {
+                "action": "click hamburger menu",
+                "rejected_reason": "settings directly visible",
+                "confidence": 0.4
+            }
+        ]
+    },
+    "element_identification": {
+        "selectors": {
+            "xpath": "//button[@id='login-button']",
+            "css": "#login-button",
+            "aria": "button[aria-label='Login']",
+            "text": "button:has-text('Log In')"
+        },
+        "visual_position": {
+            "x": 100,
+            "y": 200,
+            "width": 80,
+            "height": 40
+        },
+        "relationships": {
+            "parent": "form#login-form",
+            "siblings": ["#username-input", "#password-input"]
+        },
+        "relative_position": {
+            "from_top_nav": "20px from right",
+            "from_viewport": "top-right quadrant",
+            "nearest_landmarks": [
+                {"element": "button.new-template", "distance": "40px left"},
+                {"element": "div.user-menu", "distance": "60px right"}
+            ]
+        },
+        "hierarchy": {
+            "parent": "nav.top-bar",
+            "siblings": ["button.new-template", "button.help"],
+            "children": ["span.icon", "span.text"]
+        }
+    },
+    "failure_analysis": {
+        "state": "Element found but not clickable",
+        "attempts": [
+            {
+                "strategy": "wait for visibility",
+                "outcome": "success",
+                "duration": 500
+            }
+        ],
+        "dom_changes": [
+            {
+                "timestamp": 1000,
+                "change": "overlay-removed"
+            }
+        ],
+        "dom_mutations": [
+            {
+                "timestamp": "T+200ms",
+                "type": "attribute_change",
+                "element": "#settings-modal",
+                "attribute": "aria-hidden",
+                "old_value": "true",
+                "new_value": "false"
+            }
+        ],
+        "network_state": {
+            "requests_in_flight": 2,
+            "last_completed_request": "/api/settings",
+            "pending_requests": [
+                {
+                    "url": "/api/user/preferences",
+                    "method": "GET",
+                    "duration_so_far": "150ms"
+                }
+            ]
+        }
+    },
+    "session_context": {
+        "url": "https://example.com/login",
+        "route_changes": [
+            {
+                "from": "/",
+                "to": "/login",
+                "timestamp": 900
+            }
+        ],
+        "network_requests": [
+            {
+                "url": "/api/auth",
+                "method": "POST",
+                "status": 200
+            }
+        ],
+        "viewport": {
+            "width": 1920,
+            "height": 1080,
+            "device_pixel_ratio": 2,
+            "orientation": "landscape"
+        },
+        "performance_metrics": {
+            "memory_usage": "120MB",
+            "dom_node_count": 1250,
+            "frame_rate": "60fps",
+            "resource_timing": {
+                "dns_lookup": "10ms",
+                "connection": "50ms",
+                "ttfb": "200ms"
+            }
+        },
+        "browser_state": {
+            "cookies_enabled": True,
+            "javascript_enabled": True,
+            "local_storage_used": "2.5MB",
+            "active_service_workers": 2
+        }
+    },
+    "recovery_info": {
+        "checkpoints": [
+            {
+                "state": "pre-login",
+                "timestamp": 800,
+                "restorable": True
+            }
+        ],
+        "alternative_selectors": [
+            "#login-button",
+            "button[aria-label='Login']"
+        ],
+        "state_restoration": {
+            "checkpoints": [
+                {
+                    "timestamp": "T+0",
+                    "state": "initial_load",
+                    "restorable": True,
+                    "snapshot": {
+                        "url": "https://example.com/login",
+                        "scroll_position": {"x": 0, "y": 0},
+                        "form_data": {"username": "test", "password": "****"}
+                    }
+                },
+                {
+                    "timestamp": "T+1500ms",
+                    "state": "settings_clicked",
+                    "restorable": True,
+                    "snapshot": {
+                        "url": "https://example.com/settings",
+                        "modal_open": True,
+                        "selected_tab": "general"
+                    }
+                }
+            ]
+        },
+        "fallback_sequences": [
+            {
+                "condition": "settings_button_not_visible",
+                "actions": [
+                    {
+                        "step": "check_viewport_scroll",
+                        "max_attempts": 3,
+                        "delay_between_attempts": "500ms"
+                    },
+                    {
+                        "step": "check_hamburger_menu",
+                        "required_elements": ["button.menu", "div.dropdown"]
+                    },
+                    {
+                        "step": "refresh_page",
+                        "clear_cache": True
+                    }
+                ],
+                "success_criteria": {
+                    "element_visible": True,
+                    "element_clickable": True,
+                    "no_overlays": True
+                }
+            }
+        ]
+    },
+    "model_data": {
+        "input_tokens": 512,
+        "output_tokens": 128,
+        "vision_analysis": {
+            "button_detected": True,
+            "confidence": 0.98
+        }
+    },
+    "temporal_context": {
+        "action_start": 1000,
+        "action_complete": 1500,
+        "wait_conditions": [
+            {
+                "type": "animation",
+                "duration": 200
+            }
+        ]
+    },
+    "element_reporting": {
+        "current_step": {
+            "number": 3,
+            "description": "Locating settings button",
+            "context": "Looking for interactive element with icon or label",
+            "viewport_state": "Fully loaded, no overlays"
+        },
+        "element_selection": {
+            "chosen_element": {
+                "selector": "button.settings-icon",
+                "confidence": 0.95,
+                "action": "click",
+                "description": "Settings button in top-right corner"
+            },
+            "alternative_candidates": [
+                {
+                    "selector": "div.menu-icon",
+                    "confidence": 0.45,
+                    "rejected_reason": "Not interactive element"
+                },
+                {
+                    "selector": "span.gear-icon",
+                    "confidence": 0.30,
+                    "rejected_reason": "Hidden by overlay"
+                }
+            ],
+            "selection_criteria": [
+                "Visibility in viewport",
+                "Interactive element",
+                "Icon matching settings/gear pattern"
+            ]
+        }
+    },
+    "error_context": {
+        "session_state": {
+            "status": "reset_required",
+            "reason": "No active session found",
+            "action": "Creating new session with fresh context",
+            "resolution": "Reinitialize successful"
+        },
+        "recovery_steps": [
+            {
+                "attempt": 1,
+                "strategy": "clear_session",
+                "outcome": "success"
+            },
+            {
+                "attempt": 2,
+                "strategy": "reinitialize",
+                "outcome": "success"
+            }
+        ]
+    },
+    "timing_analysis": {
+        "action_breakdown": {
+            "element_search": "150ms",
+            "interaction_delay": "50ms",
+            "animation_duration": "200ms",
+            "network_wait": "300ms"
+        },
+        "cumulative_timing": {
+            "total_duration": "700ms",
+            "user_perceived_latency": "250ms"
+        },
+        "performance_markers": {
+            "first_paint": "100ms",
+            "first_contentful_paint": "200ms",
+            "time_to_interactive": "450ms"
+        }
+    },
+    "visual_state": {
+        "screenshot_diffs": {
+            "before_click": "diff_1.png",
+            "after_click": "diff_2.png",
+            "changes_highlighted": True
+        },
+        "element_visibility": {
+            "before": {
+                "visible_area_percentage": 100,
+                "obscured_by": [],
+                "viewport_position": "center"
+            },
+            "after": {
+                "visible_area_percentage": 100,
+                "obscured_by": [],
+                "viewport_position": "center"
+            }
+        },
+        "layout_shifts": [
+            {
+                "timestamp": "T+100ms",
+                "elements_moved": ["#settings-panel", "#main-content"],
+                "cumulative_layout_shift": 0.1
+            }
+        ]
+    },
+    "error_recovery": {
+        "retry_strategy": {
+            "backoff": "exponential",
+            "max_attempts": 3,
+            "conditions": {
+                "network_stable": True,
+                "animations_complete": True,
+                "viewport_stable": True
+            }
+        },
+        "environment_factors": {
+            "network_conditions": {
+                "latency": "50ms",
+                "bandwidth": "10Mbps",
+                "stability": "stable"
+            },
+            "system_resources": {
+                "cpu_utilization": "45%",
+                "memory_available": "2GB",
+                "gpu_utilization": "30%"
+            }
+        },
+        "recovery_checkpoints": [
+            {
+                "timestamp": "T+0",
+                "state": "pre_action",
+                "snapshot": {
+                    "dom_state": "hash1234",
+                    "scroll_position": {"x": 0, "y": 0}
+                }
+            },
+            {
+                "timestamp": "T+500ms",
+                "state": "post_action",
+                "snapshot": {
+                    "dom_state": "hash5678",
+                    "scroll_position": {"x": 0, "y": 100}
+                }
+            }
+        ]
+    }
+}
+
+@pytest.fixture
+def enhanced_trace_file():
+    """Create a temporary trace file with enhanced sample data."""
+    with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_zip:
+        with zipfile.ZipFile(temp_zip.name, 'w') as zf:
+            zf.writestr('trace.enhanced', json.dumps(SAMPLE_ENHANCED_TRACE))
+        yield temp_zip.name
+        Path(temp_zip.name).unlink()
+
+@pytest.mark.asyncio
+async def test_action_context_analysis(enhanced_trace_file):
+    """Test analysis of action context including before/after states."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    context = await analyzer.analyze_action_context()
+    
+    assert context["interactive_elements_count"] == 2
+    assert context["chosen_element"]["confidence"] > 0.9
+    assert len(context["state_changes"]) > 0
+    assert "clicked" in context["state_changes"][0]["after"]
+
+@pytest.mark.asyncio
+async def test_decision_trail_analysis(enhanced_trace_file):
+    """Test analysis of decision making process."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    trail = await analyzer.analyze_decision_trail()
+    
+    assert len(trail["reasoning_steps"]) == 3
+    assert len(trail["alternative_actions"]) > 0
+    assert len(trail["key_features"]) > 0
+
+@pytest.mark.asyncio
+async def test_element_identification_analysis(enhanced_trace_file):
+    """Test analysis of element identification methods."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    identification = await analyzer.analyze_element_identification()
+    
+    assert len(identification["selectors"]) >= 4
+    assert "visual_position" in identification
+    assert "element_relationships" in identification
+
+@pytest.mark.asyncio
+async def test_failure_analysis(enhanced_trace_file):
+    """Test analysis of failure scenarios and recovery attempts."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    failure = await analyzer.analyze_failures()
+    
+    assert "failure_state" in failure
+    assert len(failure["recovery_attempts"]) > 0
+    assert "dom_mutations" in failure
+
+@pytest.mark.asyncio
+async def test_session_context_analysis(enhanced_trace_file):
+    """Test analysis of session-wide context."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    session = await analyzer.analyze_session_context()
+    
+    assert "current_url" in session
+    assert len(session["route_history"]) > 0
+    assert len(session["network_activity"]) > 0
+
+@pytest.mark.asyncio
+async def test_recovery_info_analysis(enhanced_trace_file):
+    """Test analysis of recovery information."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    recovery = await analyzer.analyze_recovery_info()
+    
+    assert len(recovery["restore_points"]) > 0
+    assert len(recovery["fallback_selectors"]) > 0
+
+@pytest.mark.asyncio
+async def test_model_data_analysis(enhanced_trace_file):
+    """Test analysis of model-specific data."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    model_data = await analyzer.analyze_model_data()
+    
+    assert "token_usage" in model_data
+    assert "vision_results" in model_data
+    assert model_data["token_usage"]["total"] == model_data["token_usage"]["input"] + model_data["token_usage"]["output"]
+
+@pytest.mark.asyncio
+async def test_temporal_context_analysis(enhanced_trace_file):
+    """Test analysis of temporal information."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    temporal = await analyzer.analyze_temporal_context()
+    
+    assert "duration" in temporal
+    assert len(temporal["wait_events"]) > 0
+    assert temporal["duration"] == temporal["end_time"] - temporal["start_time"]
+
+@pytest.mark.asyncio
+async def test_comprehensive_trace_analysis(enhanced_trace_file):
+    """Test end-to-end analysis of enhanced trace data."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    result = await analyzer.analyze_all()
+    
+    # Verify all major components are present
+    assert "action_context" in result
+    assert "decision_trail" in result
+    assert "element_identification" in result
+    assert "failure_analysis" in result
+    assert "session_context" in result
+    assert "recovery_info" in result
+    assert "model_data" in result
+    assert "temporal_context" in result
+    
+    # Verify relationships between components
+    assert result["action_context"]["timestamp"] <= result["temporal_context"]["end_time"]
+    
+    # Debug prints
+    print("\nFallback selectors:", result["recovery_info"]["fallback_selectors"])
+    print("Element selectors:", result["element_identification"]["selectors"].values())
+    
+    # Verify that at least one selector is in the fallback selectors
+    assert any(selector in result["recovery_info"]["fallback_selectors"] 
+              for selector in result["element_identification"]["selectors"].values())
+
+@pytest.mark.asyncio
+async def test_enhanced_element_reporting(enhanced_trace_file):
+    """Test enhanced element reporting with detailed selection context."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    reporting = await analyzer.analyze_element_reporting()
+    
+    # Verify step context
+    assert reporting["current_step"]["number"] == 3
+    assert "description" in reporting["current_step"]
+    assert "context" in reporting["current_step"]
+    assert "viewport_state" in reporting["current_step"]
+    
+    # Verify element selection details
+    selection = reporting["element_selection"]
+    assert selection["chosen_element"]["confidence"] > 0.9
+    assert len(selection["alternative_candidates"]) >= 2
+    assert len(selection["selection_criteria"]) >= 3
+    
+    # Verify detailed element information
+    chosen = selection["chosen_element"]
+    assert "selector" in chosen
+    assert "description" in chosen
+    assert "action" in chosen
+
+@pytest.mark.asyncio
+async def test_enhanced_error_context(enhanced_trace_file):
+    """Test enhanced error context and session state reporting."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    error_context = await analyzer.analyze_error_context()
+    
+    # Verify session state information
+    assert "status" in error_context["session_state"]
+    assert "reason" in error_context["session_state"]
+    assert "action" in error_context["session_state"]
+    assert "resolution" in error_context["session_state"]
+    
+    # Verify recovery steps
+    assert len(error_context["recovery_steps"]) >= 2
+    for step in error_context["recovery_steps"]:
+        assert "attempt" in step
+        assert "strategy" in step
+        assert "outcome" in step
+
+@pytest.mark.asyncio
+async def test_comprehensive_analysis_with_enhancements(enhanced_trace_file):
+    """Test comprehensive analysis including new enhanced features."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    result = await analyzer.analyze_all()
+    
+    # Verify new components are present
+    assert "element_reporting" in result
+    assert "error_context" in result
+    
+    # Verify element reporting structure
+    reporting = result["element_reporting"]
+    assert reporting["current_step"]["description"] == "Locating settings button"
+    assert reporting["element_selection"]["chosen_element"]["selector"] == "button.settings-icon"
+    
+    # Verify error context structure
+    error = result["error_context"]
+    assert error["session_state"]["status"] == "reset_required"
+    assert len(error["recovery_steps"]) == 2
+
+@pytest.mark.asyncio
+async def test_enhanced_action_context_state(enhanced_trace_file):
+    """Test enhanced action context with detailed element state tracking."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    context = await analyzer.analyze_action_context()
+    
+    # Verify element state before action
+    before_state = context["element_state_before"]
+    assert before_state["visible"] is True
+    assert "pointer-events" in before_state["computed_styles"]
+    assert before_state["focus_state"] == "not-focused"
+    assert "aria-hidden" in before_state["accessibility"]
+    
+    # Verify element state after action
+    after_state = context["element_state_after"]
+    assert "focus_state" in after_state
+    assert len(after_state["triggered_events"]) >= 2
+    assert after_state["accessibility"]["aria-hidden"] == "false"
+
+@pytest.mark.asyncio
+async def test_enhanced_decision_trail(enhanced_trace_file):
+    """Test enhanced decision trail with confidence and attention weights."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    trail = await analyzer.analyze_decision_trail()
+    
+    # Verify confidence threshold
+    assert trail["confidence_threshold"] > 0.7
+    
+    # Verify attention weights
+    weights = trail["attention_weights"]
+    assert abs(sum(weights.values()) - 1.0) < 0.01  # Should sum to approximately 1
+    assert weights["element_text"] > weights["position"]  # Text should have higher weight
+    
+    # Verify alternative paths
+    alternatives = trail["alternative_paths"]
+    assert len(alternatives) > 0
+    assert all("confidence" in path for path in alternatives)
+    assert all("rejected_reason" in path for path in alternatives)
+
+@pytest.mark.asyncio
+async def test_comprehensive_analysis_with_state_tracking(enhanced_trace_file):
+    """Test comprehensive analysis including state tracking enhancements."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    result = await analyzer.analyze_all()
+    
+    # Verify enhanced action context
+    context = result["action_context"]
+    assert "element_state_before" in context
+    assert "element_state_after" in context
+    assert "computed_styles" in context["element_state_before"]
+    
+    # Verify enhanced decision trail
+    trail = result["decision_trail"]
+    assert "confidence_threshold" in trail
+    assert "attention_weights" in trail
+    assert "alternative_paths" in trail 
+
+@pytest.mark.asyncio
+async def test_enhanced_element_identification(enhanced_trace_file):
+    """Test enhanced element identification with relative positioning and hierarchy."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    identification = await analyzer.analyze_element_identification()
+    
+    # Verify relative positioning
+    position = identification["relative_position"]
+    assert "from_top_nav" in position
+    assert "from_viewport" in position
+    assert len(position["nearest_landmarks"]) >= 2
+    
+    # Verify element hierarchy
+    hierarchy = identification["hierarchy"]
+    assert hierarchy["parent"] == "nav.top-bar"
+    assert len(hierarchy["siblings"]) >= 2
+    assert len(hierarchy["children"]) >= 1
+    
+    # Verify relationships
+    assert all(isinstance(sibling, str) for sibling in hierarchy["siblings"])
+    assert all(isinstance(child, str) for child in hierarchy["children"])
+
+@pytest.mark.asyncio
+async def test_enhanced_failure_analysis(enhanced_trace_file):
+    """Test enhanced failure analysis with DOM mutations and network state."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    failure = await analyzer.analyze_failures()
+    
+    # Verify DOM mutations
+    mutations = failure["dom_mutations"]
+    assert len(mutations) > 0
+    mutation = mutations[0]
+    assert "timestamp" in mutation
+    assert "type" in mutation
+    assert "element" in mutation
+    assert "old_value" in mutation
+    assert "new_value" in mutation
+    
+    # Verify network state
+    network = failure["network_state"]
+    assert "requests_in_flight" in network
+    assert "last_completed_request" in network
+    assert len(network["pending_requests"]) > 0
+    
+    # Verify request details
+    pending = network["pending_requests"][0]
+    assert "url" in pending
+    assert "method" in pending
+    assert "duration_so_far" in pending
+
+@pytest.mark.asyncio
+async def test_comprehensive_analysis_with_enhanced_identification(enhanced_trace_file):
+    """Test comprehensive analysis including enhanced identification features."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    result = await analyzer.analyze_all()
+    
+    # Verify enhanced element identification
+    identification = result["element_identification"]
+    assert "relative_position" in identification
+    assert "hierarchy" in identification
+    assert identification["hierarchy"]["parent"] == "nav.top-bar"
+    
+    # Verify enhanced failure analysis
+    failure = result["failure_analysis"]
+    assert "dom_mutations" in failure
+    assert "network_state" in failure
+    assert failure["network_state"]["requests_in_flight"] > 0 
+
+@pytest.mark.asyncio
+async def test_enhanced_session_context(enhanced_trace_file):
+    """Test enhanced session context with viewport and performance metrics."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    session = await analyzer.analyze_session_context()
+    
+    # Verify viewport information
+    viewport = session["viewport"]
+    assert viewport["width"] == 1920
+    assert viewport["height"] == 1080
+    assert viewport["device_pixel_ratio"] == 2
+    assert viewport["orientation"] == "landscape"
+    
+    # Verify performance metrics
+    metrics = session["performance_metrics"]
+    assert "memory_usage" in metrics
+    assert "dom_node_count" in metrics
+    assert "frame_rate" in metrics
+    assert all(timing in metrics["resource_timing"] for timing in ["dns_lookup", "connection", "ttfb"])
+    
+    # Verify browser state
+    browser = session["browser_state"]
+    assert browser["cookies_enabled"] is True
+    assert browser["javascript_enabled"] is True
+    assert "local_storage_used" in browser
+    assert "active_service_workers" in browser
+
+@pytest.mark.asyncio
+async def test_enhanced_recovery_info(enhanced_trace_file):
+    """Test enhanced recovery information with state restoration and fallback sequences."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    recovery = await analyzer.analyze_recovery_info()
+    
+    # Verify state restoration
+    restoration = recovery["state_restoration"]
+    assert len(restoration["checkpoints"]) >= 2
+    
+    # Verify checkpoint details
+    checkpoint = restoration["checkpoints"][0]
+    assert "timestamp" in checkpoint
+    assert "state" in checkpoint
+    assert "restorable" in checkpoint
+    assert "snapshot" in checkpoint
+    assert all(key in checkpoint["snapshot"] for key in ["url", "scroll_position"])
+    
+    # Verify fallback sequences
+    sequences = recovery["fallback_sequences"]
+    assert len(sequences) > 0
+    sequence = sequences[0]
+    assert "condition" in sequence
+    assert len(sequence["actions"]) >= 3
+    assert "success_criteria" in sequence
+    
+    # Verify action details
+    action = sequence["actions"][0]
+    assert "step" in action
+    assert "max_attempts" in action
+    assert "delay_between_attempts" in action
+
+@pytest.mark.asyncio
+async def test_comprehensive_analysis_with_enriched_context(enhanced_trace_file):
+    """Test comprehensive analysis including enriched session context and recovery info."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    result = await analyzer.analyze_all()
+    
+    # Verify enriched session context
+    session = result["session_context"]
+    assert "viewport" in session
+    assert "performance_metrics" in session
+    assert "browser_state" in session
+    assert session["viewport"]["width"] == 1920
+    
+    # Verify enhanced recovery info
+    recovery = result["recovery_info"]
+    assert "state_restoration" in recovery
+    assert "fallback_sequences" in recovery
+    assert len(recovery["state_restoration"]["checkpoints"]) >= 2
+    assert all("success_criteria" in seq for seq in recovery["fallback_sequences"]) 
+
+@pytest.mark.asyncio
+async def test_interaction_timing_analysis(enhanced_trace_file):
+    """Test detailed interaction timing analysis."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    timing = await analyzer.analyze_timing()
+    
+    # Verify action breakdown
+    breakdown = timing["action_breakdown"]
+    assert "element_search" in breakdown
+    assert "interaction_delay" in breakdown
+    assert "animation_duration" in breakdown
+    assert "network_wait" in breakdown
+    
+    # Verify cumulative timing
+    cumulative = timing["cumulative_timing"]
+    assert "total_duration" in cumulative
+    assert "user_perceived_latency" in cumulative
+    
+    # Verify performance markers
+    markers = timing["performance_markers"]
+    assert all(marker in markers for marker in ["first_paint", "first_contentful_paint", "time_to_interactive"])
+
+@pytest.mark.asyncio
+async def test_visual_state_tracking(enhanced_trace_file):
+    """Test visual state tracking and analysis."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    visual = await analyzer.analyze_visual_state()
+    
+    # Verify screenshot diffs
+    diffs = visual["screenshot_diffs"]
+    assert "before_click" in diffs
+    assert "after_click" in diffs
+    assert diffs["changes_highlighted"] is True
+    
+    # Verify element visibility
+    visibility = visual["element_visibility"]
+    assert "before" in visibility
+    assert "after" in visibility
+    assert "visible_area_percentage" in visibility["before"]
+    assert "viewport_position" in visibility["before"]
+    
+    # Verify layout shifts
+    shifts = visual["layout_shifts"]
+    assert len(shifts) > 0
+    assert "timestamp" in shifts[0]
+    assert "elements_moved" in shifts[0]
+    assert "cumulative_layout_shift" in shifts[0]
+
+@pytest.mark.asyncio
+async def test_enhanced_error_recovery(enhanced_trace_file):
+    """Test enhanced error recovery capabilities."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    recovery = await analyzer.analyze_error_recovery()
+    
+    # Verify retry strategy
+    strategy = recovery["retry_strategy"]
+    assert strategy["backoff"] == "exponential"
+    assert strategy["max_attempts"] == 3
+    assert all(condition in strategy["conditions"] for condition in ["network_stable", "animations_complete"])
+    
+    # Verify environment factors
+    env = recovery["environment_factors"]
+    assert "network_conditions" in env
+    assert "system_resources" in env
+    assert all(metric in env["system_resources"] for metric in ["cpu_utilization", "memory_available"])
+    
+    # Verify recovery checkpoints
+    checkpoints = recovery["recovery_checkpoints"]
+    assert len(checkpoints) >= 2
+    assert all(key in checkpoints[0] for key in ["timestamp", "state", "snapshot"])
+    assert "dom_state" in checkpoints[0]["snapshot"]
+
+@pytest.mark.asyncio
+async def test_comprehensive_analysis_with_all_features(enhanced_trace_file):
+    """Test comprehensive analysis including all enhanced features."""
+    analyzer = EnhancedTraceAnalyzer(enhanced_trace_file)
+    result = await analyzer.analyze_all()
+    
+    # Verify new components are present
+    assert "timing_analysis" in result
+    assert "visual_state" in result
+    assert "error_recovery" in result
+    
+    # Verify timing analysis
+    timing = result["timing_analysis"]
+    assert "action_breakdown" in timing
+    assert "cumulative_timing" in timing
+    
+    # Verify visual state
+    visual = result["visual_state"]
+    assert "screenshot_diffs" in visual
+    assert "element_visibility" in visual
+    
+    # Verify error recovery
+    recovery = result["error_recovery"]
+    assert "retry_strategy" in recovery
+    assert "environment_factors" in recovery
+    assert recovery["retry_strategy"]["backoff"] == "exponential" 
\ No newline at end of file
diff --git a/tests/test_error_handling.py b/tests/test_error_handling.py
new file mode 100644
index 00000000..653f0683
--- /dev/null
+++ b/tests/test_error_handling.py
@@ -0,0 +1,98 @@
+import pytest
+from datetime import datetime
+from typing import Dict, Any, Optional
+import asyncio
+from src.utils.error_handling import ErrorHandler, MaxRetriesExceededError
+
+class TestErrorHandler:
+    @pytest.fixture
+    def handler(self):
+        return ErrorHandler()
+
+    @pytest.mark.asyncio
+    async def test_max_retries_exceeded(self, handler):
+        operation = "test_operation"
+        error = ValueError("Test error")
+
+        # Should handle first three attempts
+        for _ in range(3):
+            await handler.handle_error(error, operation)
+
+        # Fourth attempt should raise MaxRetriesExceededError
+        with pytest.raises(MaxRetriesExceededError) as exc_info:
+            await handler.handle_error(error, operation)
+
+        assert exc_info.value.operation == operation
+        assert exc_info.value.original_error == error
+
+    @pytest.mark.asyncio
+    async def test_error_logging(self, handler):
+        operation = "test_operation"
+        error = ValueError("Test error")
+        
+        # First attempt
+        await handler.handle_error(error, operation)
+        
+        # Get the last logged error
+        last_error = handler.get_last_error()
+        assert last_error["operation"] == operation
+        assert last_error["attempt"] == 1
+        assert "timestamp" in last_error
+        assert last_error["error"]["name"] == "ValueError"
+        assert last_error["error"]["message"] == "Test error"
+
+    @pytest.mark.asyncio
+    async def test_exponential_backoff(self, handler):
+        operation = "test_operation"
+        error = ValueError("Test error")
+        
+        # Record start time
+        start = datetime.now()
+        
+        # First attempt (should delay 1 second)
+        await handler.handle_error(error, operation)
+        
+        # Second attempt (should delay 2 seconds)
+        await handler.handle_error(error, operation)
+        
+        # Calculate duration
+        duration = (datetime.now() - start).total_seconds()
+        
+        # Should have waited at least 3 seconds (1 + 2)
+        assert duration >= 3
+
+    @pytest.mark.asyncio
+    async def test_error_code_extraction(self, handler):
+        # Test with connection error
+        error = ConnectionError("ERR_CONNECTION_REFUSED: Failed to connect")
+        code = handler.extract_error_code(error)
+        assert code == "ERR_CONNECTION_REFUSED"
+
+        # Test with DNS error
+        error = Exception("ERR_NAME_NOT_RESOLVED: Could not resolve hostname")
+        code = handler.extract_error_code(error)
+        assert code == "ERR_NAME_NOT_RESOLVED"
+
+        # Test with unknown error
+        error = ValueError("Some other error")
+        code = handler.extract_error_code(error)
+        assert code == "UNKNOWN_ERROR"
+
+    @pytest.mark.asyncio
+    async def test_concurrent_retries(self, handler):
+        operation = "test_operation"
+        error = ValueError("Test error")
+        
+        # Try to handle the same error concurrently
+        tasks = [
+            handler.handle_error(error, operation),
+            handler.handle_error(error, operation),
+            handler.handle_error(error, operation)
+        ]
+        
+        # Should complete without raising an error
+        await asyncio.gather(*tasks, return_exceptions=True)
+        
+        # Fourth attempt should still raise MaxRetriesExceededError
+        with pytest.raises(MaxRetriesExceededError):
+            await handler.handle_error(error, operation) 
\ No newline at end of file
diff --git a/tests/test_llm_api.py b/tests/test_llm_api.py
index 9e2a1d6d..5b29cb3d 100644
--- a/tests/test_llm_api.py
+++ b/tests/test_llm_api.py
@@ -6,6 +6,7 @@
 # @FileName: test_llm_api.py
 import os
 import pdb
+import pytest
 
 from dotenv import load_dotenv
 
@@ -20,12 +21,16 @@ def test_openai_model():
     from langchain_core.messages import HumanMessage
     from src.utils import utils
 
+    api_key = os.getenv("OPENAI_API_KEY")
+    if not api_key:
+        pytest.skip("OPENAI_API_KEY not set")
+
     llm = utils.get_llm_model(
         provider="openai",
         model_name="gpt-4o",
         temperature=0.8,
         base_url=os.getenv("OPENAI_ENDPOINT", ""),
-        api_key=os.getenv("OPENAI_API_KEY", "")
+        api_key=api_key
     )
     image_path = "assets/examples/test.png"
     image_data = utils.encode_image(image_path)
@@ -47,11 +52,15 @@ def test_gemini_model():
     from langchain_core.messages import HumanMessage
     from src.utils import utils
 
+    api_key = os.getenv("GOOGLE_API_KEY")
+    if not api_key:
+        pytest.skip("GOOGLE_API_KEY not set")
+
     llm = utils.get_llm_model(
         provider="gemini",
-        model_name="gemini-2.0-flash-exp",
+        model_name="gemini-1.5-pro",
         temperature=0.8,
-        api_key=os.getenv("GOOGLE_API_KEY", "")
+        api_key=api_key
     )
 
     image_path = "assets/examples/test.png"
@@ -73,12 +82,17 @@ def test_azure_openai_model():
     from langchain_core.messages import HumanMessage
     from src.utils import utils
 
+    api_key = os.getenv("AZURE_OPENAI_API_KEY")
+    endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
+    if not api_key or not endpoint:
+        pytest.skip("AZURE_OPENAI_API_KEY or AZURE_OPENAI_ENDPOINT not set")
+
     llm = utils.get_llm_model(
         provider="azure_openai",
-        model_name="gpt-4o",
+        model_name="gpt-4",
         temperature=0.8,
-        base_url=os.getenv("AZURE_OPENAI_ENDPOINT", ""),
-        api_key=os.getenv("AZURE_OPENAI_API_KEY", "")
+        base_url=endpoint,
+        api_key=api_key
     )
     image_path = "assets/examples/test.png"
     image_data = utils.encode_image(image_path)
@@ -99,12 +113,39 @@ def test_deepseek_model():
     from langchain_core.messages import HumanMessage
     from src.utils import utils
 
+    api_key = os.getenv("DEEPSEEK_API_KEY")
+    if not api_key:
+        pytest.skip("DEEPSEEK_API_KEY not set")
+
     llm = utils.get_llm_model(
         provider="deepseek",
         model_name="deepseek-chat",
         temperature=0.8,
         base_url=os.getenv("DEEPSEEK_ENDPOINT", ""),
-        api_key=os.getenv("DEEPSEEK_API_KEY", "")
+        api_key=api_key
+    )
+    message = HumanMessage(
+        content=[
+            {"type": "text", "text": "who are you?"}
+        ]
+    )
+    ai_msg = llm.invoke([message])
+    print(ai_msg.content)
+
+
+def test_anthropic_model():
+    from langchain_core.messages import HumanMessage
+    from src.utils import utils
+
+    api_key = os.getenv("ANTHROPIC_API_KEY")
+    if not api_key:
+        pytest.skip("ANTHROPIC_API_KEY not set")
+
+    llm = utils.get_llm_model(
+        provider="anthropic",
+        model_name="claude-3-5-sonnet-latest",
+        temperature=0.8,
+        api_key=api_key
     )
     message = HumanMessage(
         content=[
@@ -118,6 +159,16 @@ def test_deepseek_model():
 def test_ollama_model():
     from langchain_ollama import ChatOllama
 
+    # Check if Ollama is running by trying to connect to its default port
+    import socket
+    sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+    try:
+        result = sock.connect_ex(('localhost', 11434))
+        if result != 0:
+            pytest.skip("Ollama server not running on localhost:11434")
+    finally:
+        sock.close()
+
     llm = ChatOllama(model="qwen2.5:7b")
     ai_msg = llm.invoke("Sing a ballad of LangChain.")
     print(ai_msg.content)
@@ -128,4 +179,5 @@ def test_ollama_model():
     # test_gemini_model()
     # test_azure_openai_model()
     # test_deepseek_model()
+    # test_anthropic_model()
     test_ollama_model()
diff --git a/tests/test_llm_integration.py b/tests/test_llm_integration.py
new file mode 100644
index 00000000..60dc0056
--- /dev/null
+++ b/tests/test_llm_integration.py
@@ -0,0 +1,182 @@
+import os
+import pytest
+from dotenv import load_dotenv
+from langchain_core.messages import HumanMessage
+from src.utils import utils
+
+# Load environment variables
+load_dotenv()
+
+class TestOpenAIIntegration:
+    """Test OpenAI model integration and vision capabilities"""
+
+    def setup_method(self):
+        """Setup test environment"""
+        # Ensure required environment variables are set
+        self.api_key = os.getenv("OPENAI_API_KEY")
+        self.base_url = os.getenv("OPENAI_ENDPOINT", "https://api.openai.com/v1")
+        if not self.api_key:
+            pytest.skip("OPENAI_API_KEY not set")
+
+    def test_gpt4_turbo_initialization(self):
+        """Test GPT-4 Turbo model initialization"""
+        llm = utils.get_llm_model(
+            provider="openai",
+            model_name="gpt-4o",
+            temperature=0.8,
+            base_url=self.base_url,
+            api_key=self.api_key
+        )
+        assert llm is not None
+
+    def test_gpt4_vision_initialization(self):
+        """Test GPT-4 Vision model initialization"""
+        llm = utils.get_llm_model(
+            provider="openai",
+            model_name="gpt-4o",
+            temperature=0.8,
+            base_url=self.base_url,
+            api_key=self.api_key,
+            vision=True
+        )
+        assert llm is not None
+
+    @pytest.mark.asyncio
+    async def test_vision_capability(self):
+        """Test vision capability with an example image"""
+        llm = utils.get_llm_model(
+            provider="openai",
+            model_name="gpt-4o",
+            temperature=0.8,
+            base_url=self.base_url,
+            api_key=self.api_key,
+            vision=True
+        )
+        
+        # Use a test image
+        image_path = "assets/examples/test.png"
+        if not os.path.exists(image_path):
+            pytest.skip(f"Test image not found at {image_path}")
+        
+        image_data = utils.encode_image(image_path)
+        message = HumanMessage(
+            content=[
+                {"type": "text", "text": "describe this image"},
+                {
+                    "type": "image_url",
+                    "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
+                },
+            ]
+        )
+        response = await llm.ainvoke([message])
+        assert response is not None
+        assert isinstance(response.content, str)
+        assert len(response.content) > 0
+
+class TestAzureOpenAIIntegration:
+    """Test Azure OpenAI integration"""
+
+    def setup_method(self):
+        """Setup test environment"""
+        self.api_key = os.getenv("AZURE_OPENAI_API_KEY")
+        self.endpoint = os.getenv("AZURE_OPENAI_ENDPOINT")
+        if not self.api_key or not self.endpoint:
+            pytest.skip("Azure OpenAI credentials not set")
+
+    def test_azure_model_initialization(self):
+        """Test Azure OpenAI model initialization"""
+        llm = utils.get_llm_model(
+            provider="azure_openai",
+            model_name="gpt-4",
+            temperature=0.8,
+            base_url=self.endpoint,
+            api_key=self.api_key
+        )
+        assert llm is not None
+
+    @pytest.mark.asyncio
+    async def test_azure_basic_completion(self):
+        """Test basic completion with Azure OpenAI"""
+        llm = utils.get_llm_model(
+            provider="azure_openai",
+            model_name="gpt-4",
+            temperature=0.8,
+            base_url=self.endpoint,
+            api_key=self.api_key
+        )
+        
+        message = HumanMessage(content="Say hello!")
+        response = await llm.ainvoke([message])
+        assert response is not None
+        assert isinstance(response.content, str)
+        assert len(response.content) > 0
+
+class TestAnthropicIntegration:
+    """Test Anthropic model integration"""
+
+    def setup_method(self):
+        """Setup test environment"""
+        self.api_key = os.getenv("ANTHROPIC_API_KEY")
+        if not self.api_key:
+            pytest.skip("ANTHROPIC_API_KEY not set")
+
+    def test_claude_initialization(self):
+        """Test Claude model initialization"""
+        llm = utils.get_llm_model(
+            provider="anthropic",
+            model_name="claude-3-5-sonnet-latest",
+            temperature=0.8,
+            api_key=self.api_key
+        )
+        assert llm is not None
+
+    @pytest.mark.asyncio
+    async def test_basic_completion(self):
+        """Test basic completion with Claude"""
+        llm = utils.get_llm_model(
+            provider="anthropic",
+            model_name="claude-3-5-sonnet-latest",
+            temperature=0.8,
+            api_key=self.api_key
+        )
+        
+        message = HumanMessage(content="Say hello!")
+        response = await llm.ainvoke([message])
+        assert response is not None
+        assert isinstance(response.content, str)
+        assert len(response.content) > 0
+
+def test_model_names_consistency():
+    """Test that model names are consistent between toolchain and utils"""
+    # Test OpenAI models
+    openai_models = utils.model_names["openai"]
+    expected_openai = ["gpt-4o"]
+    assert all(model in openai_models for model in expected_openai), "Missing expected OpenAI models"
+
+    # Test Gemini models
+    gemini_models = utils.model_names["gemini"]
+    expected_gemini = ["gemini-1.5-pro", "gemini-2.0-flash"]
+    assert all(model in gemini_models for model in expected_gemini), "Missing expected Gemini models"
+
+    # Test Anthropic models
+    anthropic_models = utils.model_names["anthropic"]
+    expected_anthropic = ["claude-3-5-sonnet-latest", "claude-3-5-sonnet-20241022"]
+    assert all(model in anthropic_models for model in expected_anthropic), "Missing expected Anthropic models"
+
+    # Test DeepSeek models
+    deepseek_models = utils.model_names["deepseek"]
+    expected_deepseek = ["deepseek-chat"]
+    assert all(model in deepseek_models for model in expected_deepseek), "Missing expected DeepSeek models"
+
+    # Test Azure OpenAI models
+    azure_models = utils.model_names["azure_openai"]
+    expected_azure = ["gpt-4", "gpt-3.5-turbo"]
+    assert all(model in azure_models for model in expected_azure), "Missing expected Azure OpenAI models"
+
+    # Test Ollama models
+    ollama_models = utils.model_names["ollama"]
+    expected_ollama = ["qwen2.5:7b", "llama2:7b"]
+    assert all(model in ollama_models for model in expected_ollama), "Missing expected Ollama models"
+
+if __name__ == "__main__":
+    pytest.main([__file__, "-v"]) 
\ No newline at end of file
diff --git a/tests/test_logging.py b/tests/test_logging.py
new file mode 100644
index 00000000..5e871e0a
--- /dev/null
+++ b/tests/test_logging.py
@@ -0,0 +1,216 @@
+import json
+import logging
+import datetime
+import pytest
+from io import StringIO
+from typing import Dict, Any
+from src.utils.logging import (
+    LogFormatter,
+    BatchedEventLogger,
+    setup_logging,
+    PRODUCTION_EXCLUDE_PATTERNS,
+    LogLevel
+)
+import sys
+
+class TestLogFormatter:
+    @pytest.fixture
+    def json_formatter(self):
+        return LogFormatter(use_json=True)
+    
+    @pytest.fixture
+    def compact_formatter(self):
+        return LogFormatter(use_json=False)
+    
+    def test_json_format_basic_log(self, json_formatter):
+        record = logging.LogRecord(
+            name="test_logger",
+            level=logging.INFO,
+            pathname="test.py",
+            lineno=1,
+            msg="Test message",
+            args=(),
+            exc_info=None
+        )
+        
+        formatted = json_formatter.format(record)
+        parsed = json.loads(formatted)
+        
+        assert parsed["level"] == "INFO"
+        assert parsed["logger"] == "test_logger"
+        assert parsed["message"] == "Test message"
+        assert "timestamp" in parsed
+    
+    def test_json_format_with_extra_fields(self, json_formatter):
+        record = logging.LogRecord(
+            name="test_logger",
+            level=logging.INFO,
+            pathname="test.py",
+            lineno=1,
+            msg="Test message",
+            args=(),
+            exc_info=None
+        )
+        record.event_type = "test_event"
+        record.event_data = {"key": "value"}
+        
+        formatted = json_formatter.format(record)
+        parsed = json.loads(formatted)
+        
+        assert parsed["event_type"] == "test_event"
+        assert parsed["data"] == {"key": "value"}
+    
+    def test_json_format_with_error(self, json_formatter):
+        try:
+            raise ValueError("Test error")
+        except ValueError as e:
+            record = logging.LogRecord(
+                name="test_logger",
+                level=logging.ERROR,
+                pathname="test.py",
+                lineno=1,
+                msg="Error occurred",
+                args=(),
+                exc_info=sys.exc_info()
+            )
+            
+            formatted = json_formatter.format(record)
+            parsed = json.loads(formatted)
+            
+            assert parsed["error"]["type"] == "ValueError"
+            assert parsed["error"]["message"] == "Test error"
+            assert "stack_trace" in parsed["error"]
+    
+    def test_compact_format_basic_log(self, compact_formatter):
+        record = logging.LogRecord(
+            name="test_logger",
+            level=logging.INFO,
+            pathname="test.py",
+            lineno=1,
+            msg="Test message",
+            args=(),
+            exc_info=None
+        )
+        
+        formatted = compact_formatter.format(record)
+        assert "] I: Test message" in formatted
+    
+    def test_compact_format_with_error(self, compact_formatter):
+        try:
+            raise ValueError("Test error")
+        except ValueError as e:
+            record = logging.LogRecord(
+                name="test_logger",
+                level=logging.ERROR,
+                pathname="test.py",
+                lineno=1,
+                msg="Error occurred",
+                args=(),
+                exc_info=sys.exc_info()
+            )
+            
+            formatted = compact_formatter.format(record)
+            assert "] E: Error occurred" in formatted
+            assert "ValueError: Test error" in formatted
+
+class TestBatchedEventLogger:
+    @pytest.fixture
+    def string_io(self):
+        return StringIO()
+    
+    @pytest.fixture
+    def logger(self, string_io):
+        handler = logging.StreamHandler(string_io)
+        handler.setFormatter(LogFormatter(use_json=True))
+        logger = logging.getLogger("test_batched")
+        logger.handlers = [handler]
+        logger.setLevel(logging.INFO)
+        return logger
+    
+    @pytest.fixture
+    def batched_logger(self, logger):
+        return BatchedEventLogger(logger)
+    
+    def test_batch_single_event(self, batched_logger, string_io):
+        event_data = {"action": "click", "element": "button"}
+        batched_logger.add_event("ui_action", event_data)
+        batched_logger.flush()
+        
+        output = string_io.getvalue()
+        parsed = json.loads(output)
+        
+        assert parsed["event_type"] == "batched_ui_action"
+        assert parsed["data"]["count"] == 1
+        assert parsed["data"]["events"][0] == event_data
+    
+    def test_batch_multiple_events(self, batched_logger, string_io):
+        events = [
+            {"action": "click", "element": "button1"},
+            {"action": "type", "element": "input1"},
+            {"action": "click", "element": "button2"}
+        ]
+        
+        for event in events:
+            batched_logger.add_event("ui_action", event)
+        
+        batched_logger.flush()
+        
+        output = string_io.getvalue()
+        parsed = json.loads(output)
+        
+        assert parsed["event_type"] == "batched_ui_action"
+        assert parsed["data"]["count"] == 3
+        assert parsed["data"]["events"] == events
+
+class TestLoggingSetup:
+    @pytest.fixture
+    def temp_logger(self):
+        # Store original handlers
+        root_logger = logging.getLogger()
+        original_handlers = root_logger.handlers[:]
+        
+        yield root_logger
+        
+        # Restore original handlers
+        root_logger.handlers = original_handlers
+    
+    def test_setup_basic_logging(self, temp_logger):
+        setup_logging(level="INFO", use_json=True)
+        assert len(temp_logger.handlers) == 1
+        assert isinstance(temp_logger.handlers[0].formatter, LogFormatter)
+        assert temp_logger.level == logging.INFO
+    
+    def test_setup_with_exclude_patterns(self, temp_logger):
+        test_patterns = ["debug", "deprecated"]
+        setup_logging(level="INFO", exclude_patterns=test_patterns)
+        
+        # Create a test record that should be filtered
+        record = logging.LogRecord(
+            name="test_logger",
+            level=logging.INFO,
+            pathname="test.py",
+            lineno=1,
+            msg="This is a debug message",
+            args=(),
+            exc_info=None
+        )
+        
+        # The record should be filtered out
+        assert not temp_logger.handlers[0].filter(record)
+    
+    def test_production_exclude_patterns(self):
+        # Verify that all production patterns are strings
+        assert all(isinstance(pattern, str) for pattern in PRODUCTION_EXCLUDE_PATTERNS)
+        
+        # Verify that common patterns are included
+        common_patterns = ["deprecated", "virtual environment"]
+        assert all(pattern in PRODUCTION_EXCLUDE_PATTERNS for pattern in common_patterns)
+
+def test_log_levels():
+    # Test that all expected log levels are defined
+    expected_levels = ["CRITICAL", "ERROR", "WARNING", "INFO", "DEBUG", "TRACE"]
+    assert all(level in LogLevel.__members__ for level in expected_levels)
+    
+    # Test that the values match the names
+    for level in LogLevel:
+        assert level.value == level.name 
\ No newline at end of file
diff --git a/tests/test_logging_integration.py b/tests/test_logging_integration.py
new file mode 100644
index 00000000..3fa7a1f5
--- /dev/null
+++ b/tests/test_logging_integration.py
@@ -0,0 +1,219 @@
+import json
+import logging
+import pytest
+import asyncio
+from pathlib import Path
+from io import StringIO
+from typing import Dict, Any, List, Optional
+
+from src.utils.logging import LogFormatter, BatchedEventLogger, setup_logging
+from src.agent.custom_agent import CustomAgent
+from browser_use.agent.views import ActionResult
+from browser_use.browser.views import BrowserStateHistory
+from browser_use.browser.browser import Browser, BrowserConfig
+from browser_use.browser.context import BrowserContext, BrowserContextConfig
+from langchain_core.language_models.chat_models import BaseChatModel
+from langchain_core.messages import BaseMessage
+
+class MockElementTree:
+    def clickable_elements_to_string(self, include_attributes=None):
+        return "Mock clickable elements"
+
+class MockBrowserContext(BrowserContext):
+    def __init__(self):
+        self.config = BrowserContextConfig()
+        self.selector_map = {}
+        self.cached_state = BrowserStateHistory(
+            url="https://example.com",
+            title="Example Page",
+            tabs=[],
+            interacted_element=[None],
+            screenshot=None
+        )
+        setattr(self.cached_state, 'selector_map', self.selector_map)
+        setattr(self.cached_state, 'element_tree', MockElementTree())
+
+    async def get_state(self, use_vision=True):
+        return self.cached_state
+
+    async def close(self):
+        pass
+
+    def __del__(self):
+        # Override to prevent errors about missing session attribute
+        pass
+
+class MockBrowser(Browser):
+    def __init__(self):
+        self.config = BrowserConfig()
+        
+    async def new_context(self, config):
+        return MockBrowserContext()
+
+    async def close(self):
+        pass
+
+class MockLLM(BaseChatModel):
+    def with_structured_output(self, output_type, include_raw=False):
+        self._output_type = output_type
+        return self
+
+    async def ainvoke(self, messages: List[BaseMessage], **kwargs):
+        return {
+            'parsed': self._output_type(
+                action=[],
+                current_state={
+                    'prev_action_evaluation': 'Success',
+                    'important_contents': 'Test memory',
+                    'completed_contents': 'Test progress',
+                    'thought': 'Test thought',
+                    'summary': 'Test summary'
+                }
+            )
+        }
+        
+    @property
+    def _llm_type(self) -> str:
+        return "mock"
+        
+    def _generate(self, messages: List[BaseMessage], stop: Optional[List[str]] = None, run_manager = None, **kwargs):
+        raise NotImplementedError("Use ainvoke instead")
+        
+    @property
+    def _identifying_params(self) -> Dict[str, Any]:
+        return {"mock_param": True}
+
+class ErrorLLM(MockLLM):
+    async def ainvoke(self, messages: List[BaseMessage], **kwargs):
+        raise ValueError("Test error")
+
+class ActionLLM(MockLLM):
+    async def ainvoke(self, messages: List[BaseMessage], **kwargs):
+        return {
+            'parsed': self._output_type(
+                action=[
+                    {'type': 'click', 'selector': '#button1'},
+                    {'type': 'type', 'selector': '#input1', 'text': 'test'},
+                ],
+                current_state={
+                    'prev_action_evaluation': 'Success',
+                    'important_contents': 'Test memory',
+                    'completed_contents': 'Test progress',
+                    'thought': 'Test thought',
+                    'summary': 'Test summary'
+                }
+            )
+        }
+
+@pytest.fixture
+def logger():
+    # Configure root logger
+    root_logger = logging.getLogger()
+    root_logger.setLevel(logging.INFO)
+    
+    # Configure test logger
+    logger = logging.getLogger("test_integration")
+    logger.setLevel(logging.INFO)
+    return logger
+
+@pytest.fixture
+def string_io():
+    string_io = StringIO()
+    handler = logging.StreamHandler(string_io)
+    handler.setFormatter(LogFormatter(use_json=True))
+    
+    # Add handler to root logger
+    root_logger = logging.getLogger()
+    root_logger.addHandler(handler)
+    
+    # Add handler to test logger
+    logger = logging.getLogger("test_integration")
+    logger.addHandler(handler)
+    
+    yield string_io
+    
+    # Clean up
+    root_logger.removeHandler(handler)
+    logger.removeHandler(handler)
+
+@pytest.mark.asyncio
+async def test_agent_logging_integration(logger, string_io):
+    # Setup
+    agent = CustomAgent(
+        task="Test task",
+        llm=MockLLM(),
+        browser=MockBrowser(),
+        browser_context=MockBrowserContext(),
+        use_vision=True
+    )
+
+    # Execute a step
+    await agent.step()
+
+    # Get all log output
+    log_output = string_io.getvalue()
+    log_entries = [json.loads(line) for line in log_output.strip().split('\n') if line.strip()]
+
+    # Print log entries for debugging
+    print("\nLog entries:", log_entries)
+
+    # Verify log entries
+    assert len(log_entries) > 0
+    assert any('Starting step 1' in str(entry.get('msg', '')) for entry in log_entries)
+    assert any('Model Response: success' in str(entry.get('msg', '')) for entry in log_entries)
+    assert any('Step error' in str(entry.get('msg', '')) for entry in log_entries)
+
+@pytest.mark.asyncio
+async def test_agent_error_logging(logger, string_io):
+    # Setup
+    agent = CustomAgent(
+        task="Test task",
+        llm=ErrorLLM(),
+        browser=MockBrowser(),
+        browser_context=MockBrowserContext(),
+        use_vision=True
+    )
+
+    # Execute a step
+    await agent.step()
+
+    # Get all log output
+    log_output = string_io.getvalue()
+    log_entries = [json.loads(line) for line in log_output.strip().split('\n') if line.strip()]
+
+    # Print log entries for debugging
+    print("\nLog entries:", log_entries)
+
+    # Verify log entries
+    assert len(log_entries) > 0
+    assert any('Starting step 1' in str(entry.get('msg', '')) for entry in log_entries)
+    assert any('Step error' in str(entry.get('msg', '')) for entry in log_entries)
+    assert any('Use ainvoke instead' in str(entry.get('msg', '')) for entry in log_entries)
+
+@pytest.mark.asyncio
+async def test_agent_batched_logging(logger, string_io):
+    # Setup
+    agent = CustomAgent(
+        task="Test task",
+        llm=ActionLLM(),
+        browser=MockBrowser(),
+        browser_context=MockBrowserContext(),
+        use_vision=True
+    )
+
+    # Execute a step
+    await agent.step()
+
+    # Get all log output
+    log_output = string_io.getvalue()
+    log_entries = [json.loads(line) for line in log_output.strip().split('\n') if line.strip()]
+
+    # Print log entries for debugging
+    print("\nLog entries:", log_entries)
+
+    # Verify log entries
+    assert len(log_entries) > 0
+    assert any('Starting step 1' in str(entry.get('msg', '')) for entry in log_entries)
+    assert any('Model Response: success' in str(entry.get('msg', '')) for entry in log_entries)
+    assert any('Batch: 2 action events' in str(entry.get('msg', '')) for entry in log_entries)
+    assert any('Step error' in str(entry.get('msg', '')) for entry in log_entries) 
\ No newline at end of file
diff --git a/tests/test_structured_logging.py b/tests/test_structured_logging.py
new file mode 100644
index 00000000..9134c4f5
--- /dev/null
+++ b/tests/test_structured_logging.py
@@ -0,0 +1,270 @@
+import pytest
+import json
+import logging
+import os
+from datetime import datetime
+from src.utils.structured_logging import (
+    StructuredLogger,
+    ProgressEvent,
+    BrowserEvent,
+    JSONFormatter,
+    ColorizedFormatter,
+    ColorScheme,
+    setup_structured_logging
+)
+from colorama import Fore, Style
+
+@pytest.fixture
+def structured_logger():
+    logger = StructuredLogger("test_logger")
+    return logger
+
+def test_progress_event_creation():
+    event = ProgressEvent(
+        step="test_step",
+        status="in_progress",
+        progress=0.5,
+        message="Testing progress"
+    )
+    assert event.step == "test_step"
+    assert event.status == "in_progress"
+    assert event.progress == 0.5
+    assert event.message == "Testing progress"
+    assert event.timestamp is not None
+
+def test_browser_event_creation():
+    details = {"action": "click", "selector": "#button"}
+    event = BrowserEvent(
+        event_type="interaction",
+        details=details
+    )
+    assert event.event_type == "interaction"
+    assert event.details == details
+    assert event.timestamp is not None
+
+def test_progress_logging(structured_logger):
+    structured_logger.log_progress(
+        step="test_step",
+        status="started",
+        progress=0.0,
+        message="Starting test"
+    )
+    
+    history = structured_logger.get_progress_history()
+    assert len(history) == 1
+    assert history[0]["step"] == "test_step"
+    assert history[0]["status"] == "started"
+    assert history[0]["progress"] == 0.0
+    assert history[0]["message"] == "Starting test"
+
+def test_browser_event_logging(structured_logger):
+    details = {"page": "test.html", "action": "navigate"}
+    structured_logger.log_browser_event(
+        event_type="navigation",
+        details=details
+    )
+    
+    events = structured_logger.get_browser_events()
+    assert len(events) == 1
+    assert events[0]["event_type"] == "navigation"
+    assert events[0]["details"] == details
+
+def test_progress_tracking(structured_logger):
+    # Test multiple progress updates
+    steps = [
+        ("step1", "started", 0.0, "Starting"),
+        ("step1", "in_progress", 0.5, "Halfway"),
+        ("step1", "completed", 1.0, "Done")
+    ]
+    
+    for step, status, progress, message in steps:
+        structured_logger.log_progress(step, status, progress, message)
+    
+    assert structured_logger.get_current_progress() == 1.0
+    history = structured_logger.get_progress_history()
+    assert len(history) == 3
+    
+    for i, (step, status, progress, message) in enumerate(steps):
+        assert history[i]["step"] == step
+        assert history[i]["status"] == status
+        assert history[i]["progress"] == progress
+        assert history[i]["message"] == message
+
+def test_clear_history(structured_logger):
+    # Add some events
+    structured_logger.log_progress("test", "started", 0.5, "Test progress")
+    structured_logger.log_browser_event("test", {"action": "test"})
+    
+    # Clear history
+    structured_logger.clear_history()
+    
+    assert len(structured_logger.get_progress_history()) == 0
+    assert len(structured_logger.get_browser_events()) == 0
+    assert structured_logger.get_current_progress() == 0.0
+
+def test_json_formatter():
+    formatter = JSONFormatter()
+    record = logging.LogRecord(
+        name="test_logger",
+        level=logging.INFO,
+        pathname="test.py",
+        lineno=1,
+        msg="Test message",
+        args=(),
+        exc_info=None
+    )
+    
+    # Add custom fields
+    setattr(record, 'event_type', 'test_event')
+    setattr(record, 'data', {'test_key': 'test_value'})
+    
+    formatted = formatter.format(record)
+    parsed = json.loads(formatted)
+    
+    assert parsed["level"] == "INFO"
+    assert parsed["message"] == "Test message"
+    assert parsed["logger"] == "test_logger"
+    assert parsed["event_type"] == "test_event"
+    assert parsed["data"] == {"test_key": "test_value"}
+    assert "timestamp" in parsed
+
+def test_colorized_formatter_with_colors():
+    formatter = ColorizedFormatter(use_colors=True)
+    record = logging.LogRecord(
+        name="test_logger",
+        level=logging.ERROR,
+        pathname="test.py",
+        lineno=1,
+        msg="Test error message",
+        args=(),
+        exc_info=None
+    )
+    
+    formatted = formatter.format(record)
+    assert Fore.RED in formatted  # Error level should be red
+    assert Style.RESET_ALL in formatted  # Should have reset codes
+    assert "[" in formatted and "]" in formatted  # Should have timestamp brackets
+    assert "ERROR" in formatted  # Should include level name
+
+def test_colorized_formatter_without_colors():
+    formatter = ColorizedFormatter(use_colors=False)
+    record = logging.LogRecord(
+        name="test_logger",
+        level=logging.INFO,
+        pathname="test.py",
+        lineno=1,
+        msg="Test message",
+        args=(),
+        exc_info=None
+    )
+    
+    formatted = formatter.format(record)
+    assert Fore.CYAN not in formatted  # Should not have color codes
+    assert Style.RESET_ALL not in formatted
+    assert "[" in formatted and "]" in formatted
+    assert "INFO" in formatted
+
+def test_colorized_formatter_special_keywords():
+    formatter = ColorizedFormatter(use_colors=True)
+    record = logging.LogRecord(
+        name="test_logger",
+        level=logging.INFO,
+        pathname="test.py",
+        lineno=1,
+        msg="✓ STEP(1) completed × failed",
+        args=(),
+        exc_info=None
+    )
+    
+    formatted = formatter.format(record)
+    assert Fore.GREEN in formatted  # Success checkmark
+    assert Fore.BLUE in formatted   # STEP keyword
+    assert Fore.RED in formatted    # Error cross
+
+def test_colorized_formatter_with_structured_data():
+    formatter = ColorizedFormatter(use_colors=True)
+    record = logging.LogRecord(
+        name="test_logger",
+        level=logging.INFO,
+        pathname="test.py",
+        lineno=1,
+        msg="Progress Update",
+        args=(),
+        exc_info=None
+    )
+    
+    # Add structured data
+    setattr(record, 'event_type', 'progress')
+    setattr(record, 'data', {'step': 'test', 'progress': 0.5})
+    
+    formatted = formatter.format(record)
+    assert 'progress' in formatted
+    assert '"step": "test"' in formatted
+    assert '"progress": 0.5' in formatted
+
+def test_color_scheme():
+    scheme = ColorScheme()
+    assert scheme.ERROR == Fore.RED
+    assert scheme.WARNING == Fore.YELLOW
+    assert scheme.INFO == Fore.CYAN
+    assert scheme.DEBUG == Style.DIM
+    assert scheme.SUCCESS == Fore.GREEN
+    assert scheme.RESET == Style.RESET_ALL
+
+def test_no_color_environment_variable():
+    os.environ['NO_COLOR'] = '1'
+    formatter = ColorizedFormatter(use_colors=True)  # Even with colors enabled
+    record = logging.LogRecord(
+        name="test_logger",
+        level=logging.ERROR,
+        pathname="test.py",
+        lineno=1,
+        msg="Test message",
+        args=(),
+        exc_info=None
+    )
+    
+    formatted = formatter.format(record)
+    assert Fore.RED not in formatted  # Should not have color codes
+    assert Style.RESET_ALL not in formatted
+    
+    # Clean up
+    del os.environ['NO_COLOR']
+
+def test_setup_structured_logging_with_colors():
+    # Remove existing handlers
+    root_logger = logging.getLogger()
+    for handler in root_logger.handlers[:]:
+        root_logger.removeHandler(handler)
+    
+    # Set up logging with colors
+    setup_structured_logging(level=logging.DEBUG, use_colors=True, json_output=False)
+    
+    assert len(root_logger.handlers) == 1
+    assert isinstance(root_logger.handlers[0].formatter, ColorizedFormatter)
+    assert root_logger.handlers[0].formatter.use_colors is True
+
+def test_setup_structured_logging_json():
+    # Remove existing handlers
+    root_logger = logging.getLogger()
+    for handler in root_logger.handlers[:]:
+        root_logger.removeHandler(handler)
+    
+    # Set up logging with JSON output
+    setup_structured_logging(level=logging.DEBUG, json_output=True)
+    
+    assert len(root_logger.handlers) == 1
+    assert isinstance(root_logger.handlers[0].formatter, JSONFormatter)
+
+def test_setup_structured_logging():
+    # Remove existing handlers
+    root_logger = logging.getLogger()
+    for handler in root_logger.handlers[:]:
+        root_logger.removeHandler(handler)
+    
+    # Set up logging with default settings
+    setup_structured_logging(level=logging.DEBUG)
+    
+    assert root_logger.level == logging.DEBUG
+    assert len(root_logger.handlers) == 1
+    assert isinstance(root_logger.handlers[0].formatter, ColorizedFormatter)  # Default to ColorizedFormatter 
\ No newline at end of file
diff --git a/tests/test_task_logging.py b/tests/test_task_logging.py
new file mode 100644
index 00000000..50bf5aae
--- /dev/null
+++ b/tests/test_task_logging.py
@@ -0,0 +1,641 @@
+import pytest
+from datetime import datetime, timedelta
+import json
+import asyncio
+import os
+from src.utils.task_logging import (
+    TaskLogger,
+    TaskContext,
+    StepInfo,
+    BrowserState,
+    TaskStatus,
+    PerformanceMetrics,
+    ErrorInfo,
+    ActionType,
+    RetryConfig,
+    RetryInfo,
+    ColorScheme,
+    LogFormatter,
+    SeparatorStyle
+)
+
+def test_task_logger_initialization():
+    logger = TaskLogger("test_task", "Test task goal")
+    context = logger.get_context()
+    
+    assert context["task"]["id"] == "test_task"
+    assert context["task"]["goal"] == "Test task goal"
+    assert context["task"]["status"] == "pending"
+    assert context["browser"]["url"] == ""
+    assert context["browser"]["state"] == "loading"
+    assert context["browser"]["visible_elements"] == 0
+    assert context["browser"]["dynamic_content"] == "loading"
+
+def test_step_update():
+    logger = TaskLogger("test_task", "Test task goal")
+    
+    # Update to running state
+    logger.update_step("Starting navigation", TaskStatus.RUNNING)
+    context = logger.get_context()
+    
+    assert context["task"]["status"] == "running"
+    assert context["task"]["progress"] == "2/unknown steps"  # Step number incremented
+    
+    # Update to complete state
+    logger.update_step("Navigation complete", TaskStatus.COMPLETE)
+    context = logger.get_context()
+    
+    assert context["task"]["status"] == "complete"
+    assert context["task"]["progress"] == "3/unknown steps"
+
+def test_browser_state_update():
+    logger = TaskLogger("test_task", "Test task goal")
+    
+    # Update browser state
+    logger.update_browser_state(
+        url="https://example.com",
+        page_ready=True,
+        dynamic_content_loaded=True,
+        visible_elements=10
+    )
+    
+    context = logger.get_context()
+    assert context["browser"]["url"] == "https://example.com"
+    assert context["browser"]["state"] == "ready"
+    assert context["browser"]["dynamic_content"] == "loaded"
+    assert context["browser"]["visible_elements"] == 10
+
+def test_partial_browser_state_update():
+    logger = TaskLogger("test_task", "Test task goal")
+    
+    # Update only some fields
+    logger.update_browser_state(url="https://example.com")
+    context = logger.get_context()
+    
+    assert context["browser"]["url"] == "https://example.com"
+    assert context["browser"]["state"] == "loading"  # Unchanged
+    assert context["browser"]["dynamic_content"] == "loading"  # Unchanged
+    assert context["browser"]["visible_elements"] == 0  # Unchanged
+
+def test_elapsed_time_calculation():
+    logger = TaskLogger("test_task", "Test task goal")
+    
+    # Set a specific start time
+    start_time = datetime.utcnow() - timedelta(seconds=5)
+    logger.context.started_at = start_time.isoformat()
+    
+    context = logger.get_context()
+    elapsed_time = float(context["task"]["elapsed_time"].rstrip("s"))
+    
+    assert 4.5 <= elapsed_time <= 5.5  # Allow for small timing variations
+
+def test_task_status_validation():
+    logger = TaskLogger("test_task", "Test task goal")
+    
+    # Test all valid status values
+    for status in TaskStatus:
+        logger.update_step(f"Step with status {status}", status)
+        context = logger.get_context()
+        assert context["task"]["status"] == status.value
+
+def test_json_serialization():
+    logger = TaskLogger("test_task", "Test task goal")
+    context = logger.get_context()
+    
+    # Verify that the context can be JSON serialized
+    json_str = json.dumps(context)
+    parsed = json.loads(json_str)
+    
+    assert parsed["task"]["id"] == "test_task"
+    assert parsed["task"]["goal"] == "Test task goal"
+    assert "timestamp" in parsed
+    assert "elapsed_time" in parsed["task"]
+
+def test_step_info_status_conversion():
+    # Test that string status values are converted to TaskStatus enum
+    step = StepInfo(
+        number=1,
+        description="Test step",
+        started_at=datetime.utcnow().isoformat(),
+        status="running"  # Pass as string
+    )
+    
+    assert isinstance(step.status, TaskStatus)
+    assert step.status == TaskStatus.RUNNING
+
+def test_error_handling():
+    logger = TaskLogger("error_task", "Test error handling")
+    
+    # Simulate an error
+    error = ValueError("Test error")
+    logger.log_error(error, step_number=1, action="test action")
+    
+    context = logger.get_context()
+    assert context["task"]["status"] == "failed"
+    assert context["error"]["message"] == "Test error"
+    assert context["error"]["type"] == "ValueError"
+    assert context["error"]["step"] == 1
+    assert context["error"]["action"] == "test action"
+
+def test_performance_metrics():
+    logger = TaskLogger("perf_task", "Test performance tracking")
+    
+    # Start tracking performance
+    logger.start_performance_tracking()
+    
+    # Simulate some steps with timing
+    logger.update_step("Navigation", TaskStatus.RUNNING)
+    logger.track_step_duration("navigation", 0.5)
+    
+    logger.update_step("Interaction", TaskStatus.RUNNING)
+    logger.track_step_duration("interaction", 0.3)
+    
+    # Get performance metrics
+    metrics = logger.get_performance_metrics()
+    assert metrics["step_breakdown"]["navigation"] == pytest.approx(0.5)
+    assert metrics["step_breakdown"]["interaction"] == pytest.approx(0.3)
+    assert metrics["total_duration"] > 0
+
+def test_detailed_browser_state():
+    logger = TaskLogger("browser_task", "Test browser state")
+    
+    # Update with detailed browser state
+    logger.update_browser_state(
+        url="https://example.com",
+        page_ready=True,
+        dynamic_content_loaded=True,
+        visible_elements=10,
+        current_frame="main",
+        active_element="search_input",
+        page_title="Example Page"
+    )
+    
+    context = logger.get_context()
+    browser_state = context["browser"]
+    assert browser_state["url"] == "https://example.com"
+    assert browser_state["state"] == "ready"
+    assert browser_state["current_frame"] == "main"
+    assert browser_state["active_element"] == "search_input"
+    assert browser_state["page_title"] == "Example Page"
+
+def test_task_progress_tracking():
+    logger = TaskLogger("progress_task", "Test progress tracking")
+    
+    # Add steps with progress information
+    logger.update_step("Step 1", TaskStatus.COMPLETE, progress=0.25)
+    context = logger.get_context()
+    assert context["task"]["progress"] == "25%"
+    
+    logger.update_step("Step 2", TaskStatus.COMPLETE, progress=0.5)
+    context = logger.get_context()
+    assert context["task"]["progress"] == "50%"
+    
+    logger.update_step("Final Step", TaskStatus.COMPLETE, progress=1.0)
+    context = logger.get_context()
+    assert context["task"]["progress"] == "100%"
+
+def test_log_formatting():
+    logger = TaskLogger("format_task", "Test log formatting")
+    
+    # Capture log output
+    logger.update_step("Navigation", TaskStatus.RUNNING)
+    log_output = logger.format_log_entry()
+    
+    # Verify log format matches the specified structure
+    assert "[" in log_output  # Has timestamp
+    assert "STEP 2/" in log_output  # Has step number (2 because update_step increments)
+    assert "Navigation" in log_output  # Has action
+    assert "→" in log_output  # Has status symbol for running
+    
+    # Add another step to test duration
+    logger.update_step("Click button", TaskStatus.COMPLETE)
+    log_output = logger.format_log_entry()
+    assert "(" in log_output and "s)" in log_output  # Now we should have duration
+
+def test_semantic_step_descriptions():
+    logger = TaskLogger("semantic_task", "Test semantic descriptions")
+    
+    # Test navigation step
+    logger.update_step(
+        "Navigate to example.com",
+        TaskStatus.RUNNING,
+        action_type=ActionType.NAVIGATION
+    )
+    context = logger.get_context()
+    assert context["task"]["current_action"] == "navigation"
+    assert "🌐" in logger.format_log_entry()  # Navigation emoji
+    
+    # Test interaction step
+    logger.update_step(
+        "Click search button",
+        TaskStatus.RUNNING,
+        action_type=ActionType.INTERACTION
+    )
+    context = logger.get_context()
+    assert context["task"]["current_action"] == "interaction"
+    assert "🖱️" in logger.format_log_entry()  # Interaction emoji
+    
+    # Test extraction step
+    logger.update_step(
+        "Extract search results",
+        TaskStatus.RUNNING,
+        action_type=ActionType.EXTRACTION
+    )
+    context = logger.get_context()
+    assert context["task"]["current_action"] == "extraction"
+    assert "📑" in logger.format_log_entry()  # Extraction emoji
+
+def test_redundant_message_filtering():
+    logger = TaskLogger("filter_task", "Test message filtering")
+    
+    # Add multiple steps of the same type
+    logger.update_step(
+        "Navigate to example.com",
+        TaskStatus.RUNNING,
+        action_type=ActionType.NAVIGATION
+    )
+    logger.update_step(
+        "Page loaded successfully",
+        TaskStatus.COMPLETE,
+        action_type=ActionType.NAVIGATION,
+        suppress_similar=True  # Should be filtered as it's a completion of the same action
+    )
+    
+    # Get all log entries
+    log_entries = logger.get_log_history()
+    
+    # Verify that redundant messages are consolidated
+    navigation_entries = [entry for entry in log_entries if "Navigate" in entry]
+    assert len(navigation_entries) == 1  # Only the main action should be logged
+    
+    # Verify that the current step shows the completion status
+    current_log = logger.format_log_entry()
+    assert "✓" in current_log  # Success symbol should be in current state
+
+def test_action_context_tracking():
+    logger = TaskLogger("context_task", "Test action context")
+    
+    # Start a navigation action
+    logger.update_step(
+        "Navigate to example.com",
+        TaskStatus.RUNNING,
+        action_type=ActionType.NAVIGATION,
+        context={
+            "url": "https://example.com",
+            "method": "GET",
+            "headers": {"User-Agent": "browser-use"}
+        }
+    )
+    
+    context = logger.get_context()
+    assert "action_context" in context["task"]
+    assert context["task"]["action_context"]["url"] == "https://example.com"
+    
+    # Complete the action with results
+    logger.update_step(
+        "Navigation complete",
+        TaskStatus.COMPLETE,
+        action_type=ActionType.NAVIGATION,
+        results={
+            "status_code": 200,
+            "page_title": "Example Domain",
+            "load_time": 0.5
+        }
+    )
+    
+    context = logger.get_context()
+    assert "action_results" in context["task"]
+    assert context["task"]["action_results"]["status_code"] == 200
+
+def test_retry_configuration():
+    config = RetryConfig(
+        max_retries=3,
+        base_delay=1.0,
+        max_delay=10.0,
+        jitter=0.1
+    )
+    
+    # Test that delays follow exponential backoff pattern
+    delays = [config.get_delay(attempt) for attempt in range(5)]
+    assert delays[0] == 0  # First attempt has no delay
+    assert 0.9 <= delays[1] <= 1.1  # First retry ~1.0s with jitter
+    assert 1.8 <= delays[2] <= 2.2  # Second retry ~2.0s with jitter
+    assert 3.6 <= delays[3] <= 4.4  # Third retry ~4.0s with jitter
+    assert delays[4] == -1  # Beyond max retries
+    
+    # Test max delay capping
+    config = RetryConfig(
+        max_retries=5,
+        base_delay=1.0,
+        max_delay=5.0,
+        jitter=0.0  # Disable jitter for predictable testing
+    )
+    assert config.get_delay(3) == 4.0  # Within max
+    assert config.get_delay(4) == 5.0  # Capped at max
+
+@pytest.mark.asyncio
+async def test_retry_execution():
+    logger = TaskLogger("retry_task", "Test retry logic")
+    
+    # Mock function that fails twice then succeeds
+    attempt_count = 0
+    async def mock_operation():
+        nonlocal attempt_count
+        attempt_count += 1
+        if attempt_count <= 2:
+            raise ValueError("Temporary error")
+        return "success"
+    
+    # Configure retry behavior
+    retry_config = RetryConfig(max_retries=3, base_delay=0.1)
+    
+    # Execute with retry
+    result = await logger.execute_with_retry(
+        mock_operation,
+        "test_operation",
+        retry_config=retry_config
+    )
+    
+    assert result == "success"
+    assert attempt_count == 3  # Two failures + one success
+    
+    # Verify retry information in logs
+    context = logger.get_context()
+    assert "retries" in context["task"]
+    retry_info = context["task"]["retries"]
+    assert retry_info["attempts"] == 3
+    assert retry_info["success"] is True
+    assert len(retry_info["history"]) == 2  # Two retry attempts
+
+@pytest.mark.asyncio
+async def test_retry_max_attempts_exceeded():
+    logger = TaskLogger("retry_task", "Test retry logic")
+    
+    # Mock function that always fails
+    async def mock_operation():
+        raise ValueError("Persistent error")
+    
+    # Configure retry behavior
+    retry_config = RetryConfig(max_retries=2, base_delay=0.1)
+    
+    # Execute with retry and expect failure
+    with pytest.raises(ValueError) as exc_info:
+        await logger.execute_with_retry(
+            mock_operation,
+            "test_operation",
+            retry_config=retry_config
+        )
+    
+    assert str(exc_info.value) == "Persistent error"
+    
+    # Verify retry information in logs
+    context = logger.get_context()
+    assert "retries" in context["task"]
+    retry_info = context["task"]["retries"]
+    assert retry_info["attempts"] == 3  # Initial + 2 retries
+    assert retry_info["success"] is False
+    assert len(retry_info["history"]) == 3  # Initial attempt + two retries
+    assert all(entry["error"] == "ValueError: Persistent error" for entry in retry_info["history"])
+    
+    # Verify the delays follow the expected pattern
+    delays = [entry["delay"] for entry in retry_info["history"]]
+    assert delays[0] > 0  # First retry has positive delay
+    assert delays[1] > delays[0]  # Second retry has longer delay
+    assert delays[2] == -1  # Final attempt indicates max retries exceeded
+
+def test_retry_backoff_calculation():
+    config = RetryConfig(
+        max_retries=3,
+        base_delay=1.0,
+        max_delay=10.0,
+        jitter=0.0  # Disable jitter for predictable testing
+    )
+    
+    # Test exponential backoff sequence
+    assert config.get_delay(0) == 0  # First attempt
+    assert config.get_delay(1) == 1.0  # First retry
+    assert config.get_delay(2) == 2.0  # Second retry
+    assert config.get_delay(3) == 4.0  # Third retry
+    assert config.get_delay(4) == -1  # Beyond max retries
+    
+    # Test max delay capping
+    config = RetryConfig(
+        max_retries=5,
+        base_delay=1.0,
+        max_delay=5.0,
+        jitter=0.0
+    )
+    assert config.get_delay(3) == 4.0  # Within max
+    assert config.get_delay(4) == 5.0  # Capped at max
+
+def test_color_scheme():
+    """Test that color scheme is properly defined and accessible."""
+    scheme = ColorScheme()
+    
+    # Test error colors
+    assert scheme.error.startswith("\033[31m")  # Red
+    assert scheme.warning.startswith("\033[33m")  # Yellow
+    assert scheme.info.startswith("\033[36m")  # Cyan
+    assert scheme.success.startswith("\033[32m")  # Green
+    assert scheme.reset == "\033[0m"  # Reset
+
+def test_log_formatting_with_colors():
+    """Test that log messages are properly formatted with colors."""
+    logger = TaskLogger("color_task", "Test color formatting")
+    
+    # Test error formatting
+    logger.update_step("Failed operation", TaskStatus.FAILED)
+    log_output = logger.format_log_entry()
+    assert "\033[31m" in log_output  # Contains red color code
+    assert "×" in log_output  # Contains error symbol
+    
+    # Test success formatting
+    logger.update_step("Successful operation", TaskStatus.COMPLETE)
+    log_output = logger.format_log_entry()
+    assert "\033[32m" in log_output  # Contains green color code
+    assert "✓" in log_output  # Contains success symbol
+    
+    # Test running state formatting
+    logger.update_step("Running operation", TaskStatus.RUNNING)
+    log_output = logger.format_log_entry()
+    assert "\033[36m" in log_output  # Contains cyan color code
+    assert "→" in log_output  # Contains running symbol
+
+def test_color_disabled():
+    """Test that colors can be disabled via environment variable."""
+    os.environ["NO_COLOR"] = "1"
+    logger = TaskLogger("no_color_task", "Test without colors")
+    
+    logger.update_step("Test operation", TaskStatus.COMPLETE)
+    log_output = logger.format_log_entry()
+    
+    # Verify no color codes are present
+    assert "\033[" not in log_output
+    assert "✓" in log_output  # Symbols still present
+    
+    # Clean up
+    del os.environ["NO_COLOR"]
+
+def test_color_scheme_customization():
+    """Test that color scheme can be customized."""
+    custom_scheme = ColorScheme(
+        error="\033[35m",  # Magenta for errors
+        warning="\033[34m",  # Blue for warnings
+        info="\033[37m",  # White for info
+        success="\033[32m"  # Keep green for success
+    )
+    
+    logger = TaskLogger("custom_color_task", "Test custom colors", color_scheme=custom_scheme)
+    
+    # Test custom error color
+    logger.update_step("Failed operation", TaskStatus.FAILED)
+    log_output = logger.format_log_entry()
+    assert "\033[35m" in log_output  # Contains magenta color code
+    
+    # Test custom info color
+    logger.update_step("Info message", TaskStatus.RUNNING)
+    log_output = logger.format_log_entry()
+    assert "\033[37m" in log_output  # Contains white color code
+
+def test_log_formatter_with_colors():
+    """Test that the log formatter properly applies colors to different components."""
+    formatter = LogFormatter()
+    
+    # Create a mock log record
+    class MockRecord:
+        def __init__(self, levelname, msg):
+            self.levelname = levelname
+            self.msg = msg
+            self.created = datetime.utcnow().timestamp()
+    
+    # Test error formatting
+    error_record = MockRecord("ERROR", "Test error message")
+    formatted = formatter.format(error_record)
+    assert "\033[31m" in formatted  # Red for error
+    assert "ERROR" in formatted
+    
+    # Test info formatting
+    info_record = MockRecord("INFO", "Test info message")
+    formatted = formatter.format(info_record)
+    assert "\033[36m" in formatted  # Cyan for info
+    assert "INFO" in formatted
+    
+    # Test warning formatting
+    warn_record = MockRecord("WARNING", "Test warning message")
+    formatted = formatter.format(warn_record)
+    assert "\033[33m" in formatted  # Yellow for warning
+    assert "WARNING" in formatted
+
+def test_task_separator_style():
+    """Test that separator styles are properly defined and formatted."""
+    style = SeparatorStyle()
+    
+    # Test default separator styles
+    assert len(style.task) >= 50  # Task separator should be substantial
+    assert len(style.phase) >= 30  # Phase separator should be visible but less prominent
+    assert len(style.error) >= 40  # Error separator should be distinct
+    
+    # Test that styles are different
+    assert style.task != style.phase
+    assert style.task != style.error
+    assert style.phase != style.error
+
+def test_task_start_separator():
+    """Test that separators are added at task start."""
+    logger = TaskLogger("separator_task", "Test separators")
+    
+    # Get initial log output
+    log_entries = logger.get_log_history()
+    
+    # Should have task separator and initial step
+    assert len(log_entries) == 2
+    assert "=" * 50 in log_entries[0]  # Task separator
+    assert "TASK GOAL: Test separators" in log_entries[1]  # Initial step message
+
+def test_phase_separators():
+    """Test that separators are added between different phases."""
+    logger = TaskLogger("separator_task", "Test separators")
+    
+    # Navigation phase
+    logger.start_phase("Navigation")
+    logger.update_step("Navigate to example.com", TaskStatus.COMPLETE, action_type=ActionType.NAVIGATION)
+    
+    # Interaction phase
+    logger.start_phase("Interaction")
+    logger.update_step("Click button", TaskStatus.COMPLETE, action_type=ActionType.INTERACTION)
+    
+    # Get log entries
+    log_entries = logger.get_log_history()
+    
+    # Count phase separators
+    phase_separators = [entry for entry in log_entries if "-" * 30 in entry]
+    assert len(phase_separators) == 2  # One before each phase
+
+def test_error_separators():
+    """Test that separators are added around error messages."""
+    logger = TaskLogger("separator_task", "Test separators")
+    
+    # Simulate an error
+    try:
+        raise ValueError("Test error")
+    except Exception as e:
+        logger.log_error(e, step_number=1, action="test_action")
+    
+    # Get log entries
+    log_entries = logger.get_log_history()
+    
+    # Should have error separators
+    error_separators = [entry for entry in log_entries if "*" * 40 in entry]
+    assert len(error_separators) == 2  # One before and one after error
+
+def test_custom_separator_style():
+    """Test that separator styles can be customized."""
+    custom_style = SeparatorStyle(
+        task="◈" * 30,
+        phase="•" * 20,
+        error="!" * 25
+    )
+    
+    logger = TaskLogger("custom_separator_task", "Test custom separators", separator_style=custom_style)
+    
+    # Start a phase
+    logger.start_phase("Test Phase")
+    
+    # Get log entries
+    log_entries = logger.get_log_history()
+    
+    # Verify custom separators are used
+    assert "◈" * 30 in log_entries[0]  # Task separator
+    assert "•" * 20 in log_entries[2]  # Phase separator
+    assert "→" in log_entries[2]  # Arrow indicator for phase start
+
+def test_separator_with_colors():
+    """Test that separators can be colored."""
+    logger = TaskLogger("colored_separator_task", "Test colored separators")
+    
+    # Start a phase
+    logger.start_phase("Test Phase")
+    
+    # Get log entries
+    log_entries = logger.get_log_history()
+    
+    # Verify separators have color codes
+    task_separator = log_entries[0]
+    phase_separator = log_entries[1]
+    
+    assert "\033[" in task_separator  # Contains color code
+    assert "\033[" in phase_separator  # Contains color code
+
+def test_separator_disabled():
+    """Test that separators can be disabled."""
+    logger = TaskLogger("no_separator_task", "Test without separators", use_separators=False)
+    
+    # Start a phase
+    logger.start_phase("Test Phase")
+    
+    # Get log entries
+    log_entries = logger.get_log_history()
+    
+    # Verify no separators are present
+    separators = [entry for entry in log_entries if any(c * 20 in entry for c in "=-*")]
+    assert len(separators) == 0  # No separators should be present 
\ No newline at end of file
diff --git a/tests/test_trace_analyzer.py b/tests/test_trace_analyzer.py
new file mode 100644
index 00000000..841d180d
--- /dev/null
+++ b/tests/test_trace_analyzer.py
@@ -0,0 +1,162 @@
+import pytest
+import asyncio
+import json
+import zipfile
+from pathlib import Path
+import tempfile
+from src.trace_analyzer import PlaywrightTrace, analyze_trace
+
+# Sample trace data
+SAMPLE_TRACE_DATA = [
+    # Action event (before)
+    {
+        "type": "before",
+        "method": "goto",
+        "params": {"url": "https://example.com"},
+        "timestamp": 1000,
+        "duration": 500
+    },
+    # Action event (after - success)
+    {
+        "type": "after",
+        "method": "goto",
+        "params": {"url": "https://example.com"},
+        "timestamp": 1500,
+        "duration": 500
+    },
+    # Action event (after - error)
+    {
+        "type": "after",
+        "method": "click",
+        "params": {"selector": "#missing-button"},
+        "timestamp": 2000,
+        "duration": 100,
+        "error": {"message": "Element not found"}
+    },
+    # Console event
+    {
+        "type": "console",
+        "text": "Test console message"
+    },
+    # Error event
+    {
+        "type": "error",
+        "error": {"message": "Test error message"}
+    }
+]
+
+# Sample HAR data
+SAMPLE_HAR_DATA = {
+    "log": {
+        "entries": [
+            {
+                "request": {
+                    "url": "https://example.com",
+                    "method": "GET"
+                },
+                "response": {
+                    "status": 200,
+                    "statusText": "OK"
+                },
+                "time": 150
+            },
+            {
+                "request": {
+                    "url": "https://example.com/missing",
+                    "method": "GET"
+                },
+                "response": {
+                    "status": 404,
+                    "statusText": "Not Found"
+                },
+                "time": 100
+            }
+        ]
+    }
+}
+
+@pytest.fixture
+def sample_trace_file():
+    """Create a temporary trace file with sample data."""
+    with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_zip:
+        with zipfile.ZipFile(temp_zip.name, 'w') as zf:
+            # Add trace data
+            trace_data = '\n'.join(json.dumps(event) for event in SAMPLE_TRACE_DATA)
+            zf.writestr('trace.trace', trace_data)
+            
+            # Add HAR data
+            zf.writestr('trace.har', json.dumps(SAMPLE_HAR_DATA))
+        
+        yield temp_zip.name
+        Path(temp_zip.name).unlink()
+
+@pytest.mark.asyncio
+async def test_trace_parsing(sample_trace_file):
+    """Test basic trace file parsing."""
+    trace = await PlaywrightTrace.parse(sample_trace_file)
+    
+    # Check actions
+    assert len(trace.actions) == 3
+    assert any(a['type'] == 'goto' and a['success'] for a in trace.actions)
+    assert any(a['type'] == 'click' and not a['success'] for a in trace.actions)
+    
+    # Check console logs
+    assert len(trace.console_logs) == 1
+    assert trace.console_logs[0] == "Test console message"
+    
+    # Check errors
+    assert len(trace.errors) == 1
+    assert "Test error message" in trace.errors[0]
+    
+    # Check network requests
+    assert len(trace.network_requests) == 2
+    assert any(r['status'] == 200 for r in trace.network_requests)
+    assert any(r['status'] == 404 for r in trace.network_requests)
+
+@pytest.mark.asyncio
+async def test_analyze_trace(sample_trace_file):
+    """Test the analyze_trace function."""
+    result = await analyze_trace(sample_trace_file)
+    
+    assert "actions" in result
+    assert "network_requests" in result
+    assert "console_logs" in result
+    assert "errors" in result
+    assert "summary" in result
+    
+    summary = result["summary"]
+    assert summary["total_actions"] == 3
+    assert summary["failed_actions"] == 1
+    assert summary["total_requests"] == 2
+    assert summary["failed_requests"] == 1
+    assert summary["total_errors"] == 1
+
+@pytest.mark.asyncio
+async def test_invalid_trace_file():
+    """Test handling of invalid trace files."""
+    with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_file:
+        temp_file.write(b"Invalid zip data")
+    
+    with pytest.raises(ValueError, match="Invalid trace file format"):
+        await PlaywrightTrace.parse(temp_file.name)
+    
+    Path(temp_file.name).unlink()
+
+@pytest.mark.asyncio
+async def test_missing_trace_file():
+    """Test handling of missing trace files."""
+    with pytest.raises(FileNotFoundError):
+        await PlaywrightTrace.parse("nonexistent_file.zip")
+
+@pytest.mark.asyncio
+async def test_malformed_trace_data(sample_trace_file):
+    """Test handling of malformed trace data."""
+    with tempfile.NamedTemporaryFile(suffix='.zip', delete=False) as temp_zip:
+        with zipfile.ZipFile(temp_zip.name, 'w') as zf:
+            zf.writestr('trace.trace', 'Invalid JSON data\n{"type": "console", "text": "Valid event"}')
+        
+        trace = await PlaywrightTrace.parse(temp_zip.name)
+        assert len(trace.errors) == 1  # One error for the invalid JSON
+        assert len(trace.console_logs) == 1  # One valid console event
+        
+        Path(temp_zip.name).unlink() 
\ No newline at end of file
diff --git a/webui.py b/webui.py
index b7acffe4..ca96dfc7 100644
--- a/webui.py
+++ b/webui.py
@@ -7,15 +7,29 @@
 
 import pdb
 import logging
-
-from dotenv import load_dotenv
-
-load_dotenv()
 import os
+import sys
 import glob
 import asyncio
 import argparse
 import os
+import warnings
+
+from dotenv import load_dotenv
+from src.utils.logging import setup_logging, PRODUCTION_EXCLUDE_PATTERNS
+
+# Filter out the specific deprecation warning from langchain-google-genai
+warnings.filterwarnings('ignore', message='Convert_system_message_to_human will be deprecated!')
+
+load_dotenv()
+
+# Setup logging before importing other modules
+setup_logging(
+    level=os.getenv("LOG_LEVEL", "INFO"),
+    use_json=os.getenv("LOG_JSON", "true").lower() == "true",
+    log_file=os.getenv("LOG_FILE"),
+    exclude_patterns=PRODUCTION_EXCLUDE_PATTERNS if os.getenv("ENVIRONMENT") == "production" else None
+)
 
 logger = logging.getLogger(__name__)