Skip to content

Commit c5a5d79

Browse files
committed
Enhance Google Gemini integration and environment configuration
- Added support for Google Gemini API in `test_gemini_connection.py`, including model validation and content generation. - Updated `.gitignore` to include `.env.google` for environment variable management. - Modified `browser_use_cli.py` to dynamically set the model name from environment variables. - Improved `get_llm_model` function in `utils.py` to handle model names and API keys more securely using `SecretStr`. - Updated tests in `test_browser_cli.py` to set the Google model via environment variable for better test isolation.
1 parent 0829027 commit c5a5d79

File tree

5 files changed

+86
-15
lines changed

5 files changed

+86
-15
lines changed

.gitignore

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -193,3 +193,5 @@ data/
193193

194194
# Brain directory
195195
.brain/
196+
197+
.env.google

cli/browser_use_cli.py

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -121,8 +121,9 @@ async def run_browser_task(
121121
# Get LLM model
122122
llm = utils.get_llm_model(
123123
provider="deepseek" if model == "deepseek-chat" else model,
124-
model_name=model,
125-
temperature=0.8
124+
model_name=os.getenv("GOOGLE_API_MODEL", "gemini-pro-vision") if model == "gemini" else model,
125+
temperature=0.8,
126+
vision=vision
126127
)
127128

128129
# Update context with runtime options if needed

src/utils/utils.py

Lines changed: 29 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99
import time
1010
from pathlib import Path
1111
from typing import Dict, Optional
12+
from pydantic import SecretStr
1213

1314
from langchain_anthropic import ChatAnthropic
1415
from langchain_google_genai import ChatGoogleGenerativeAI
@@ -30,15 +31,17 @@ def get_llm_model(provider: str, **kwargs):
3031
base_url = kwargs.get("base_url")
3132

3233
if not kwargs.get("api_key", ""):
33-
api_key = os.getenv("ANTHROPIC_API_KEY", "")
34+
api_key = SecretStr(os.getenv("ANTHROPIC_API_KEY") or "")
3435
else:
35-
api_key = kwargs.get("api_key")
36+
api_key = SecretStr(kwargs.get("api_key") or "")
3637

3738
return ChatAnthropic(
3839
model_name=kwargs.get("model_name", "claude-3-5-sonnet-20240620"),
3940
temperature=kwargs.get("temperature", 0.0),
4041
base_url=base_url,
4142
api_key=api_key,
43+
timeout=kwargs.get("timeout", 60),
44+
stop=kwargs.get("stop", None)
4245
)
4346
elif provider == "openai":
4447
if not kwargs.get("base_url", ""):
@@ -47,15 +50,16 @@ def get_llm_model(provider: str, **kwargs):
4750
base_url = kwargs.get("base_url")
4851

4952
if not kwargs.get("api_key", ""):
50-
api_key = os.getenv("OPENAI_API_KEY", "")
53+
api_key = SecretStr(os.getenv("OPENAI_API_KEY") or "")
5154
else:
52-
api_key = kwargs.get("api_key")
55+
api_key = SecretStr(kwargs.get("api_key") or "")
5356

5457
return ChatOpenAI(
55-
model=kwargs.get("model_name", "gpt-4o"),
58+
model=kwargs.get("model_name", "gpt-4"),
5659
temperature=kwargs.get("temperature", 0.0),
5760
base_url=base_url,
5861
api_key=api_key,
62+
timeout=kwargs.get("timeout", 60),
5963
)
6064
elif provider == "deepseek":
6165
if not kwargs.get("base_url", ""):
@@ -64,25 +68,37 @@ def get_llm_model(provider: str, **kwargs):
6468
base_url = kwargs.get("base_url")
6569

6670
if not kwargs.get("api_key", ""):
67-
api_key = os.getenv("DEEPSEEK_API_KEY", "")
71+
api_key = SecretStr(os.getenv("DEEPSEEK_API_KEY") or "")
6872
else:
69-
api_key = kwargs.get("api_key")
73+
api_key = SecretStr(kwargs.get("api_key") or "")
7074

7175
return ChatOpenAI(
7276
model=kwargs.get("model_name", "deepseek-chat"),
7377
temperature=kwargs.get("temperature", 0.0),
7478
base_url=base_url,
7579
api_key=api_key,
80+
timeout=kwargs.get("timeout", 60),
7681
)
7782
elif provider == "gemini":
7883
if not kwargs.get("api_key", ""):
79-
api_key = os.getenv("GOOGLE_API_KEY", "")
84+
api_key = SecretStr(os.getenv("GOOGLE_API_KEY") or "")
8085
else:
81-
api_key = kwargs.get("api_key")
86+
api_key = SecretStr(kwargs.get("api_key") or "")
87+
88+
# Get model name from environment or kwargs
89+
model_name = kwargs.get("model_name")
90+
if not model_name:
91+
if kwargs.get("vision"):
92+
model_name = os.getenv("GOOGLE_API_MODEL", "gemini-1.5-flash")
93+
else:
94+
model_name = os.getenv("GOOGLE_API_TYPE", "gemini-1.5-flash")
95+
8296
return ChatGoogleGenerativeAI(
83-
model=kwargs.get("model_name", "gemini-2.0-flash-exp"),
97+
model=model_name,
8498
temperature=kwargs.get("temperature", 0.0),
85-
google_api_key=api_key,
99+
api_key=api_key,
100+
timeout=kwargs.get("timeout", 60),
101+
convert_system_message_to_human=True
86102
)
87103
elif provider == "ollama":
88104
return ChatOllama(
@@ -97,9 +113,9 @@ def get_llm_model(provider: str, **kwargs):
97113
else:
98114
base_url = kwargs.get("base_url")
99115
if not kwargs.get("api_key", ""):
100-
api_key = os.getenv("AZURE_OPENAI_API_KEY", "")
116+
api_key = SecretStr(os.getenv("AZURE_OPENAI_API_KEY") or "")
101117
else:
102-
api_key = kwargs.get("api_key")
118+
api_key = SecretStr(kwargs.get("api_key") or "")
103119
return AzureChatOpenAI(
104120
model=kwargs.get("model_name", "gpt-4o"),
105121
temperature=kwargs.get("temperature", 0.0),

test_gemini_connection.py

Lines changed: 47 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,47 @@
1+
import google.generativeai as genai
2+
import os
3+
from dotenv import load_dotenv, find_dotenv
4+
5+
# Force reload of environment variables
6+
load_dotenv(find_dotenv(), override=True)
7+
8+
api_key = os.environ.get("GOOGLE_API_KEY")
9+
model_name = os.environ.get("GOOGLE_API_MODEL")
10+
11+
if not api_key or not model_name:
12+
raise ValueError("Missing required environment variables: GOOGLE_API_KEY or GOOGLE_API_MODEL")
13+
14+
print(f"Using model: {model_name}")
15+
genai.configure(api_key=api_key, transport="rest")
16+
17+
# List all available models
18+
print("\nAvailable models:")
19+
for m in genai.list_models():
20+
print(f"- {m.name}")
21+
22+
# Check that the model exists in the client
23+
found_model = False
24+
for m in genai.list_models():
25+
model_id = m.name.replace("models/", "")
26+
if model_id == model_name:
27+
found_model = True
28+
print(f"\nFound model: {m.name}")
29+
break
30+
31+
if not found_model:
32+
print("\nAvailable model IDs:")
33+
for m in genai.list_models():
34+
print(f"- {m.name.replace('models/', '')}")
35+
36+
assert found_model, f"Model not found: {model_name}"
37+
38+
# Load the model
39+
model = genai.GenerativeModel(model_name)
40+
41+
# Perform a simple generation task
42+
try:
43+
response = model.generate_content("Hello, I'm testing the Gemini API connection. Please respond with a short greeting.")
44+
print(f"\nResponse: {response.text}")
45+
except Exception as e:
46+
print(f"\nError generating content: {e}")
47+
raise

tests/test_browser_cli.py

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -11,6 +11,7 @@
1111
import asyncio
1212
import os
1313
from cli.browser_use_cli import initialize_browser, run_browser_task, close_browser, main, _global_browser, _global_browser_context
14+
from src.utils.utils import model_names # Import model names from utils
1415

1516
# Configure logging for tests
1617
logging.basicConfig(level=logging.INFO)
@@ -164,6 +165,7 @@ async def test_model_switching(self):
164165
raise
165166

166167
# Test Gemini
168+
os.environ["GOOGLE_API_MODEL"] = model_names["gemini"][0] # Set model via environment
167169
result2 = await run_browser_task(
168170
"what do you see on the page?",
169171
model="gemini",
@@ -175,6 +177,9 @@ async def test_model_switching(self):
175177

176178
async def test_vision_capability(self):
177179
"""Test vision capabilities"""
180+
# Set Gemini model via environment
181+
os.environ["GOOGLE_API_MODEL"] = model_names["gemini"][0]
182+
178183
# Without vision
179184
result1 = await run_browser_task(
180185
"what do you see on example.com?",

0 commit comments

Comments
 (0)