Skip to content

Commit c9c773b

Browse files
committed
feat: added logger
1 parent a9af593 commit c9c773b

14 files changed

+145
-7
lines changed

benchmark.py

Lines changed: 16 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -11,16 +11,21 @@
1111
from selenium.common.exceptions import WebDriverException
1212
import logging
1313

14-
# Setup logging
15-
logging.basicConfig(filename='webwand_test_log.txt', level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s')
16-
1714
load_dotenv()
1815
api_key = os.getenv('OPENAI_API_KEY')
1916

2017
# Place to store task execution results
2118
results_dir = 'results'
2219
os.makedirs(results_dir, exist_ok=True)
2320

21+
dataset = 'tasks_test.jsonl'
22+
example_dataset = 'example_test.jsonl'
23+
24+
# Setup logging
25+
logs_path = 'webwand_test_log.txt'
26+
sample_logs_path = 'webwand_test_log_example.txt'
27+
logging.basicConfig(filename=sample_logs_path, level=logging.INFO, format='%(asctime)s:%(levelname)s:%(message)s')
28+
2429
def setup_driver():
2530
chrome_options = Options()
2631
# Load the unpacked webwand chrome extension
@@ -33,6 +38,7 @@ def setup_driver():
3338
return driver
3439

3540
def dispatch_event(driver, event_name, event):
41+
logging.info(f'Dispatched event {event_name}')
3642
script = f"""
3743
var event = new CustomEvent('{event_name}', {{ detail: {json.dumps(event)} }});
3844
document.dispatchEvent(event);
@@ -77,10 +83,11 @@ def add_task_listener(driver, task_id, max_retries=3):
7783
def handle_event(event_data):
7884
nonlocal attempts
7985
if not event_data:
80-
print("no event data")
86+
logging.info("No event data received")
8187
return
8288
if event_data['type'] == 'history':
8389
# Record history when task stops
90+
logging.info(f'Task {task_id} status: {event_data["status"]}')
8491
write_history(task_id, event_data['data'])
8592
attempts = 0
8693
return
@@ -90,6 +97,7 @@ def handle_event(event_data):
9097
# Task is still running. Continue to listen for events
9198
handle_event(driver.execute_async_script(script))
9299
else:
100+
logging.error(f"Unhandled event data type: {event_data['type']}")
93101
raise ValueError(f"Unhandled event data type: {event_data['type']}")
94102

95103
while attempts < max_retries:
@@ -100,7 +108,6 @@ def handle_event(event_data):
100108
if "javascript error: document unloaded while waiting for result" in str(e):
101109
attempts += 1
102110
logging.warning(f'Document unloaded error during task {task_id} attempt {attempts}: {str(e)}')
103-
print(f"Attempt {attempts}: Document unloaded error. Retrying...")
104111
logging.info("Retrying...")
105112
if attempts == max_retries:
106113
logging.error(f'Maximum retry attempts reached for task {task_id}.')
@@ -118,6 +125,7 @@ def write_history(task_id, task_history):
118125

119126
with open(file_path, 'w') as file:
120127
json.dump(task_history, file, indent=4)
128+
logging.info(f'History saved for task {task_id}')
121129

122130
def write_screenshots(task_id, image_data):
123131
image_bytes = base64.b64decode(image_data)
@@ -137,7 +145,7 @@ def run_webwand_task(driver, task_id, task_description):
137145
dispatch_event(driver, 'RunTask', {})
138146
add_task_listener(driver, task_id)
139147
end = time.time()
140-
logging.info(f'The task {task_id} took {end - start} seconds to complete.')
148+
logging.info(f'Task {task_id} took {end - start} seconds to complete.')
141149

142150
def click_extensions_icon(driver):
143151
# Simulate click to open side panel
@@ -154,8 +162,9 @@ def main():
154162
driver = setup_driver()
155163
initial_load = True
156164

157-
with open('tasks_test.jsonl', 'r') as file:
165+
with open(example_dataset, 'r') as file:
158166
for line in file:
167+
logging.info(f'-------------------------------------')
159168
task = json.loads(line)
160169
task_id = task["id"]
161170
driver.get(task['web'])

example_test.jsonl

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
{"web_name": "Google", "id": "Google--0", "ques": "Search for bananas", "web": "https://www.google.com/"}
2+
{"web_name": "Google", "id": "Google--1", "ques": "Search for apples", "web": "https://www.google.com/"}
Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,36 @@
1+
[
2+
{
3+
"action": {
4+
"operation": {
5+
"args": {
6+
"label": "8",
7+
"value": "bananas"
8+
},
9+
"name": "setValueAndEnter"
10+
},
11+
"thought": "I will enter 'bananas' into the search bar and press Enter to search."
12+
},
13+
"prompt": "The user requests the following task:\n\n Search for bananas\n\n\nCurrent time: 5/16/2024, 2:57:41 PM\nCurrent URL: https://www.google.com/\nCurrent page scrolling position: 100.0%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = About\ntagName = A\n===\nlabel = 2\nname = Store\ntagName = A\n===\nlabel = 3\nname = Gmail\ntagName = A\n===\nlabel = 4\nname = Search for Images\ntagName = A\n===\nlabel = 5\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 6\nname = Sign in\ntagName = A\n===\nlabel = 7\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n===\nlabel = 9\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 10\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 11\nname = Google Search\ntagName = INPUT\ncurrentValue = Google Search\nplaceholder = \nrole = button\n===\nlabel = 12\nname = I'm Feeling Lucky\ntagName = INPUT\ncurrentValue = I'm Feeling Lucky\nplaceholder = \nrole = button\n===\nlabel = 13\nname = Advertising\ntagName = A\n===\nlabel = 14\nname = Business\ntagName = A\n===\nlabel = 15\nname = How Search works\ntagName = A\n===\nlabel = 16\nname = Our third decade of climate action: join us\ntagName = A\n===\nlabel = 17\nname = Privacy\ntagName = A\n===\nlabel = 18\nname = Terms\ntagName = A\n===\nlabel = 19\nname = Settings\ntagName = DIV\nrole = button\n\nThis textarea currently has focus:\nlabel = 8\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n",
14+
"response": "{\n \"thought\": \"I will enter 'bananas' into the search bar and press Enter to search.\",\n \"action\": {\n \"name\": \"setValueAndEnter\",\n \"args\": {\n \"label\": \"8\",\n \"value\": \"bananas\"\n }\n }\n}",
15+
"usage": {
16+
"completion_tokens": 63,
17+
"prompt_tokens": 2599,
18+
"total_tokens": 2662
19+
}
20+
},
21+
{
22+
"action": {
23+
"operation": {
24+
"name": "finish"
25+
},
26+
"thought": "The search results for 'bananas' are displayed. The first result is a Wikipedia link about bananas, followed by related questions and nutritional information."
27+
},
28+
"prompt": "The user requests the following task:\n\n Search for bananas\nYou have already taken the following actions: \nThought: I will enter 'bananas' into the search bar and press Enter to search.\nAction:{\"name\":\"setValueAndEnter\",\"args\":{\"label\":\"8\",\"value\":\"bananas\"}}\n\n\n\nCurrent time: 5/16/2024, 2:57:49 PM\nCurrent URL: https://www.google.com/search\nCurrent page scrolling position: 22.1%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 2\nname = Search\ntagName = TEXTAREA\ncurrentValue = bananas\nplaceholder = \nrole = combobox\n===\nlabel = 3\nname = Clear\ntagName = DIV\nrole = button\n===\nlabel = 4\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 5\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 6\nname = Search\ntagName = BUTTON\n===\nlabel = 7\nname = Settings\ntagName = DIV\nrole = button\n===\nlabel = 8\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 9\nname = Sign in\ntagName = A\n===\nlabel = 10\nname = Images\ntagName = A\nrole = link\n===\nlabel = 11\nname = Shopping\ntagName = A\nrole = link\n===\nlabel = 12\nname = Videos\ntagName = A\nrole = link\n===\nlabel = 13\nname = Forums\ntagName = A\nrole = link\n===\nlabel = 14\nname = More\ntagName = DIV\nrole = button\n===\nlabel = 15\nname = Tools\ntagName = DIV\nrole = button\n===\nlabel = 16\nname = Add Nutrition facts\ntagName = A\nrole = link\n===\nlabel = 17\nname = Add Benefits\ntagName = A\nrole = link\n===\nlabel = 18\nname = Add Recipes\ntagName = A\nrole = link\n===\nlabel = 19\nname = Add In stock\ntagName = A\nrole = link\n===\nlabel = 20\nname = Add Types\ntagName = A\nrole = link\n===\nlabel = 21\nname = Add Open now\ntagName = A\nrole = link\n===\nlabel = 22\nname = Add Plant\ntagName = A\nrole = link\n===\nlabel = 23\nname = Add Tree\ntagName = A\nrole = link\n===\nlabel = 24\nname = Add Who should avoid\ntagName = A\nrole = link\n===\nlabel = 25\nname = SafeSearch\ntagName = DIV\nrole = button\n===\nlabel = 26\nname = Banana Wikipedia https://en.wikipedia.org \u203a wiki \u203a Banana\ntagName = A\n===\nlabel = 27\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 28\nname = List of banana cultivars\ntagName = A\n===\nlabel = 29\nname = Cavendish banana\ntagName = A\n===\nlabel = 30\nname = Gros Michel\ntagName = A\n===\nlabel = 31\nname = Cooking banana\ntagName = A\n===\nlabel = 32\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 33\nname = What are the benefits of eating bananas?\ntagName = DIV\nrole = button\n===\nlabel = 34\nname = Is it good to eat a banana every day?\ntagName = DIV\nrole = button\n===\nlabel = 35\nname = What are bananas high in?\ntagName = DIV\nrole = button\n===\nlabel = 36\nname = What is the best time to eat bananas?\ntagName = DIV\nrole = button\n===\nlabel = 37\nname = Feedback\ntagName = DIV\nrole = button\n===\nlabel = 38\nname = Food Nutrition\u2026Banana nutrition\ntagName = DIV\nrole = button\n===\nlabel = 39\nname = Protein Amount\u2026protein in banana\ntagName = DIV\nrole = button\n===\nlabel = 40\nname = Equivalent\u2026what fruit is equivalent to a banana\ntagName = DIV\nrole = button\n===\nlabel = 41\nname = Sugar Amount\u2026how much sugar in a banana\ntagName = DIV\nrole = button\n===\nlabel = 42\nname = 6 more\ntagName = DIV\nrole = button\n===\nlabel = 43\nname = Feedback\ntagName = DIV\nrole = button\n",
29+
"response": "{\n \"thought\": \"The search results for 'bananas' are displayed. The first result is a Wikipedia link about bananas, followed by related questions and nutritional information.\",\n \"action\": {\n \"name\": \"finish\"\n }\n}",
30+
"usage": {
31+
"completion_tokens": 50,
32+
"prompt_tokens": 3147,
33+
"total_tokens": 3197
34+
}
35+
}
36+
]
344 KB
Loading
316 KB
Loading
384 KB
Loading
315 KB
Loading
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
[
2+
{
3+
"action": {
4+
"operation": {
5+
"args": {
6+
"label": "9",
7+
"value": "apples"
8+
},
9+
"name": "setValueAndEnter"
10+
},
11+
"thought": "I will enter 'apples' in the search bar and press Enter to search for apples."
12+
},
13+
"prompt": "The user requests the following task:\n\n Search for apples\n\n\nCurrent time: 5/16/2024, 2:57:58 PM\nCurrent URL: https://www.google.com/\nCurrent page scrolling position: 100.0%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = About\ntagName = A\n===\nlabel = 2\nname = Store\ntagName = A\n===\nlabel = 3\nname = Gmail\ntagName = A\n===\nlabel = 4\nname = Search for Images\ntagName = A\n===\nlabel = 5\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 6\nname = Sign in\ntagName = A\n===\nlabel = 7\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 8\nname = Share\ntagName = BUTTON\n===\nlabel = 9\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n===\nlabel = 10\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 11\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 12\nname = Google Search\ntagName = INPUT\ncurrentValue = Google Search\nplaceholder = \nrole = button\n===\nlabel = 13\nname = I'm Feeling Lucky\ntagName = INPUT\ncurrentValue = I'm Feeling Lucky\nplaceholder = \nrole = button\n===\nlabel = 14\nname = Advertising\ntagName = A\n===\nlabel = 15\nname = Business\ntagName = A\n===\nlabel = 16\nname = How Search works\ntagName = A\n===\nlabel = 17\nname = Our third decade of climate action: join us\ntagName = A\n===\nlabel = 18\nname = Privacy\ntagName = A\n===\nlabel = 19\nname = Terms\ntagName = A\n===\nlabel = 20\nname = Settings\ntagName = DIV\nrole = button\n\nThis textarea currently has focus:\nlabel = 9\nname = Search\ntagName = TEXTAREA\ncurrentValue = \nplaceholder = \nrole = combobox\n",
14+
"response": "{\n \"thought\": \"I will enter 'apples' in the search bar and press Enter to search for apples.\",\n \"action\": {\n \"name\": \"setValueAndEnter\",\n \"args\": {\n \"label\": \"9\",\n \"value\": \"apples\"\n }\n }\n}",
15+
"usage": {
16+
"completion_tokens": 65,
17+
"prompt_tokens": 2614,
18+
"total_tokens": 2679
19+
}
20+
},
21+
{
22+
"action": {
23+
"operation": {
24+
"args": {},
25+
"name": "finish"
26+
},
27+
"thought": "The search results for 'apples' are displayed. The results include various types of apples and related products from different vendors."
28+
},
29+
"prompt": "The user requests the following task:\n\n Search for apples\nYou have already taken the following actions: \nThought: I will enter 'apples' in the search bar and press Enter to search for apples.\nAction:{\"name\":\"setValueAndEnter\",\"args\":{\"label\":\"9\",\"value\":\"apples\"}}\n\n\n\nCurrent time: 5/16/2024, 2:58:07 PM\nCurrent URL: https://www.google.com/search\nCurrent page scrolling position: 18.1%\n\n\nUse the following data as a reference of the annotated elements (using `===` as a delimiter between each annotation):\n\nlabel = 1\nname = Celebrating Hank Adams\ntagName = A\n===\nlabel = 2\nname = Search\ntagName = TEXTAREA\ncurrentValue = apples\nplaceholder = \nrole = combobox\n===\nlabel = 3\nname = Clear\ntagName = DIV\nrole = button\n===\nlabel = 4\nname = Search by voice\ntagName = DIV\nrole = button\n===\nlabel = 5\nname = Search by image\ntagName = DIV\nrole = button\n===\nlabel = 6\nname = Search\ntagName = BUTTON\n===\nlabel = 7\nname = Settings\ntagName = DIV\nrole = button\n===\nlabel = 8\nname = Google apps\ntagName = A\nrole = button\n===\nlabel = 9\nname = Sign in\ntagName = A\n===\nlabel = 10\nname = Images\ntagName = A\nrole = link\n===\nlabel = 11\nname = Shopping\ntagName = A\nrole = link\n===\nlabel = 12\nname = Videos\ntagName = A\nrole = link\n===\nlabel = 13\nname = News\ntagName = A\nrole = link\n===\nlabel = 14\nname = More\ntagName = DIV\nrole = button\n===\nlabel = 15\nname = Tools\ntagName = DIV\nrole = button\n===\nlabel = 16\nname = Add Benefits\ntagName = A\nrole = link\n===\nlabel = 17\nname = Add Nutrition\ntagName = A\nrole = link\n===\nlabel = 18\nname = Add Types\ntagName = A\nrole = link\n===\nlabel = 19\nname = Add Fruit\ntagName = A\nrole = link\n===\nlabel = 20\nname = Add Open now\ntagName = A\nrole = link\n===\nlabel = 21\nname = Add Drawing\ntagName = A\nrole = link\n===\nlabel = 22\nname = Add Clipart\ntagName = A\nrole = link\n===\nlabel = 23\nname = Add Top rated\ntagName = A\nrole = link\n===\nlabel = 24\nname = Add Green\ntagName = A\nrole = link\n===\nlabel = 25\nname = SafeSearch\ntagName = DIV\nrole = button\n===\nlabel = 26\nname = Stay signed out\ntagName = G-RAISED-BUTTON\nrole = button\n===\nlabel = 27\nname = Sign in\ntagName = G-RAISED-BUTTON\nrole = button\n===\nlabel = 28\nname = Why this ad?\ntagName = DIV\nrole = button\n===\nlabel = 29\nname = Fresh - Giant Ultra-Premium Honeycrisp Apple for $1.99 from FreshDirect\ntagName = A\n===\nlabel = 30\nname = Image of Fresh - Giant Ultra-Premium Honeycrisp Apple\ntagName = DIV\nrole = link\n===\nlabel = 31\nname = Fresh - Giant Ultra-Premium Honeycrisp Apple\ntagName = DIV\nrole = link\n===\nlabel = 32\nname = Gala Apples Fresh Produce Fruit, 3 LB Bag for $14.95 from Amazon.com\ntagName = A\n===\nlabel = 33\nname = Image of Gala Apples Fresh Produce Fruit, 3 LB Bag\ntagName = DIV\nrole = link\n===\nlabel = 34\nname = Gala Apples Fresh Produce Fruit, 3 LB Bag\ntagName = DIV\nrole = link\n===\nlabel = 35\nname = Hudson Valley McIntosh Apples for $4.99 from FreshDirect\ntagName = A\n===\nlabel = 36\nname = Image of Hudson Valley McIntosh Apples\ntagName = DIV\nrole = link\n===\nlabel = 37\nname = Hudson Valley McIntosh Apples\ntagName = DIV\nrole = link\n===\nlabel = 38\nname = Amazing Andrea Apple Tree Seeds - 20 Seeds -Grow Fresh Apples for $8.08 from Walmart\ntagName = A\n===\nlabel = 39\nname = Image of Amazing Andrea Apple Tree Seeds - 20 Seeds -Grow Fresh Apples\ntagName = DIV\nrole = link\n===\nlabel = 40\nname = Amazing Andrea Apple Tree Seeds - 20 Seeds -Grow Fresh Apples\ntagName = DIV\nrole = link\n===\nlabel = 41\nname = ZEAYEA 20 Pcs Artificial Apples, Decorative Fake Apples for Decoration Lifelike, Realistic Fake Fruit Apples for Home Kitchen Festival Party for $21.99 from Amazon.com\ntagName = A\n===\nlabel = 42\nname = Image of ZEAYEA 20 Pcs Artificial Apples, Decorative Fake Apples for Decoration Lifelike, Realistic Fake Fruit Apples for Home Kitchen Festival Party\ntagName = DIV\nrole = link\n===\nlabel = 43\nname = ZEAYEA 20 Pcs Artificial Apples, Decorative Fake Apples for Decoration Lifelike, Realistic Fake Fruit Apples for Home Kitchen Festival Party\ntagName = DIV\nrole = link\n===\nlabel = 44\nname = About this result\ntagName = DIV\nrole = button\n===\nlabel = 45\nname = Health benefits\u2026Why are apples the healthiest fruit?\ntagName = DIV\nrole = button\n===\nlabel = 46\nname = Type of fruit\u2026what type of fruit is a apple\ntagName = DIV\nrole = button\n===\nlabel = 47\nname = Inventors\u2026Who invented the apple fruit?\ntagName = DIV\nrole = button\n===\nlabel = 48\nname = Fruit or Vegetable\u2026Is an apple a fruit or vegetable?\ntagName = DIV\nrole = button\n===\nlabel = 49\nname = 6 more\ntagName = DIV\nrole = button\n===\nlabel = 50\nname = Feedback\ntagName = DIV\nrole = button\n===\nlabel = 51\nname = Apple Fruit\ntagName = A\n",
30+
"response": "{\n \"thought\": \"The search results for 'apples' are displayed. The results include various types of apples and related products from different vendors.\",\n \"action\": {\n \"name\": \"finish\",\n \"args\": {}\n }\n}",
31+
"usage": {
32+
"completion_tokens": 51,
33+
"prompt_tokens": 3514,
34+
"total_tokens": 3565
35+
}
36+
}
37+
]
344 KB
Loading
399 KB
Loading

0 commit comments

Comments
 (0)