Skip to content
This repository was archived by the owner on Jul 3, 2025. It is now read-only.

Commit c35c2a2

Browse files
authored
Merge pull request #47 from lmnr-ai/dev
Dev
2 parents e192884 + c3b3b54 commit c35c2a2

File tree

9 files changed

+611
-64
lines changed

9 files changed

+611
-64
lines changed

LICENSE

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -186,7 +186,7 @@
186186
same "printed page" as the copyright notice for easier
187187
identification within third-party archives.
188188

189-
Copyright [yyyy] [name of copyright owner]
189+
Copyright [2025] [LMNR AI, Inc.]
190190

191191
Licensed under the Apache License, Version 2.0 (the "License");
192192
you may not use this file except in compliance with the License.

evals/eval_generate_json.py

Lines changed: 136 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,136 @@
1+
import json
2+
from typing import Any, Dict
3+
4+
from lmnr import evaluate
5+
6+
from index import AnthropicProvider
7+
from index.agent.utils import generate_proper_json
8+
9+
llm = AnthropicProvider(model="claude-3-7-sonnet-20250219", enable_thinking=True, thinking_token_budget=1024)
10+
11+
async def run_json_correction(data: Dict[str, Any]):
12+
"""Execute the JSON correction function."""
13+
malformed_json = data["malformed_json"]
14+
# We'll need an LLM provider. Let's use GeminiProvider as in the reference.
15+
# In a real scenario, you might want to configure this or pass it differently.
16+
17+
corrected_json_str = await generate_proper_json(llm=llm, json_str=malformed_json)
18+
19+
# The function returns a string, let's try to parse it to ensure it's valid JSON for the eval
20+
try:
21+
return json.loads(corrected_json_str)
22+
except json.JSONDecodeError:
23+
# If it's not valid JSON, return the string itself for the evaluator to handle
24+
return corrected_json_str
25+
26+
27+
async def eval_json_correction(output: Any, target: Dict[str, Any]):
28+
"""Evaluate the JSON correction accuracy."""
29+
# Assuming target is a Python dict representing the expected JSON
30+
# And output is also a Python dict (if parsing was successful) or a string
31+
32+
if isinstance(output, str):
33+
# This means the corrected_json_str was not valid JSON
34+
# For this simple eval, we can consider this a failure if the target is a dict
35+
# Or, if the target itself is expected to be a non-JSON string (e.g. an error message)
36+
# For now, let's assume target is always a valid JSON object.
37+
try:
38+
# Attempt to parse the output string here for comparison
39+
output_dict = json.loads(output)
40+
exact_match = output_dict == target
41+
except json.JSONDecodeError:
42+
exact_match = False # Output was not valid JSON
43+
else: # Output is already a dict
44+
exact_match = output == target
45+
46+
return exact_match
47+
48+
test_data = [
49+
{
50+
"data": {
51+
# Trailing comma, single quotes
52+
"malformed_json": "{'name': 'John Doe', 'age': 30, 'city': 'New York',}",
53+
},
54+
"target": {
55+
"name": "John Doe",
56+
"age": 30,
57+
"city": "New York"
58+
}
59+
},
60+
{
61+
"data": {
62+
"malformed_json": '''{
63+
"item": "Book",
64+
"details": {
65+
"title": "The "Great Gatsby"",
66+
"author": "F. Scott Fitzgerald"
67+
},
68+
"price": 10.99
69+
}'''
70+
},
71+
"target": {
72+
"item": "Book",
73+
"details": {
74+
"title": "The \"Great Gatsby\"",
75+
"author": "F. Scott Fitzgerald"
76+
},
77+
"price": 10.99
78+
}
79+
},
80+
{
81+
"data": {
82+
# No closing brace
83+
"malformed_json": '''{
84+
"key1": "value1",
85+
"key2": "value2"
86+
''' # Corrected: Removed trailing content that looked like a comment inside string
87+
},
88+
"target": {
89+
"key1": "value1",
90+
"key2": "value2"
91+
}
92+
},
93+
{
94+
"data": {
95+
# JSON with comments (not standard, should be removed by the fixer)
96+
"malformed_json": '''{
97+
// This is a comment
98+
"product_id": 123,
99+
"status": "active"
100+
}'''
101+
},
102+
"target": {
103+
"product_id": 123,
104+
"status": "active"
105+
}
106+
},
107+
# Example of a more complex malformed JSON
108+
{
109+
"data": {
110+
"malformed_json": "{\"name\": \"incomplete, \"value\": [1, 2, \"unfinished_array\"" # Missing closing bracket and quote
111+
},
112+
"target": { # Assuming the LLM can make a reasonable guess or fix structure
113+
"name": "incomplete",
114+
"value": [1, 2, "unfinished_array"]
115+
}
116+
},
117+
{
118+
"data": {
119+
"malformed_json": "{'key with space': 'value', 'another key': true, 'numeric_string': '123.45' }" # Single quotes, boolean
120+
},
121+
"target": {
122+
"key with space": "value",
123+
"another key": True, # Python bool
124+
"numeric_string": "123.45"
125+
}
126+
}
127+
]
128+
129+
# Run the evaluation
130+
evaluate(
131+
data=test_data,
132+
executor=run_json_correction,
133+
evaluators={"json_correction_accuracy": eval_json_correction},
134+
concurrency_limit=10,
135+
group_name="json_correction_eval",
136+
)

index/agent/agent.py

Lines changed: 8 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -1,15 +1,13 @@
11
from __future__ import annotations
22

3-
import json
43
import logging
5-
import re
64
import time
75
import uuid
86
from typing import AsyncGenerator, Optional
97

108
from dotenv import load_dotenv
119
from lmnr import Laminar, LaminarSpanContext, observe, use_span
12-
from pydantic import BaseModel, ValidationError
10+
from pydantic import BaseModel
1311

1412
from index.agent.message_manager import MessageManager
1513
from index.agent.models import (
@@ -25,6 +23,7 @@
2523
TimeoutChunk,
2624
TimeoutChunkContent,
2725
)
26+
from index.agent.utils import validate_json
2827
from index.browser.browser import Browser, BrowserConfig
2928
from index.controller.controller import Controller
3029
from index.llm.llm import BaseLLMProvider, Message
@@ -106,39 +105,9 @@ async def _generate_action(self, input_messages: list[Message]) -> AgentLLMOutpu
106105

107106
response = await self.llm.call(input_messages)
108107

109-
# Extract content between <output> tags using regex, including variations like <output_32>
110-
pattern = r"<output(?:[^>]*)>(.*?)</output(?:[^>]*)>"
111-
match = re.search(pattern, response.content, re.DOTALL)
112-
113-
json_str = ""
114-
115-
if not match:
116-
# if we couldn't find the <output> tags, it most likely means the <output*> tag is not present in the response
117-
# remove closing and opening tags just in case
118-
closing_tag_pattern = r"</output(?:[^>]*)>"
119-
json_str = re.sub(closing_tag_pattern, "", response.content).strip()
120-
121-
open_tag_pattern = r"<output(?:[^>]*)>"
122-
json_str = re.sub(open_tag_pattern, "", json_str).strip()
123-
124-
json_str = json_str.replace("```json", "").replace("```", "").strip()
125-
126-
else:
127-
# Extract just the content between the tags without any additional replacement
128-
json_str = match.group(1).strip()
129-
130108
try:
131-
# First try to parse it directly to catch any obvious JSON issues
132-
try:
133-
json.loads(json_str)
134-
except json.JSONDecodeError:
135-
# If direct parsing fails, attempt to fix common issues
136-
# Remove escape characters and control characters (0x00-0x1F) that might cause problems
137-
json_str = json_str.replace('\\n', '\n').replace('\\r', '\r').replace('\\t', '\t')
138-
# Clean all control characters (0x00-0x1F) except valid JSON whitespace (\n, \r, \t)
139-
json_str = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', json_str)
140-
141-
output = AgentLLMOutput.model_validate_json(json_str.strip())
109+
# Pass the raw LLM response content to validate_json
110+
output = await validate_json(response.content, self.llm)
142111

143112
logger.info(f'💡 Thought: {output.thought}')
144113
logger.info(f'💡 Summary: {output.summary}')
@@ -148,8 +117,10 @@ async def _generate_action(self, input_messages: list[Message]) -> AgentLLMOutpu
148117
output.thinking_block = response.thinking
149118

150119
return output
151-
except ValidationError as e:
152-
raise ValueError(f"Could not parse response: {str(e)}\nResponse was: {json_str}")
120+
except ValueError as e:
121+
# Re-raise the ValueError from validate_json, which now includes detailed context
122+
logger.error(f"Failed to generate and validate action after multiple retries: {e}")
123+
raise e
153124

154125
async def _setup_messages(self,
155126
prompt: str,

index/agent/utils.py

Lines changed: 129 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,16 @@
11
import base64
22
import enum
33
import importlib.resources
4+
import json
45
import logging
6+
import re
57
from typing import Any, Dict, Type
68

7-
from pydantic import BaseModel
9+
from pydantic import BaseModel, ValidationError
810

11+
from index.agent.models import AgentLLMOutput
912
from index.browser.utils import scale_b64_image
13+
from index.llm.llm import BaseLLMProvider, Message
1014

1115
logger = logging.getLogger(__name__)
1216

@@ -91,4 +95,127 @@ def process_model(model):
9195

9296
return model_schema
9397

94-
return process_model(model_class)
98+
return process_model(model_class)
99+
100+
101+
async def generate_proper_json(llm: BaseLLMProvider, json_str: str) -> str:
102+
103+
prompt = f"""The following JSON string is malformed or has issues. Please correct it while preserving the original structure and content as much as possible.
104+
Return ONLY the corrected JSON string, without any surrounding text, comments, or markdown. Do not add any explanations.
105+
106+
Problematic JSON string:
107+
{json_str}
108+
"""
109+
110+
input_messages = [
111+
Message(role="user", content=prompt)
112+
]
113+
114+
response = await llm.call(input_messages)
115+
corrected_json_str = response.content.strip()
116+
if corrected_json_str.startswith("```json"):
117+
corrected_json_str = corrected_json_str[7:]
118+
if corrected_json_str.endswith("```"):
119+
corrected_json_str = corrected_json_str[:-3]
120+
return corrected_json_str.strip()
121+
122+
123+
async def validate_json(raw_llm_response_content: str, llm: BaseLLMProvider, max_retries: int = 3) -> AgentLLMOutput:
124+
"""
125+
Extracts, validates, and parses a JSON string from raw LLM output,
126+
attempting to fix it if necessary using retries with cleaning and LLM-based correction.
127+
128+
Args:
129+
raw_llm_response_content: The raw string content from the LLM response.
130+
llm: The LLM provider instance for fixing JSON if needed.
131+
max_retries: Maximum number of attempts to parse the JSON.
132+
133+
Returns:
134+
An AgentLLMOutput object.
135+
136+
Raises:
137+
ValueError: If the JSON string cannot be parsed or validated after all retries.
138+
"""
139+
# 1. Regex extraction from raw_llm_response_content
140+
pattern = r"<output(?:[^>]*)>(.*?)</output(?:[^>]*)>"
141+
match = re.search(pattern, raw_llm_response_content, re.DOTALL)
142+
143+
current_json_str = ""
144+
if not match:
145+
# if we couldn't find the <output> tags, it most likely means the <output*> tag is not present in the response
146+
# remove closing and opening tags just in case
147+
closing_tag_pattern = r"</output(?:[^>]*)>"
148+
json_str_no_closing = re.sub(closing_tag_pattern, "", raw_llm_response_content).strip()
149+
open_tag_pattern = r"<output(?:[^>]*)>"
150+
json_str_no_tags = re.sub(open_tag_pattern, "", json_str_no_closing).strip()
151+
# Also remove potential markdown code blocks if not already handled by regex
152+
current_json_str = json_str_no_tags.replace("```json", "").replace("```", "").strip()
153+
else:
154+
current_json_str = match.group(1).strip()
155+
156+
last_exception = None
157+
158+
for attempt in range(max_retries):
159+
logger.debug(f"JSON parsing attempt {attempt + 1}/{max_retries}")
160+
161+
# Stage 1: Try to parse the current_json_str as is
162+
try:
163+
# Remove potential markdown that might have been added by LLM fix
164+
temp_json_str = current_json_str
165+
if temp_json_str.startswith("```json"):
166+
temp_json_str = temp_json_str[7:]
167+
if temp_json_str.endswith("```"):
168+
temp_json_str = temp_json_str[:-3]
169+
temp_json_str = temp_json_str.strip()
170+
171+
logger.debug(f"Attempting to parse JSON on attempt {attempt + 1}. Raw JSON: '{temp_json_str}'")
172+
output = AgentLLMOutput.model_validate_json(temp_json_str)
173+
logger.debug(f"Successfully parsed JSON on attempt {attempt + 1}.")
174+
return output
175+
except (json.JSONDecodeError, ValidationError) as e1:
176+
logger.warning(f"Direct JSON parsing failed on attempt {attempt + 1}: {e1}")
177+
last_exception = e1
178+
179+
# Stage 2: Try to parse after cleaning common issues
180+
try:
181+
json_str_cleaned = current_json_str # Start with the current_json_str for cleaning
182+
# Removed explicit replacement of \n, \r, \t - rely on JSON parser
183+
# json_str_cleaned = json_str_cleaned.replace('\\\\n', '\n').replace('\\\\r', '\r').replace('\\\\t', '\t')
184+
# Keep control character removal
185+
json_str_cleaned = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', json_str_cleaned)
186+
187+
if json_str_cleaned.startswith("```json"):
188+
json_str_cleaned = json_str_cleaned[7:]
189+
if json_str_cleaned.endswith("```"):
190+
json_str_cleaned = json_str_cleaned[:-3]
191+
json_str_cleaned = json_str_cleaned.strip()
192+
193+
logger.debug(f"Attempting to parse cleaned JSON on attempt {attempt + 1}. Cleaned JSON: '{json_str_cleaned[:250]}...'")
194+
output = AgentLLMOutput.model_validate_json(json_str_cleaned)
195+
logger.debug(f"Successfully parsed JSON on attempt {attempt + 1} (after cleaning).")
196+
return output
197+
except (json.JSONDecodeError, ValidationError) as e2:
198+
logger.warning(f"Cleaned JSON parsing failed on attempt {attempt + 1}: {e2}")
199+
last_exception = e2
200+
201+
if attempt < max_retries - 1:
202+
logger.debug(f"Attempt {attempt + 1} failed. Attempting to fix JSON with LLM.")
203+
try:
204+
# Pass the original problematic string (before this attempt's cleaning) to LLM
205+
current_json_str = await generate_proper_json(llm, current_json_str)
206+
logger.debug(f"LLM proposed a new JSON string: '{current_json_str}'")
207+
except Exception as llm_fix_exception:
208+
logger.error(f"LLM call to fix JSON failed during attempt {attempt + 1}: {llm_fix_exception}")
209+
# If LLM fix fails, loop continues with the previous current_json_str,
210+
# and will eventually fail if parsing doesn't succeed.
211+
pass
212+
else:
213+
logger.error(f"All {max_retries} attempts to parse JSON failed. Final attempt was with: '{current_json_str[:250]}...'")
214+
break
215+
216+
raise ValueError(
217+
f"Could not parse or validate response after {max_retries} attempts. "
218+
f"Last error: {str(last_exception)}\\n"
219+
f"Final problematic JSON string after all attempts: '{current_json_str[:500]}...'"
220+
) from last_exception
221+

0 commit comments

Comments
 (0)