lmnr-ai
diff --git a/‎LICENSE‎
Lines changed: 1 addition & 1 deletion b/‎LICENSE‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎evals/eval_generate_json.py‎
Lines changed: 136 additions & 0 deletions b/‎evals/eval_generate_json.py‎
Lines changed: 136 additions & 0 deletions
diff --git a/‎index/agent/agent.py‎
Lines changed: 8 additions & 37 deletions b/‎index/agent/agent.py‎
Lines changed: 8 additions & 37 deletions
diff --git a/‎index/agent/utils.py‎
Lines changed: 129 additions & 2 deletions b/‎index/agent/utils.py‎
Lines changed: 129 additions & 2 deletions
@@ -186,7 +186,7 @@
       same "printed page" as the copyright notice for easier
       identification within third-party archives.
 
-   Copyright [yyyy] [name of copyright owner]
+   Copyright [2025] [LMNR AI, Inc.]
 
    Licensed under the Apache License, Version 2.0 (the "License");
    you may not use this file except in compliance with the License.
 
@@ -0,0 +1,136 @@
+import json
+from typing import Any, Dict
+
+from lmnr import evaluate
+
+from index import AnthropicProvider
+from index.agent.utils import generate_proper_json
+
+llm = AnthropicProvider(model="claude-3-7-sonnet-20250219", enable_thinking=True, thinking_token_budget=1024)
+    
+async def run_json_correction(data: Dict[str, Any]):
+    """Execute the JSON correction function."""
+    malformed_json = data["malformed_json"]
+    # We'll need an LLM provider. Let's use GeminiProvider as in the reference.
+    # In a real scenario, you might want to configure this or pass it differently.
+    
+    corrected_json_str = await generate_proper_json(llm=llm, json_str=malformed_json)
+    
+    # The function returns a string, let's try to parse it to ensure it's valid JSON for the eval
+    try:
+        return json.loads(corrected_json_str)
+    except json.JSONDecodeError:
+        # If it's not valid JSON, return the string itself for the evaluator to handle
+        return corrected_json_str
+
+
+async def eval_json_correction(output: Any, target: Dict[str, Any]):
+    """Evaluate the JSON correction accuracy."""
+    # Assuming target is a Python dict representing the expected JSON
+    # And output is also a Python dict (if parsing was successful) or a string
+    
+    if isinstance(output, str):
+        # This means the corrected_json_str was not valid JSON
+        # For this simple eval, we can consider this a failure if the target is a dict
+        # Or, if the target itself is expected to be a non-JSON string (e.g. an error message)
+        # For now, let's assume target is always a valid JSON object.
+        try:
+            # Attempt to parse the output string here for comparison
+            output_dict = json.loads(output)
+            exact_match = output_dict == target
+        except json.JSONDecodeError:
+            exact_match = False # Output was not valid JSON
+    else: # Output is already a dict
+        exact_match = output == target
+    
+    return exact_match
+
+test_data = [
+    {
+        "data": {
+            # Trailing comma, single quotes
+            "malformed_json": "{'name': 'John Doe', 'age': 30, 'city': 'New York',}",
+        },
+        "target": {
+            "name": "John Doe",
+            "age": 30,
+            "city": "New York"
+        }
+    },
+    {
+        "data": {
+            "malformed_json": '''{
+                "item": "Book",
+                "details": {
+                    "title": "The "Great Gatsby"",
+                    "author": "F. Scott Fitzgerald"
+                },
+                "price": 10.99
+            }'''
+        },
+        "target": {
+            "item": "Book",
+            "details": {
+                "title": "The \"Great Gatsby\"",
+                "author": "F. Scott Fitzgerald"
+            },
+            "price": 10.99
+        }
+    },
+    {
+        "data": {
+            # No closing brace
+            "malformed_json": '''{
+                "key1": "value1",
+                "key2": "value2"
+            ''' # Corrected: Removed trailing content that looked like a comment inside string
+        },
+        "target": {
+            "key1": "value1",
+            "key2": "value2"
+        }
+    },
+    {
+        "data": {
+            # JSON with comments (not standard, should be removed by the fixer)
+            "malformed_json": '''{
+                // This is a comment
+                "product_id": 123,
+                "status": "active"
+            }'''
+        },
+        "target": {
+            "product_id": 123,
+            "status": "active"
+        }
+    },
+    # Example of a more complex malformed JSON
+    {
+        "data": {
+            "malformed_json": "{\"name\": \"incomplete, \"value\": [1, 2, \"unfinished_array\"" # Missing closing bracket and quote
+        },
+        "target": { # Assuming the LLM can make a reasonable guess or fix structure
+            "name": "incomplete",
+            "value": [1, 2, "unfinished_array"]
+        }
+    },
+    {
+        "data": {
+            "malformed_json": "{'key with space': 'value', 'another key': true, 'numeric_string': '123.45' }" # Single quotes, boolean
+        },
+        "target": {
+            "key with space": "value",
+            "another key": True, # Python bool
+            "numeric_string": "123.45"
+        }
+    }
+]
+
+# Run the evaluation
+evaluate(
+    data=test_data,
+    executor=run_json_correction,
+    evaluators={"json_correction_accuracy": eval_json_correction},
+    concurrency_limit=10,
+    group_name="json_correction_eval",
+)
@@ -1,15 +1,13 @@
 from __future__ import annotations
 
-import json
 import logging
-import re
 import time
 import uuid
 from typing import AsyncGenerator, Optional
 
 from dotenv import load_dotenv
 from lmnr import Laminar, LaminarSpanContext, observe, use_span
-from pydantic import BaseModel, ValidationError
+from pydantic import BaseModel
 
 from index.agent.message_manager import MessageManager
 from index.agent.models import (
@@ -25,6 +23,7 @@
 	TimeoutChunk,
 	TimeoutChunkContent,
 )
+from index.agent.utils import validate_json
 from index.browser.browser import Browser, BrowserConfig
 from index.controller.controller import Controller
 from index.llm.llm import BaseLLMProvider, Message
@@ -106,39 +105,9 @@ async def _generate_action(self, input_messages: list[Message]) -> AgentLLMOutpu
 
 		response = await self.llm.call(input_messages)
 
-		# Extract content between <output> tags using regex, including variations like <output_32>
-		pattern = r"<output(?:[^>]*)>(.*?)</output(?:[^>]*)>"
-		match = re.search(pattern, response.content, re.DOTALL)
-		
-		json_str = ""
-
-		if not match:
-			# if we couldn't find the <output> tags, it most likely means the <output*> tag is not present in the response
-			# remove closing and opening tags just in case
-			closing_tag_pattern = r"</output(?:[^>]*)>"
-			json_str = re.sub(closing_tag_pattern, "", response.content).strip()
-
-			open_tag_pattern = r"<output(?:[^>]*)>"
-			json_str = re.sub(open_tag_pattern, "", json_str).strip()
-
-			json_str = json_str.replace("```json", "").replace("```", "").strip()
-
-		else:
-			# Extract just the content between the tags without any additional replacement
-			json_str = match.group(1).strip()
-			
 		try:
-			# First try to parse it directly to catch any obvious JSON issues
-			try:
-				json.loads(json_str)
-			except json.JSONDecodeError:
-				# If direct parsing fails, attempt to fix common issues
-				# Remove escape characters and control characters (0x00-0x1F) that might cause problems
-				json_str = json_str.replace('\\n', '\n').replace('\\r', '\r').replace('\\t', '\t')
-				# Clean all control characters (0x00-0x1F) except valid JSON whitespace (\n, \r, \t)
-				json_str = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', json_str)
-				
-			output = AgentLLMOutput.model_validate_json(json_str.strip())
+			# Pass the raw LLM response content to validate_json
+			output = await validate_json(response.content, self.llm)
 
 			logger.info(f'💡 Thought: {output.thought}')
 			logger.info(f'💡 Summary: {output.summary}')
@@ -148,8 +117,10 @@ async def _generate_action(self, input_messages: list[Message]) -> AgentLLMOutpu
 				output.thinking_block = response.thinking
 
 			return output
-		except ValidationError as e:
-			raise ValueError(f"Could not parse response: {str(e)}\nResponse was: {json_str}")
+		except ValueError as e:
+			# Re-raise the ValueError from validate_json, which now includes detailed context
+			logger.error(f"Failed to generate and validate action after multiple retries: {e}")
+			raise e
 
 	async def _setup_messages(self, 
 							prompt: str, 
 
@@ -1,12 +1,16 @@
 import base64
 import enum
 import importlib.resources
+import json
 import logging
+import re
 from typing import Any, Dict, Type
 
-from pydantic import BaseModel
+from pydantic import BaseModel, ValidationError
 
+from index.agent.models import AgentLLMOutput
 from index.browser.utils import scale_b64_image
+from index.llm.llm import BaseLLMProvider, Message
 
 logger = logging.getLogger(__name__)
 
@@ -91,4 +95,127 @@ def process_model(model):
 
         return model_schema
 
-    return process_model(model_class)
+    return process_model(model_class)
+
+
+async def generate_proper_json(llm: BaseLLMProvider, json_str: str) -> str:
+
+    prompt = f"""The following JSON string is malformed or has issues. Please correct it while preserving the original structure and content as much as possible.
+Return ONLY the corrected JSON string, without any surrounding text, comments, or markdown. Do not add any explanations.
+
+Problematic JSON string:
+{json_str}
+"""
+
+    input_messages = [
+        Message(role="user", content=prompt)
+    ]
+
+    response = await llm.call(input_messages)
+    corrected_json_str = response.content.strip()
+    if corrected_json_str.startswith("```json"):
+        corrected_json_str = corrected_json_str[7:]
+    if corrected_json_str.endswith("```"):
+        corrected_json_str = corrected_json_str[:-3]
+    return corrected_json_str.strip()
+
+
+async def validate_json(raw_llm_response_content: str, llm: BaseLLMProvider, max_retries: int = 3) -> AgentLLMOutput:
+    """
+    Extracts, validates, and parses a JSON string from raw LLM output,
+    attempting to fix it if necessary using retries with cleaning and LLM-based correction.
+    
+    Args:
+        raw_llm_response_content: The raw string content from the LLM response.
+        llm: The LLM provider instance for fixing JSON if needed.
+        max_retries: Maximum number of attempts to parse the JSON.
+        
+    Returns:
+        An AgentLLMOutput object.
+        
+    Raises:
+        ValueError: If the JSON string cannot be parsed or validated after all retries.
+    """
+    # 1. Regex extraction from raw_llm_response_content
+    pattern = r"<output(?:[^>]*)>(.*?)</output(?:[^>]*)>"
+    match = re.search(pattern, raw_llm_response_content, re.DOTALL)
+    
+    current_json_str = ""
+    if not match:
+        # if we couldn't find the <output> tags, it most likely means the <output*> tag is not present in the response
+        # remove closing and opening tags just in case
+        closing_tag_pattern = r"</output(?:[^>]*)>"
+        json_str_no_closing = re.sub(closing_tag_pattern, "", raw_llm_response_content).strip()
+        open_tag_pattern = r"<output(?:[^>]*)>"
+        json_str_no_tags = re.sub(open_tag_pattern, "", json_str_no_closing).strip()
+        # Also remove potential markdown code blocks if not already handled by regex
+        current_json_str = json_str_no_tags.replace("```json", "").replace("```", "").strip()
+    else:
+        current_json_str = match.group(1).strip()
+
+    last_exception = None
+
+    for attempt in range(max_retries):
+        logger.debug(f"JSON parsing attempt {attempt + 1}/{max_retries}")
+        
+        # Stage 1: Try to parse the current_json_str as is
+        try:
+            # Remove potential markdown that might have been added by LLM fix
+            temp_json_str = current_json_str
+            if temp_json_str.startswith("```json"):
+                temp_json_str = temp_json_str[7:]
+            if temp_json_str.endswith("```"):
+                temp_json_str = temp_json_str[:-3]
+            temp_json_str = temp_json_str.strip()
+
+            logger.debug(f"Attempting to parse JSON on attempt {attempt + 1}. Raw JSON: '{temp_json_str}'")
+            output = AgentLLMOutput.model_validate_json(temp_json_str)
+            logger.debug(f"Successfully parsed JSON on attempt {attempt + 1}.")
+            return output
+        except (json.JSONDecodeError, ValidationError) as e1:
+            logger.warning(f"Direct JSON parsing failed on attempt {attempt + 1}: {e1}")
+            last_exception = e1
+
+            # Stage 2: Try to parse after cleaning common issues
+            try:
+                json_str_cleaned = current_json_str # Start with the current_json_str for cleaning
+                # Removed explicit replacement of \n, \r, \t - rely on JSON parser
+                # json_str_cleaned = json_str_cleaned.replace('\\\\n', '\n').replace('\\\\r', '\r').replace('\\\\t', '\t')
+                # Keep control character removal
+                json_str_cleaned = re.sub(r'[\x00-\x08\x0B\x0C\x0E-\x1F]', '', json_str_cleaned)
+                
+                if json_str_cleaned.startswith("```json"):
+                    json_str_cleaned = json_str_cleaned[7:]
+                if json_str_cleaned.endswith("```"):
+                    json_str_cleaned = json_str_cleaned[:-3]
+                json_str_cleaned = json_str_cleaned.strip()
+
+                logger.debug(f"Attempting to parse cleaned JSON on attempt {attempt + 1}. Cleaned JSON: '{json_str_cleaned[:250]}...'")
+                output = AgentLLMOutput.model_validate_json(json_str_cleaned)
+                logger.debug(f"Successfully parsed JSON on attempt {attempt + 1} (after cleaning).")
+                return output
+            except (json.JSONDecodeError, ValidationError) as e2:
+                logger.warning(f"Cleaned JSON parsing failed on attempt {attempt + 1}: {e2}")
+                last_exception = e2 
+
+                if attempt < max_retries - 1:
+                    logger.debug(f"Attempt {attempt + 1} failed. Attempting to fix JSON with LLM.")
+                    try:
+                        # Pass the original problematic string (before this attempt's cleaning) to LLM
+                        current_json_str = await generate_proper_json(llm, current_json_str) 
+                        logger.debug(f"LLM proposed a new JSON string: '{current_json_str}'")
+                    except Exception as llm_fix_exception:
+                        logger.error(f"LLM call to fix JSON failed during attempt {attempt + 1}: {llm_fix_exception}")
+                        # If LLM fix fails, loop continues with the previous current_json_str,
+                        # and will eventually fail if parsing doesn't succeed.
+                        pass 
+                else:
+                    logger.error(f"All {max_retries} attempts to parse JSON failed. Final attempt was with: '{current_json_str[:250]}...'")
+                    break 
+    
+    raise ValueError(
+        f"Could not parse or validate response after {max_retries} attempts. "
+        f"Last error: {str(last_exception)}\\n"
+        f"Final problematic JSON string after all attempts: '{current_json_str[:500]}...'"
+    ) from last_exception
+