fix: resolve import errors and complete module cleanup

johnhuang316 · johnhuang316 · commit 98e22c8ff6e8 · 2025-08-11T12:43:10.000+08:00
- Remove orphaned imports from indexing/__init__.py after deleting duplicate_detection, qualified_names, and simple_models modules
- Add local normalize_file_path utility function to search/base.py and utils/validation.py to replace deleted qualified_names module
- Add local utility functions to utils/response_formatter.py (generate_qualified_name, detect_duplicate_functions, detect_duplicate_classes)
- Create simple CodeIndex class inline for backward compatibility
- Clean up all remaining references to deleted modules
- Verify server imports and starts successfully

This completes the post-refactor cleanup, ensuring all deleted module references are resolved and the new SCIP architecture works properly without any import errors.
diff --git a/src/code_index_mcp/indexing/__init__.py b/src/code_index_mcp/indexing/__init__.py
@@ -1,36 +1,12 @@
 """
 Code indexing utilities for the MCP server.
 
-This module provides utility functions for duplicate detection and 
-qualified name generation used by the SCIP indexing system.
+This module provides the SCIP indexing system used by the new architecture.
 """
 
-# Import utility functions that are still used
-from .duplicate_detection import (
-    detect_duplicate_functions,
-    detect_duplicate_classes,
-    get_duplicate_statistics,
-    format_duplicate_report
-)
-
-from .qualified_names import (
-    generate_qualified_name,
-    normalize_file_path
-)
-
-# Simple models for backward compatibility
-from .simple_models import CodeIndex
-
-# SCIP builder is still used by the new architecture
+# SCIP builder is the main component used by the new architecture
 from .scip_builder import SCIPIndexBuilder
 
 __all__ = [
-    'detect_duplicate_functions',
-    'detect_duplicate_classes', 
-    'get_duplicate_statistics',
-    'format_duplicate_report',
-    'generate_qualified_name',
-    'normalize_file_path',
-    'SCIPIndexBuilder',
-    'CodeIndex'
+    'SCIPIndexBuilder'
 ]
diff --git a/src/code_index_mcp/search/ag.py b/src/code_index_mcp/search/ag.py
@@ -27,7 +27,8 @@ def search(
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
         fuzzy: bool = False,
-        regex: bool = False
+        regex: bool = False,
+        max_line_length: Optional[int] = None
     ) -> Dict[str, List[Tuple[int, str]]]:
         """
         Execute a search using The Silver Searcher (ag).
@@ -40,6 +41,7 @@ def search(
             file_pattern: File pattern to filter
             fuzzy: Enable word boundary matching (not true fuzzy search)
             regex: Enable regex pattern matching
+            max_line_length: Optional. Limit the length of lines when context_lines is used
         """
         # ag prints line numbers and groups by file by default, which is good.
         # --noheading is used to be consistent with other tools' output format.
@@ -116,7 +118,7 @@ def search(
             if process.returncode > 1:
                  raise RuntimeError(f"ag failed with exit code {process.returncode}: {process.stderr}")
 
-            return parse_search_output(process.stdout, base_path)
+            return parse_search_output(process.stdout, base_path, max_line_length)
         
         except FileNotFoundError:
             raise RuntimeError("'ag' (The Silver Searcher) not found. Please install it and ensure it's in your PATH.")
diff --git a/src/code_index_mcp/search/base.py b/src/code_index_mcp/search/base.py
@@ -12,15 +12,40 @@
 from abc import ABC, abstractmethod
 from typing import Dict, List, Optional, Tuple, Any
 
-from ..indexing.qualified_names import normalize_file_path
-
-def parse_search_output(output: str, base_path: str) -> Dict[str, List[Tuple[int, str]]]:
+# Local utility function (moved from deleted qualified_names module)
+def normalize_file_path(file_path: str) -> str:
+    """
+    Normalize file path for consistent use throughout the codebase.
+    
+    This function provides a unified way to normalize file paths by:
+    1. Converting all path separators to forward slashes
+    2. Normalizing the path structure (removing redundant separators, etc.)
+    
+    Args:
+        file_path: File path to normalize
+    
+    Returns:
+        Normalized file path with forward slashes
+    """
+    if not file_path:
+        return file_path
+    
+    # First normalize the path structure, then convert separators
+    normalized = os.path.normpath(file_path)
+    return normalized.replace(os.sep, '/')
+
+def parse_search_output(
+    output: str, 
+    base_path: str, 
+    max_line_length: Optional[int] = None
+) -> Dict[str, List[Tuple[int, str]]]:
     """
     Parse the output of command-line search tools (grep, ag, rg).
 
     Args:
         output: The raw output from the command-line tool.
         base_path: The base path of the project to make file paths relative.
+        max_line_length: Optional maximum line length to truncate long lines.
 
     Returns:
         A dictionary where keys are file paths and values are lists of (line_number, line_content) tuples.
@@ -53,6 +78,10 @@ def parse_search_output(output: str, base_path: str) -> Dict[str, List[Tuple[int
             # Normalize path separators for consistency
             relative_path = normalize_file_path(relative_path)
 
+            # Truncate content if it exceeds max_line_length
+            if max_line_length and len(content) > max_line_length:
+                content = content[:max_line_length] + '... (truncated)'
+
             if relative_path not in results:
                 results[relative_path] = []
             results[relative_path].append((line_number, content))
@@ -175,7 +204,8 @@ def search(
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
         fuzzy: bool = False,
-        regex: bool = False
+        regex: bool = False,
+        max_line_length: Optional[int] = None
     ) -> Dict[str, List[Tuple[int, str]]]:
         """
         Execute a search using the specific strategy.
@@ -188,6 +218,7 @@ def search(
             file_pattern: Glob pattern to filter files (e.g., "*.py").
             fuzzy: Whether to enable fuzzy/partial matching.
             regex: Whether to enable regex pattern matching.
+            max_line_length: Optional. Limit the length of lines when context_lines is used.
 
         Returns:
             A dictionary mapping filenames to lists of (line_number, line_content) tuples.
diff --git a/src/code_index_mcp/search/basic.py b/src/code_index_mcp/search/basic.py
@@ -46,7 +46,8 @@ def search(
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
         fuzzy: bool = False,
-        regex: bool = False
+        regex: bool = False,
+        max_line_length: Optional[int] = None
     ) -> Dict[str, List[Tuple[int, str]]]:
         """
         Execute a basic, line-by-line search.
@@ -60,6 +61,7 @@ def search(
             file_pattern: File pattern to filter
             fuzzy: Enable word boundary matching
             regex: Enable regex pattern matching
+            max_line_length: Optional. Limit the length of lines when context_lines is used
         """
         results: Dict[str, List[Tuple[int, str]]] = {}
         
@@ -94,10 +96,14 @@ def search(
                     with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                         for line_num, line in enumerate(f, 1):
                             if search_regex.search(line):
+                                content = line.rstrip('\n')
+                                # Truncate content if it exceeds max_line_length
+                                if max_line_length and len(content) > max_line_length:
+                                    content = content[:max_line_length] + '... (truncated)'
+                                
                                 if rel_path not in results:
                                     results[rel_path] = []
-                                # Strip newline for consistent output
-                                results[rel_path].append((line_num, line.rstrip('\n')))
+                                results[rel_path].append((line_num, content))
                 except (UnicodeDecodeError, PermissionError, OSError):
                     # Ignore files that can't be opened or read due to encoding/permission issues
                     continue
diff --git a/src/code_index_mcp/search/grep.py b/src/code_index_mcp/search/grep.py
@@ -32,7 +32,8 @@ def search(
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
         fuzzy: bool = False,
-        regex: bool = False
+        regex: bool = False,
+        max_line_length: Optional[int] = None
     ) -> Dict[str, List[Tuple[int, str]]]:
         """
         Execute a search using standard grep.
@@ -45,6 +46,7 @@ def search(
             file_pattern: File pattern to filter
             fuzzy: Enable word boundary matching
             regex: Enable regex pattern matching
+            max_line_length: Optional. Limit the length of lines when context_lines is used
         """
         # -r: recursive, -n: line number
         cmd = ['grep', '-r', '-n']
@@ -102,7 +104,7 @@ def search(
             if process.returncode > 1:
                  raise RuntimeError(f"grep failed with exit code {process.returncode}: {process.stderr}")
 
-            return parse_search_output(process.stdout, base_path)
+            return parse_search_output(process.stdout, base_path, max_line_length)
         
         except FileNotFoundError:
             raise RuntimeError("'grep' not found. Please install it and ensure it's in your PATH.")
diff --git a/src/code_index_mcp/search/ripgrep.py b/src/code_index_mcp/search/ripgrep.py
@@ -27,7 +27,8 @@ def search(
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
         fuzzy: bool = False,
-        regex: bool = False
+        regex: bool = False,
+        max_line_length: Optional[int] = None
     ) -> Dict[str, List[Tuple[int, str]]]:
         """
         Execute a search using ripgrep.
@@ -40,6 +41,7 @@ def search(
             file_pattern: File pattern to filter
             fuzzy: Enable word boundary matching (not true fuzzy search)
             regex: Enable regex pattern matching
+            max_line_length: Optional. Limit the length of lines when context_lines is used
         """
         cmd = ['rg', '--line-number', '--no-heading', '--color=never', '--no-ignore']
 
@@ -87,7 +89,7 @@ def search(
             if process.returncode > 1:
                 raise RuntimeError(f"ripgrep failed with exit code {process.returncode}: {process.stderr}")
 
-            return parse_search_output(process.stdout, base_path)
+            return parse_search_output(process.stdout, base_path, max_line_length)
         
         except FileNotFoundError:
             raise RuntimeError("ripgrep (rg) not found. Please install it and ensure it's in your PATH.")
diff --git a/src/code_index_mcp/search/ugrep.py b/src/code_index_mcp/search/ugrep.py
@@ -27,7 +27,8 @@ def search(
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
         fuzzy: bool = False,
-        regex: bool = False
+        regex: bool = False,
+        max_line_length: Optional[int] = None
     ) -> Dict[str, List[Tuple[int, str]]]:
         """
         Execute a search using the 'ug' command-line tool.
@@ -40,6 +41,7 @@ def search(
             file_pattern: File pattern to filter
             fuzzy: Enable true fuzzy search (ugrep native support)
             regex: Enable regex pattern matching
+            max_line_length: Optional. Limit the length of lines when context_lines is used
         """
         if not self.is_available():
             return {"error": "ugrep (ug) command not found."}
@@ -89,7 +91,7 @@ def search(
                 error_output = process.stderr.strip()
                 return {"error": f"ugrep execution failed with code {process.returncode}", "details": error_output}
 
-            return parse_search_output(process.stdout, base_path)
+            return parse_search_output(process.stdout, base_path, max_line_length)
 
         except FileNotFoundError:
             return {"error": "ugrep (ug) command not found. Please ensure it's installed and in your PATH."}
diff --git a/src/code_index_mcp/server.py b/src/code_index_mcp/server.py
@@ -128,7 +128,8 @@ def search_code_advanced(
     context_lines: int = 0,
     file_pattern: str = None,
     fuzzy: bool = False,
-    regex: bool = None
+    regex: bool = None,
+    max_line_length: int = 200
 ) -> Dict[str, Any]:
     """
     Search for a code pattern in the project using an advanced, fast tool.
@@ -159,6 +160,9 @@ def search_code_advanced(
                - If False, forces literal string search
                - If None (default), automatically detects regex patterns and enables regex for patterns like "ERROR|WARN"
                The pattern will always be validated for safety to prevent ReDoS attacks.
+        max_line_length: Optional. Default 200. Limits the length of lines when context_lines is used.
+                        Lines longer than this will be truncated with '... (truncated)' appended.
+                        This prevents token flooding from very long lines (e.g., minified JavaScript files).
 
     Returns:
         A dictionary containing the search results or an error message.
@@ -170,7 +174,8 @@ def search_code_advanced(
         context_lines=context_lines,
         file_pattern=file_pattern,
         fuzzy=fuzzy,
-        regex=regex
+        regex=regex,
+        max_line_length=max_line_length
     )
 
 @mcp.tool()
diff --git a/src/code_index_mcp/services/search_service.py b/src/code_index_mcp/services/search_service.py
@@ -31,7 +31,8 @@ def search_code(  # pylint: disable=too-many-arguments
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
         fuzzy: bool = False,
-        regex: Optional[bool] = None
+        regex: Optional[bool] = None,
+        max_line_length: Optional[int] = 200
     ) -> Dict[str, Any]:
         """
         Search for code patterns in the project.
@@ -45,6 +46,7 @@ def search_code(  # pylint: disable=too-many-arguments
             file_pattern: Glob pattern to filter files
             fuzzy: Whether to enable fuzzy matching
             regex: Regex mode - True/False to force, None for auto-detection
+            max_line_length: Optional. Default 200. Limits the length of lines when context_lines is used.
 
         Returns:
             Dictionary with search results or error information
@@ -89,7 +91,8 @@ def search_code(  # pylint: disable=too-many-arguments
                 context_lines=context_lines,
                 file_pattern=file_pattern,
                 fuzzy=fuzzy,
-                regex=regex
+                regex=regex,
+                max_line_length=max_line_length
             )
             return ResponseFormatter.search_results_response(results)
         except Exception as e:
diff --git a/src/code_index_mcp/services/symbol_graph_service.py b/src/code_index_mcp/services/symbol_graph_service.py
diff --git a/src/code_index_mcp/utils/response_formatter.py b/src/code_index_mcp/utils/response_formatter.py
@@ -8,8 +8,64 @@
 import json
 from typing import Any, Dict, List, Optional, Union
 
-from ..indexing.qualified_names import generate_qualified_name
-from ..indexing.duplicate_detection import detect_duplicate_functions, detect_duplicate_classes
+# Local utility functions (moved from deleted modules)
+def generate_qualified_name(file_path: str, element_name: str) -> str:
+    """
+    Generate qualified name in format: file_path:element_name
+    
+    Args:
+        file_path: Relative file path (e.g., 'src/utils/helpers.py')
+        element_name: Function or class name (e.g., 'format_data')
+    
+    Returns:
+        Qualified name string (e.g., 'src/utils/helpers.py:format_data')
+    """
+    if not file_path or not element_name:
+        raise ValueError("Both file_path and element_name must be non-empty")
+    
+    # Normalize path separators to forward slashes for consistency
+    import os
+    normalized_path = file_path.replace(os.sep, '/')
+    
+    return f"{normalized_path}:{element_name}"
+
+def detect_duplicate_functions(index) -> Dict[str, List[int]]:
+    """
+    Detect functions with duplicate names across files.
+    
+    Args:
+        index: Complete code index
+        
+    Returns:
+        Dictionary mapping function names to lists of file IDs where duplicates exist
+    """
+    duplicates = {}
+    
+    if hasattr(index, 'lookups') and 'function_to_file_id' in index.lookups:
+        for func_name, file_ids in index.lookups['function_to_file_id'].items():
+            if isinstance(file_ids, list) and len(file_ids) > 1:
+                duplicates[func_name] = file_ids
+    
+    return duplicates
+
+def detect_duplicate_classes(index) -> Dict[str, List[int]]:
+    """
+    Detect classes with duplicate names across files.
+    
+    Args:
+        index: Complete code index
+        
+    Returns:
+        Dictionary mapping class names to lists of file IDs where duplicates exist
+    """
+    duplicates = {}
+    
+    if hasattr(index, 'lookups') and 'class_to_file_id' in index.lookups:
+        for class_name, file_ids in index.lookups['class_to_file_id'].items():
+            if isinstance(file_ids, list) and len(file_ids) > 1:
+                duplicates[class_name] = file_ids
+    
+    return duplicates
 
 
 class ResponseFormatter:
@@ -78,7 +134,11 @@ def _get_duplicate_names_from_index(index_cache: Optional[Dict[str, Any]] = None
         
         try:
             # Create a temporary CodeIndex-like object for duplicate detection
-            from ..indexing.simple_models import CodeIndex
+            # Simple CodeIndex class for backward compatibility
+            class CodeIndex:
+                def __init__(self, **kwargs):
+                    for key, value in kwargs.items():
+                        setattr(self, key, value)
             
             # Convert index_cache to CodeIndex format if needed
             if isinstance(index_cache, dict) and 'lookups' in index_cache and index_cache['lookups'] is not None:
diff --git a/src/code_index_mcp/utils/validation.py b/src/code_index_mcp/utils/validation.py
diff --git a/uv.lock b/uv.lock