Skip to content

Commit efd921a

Browse files
author
johnhuang316
committed
Add search strategy implementations for ugrep, ripgrep, ag, and grep
This commit introduces multiple search strategies to enhance the code indexer's search capabilities. Each strategy is encapsulated in its own class, implementing a common interface for searching code. The new strategies include: - UgrepStrategy: Utilizes the ugrep command-line tool, supporting fuzzy search. - RipgrepStrategy: Implements the ripgrep tool for fast searching. - AgStrategy: Integrates The Silver Searcher (ag) for efficient searching. - GrepStrategy: Provides a fallback using the standard grep tool. - BasicSearchStrategy: A pure-Python implementation for basic searching when no external tools are available. Additionally, the project settings have been updated to manage available search strategies dynamically, improving the overall search functionality and user experience.
1 parent 12b3136 commit efd921a

File tree

9 files changed

+669
-398
lines changed

9 files changed

+669
-398
lines changed

src/code_index_mcp/project_settings.py

Lines changed: 53 additions & 72 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,39 @@
1616
from .constants import (
1717
SETTINGS_DIR, CONFIG_FILE, INDEX_FILE, CACHE_FILE
1818
)
19+
from .search.base import SearchStrategy
20+
from .search.ugrep import UgrepStrategy
21+
from .search.ripgrep import RipgrepStrategy
22+
from .search.ag import AgStrategy
23+
from .search.grep import GrepStrategy
24+
from .search.basic import BasicSearchStrategy
25+
26+
27+
# Prioritized list of search strategies
28+
SEARCH_STRATEGY_CLASSES = [
29+
UgrepStrategy,
30+
RipgrepStrategy,
31+
AgStrategy,
32+
GrepStrategy,
33+
BasicSearchStrategy,
34+
]
35+
36+
37+
def _get_available_strategies() -> list[SearchStrategy]:
38+
"""
39+
Detect and return a list of available search strategy instances,
40+
ordered by preference.
41+
"""
42+
available = []
43+
for strategy_class in SEARCH_STRATEGY_CLASSES:
44+
try:
45+
strategy = strategy_class()
46+
if strategy.is_available():
47+
available.append(strategy)
48+
except Exception as e:
49+
print(f"Error initializing strategy {strategy_class.__name__}: {e}")
50+
return available
51+
1952

2053
class ProjectSettings:
2154
"""Class for managing project settings and index data"""
@@ -29,7 +62,8 @@ def __init__(self, base_path, skip_load=False):
2962
"""
3063
self.base_path = base_path
3164
self.skip_load = skip_load
32-
self._search_tools_cache = None # Lazy-loaded search tools configuration
65+
self.available_strategies: list[SearchStrategy] = []
66+
self.refresh_available_strategies()
3367

3468
# Ensure the base path of the temporary directory exists
3569
try:
@@ -478,84 +512,31 @@ def get_stats(self):
478512
}
479513

480514
def get_search_tools_config(self):
481-
"""Get search tools configuration with lazy loading.
482-
483-
Returns:
484-
dict: Search tools configuration with preferred tool and available tools
485-
"""
486-
if self._search_tools_cache is None:
487-
print("Detecting available search tools...")
488-
self._search_tools_cache = self._detect_search_tools()
489-
print(f"Search tools detected. Preferred: {self._search_tools_cache.get('preferred_tool', 'basic')}")
490-
491-
return self._search_tools_cache
515+
"""Get the configuration of available search tools.
492516
493-
def get_preferred_search_tool(self):
494-
"""Get the preferred search tool name.
495-
496517
Returns:
497-
str: Name of preferred search tool ('ripgrep', 'ag', 'grep', or 'basic')
518+
dict: A dictionary containing the list of available tool names.
498519
"""
499-
config = self.get_search_tools_config()
500-
return config.get('preferred_tool', 'basic')
520+
return {
521+
"available_tools": [s.name for s in self.available_strategies],
522+
"preferred_tool": self.get_preferred_search_tool().name if self.available_strategies else None
523+
}
524+
525+
def get_preferred_search_tool(self) -> SearchStrategy | None:
526+
"""Get the preferred search tool based on availability and priority.
501527
502-
def _detect_search_tools(self):
503-
"""Detect available search tools on the system.
504-
505528
Returns:
506-
dict: Configuration with available tools and preferred tool
529+
SearchStrategy: An instance of the preferred search strategy, or None.
507530
"""
508-
tools_info = {
509-
'detected_at': self._get_timestamp(),
510-
'available_tools': {},
511-
'preferred_tool': 'basic'
512-
}
513-
514-
# Check tools in priority order: ugrep > ripgrep > ag > grep
515-
search_tools = [
516-
('ugrep', 'ug'),
517-
('ripgrep', 'rg'),
518-
('ag', 'ag'),
519-
('grep', 'grep')
520-
]
531+
if not self.available_strategies:
532+
self.refresh_available_strategies()
521533

522-
for tool_name, command in search_tools:
523-
is_available = self._is_tool_available(command)
524-
tools_info['available_tools'][tool_name] = is_available
525-
526-
# Set the first available tool as preferred
527-
if is_available and tools_info['preferred_tool'] == 'basic':
528-
tools_info['preferred_tool'] = tool_name
529-
530-
return tools_info
534+
return self.available_strategies[0] if self.available_strategies else None
531535

532-
def _is_tool_available(self, command):
533-
"""Check if a search tool is available on the system.
534-
535-
Args:
536-
command (str): Command to check (e.g., 'rg', 'ag', 'grep')
537-
538-
Returns:
539-
bool: True if tool is available, False otherwise
536+
def refresh_available_strategies(self):
540537
"""
541-
try:
542-
result = subprocess.run(
543-
[command, '--version'],
544-
capture_output=True,
545-
timeout=3,
546-
check=False
547-
)
548-
return result.returncode == 0
549-
except (FileNotFoundError, subprocess.TimeoutExpired, OSError):
550-
return False
551-
552-
def refresh_search_tools(self):
553-
"""Manually refresh search tools detection.
554-
555-
Returns:
556-
dict: Updated search tools configuration
538+
Force a refresh of the available search tools list.
557539
"""
558-
print("Refreshing search tools detection...")
559-
self._search_tools_cache = self._detect_search_tools()
560-
print(f"Search tools refreshed. Preferred: {self._search_tools_cache.get('preferred_tool', 'basic')}")
561-
return self._search_tools_cache
540+
print("Refreshing available search strategies...")
541+
self.available_strategies = _get_available_strategies()
542+
print(f"Available strategies found: {[s.name for s in self.available_strategies]}")
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
"""Search strategies package."""

src/code_index_mcp/search/ag.py

Lines changed: 89 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,89 @@
1+
"""
2+
Search Strategy for The Silver Searcher (ag)
3+
"""
4+
import shutil
5+
import subprocess
6+
from typing import Dict, List, Optional, Tuple
7+
8+
from .base import SearchStrategy, parse_search_output, create_safe_fuzzy_pattern
9+
10+
class AgStrategy(SearchStrategy):
11+
"""Search strategy using 'The Silver Searcher' (ag) command-line tool."""
12+
13+
@property
14+
def name(self) -> str:
15+
"""The name of the search tool."""
16+
return 'ag'
17+
18+
def is_available(self) -> bool:
19+
"""Check if 'ag' command is available on the system."""
20+
return shutil.which('ag') is not None
21+
22+
def search(
23+
self,
24+
pattern: str,
25+
base_path: str,
26+
case_sensitive: bool = True,
27+
context_lines: int = 0,
28+
file_pattern: Optional[str] = None,
29+
fuzzy: bool = False
30+
) -> Dict[str, List[Tuple[int, str]]]:
31+
"""
32+
Execute a search using The Silver Searcher (ag).
33+
34+
Note: ag does not support native fuzzy searching. When fuzzy=True, a
35+
safe fuzzy pattern with word boundaries is used for regex search.
36+
When fuzzy=False, a literal string search is performed.
37+
"""
38+
# ag prints line numbers and groups by file by default, which is good.
39+
# --noheading is used to be consistent with other tools' output format.
40+
cmd = ['ag', '--noheading']
41+
42+
if not case_sensitive:
43+
cmd.append('--ignore-case')
44+
45+
# Prepare search pattern
46+
search_pattern = pattern
47+
if fuzzy:
48+
# Use safe fuzzy pattern for regex search
49+
search_pattern = create_safe_fuzzy_pattern(pattern)
50+
else:
51+
cmd.append('--literal') # or -Q
52+
53+
if context_lines > 0:
54+
cmd.extend(['--before', str(context_lines)])
55+
cmd.extend(['--after', str(context_lines)])
56+
57+
if file_pattern:
58+
# Use -G to filter files by regex pattern
59+
cmd.extend(['-G', file_pattern])
60+
61+
# Add -- to treat pattern as a literal argument, preventing injection
62+
cmd.append('--')
63+
cmd.append(search_pattern)
64+
cmd.append(base_path)
65+
66+
try:
67+
# ag exits with 1 if no matches are found, which is not an error.
68+
# It exits with 0 on success (match found). Other codes are errors.
69+
process = subprocess.run(
70+
cmd,
71+
capture_output=True,
72+
text=True,
73+
encoding='utf-8',
74+
errors='replace',
75+
check=False # Do not raise CalledProcessError on non-zero exit
76+
)
77+
# We don't check returncode > 1 because ag's exit code behavior
78+
# is less standardized than rg/ug. 0 for match, 1 for no match.
79+
# Any actual error will likely raise an exception or be in stderr.
80+
if process.returncode > 1:
81+
raise RuntimeError(f"ag failed with exit code {process.returncode}: {process.stderr}")
82+
83+
return parse_search_output(process.stdout, base_path)
84+
85+
except FileNotFoundError:
86+
raise RuntimeError("'ag' (The Silver Searcher) not found. Please install it and ensure it's in your PATH.")
87+
except Exception as e:
88+
# Re-raise other potential exceptions like permission errors
89+
raise RuntimeError(f"An error occurred while running ag: {e}")

src/code_index_mcp/search/base.py

Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
"""
2+
Search Strategies for Code Indexer
3+
4+
This module defines the abstract base class for search strategies and will contain
5+
concrete implementations for different search tools like ugrep, ripgrep, etc.
6+
"""
7+
import os
8+
import re
9+
import shutil
10+
import subprocess
11+
import sys
12+
from abc import ABC, abstractmethod
13+
from typing import Dict, List, Optional, Tuple, Any
14+
15+
def parse_search_output(output: str, base_path: str) -> Dict[str, List[Tuple[int, str]]]:
16+
"""
17+
Parse the output of command-line search tools (grep, ag, rg).
18+
19+
Args:
20+
output: The raw output from the command-line tool.
21+
base_path: The base path of the project to make file paths relative.
22+
23+
Returns:
24+
A dictionary where keys are file paths and values are lists of (line_number, line_content) tuples.
25+
"""
26+
results = {}
27+
# Normalize base_path to ensure consistent path separation
28+
normalized_base_path = os.path.normpath(base_path)
29+
30+
for line in output.strip().split('\n'):
31+
if not line.strip():
32+
continue
33+
try:
34+
# Handle Windows paths which might have a drive letter, e.g., C:
35+
parts = line.split(':', 2)
36+
if sys.platform == "win32" and len(parts[0]) == 1 and parts[1].startswith('\\'):
37+
# Re-join drive letter with the rest of the path
38+
file_path_abs = f"{parts[0]}:{parts[1]}"
39+
line_number_str = parts[2].split(':', 1)[0]
40+
content = parts[2].split(':', 1)[1]
41+
else:
42+
file_path_abs = parts[0]
43+
line_number_str = parts[1]
44+
content = parts[2]
45+
46+
line_number = int(line_number_str)
47+
48+
# Make the file path relative to the base_path
49+
relative_path = os.path.relpath(file_path_abs, normalized_base_path)
50+
51+
# Normalize path separators for consistency
52+
relative_path = relative_path.replace('\\', '/')
53+
54+
if relative_path not in results:
55+
results[relative_path] = []
56+
results[relative_path].append((line_number, content))
57+
except (ValueError, IndexError):
58+
# Silently ignore lines that don't match the expected format
59+
# This can happen with summary lines or other tool-specific output
60+
pass
61+
62+
return results
63+
64+
65+
def create_safe_fuzzy_pattern(pattern: str) -> str:
66+
"""
67+
Create safe fuzzy search patterns that are more permissive than exact match
68+
but still safe from regex injection attacks.
69+
70+
Args:
71+
pattern: Original search pattern
72+
73+
Returns:
74+
Safe fuzzy pattern for extended regex
75+
"""
76+
# Escape any regex special characters to make them literal
77+
escaped = re.escape(pattern)
78+
79+
# Create fuzzy pattern that matches:
80+
# 1. Word at start of word boundary (e.g., "test" in "testing")
81+
# 2. Word at end of word boundary (e.g., "test" in "mytest")
82+
# 3. Whole word (e.g., "test" as standalone word)
83+
if len(pattern) >= 3: # Only for patterns of reasonable length
84+
# This pattern allows partial matches at word boundaries
85+
fuzzy_pattern = f"\\b{escaped}|{escaped}\\b"
86+
else:
87+
# For short patterns, require full word boundaries to avoid too many matches
88+
fuzzy_pattern = f"\\b{escaped}\\b"
89+
90+
return fuzzy_pattern
91+
92+
93+
class SearchStrategy(ABC):
94+
"""
95+
Abstract base class for a search strategy.
96+
97+
Each strategy is responsible for searching code using a specific tool or method.
98+
"""
99+
100+
@property
101+
@abstractmethod
102+
def name(self) -> str:
103+
"""The name of the search tool (e.g., 'ugrep', 'ripgrep')."""
104+
pass
105+
106+
@abstractmethod
107+
def is_available(self) -> bool:
108+
"""
109+
Check if the search tool for this strategy is available on the system.
110+
111+
Returns:
112+
True if the tool is available, False otherwise.
113+
"""
114+
pass
115+
116+
@abstractmethod
117+
def search(
118+
self,
119+
pattern: str,
120+
base_path: str,
121+
case_sensitive: bool = True,
122+
context_lines: int = 0,
123+
file_pattern: Optional[str] = None,
124+
fuzzy: bool = False
125+
) -> Dict[str, List[Tuple[int, str]]]:
126+
"""
127+
Execute a search using the specific strategy.
128+
129+
Args:
130+
pattern: The search pattern (string or regex).
131+
base_path: The root directory to search in.
132+
case_sensitive: Whether the search is case-sensitive.
133+
context_lines: Number of context lines to show around each match.
134+
file_pattern: Glob pattern to filter files (e.g., "*.py").
135+
fuzzy: Whether to enable fuzzy search.
136+
137+
Returns:
138+
A dictionary mapping filenames to lists of (line_number, line_content) tuples.
139+
"""
140+
pass
141+

0 commit comments

Comments
 (0)