Skip to content

Commit 98075ba

Browse files
committed
feat(search): align code search filters with index
1 parent 05fdc8c commit 98075ba

File tree

8 files changed

+257
-71
lines changed

8 files changed

+257
-71
lines changed

src/code_index_mcp/search/ag.py

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,26 @@ def search(
9595

9696
cmd.extend(['-G', regex_pattern])
9797

98+
processed_patterns = set()
99+
exclude_dirs = getattr(self, 'exclude_dirs', [])
100+
exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
101+
102+
for directory in exclude_dirs:
103+
normalized = directory.strip()
104+
if not normalized or normalized in processed_patterns:
105+
continue
106+
cmd.extend(['--ignore', normalized])
107+
processed_patterns.add(normalized)
108+
109+
for pattern in exclude_file_patterns:
110+
normalized = pattern.strip()
111+
if not normalized or normalized in processed_patterns:
112+
continue
113+
if normalized.startswith('!'):
114+
normalized = normalized[1:]
115+
cmd.extend(['--ignore', normalized])
116+
processed_patterns.add(normalized)
117+
98118
# Add -- to treat pattern as a literal argument, preventing injection
99119
cmd.append('--')
100120
cmd.append(search_pattern)

src/code_index_mcp/search/base.py

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,10 +10,13 @@
1010
import subprocess
1111
import sys
1212
from abc import ABC, abstractmethod
13-
from typing import Dict, List, Optional, Tuple, Any
13+
from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
1414

1515
from ..indexing.qualified_names import normalize_file_path
1616

17+
if TYPE_CHECKING: # pragma: no cover
18+
from ..utils.file_filter import FileFilter
19+
1720
def parse_search_output(
1821
output: str,
1922
base_path: str,
@@ -182,6 +185,16 @@ class SearchStrategy(ABC):
182185
Each strategy is responsible for searching code using a specific tool or method.
183186
"""
184187

188+
def configure_excludes(self, file_filter: Optional['FileFilter']) -> None:
189+
"""Configure shared exclusion settings for the strategy."""
190+
self.file_filter = file_filter
191+
if file_filter:
192+
self.exclude_dirs = sorted(set(file_filter.exclude_dirs))
193+
self.exclude_file_patterns = sorted(set(file_filter.exclude_files))
194+
else:
195+
self.exclude_dirs = []
196+
self.exclude_file_patterns = []
197+
185198
@property
186199
@abstractmethod
187200
def name(self) -> str:

src/code_index_mcp/search/basic.py

Lines changed: 16 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
"""
22
Basic, pure-Python search strategy.
33
"""
4+
import fnmatch
45
import os
56
import re
6-
import fnmatch
7+
from pathlib import Path
78
from typing import Dict, List, Optional, Tuple
89

910
from .base import SearchStrategy, create_word_boundary_pattern, is_safe_regex_pattern
@@ -83,33 +84,38 @@ def search(
8384
except re.error as e:
8485
raise ValueError(f"Invalid regex pattern: {pattern}, error: {e}")
8586

86-
for root, _, files in os.walk(base_path):
87+
file_filter = getattr(self, 'file_filter', None)
88+
base = Path(base_path)
89+
90+
for root, dirs, files in os.walk(base_path):
91+
if file_filter:
92+
dirs[:] = [d for d in dirs if not file_filter.should_exclude_directory(d)]
93+
8794
for file in files:
88-
# Improved file pattern matching with glob support
8995
if file_pattern and not self._matches_pattern(file, file_pattern):
9096
continue
9197

92-
file_path = os.path.join(root, file)
98+
file_path = Path(root) / file
99+
100+
if file_filter and not file_filter.should_process_path(file_path, base):
101+
continue
102+
93103
rel_path = os.path.relpath(file_path, base_path)
94-
104+
95105
try:
96106
with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
97107
for line_num, line in enumerate(f, 1):
98108
if search_regex.search(line):
99109
content = line.rstrip('\n')
100-
# Truncate content if it exceeds max_line_length
101110
if max_line_length and len(content) > max_line_length:
102111
content = content[:max_line_length] + '... (truncated)'
103-
112+
104113
if rel_path not in results:
105114
results[rel_path] = []
106-
# Strip newline for consistent output
107115
results[rel_path].append((line_num, content))
108116
except (UnicodeDecodeError, PermissionError, OSError):
109-
# Ignore files that can't be opened or read due to encoding/permission issues
110117
continue
111118
except Exception:
112-
# Ignore any other unexpected exceptions to maintain robustness
113119
continue
114120

115121
return results

src/code_index_mcp/search/grep.py

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -83,6 +83,27 @@ def search(
8383
# Note: grep's --include uses glob patterns, not regex
8484
cmd.append(f'--include={file_pattern}')
8585

86+
exclude_dirs = getattr(self, 'exclude_dirs', [])
87+
exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
88+
89+
processed_dirs = set()
90+
for directory in exclude_dirs:
91+
normalized = directory.strip()
92+
if not normalized or normalized in processed_dirs:
93+
continue
94+
cmd.append(f'--exclude-dir={normalized}')
95+
processed_dirs.add(normalized)
96+
97+
processed_files = set()
98+
for pattern in exclude_file_patterns:
99+
normalized = pattern.strip()
100+
if not normalized or normalized in processed_files:
101+
continue
102+
if normalized.startswith('!'):
103+
normalized = normalized[1:]
104+
cmd.append(f'--exclude={normalized}')
105+
processed_files.add(normalized)
106+
86107
# Add -- to treat pattern as a literal argument, preventing injection
87108
cmd.append('--')
88109
cmd.append(search_pattern)

src/code_index_mcp/search/ripgrep.py

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,31 @@ def search(
6969
if file_pattern:
7070
cmd.extend(['--glob', file_pattern])
7171

72+
exclude_dirs = getattr(self, 'exclude_dirs', [])
73+
exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
74+
75+
processed_patterns = set()
76+
77+
for directory in exclude_dirs:
78+
normalized = directory.strip()
79+
if not normalized or normalized in processed_patterns:
80+
continue
81+
cmd.extend(['--glob', f'!**/{normalized}/**'])
82+
processed_patterns.add(normalized)
83+
84+
for pattern in exclude_file_patterns:
85+
normalized = pattern.strip()
86+
if not normalized or normalized in processed_patterns:
87+
continue
88+
if normalized.startswith('!'):
89+
glob_pattern = normalized
90+
elif any(ch in normalized for ch in '*?[') or '/' in normalized:
91+
glob_pattern = f'!{normalized}'
92+
else:
93+
glob_pattern = f'!**/{normalized}'
94+
cmd.extend(['--glob', glob_pattern])
95+
processed_patterns.add(normalized)
96+
7297
# Add -- to treat pattern as a literal argument, preventing injection
7398
cmd.append('--')
7499
cmd.append(search_pattern)

src/code_index_mcp/search/ugrep.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,6 +69,30 @@ def search(
6969
if file_pattern:
7070
cmd.extend(['--include', file_pattern])
7171

72+
processed_patterns = set()
73+
exclude_dirs = getattr(self, 'exclude_dirs', [])
74+
exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
75+
76+
for directory in exclude_dirs:
77+
normalized = directory.strip()
78+
if not normalized or normalized in processed_patterns:
79+
continue
80+
cmd.extend(['--ignore', f'**/{normalized}/**'])
81+
processed_patterns.add(normalized)
82+
83+
for pattern in exclude_file_patterns:
84+
normalized = pattern.strip()
85+
if not normalized or normalized in processed_patterns:
86+
continue
87+
if normalized.startswith('!'):
88+
ignore_pattern = normalized[1:]
89+
elif any(ch in normalized for ch in '*?[') or '/' in normalized:
90+
ignore_pattern = normalized
91+
else:
92+
ignore_pattern = f'**/{normalized}'
93+
cmd.extend(['--ignore', ignore_pattern])
94+
processed_patterns.add(normalized)
95+
7296
# Add '--' to treat pattern as a literal argument, preventing injection
7397
cmd.append('--')
7498
cmd.append(pattern)

0 commit comments

Comments
 (0)