Skip to content

Commit 3cbff23

Browse files
committed
feat: enhance regex handling in search functions with auto-detection and improved safety checks
1 parent b2001dd commit 3cbff23

File tree

3 files changed

+50
-18
lines changed

3 files changed

+50
-18
lines changed

src/code_index_mcp/search/base.py

Lines changed: 35 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -100,22 +100,45 @@ def is_safe_regex_pattern(pattern: str) -> bool:
100100
Returns:
101101
True if the pattern looks like a safe regex, False otherwise
102102
"""
103-
# Allow basic regex operators that are commonly used and safe
104-
safe_regex_chars = ['|', '(', ')', '[', ']', '^', '$']
103+
# Strong indicators of regex intent
104+
strong_regex_indicators = ['|', '(', ')', '[', ']', '^', '$']
105105

106-
# Check if pattern contains any regex metacharacters
107-
has_regex_chars = any(char in pattern for char in safe_regex_chars)
106+
# Weaker indicators that need context
107+
weak_regex_indicators = ['.', '*', '+', '?']
108108

109-
# Basic safety check - avoid obviously dangerous patterns
110-
dangerous_patterns = [
111-
r'(.+)+', # Nested quantifiers
112-
r'(.*)*', # Nested stars
113-
r'(.{0,})+', # Potential ReDoS patterns
114-
]
109+
# Check for strong regex indicators
110+
has_strong_regex = any(char in pattern for char in strong_regex_indicators)
115111

116-
has_dangerous_patterns = any(dangerous in pattern for dangerous in dangerous_patterns)
112+
# Check for weak indicators with context
113+
has_weak_regex = any(char in pattern for char in weak_regex_indicators)
117114

118-
return has_regex_chars and not has_dangerous_patterns
115+
# If has strong indicators, likely regex
116+
if has_strong_regex:
117+
# Still check for dangerous patterns
118+
dangerous_patterns = [
119+
r'(.+)+', # Nested quantifiers
120+
r'(.*)*', # Nested stars
121+
r'(.{0,})+', # Potential ReDoS patterns
122+
]
123+
124+
has_dangerous_patterns = any(dangerous in pattern for dangerous in dangerous_patterns)
125+
return not has_dangerous_patterns
126+
127+
# If only weak indicators, need more context
128+
if has_weak_regex:
129+
# Patterns like ".*", ".+", "file.*py" look like regex
130+
# But "file.txt", "test.py" look like literal filenames
131+
regex_like_patterns = [
132+
r'\.\*', # .*
133+
r'\.\+', # .+
134+
r'\.\w*\*', # .something*
135+
r'\*\.', # *.
136+
r'\w+\.\*\w*', # word.*word
137+
]
138+
139+
return any(re.search(regex_pattern, pattern) for regex_pattern in regex_like_patterns)
140+
141+
return False
119142

120143

121144
class SearchStrategy(ABC):

src/code_index_mcp/server.py

Lines changed: 6 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -115,7 +115,7 @@ def search_code_advanced(
115115
context_lines: int = 0,
116116
file_pattern: str = None,
117117
fuzzy: bool = False,
118-
regex: bool = False
118+
regex: bool = None
119119
) -> Dict[str, Any]:
120120
"""
121121
Search for a code pattern in the project using an advanced, fast tool.
@@ -141,9 +141,11 @@ def search_code_advanced(
141141
IMPORTANT: Only ugrep provides true fuzzy search. Other tools use word boundary
142142
matching which allows partial matches at word boundaries.
143143
For exact literal matches, set fuzzy=False (default and recommended).
144-
regex: If True, enables regex pattern matching. Use this for patterns like "ERROR|WARN".
145-
The pattern will be validated for safety to prevent ReDoS attacks.
146-
If False (default), uses literal string search.
144+
regex: Controls regex pattern matching behavior:
145+
- If True, enables regex pattern matching
146+
- If False, forces literal string search
147+
- If None (default), automatically detects regex patterns and enables regex for patterns like "ERROR|WARN"
148+
The pattern will always be validated for safety to prevent ReDoS attacks.
147149
148150
Returns:
149151
A dictionary containing the search results or an error message.

src/code_index_mcp/services/search_service.py

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,7 @@
99

1010
from .base_service import BaseService
1111
from ..utils import ValidationHelper, ResponseFormatter
12+
from ..search.base import is_safe_regex_pattern
1213

1314

1415
class SearchService(BaseService):
@@ -30,7 +31,7 @@ def search_code( # pylint: disable=too-many-arguments
3031
context_lines: int = 0,
3132
file_pattern: Optional[str] = None,
3233
fuzzy: bool = False,
33-
regex: bool = False
34+
regex: Optional[bool] = None
3435
) -> Dict[str, Any]:
3536
"""
3637
Search for code patterns in the project.
@@ -43,7 +44,7 @@ def search_code( # pylint: disable=too-many-arguments
4344
context_lines: Number of context lines to show
4445
file_pattern: Glob pattern to filter files
4546
fuzzy: Whether to enable fuzzy matching
46-
regex: Whether pattern is a regex
47+
regex: Regex mode - True/False to force, None for auto-detection
4748
4849
Returns:
4950
Dictionary with search results or error information
@@ -53,6 +54,12 @@ def search_code( # pylint: disable=too-many-arguments
5354
"""
5455
self._require_project_setup()
5556

57+
# Smart regex detection if regex parameter is None
58+
if regex is None:
59+
regex = is_safe_regex_pattern(pattern)
60+
if regex:
61+
print(f"Auto-detected regex pattern: {pattern}")
62+
5663
# Validate search pattern
5764
error = ValidationHelper.validate_search_pattern(pattern, regex)
5865
if error:

0 commit comments

Comments
 (0)