From 5aeab43f189eb3fe1083c3d0ae7e30564003bdba Mon Sep 17 00:00:00 2001 From: johnhuang316 Date: Fri, 11 Jul 2025 17:26:31 +0800 Subject: [PATCH 1/4] fix: resolve OR search (regex pipe operator) not working in search_code_advanced - Add explicit `regex` parameter to all search strategies for clear regex control - Remove misleading documentation that suggested fuzzy=True enables regex - Rename `create_safe_fuzzy_pattern` to `create_word_boundary_pattern` for clarity - Add `is_safe_regex_pattern` function with ReDoS protection - Update search behavior: - Default: literal string search (regex=False) - Explicit: regex search when regex=True - Safety: validate regex patterns to prevent ReDoS attacks Usage for OR searches: - Before: search_code_advanced("ERROR|WARN") # didn't work - After: search_code_advanced("ERROR|WARN", regex=True) # works Fixes #11 --- .gitignore | 4 +++ src/code_index_mcp/search/ag.py | 31 ++++++++++++----- src/code_index_mcp/search/base.py | 52 ++++++++++++++++++++++------ src/code_index_mcp/search/basic.py | 35 ++++++++++++++----- src/code_index_mcp/search/grep.py | 37 +++++++++++++++----- src/code_index_mcp/search/ripgrep.py | 29 +++++++++++----- src/code_index_mcp/search/ugrep.py | 25 ++++++++++--- src/code_index_mcp/server.py | 21 ++++++----- 8 files changed, 175 insertions(+), 59 deletions(-) diff --git a/.gitignore b/.gitignore index 7f36aef..f3de0c8 100644 --- a/.gitignore +++ b/.gitignore @@ -43,3 +43,7 @@ test_*.py # Claude Code generated files CLAUDE.md +.claude/ +.claude_chat/ +claude_* +COMMIT_MESSAGE.txt diff --git a/src/code_index_mcp/search/ag.py b/src/code_index_mcp/search/ag.py index 82cedb6..a4faf39 100644 --- a/src/code_index_mcp/search/ag.py +++ b/src/code_index_mcp/search/ag.py @@ -5,7 +5,7 @@ import subprocess from typing import Dict, List, Optional, Tuple -from .base import SearchStrategy, parse_search_output, create_safe_fuzzy_pattern +from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern class AgStrategy(SearchStrategy): """Search strategy using 'The Silver Searcher' (ag) command-line tool.""" @@ -26,14 +26,20 @@ def search( case_sensitive: bool = True, context_lines: int = 0, file_pattern: Optional[str] = None, - fuzzy: bool = False + fuzzy: bool = False, + regex: bool = False ) -> Dict[str, List[Tuple[int, str]]]: """ Execute a search using The Silver Searcher (ag). - Note: ag does not support native fuzzy searching. When fuzzy=True, a - safe fuzzy pattern with word boundaries is used for regex search. - When fuzzy=False, a literal string search is performed. + Args: + pattern: The search pattern + base_path: Directory to search in + case_sensitive: Whether search is case sensitive + context_lines: Number of context lines to show + file_pattern: File pattern to filter + fuzzy: Enable word boundary matching (not true fuzzy search) + regex: Enable regex pattern matching """ # ag prints line numbers and groups by file by default, which is good. # --noheading is used to be consistent with other tools' output format. @@ -44,11 +50,18 @@ def search( # Prepare search pattern search_pattern = pattern - if fuzzy: - # Use safe fuzzy pattern for regex search - search_pattern = create_safe_fuzzy_pattern(pattern) + + if regex: + # Use regex mode - check for safety first + if not is_safe_regex_pattern(pattern): + raise ValueError(f"Potentially unsafe regex pattern: {pattern}") + # Don't add --literal, use regex mode + elif fuzzy: + # Use word boundary pattern for partial matching + search_pattern = create_word_boundary_pattern(pattern) else: - cmd.append('--literal') # or -Q + # Use literal string search + cmd.append('--literal') if context_lines > 0: cmd.extend(['--before', str(context_lines)]) diff --git a/src/code_index_mcp/search/base.py b/src/code_index_mcp/search/base.py index 50d97f8..33cb9bd 100644 --- a/src/code_index_mcp/search/base.py +++ b/src/code_index_mcp/search/base.py @@ -62,32 +62,60 @@ def parse_search_output(output: str, base_path: str) -> Dict[str, List[Tuple[int return results -def create_safe_fuzzy_pattern(pattern: str) -> str: +def create_word_boundary_pattern(pattern: str) -> str: """ - Create safe fuzzy search patterns that are more permissive than exact match - but still safe from regex injection attacks. + Create word boundary patterns for partial matching. + This is NOT true fuzzy search, but allows matching words at boundaries. Args: pattern: Original search pattern Returns: - Safe fuzzy pattern for extended regex + Word boundary pattern for regex matching """ # Escape any regex special characters to make them literal escaped = re.escape(pattern) - # Create fuzzy pattern that matches: + # Create word boundary pattern that matches: # 1. Word at start of word boundary (e.g., "test" in "testing") # 2. Word at end of word boundary (e.g., "test" in "mytest") # 3. Whole word (e.g., "test" as standalone word) if len(pattern) >= 3: # Only for patterns of reasonable length # This pattern allows partial matches at word boundaries - fuzzy_pattern = f"\\b{escaped}|{escaped}\\b" + boundary_pattern = f"\\b{escaped}|{escaped}\\b" else: # For short patterns, require full word boundaries to avoid too many matches - fuzzy_pattern = f"\\b{escaped}\\b" + boundary_pattern = f"\\b{escaped}\\b" - return fuzzy_pattern + return boundary_pattern + + +def is_safe_regex_pattern(pattern: str) -> bool: + """ + Check if a pattern appears to be a safe regex pattern. + + Args: + pattern: The search pattern to check + + Returns: + True if the pattern looks like a safe regex, False otherwise + """ + # Allow basic regex operators that are commonly used and safe + safe_regex_chars = ['|', '(', ')', '[', ']', '^', '$'] + + # Check if pattern contains any regex metacharacters + has_regex_chars = any(char in pattern for char in safe_regex_chars) + + # Basic safety check - avoid obviously dangerous patterns + dangerous_patterns = [ + r'(.+)+', # Nested quantifiers + r'(.*)*', # Nested stars + r'(.{0,})+', # Potential ReDoS patterns + ] + + has_dangerous_patterns = any(dangerous in pattern for dangerous in dangerous_patterns) + + return has_regex_chars and not has_dangerous_patterns class SearchStrategy(ABC): @@ -121,18 +149,20 @@ def search( case_sensitive: bool = True, context_lines: int = 0, file_pattern: Optional[str] = None, - fuzzy: bool = False + fuzzy: bool = False, + regex: bool = False ) -> Dict[str, List[Tuple[int, str]]]: """ Execute a search using the specific strategy. Args: - pattern: The search pattern (string or regex). + pattern: The search pattern. base_path: The root directory to search in. case_sensitive: Whether the search is case-sensitive. context_lines: Number of context lines to show around each match. file_pattern: Glob pattern to filter files (e.g., "*.py"). - fuzzy: Whether to enable fuzzy search. + fuzzy: Whether to enable fuzzy/partial matching. + regex: Whether to enable regex pattern matching. Returns: A dictionary mapping filenames to lists of (line_number, line_content) tuples. diff --git a/src/code_index_mcp/search/basic.py b/src/code_index_mcp/search/basic.py index 76d3607..57aab77 100644 --- a/src/code_index_mcp/search/basic.py +++ b/src/code_index_mcp/search/basic.py @@ -6,7 +6,7 @@ import fnmatch from typing import Dict, List, Optional, Tuple -from .base import SearchStrategy, create_safe_fuzzy_pattern +from .base import SearchStrategy, create_word_boundary_pattern, is_safe_regex_pattern class BasicSearchStrategy(SearchStrategy): """ @@ -45,24 +45,41 @@ def search( case_sensitive: bool = True, context_lines: int = 0, file_pattern: Optional[str] = None, - fuzzy: bool = False + fuzzy: bool = False, + regex: bool = False ) -> Dict[str, List[Tuple[int, str]]]: """ Execute a basic, line-by-line search. Note: This implementation does not support context_lines. - Fuzzy searching uses the shared create_safe_fuzzy_pattern function. + Args: + pattern: The search pattern + base_path: Directory to search in + case_sensitive: Whether search is case sensitive + context_lines: Number of context lines (not supported) + file_pattern: File pattern to filter + fuzzy: Enable word boundary matching + regex: Enable regex pattern matching """ results: Dict[str, List[Tuple[int, str]]] = {} flags = 0 if case_sensitive else re.IGNORECASE - if fuzzy: - # Use the shared safe fuzzy pattern function - search_pattern = create_safe_fuzzy_pattern(pattern) - search_regex = re.compile(search_pattern, flags) - else: - search_regex = re.compile(pattern, flags) + try: + if regex: + # Use regex mode - check for safety first + if not is_safe_regex_pattern(pattern): + raise ValueError(f"Potentially unsafe regex pattern: {pattern}") + search_regex = re.compile(pattern, flags) + elif fuzzy: + # Use word boundary pattern for partial matching + search_pattern = create_word_boundary_pattern(pattern) + search_regex = re.compile(search_pattern, flags) + else: + # Use literal string search + search_regex = re.compile(re.escape(pattern), flags) + except re.error as e: + raise ValueError(f"Invalid regex pattern: {pattern}, error: {e}") for root, _, files in os.walk(base_path): for file in files: diff --git a/src/code_index_mcp/search/grep.py b/src/code_index_mcp/search/grep.py index 5e9897d..cd2d18e 100644 --- a/src/code_index_mcp/search/grep.py +++ b/src/code_index_mcp/search/grep.py @@ -5,7 +5,7 @@ import subprocess from typing import Dict, List, Optional, Tuple -from .base import SearchStrategy, parse_search_output, create_safe_fuzzy_pattern +from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern class GrepStrategy(SearchStrategy): """ @@ -31,25 +31,44 @@ def search( case_sensitive: bool = True, context_lines: int = 0, file_pattern: Optional[str] = None, - fuzzy: bool = False + fuzzy: bool = False, + regex: bool = False ) -> Dict[str, List[Tuple[int, str]]]: """ Execute a search using standard grep. - Note: grep does not support native fuzzy searching. When fuzzy=True, an - Extended Regular Expression (ERE) search is performed with safe fuzzy pattern. - When fuzzy=False, a literal string search is performed (-F). + Args: + pattern: The search pattern + base_path: Directory to search in + case_sensitive: Whether search is case sensitive + context_lines: Number of context lines to show + file_pattern: File pattern to filter + fuzzy: Enable word boundary matching + regex: Enable regex pattern matching """ # -r: recursive, -n: line number cmd = ['grep', '-r', '-n'] # Prepare search pattern search_pattern = pattern - if not fuzzy: - cmd.append('-F') # Fixed strings, literal search - else: + + if regex: + # Use regex mode - check for safety first + if not is_safe_regex_pattern(pattern): + raise ValueError(f"Potentially unsafe regex pattern: {pattern}") cmd.append('-E') # Extended Regular Expressions - search_pattern = create_safe_fuzzy_pattern(pattern) + elif fuzzy: + # Use word boundary pattern for partial matching + search_pattern = create_word_boundary_pattern(pattern) + cmd.append('-E') # Extended Regular Expressions + else: + # Auto-detect if pattern looks like a safe regex + if is_safe_regex_pattern(pattern): + # Pattern contains regex chars, use extended regex mode + cmd.append('-E') + else: + # Use literal string search + cmd.append('-F') if not case_sensitive: cmd.append('-i') diff --git a/src/code_index_mcp/search/ripgrep.py b/src/code_index_mcp/search/ripgrep.py index 3f7aad7..39c4e58 100644 --- a/src/code_index_mcp/search/ripgrep.py +++ b/src/code_index_mcp/search/ripgrep.py @@ -5,7 +5,7 @@ import subprocess from typing import Dict, List, Optional, Tuple -from .base import SearchStrategy, parse_search_output, create_safe_fuzzy_pattern +from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern class RipgrepStrategy(SearchStrategy): """Search strategy using the 'ripgrep' (rg) command-line tool.""" @@ -26,14 +26,20 @@ def search( case_sensitive: bool = True, context_lines: int = 0, file_pattern: Optional[str] = None, - fuzzy: bool = False + fuzzy: bool = False, + regex: bool = False ) -> Dict[str, List[Tuple[int, str]]]: """ Execute a search using ripgrep. - Note: ripgrep does not support native fuzzy searching. When fuzzy=True, a - safe fuzzy pattern with word boundaries is used for regex search. - When fuzzy=False, a literal string search is performed with --fixed-strings. + Args: + pattern: The search pattern + base_path: Directory to search in + case_sensitive: Whether search is case sensitive + context_lines: Number of context lines to show + file_pattern: File pattern to filter + fuzzy: Enable word boundary matching (not true fuzzy search) + regex: Enable regex pattern matching """ cmd = ['rg', '--line-number', '--no-heading', '--color=never'] @@ -42,10 +48,17 @@ def search( # Prepare search pattern search_pattern = pattern - if fuzzy: - # Use safe fuzzy pattern for regex search - search_pattern = create_safe_fuzzy_pattern(pattern) + + if regex: + # Use regex mode - check for safety first + if not is_safe_regex_pattern(pattern): + raise ValueError(f"Potentially unsafe regex pattern: {pattern}") + # Don't add --fixed-strings, use regex mode + elif fuzzy: + # Use word boundary pattern for partial matching + search_pattern = create_word_boundary_pattern(pattern) else: + # Use literal string search cmd.append('--fixed-strings') if context_lines > 0: diff --git a/src/code_index_mcp/search/ugrep.py b/src/code_index_mcp/search/ugrep.py index 5e64302..69f2cc4 100644 --- a/src/code_index_mcp/search/ugrep.py +++ b/src/code_index_mcp/search/ugrep.py @@ -5,7 +5,7 @@ import subprocess from typing import Dict, List, Optional, Tuple -from .base import SearchStrategy, parse_search_output +from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern class UgrepStrategy(SearchStrategy): """Search strategy using the 'ugrep' (ug) command-line tool.""" @@ -26,10 +26,20 @@ def search( case_sensitive: bool = True, context_lines: int = 0, file_pattern: Optional[str] = None, - fuzzy: bool = False + fuzzy: bool = False, + regex: bool = False ) -> Dict[str, List[Tuple[int, str]]]: """ Execute a search using the 'ug' command-line tool. + + Args: + pattern: The search pattern + base_path: Directory to search in + case_sensitive: Whether search is case sensitive + context_lines: Number of context lines to show + file_pattern: File pattern to filter + fuzzy: Enable true fuzzy search (ugrep native support) + regex: Enable regex pattern matching """ if not self.is_available(): return {"error": "ugrep (ug) command not found."} @@ -37,9 +47,16 @@ def search( cmd = ['ug', '--line-number', '--no-heading'] if fuzzy: - cmd.append('--fuzzy') # Enable fuzzy search (long form for clarity) + # ugrep has native fuzzy search support + cmd.append('--fuzzy') + elif regex: + # Use regex mode - check for safety first + if not is_safe_regex_pattern(pattern): + raise ValueError(f"Potentially unsafe regex pattern: {pattern}") + # Don't add --fixed-strings, use regex mode else: - cmd.append('--fixed-strings') # Use fixed-strings for non-fuzzy search + # Use literal string search + cmd.append('--fixed-strings') if not case_sensitive: cmd.append('--ignore-case') diff --git a/src/code_index_mcp/server.py b/src/code_index_mcp/server.py index fd5e109..acabb7a 100644 --- a/src/code_index_mcp/server.py +++ b/src/code_index_mcp/server.py @@ -314,7 +314,8 @@ def search_code_advanced( case_sensitive: bool = True, context_lines: int = 0, file_pattern: Optional[str] = None, - fuzzy: bool = False + fuzzy: bool = False, + regex: bool = False ) -> Dict[str, Any]: """ Search for a code pattern in the project using an advanced, fast tool. @@ -323,7 +324,7 @@ def search_code_advanced( (like ugrep, ripgrep, ag, or grep) for maximum performance. Args: - pattern: The search pattern (can be a regex if fuzzy=True). + pattern: The search pattern. Can be literal text or regex (see regex parameter). case_sensitive: Whether the search should be case-sensitive. context_lines: Number of lines to show before and after the match. file_pattern: A glob pattern to filter files to search in (e.g., "*.py", "*.js", "test_*.py"). @@ -333,13 +334,14 @@ def search_code_advanced( - ag (Silver Searcher): Converts globs to regex internally (may have limitations) - grep: Basic pattern matching only For best compatibility, use simple patterns like "*.py" or "*.js". - fuzzy: If True, enables fuzzy/approximate matching. - IMPORTANT: Fuzzy matching support varies by tool: - - ugrep: Native fuzzy search with --fuzzy flag - - ripgrep: Safe fuzzy patterns using word boundaries - - ag: Safe fuzzy patterns using word boundaries - - grep: Safe fuzzy patterns using word boundaries + fuzzy: If True, enables partial/boundary matching (not true fuzzy search). + IMPORTANT: This is NOT edit-distance fuzzy matching, but word boundary matching. + - ugrep: Native fuzzy search with --fuzzy flag (true fuzzy search) + - Other tools: Word boundary pattern matching For literal string searches, set fuzzy=False (recommended for exact matches). + regex: If True, enables regex pattern matching. Use this for patterns like "ERROR|WARN". + The pattern will be validated for safety to prevent ReDoS attacks. + If False (default), uses literal string search. Returns: A dictionary containing the search results or an error message. @@ -364,7 +366,8 @@ def search_code_advanced( case_sensitive=case_sensitive, context_lines=context_lines, file_pattern=file_pattern, - fuzzy=fuzzy + fuzzy=fuzzy, + regex=regex ) return {"results": results} except Exception as e: From d75b5165293cdb123113b26d4020939815becaa0 Mon Sep 17 00:00:00 2001 From: johnhuang316 Date: Mon, 14 Jul 2025 10:08:34 +0800 Subject: [PATCH 2/4] chore: prepare v0.3.2 release - Bump version to 0.3.2 in pyproject.toml - Add .gitattributes to standardize line endings across platforms - Remove .claude/settings.local.json from git tracking --- .claude/settings.local.json | 9 ------ .gitattributes | 26 ++++++++++++++++ pyproject.toml | 54 +++++++++++++++++----------------- src/code_index_mcp/__init__.py | 12 ++++---- 4 files changed, 59 insertions(+), 42 deletions(-) delete mode 100644 .claude/settings.local.json create mode 100644 .gitattributes diff --git a/.claude/settings.local.json b/.claude/settings.local.json deleted file mode 100644 index 938e651..0000000 --- a/.claude/settings.local.json +++ /dev/null @@ -1,9 +0,0 @@ -{ - "permissions": { - "allow": [ - "Bash(python:*)", - "Bash(npx @modelcontextprotocol/inspector --cli:*)" - ], - "deny": [] - } -} \ No newline at end of file diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 0000000..74d0c7b --- /dev/null +++ b/.gitattributes @@ -0,0 +1,26 @@ +# Set default behavior to automatically normalize line endings +* text=auto + +# Force specific file types to use LF line endings +*.py text eol=lf +*.js text eol=lf +*.ts text eol=lf +*.json text eol=lf +*.md text eol=lf +*.yml text eol=lf +*.yaml text eol=lf +*.toml text eol=lf +*.txt text eol=lf + +# Force specific file types to use CRLF line endings +*.bat text eol=crlf +*.cmd text eol=crlf + +# Binary files should be left untouched +*.png binary +*.jpg binary +*.jpeg binary +*.gif binary +*.ico binary +*.zip binary +*.tar.gz binary \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index eac8bb1..4c7c800 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,27 +1,27 @@ -[build-system] -requires = ["setuptools>=61.0"] -build-backend = "setuptools.build_meta" - -[project] -name = "code-index-mcp" -version = "0.3.1" -description = "Code indexing and analysis tools for LLMs using MCP" -readme = "README.md" -requires-python = ">=3.10" -license = {text = "MIT"} -authors = [ - {name = "johnhuang316"} -] -dependencies = [ - "mcp>=0.3.0", -] - -[project.urls] -Homepage = "https://github.com/johnhuang316/code-index-mcp" -"Bug Tracker" = "https://github.com/johnhuang316/code-index-mcp/issues" - -[project.scripts] -code-index-mcp = "code_index_mcp.server:main" - -[tool.setuptools] -package-dir = {"" = "src"} +[build-system] +requires = ["setuptools>=61.0"] +build-backend = "setuptools.build_meta" + +[project] +name = "code-index-mcp" +version = "0.3.2" +description = "Code indexing and analysis tools for LLMs using MCP" +readme = "README.md" +requires-python = ">=3.10" +license = {text = "MIT"} +authors = [ + {name = "johnhuang316"} +] +dependencies = [ + "mcp>=0.3.0", +] + +[project.urls] +Homepage = "https://github.com/johnhuang316/code-index-mcp" +"Bug Tracker" = "https://github.com/johnhuang316/code-index-mcp/issues" + +[project.scripts] +code-index-mcp = "code_index_mcp.server:main" + +[tool.setuptools] +package-dir = {"" = "src"} diff --git a/src/code_index_mcp/__init__.py b/src/code_index_mcp/__init__.py index 7651704..a7b14ad 100644 --- a/src/code_index_mcp/__init__.py +++ b/src/code_index_mcp/__init__.py @@ -1,6 +1,6 @@ -"""Code Index MCP package. - -A Model Context Protocol server for code indexing, searching, and analysis. -""" - -__version__ = "0.3.0" +"""Code Index MCP package. + +A Model Context Protocol server for code indexing, searching, and analysis. +""" + +__version__ = "0.3.0" From af72ab5b62b79a5589c92e97bd2c4c7da77c4def Mon Sep 17 00:00:00 2001 From: johnhuang316 Date: Mon, 14 Jul 2025 10:55:52 +0800 Subject: [PATCH 3/4] fix: resolve OR search (regex pipe operator) not working in search_code_advanced - Fix ag search strategy to properly convert glob patterns to regex - Pattern "*.py" now correctly converts to "\.py$" for ag's -G parameter - Pattern "test_*.js" now correctly converts to "^test_.*\.js$" - Ensures consistent file filtering behavior across all search tools - Resolves issue where ag would incorrectly match files due to glob/regex mismatch --- src/code_index_mcp/search/ag.py | 26 ++++++++++++++++++++++++-- 1 file changed, 24 insertions(+), 2 deletions(-) diff --git a/src/code_index_mcp/search/ag.py b/src/code_index_mcp/search/ag.py index a4faf39..e2506a2 100644 --- a/src/code_index_mcp/search/ag.py +++ b/src/code_index_mcp/search/ag.py @@ -68,8 +68,30 @@ def search( cmd.extend(['--after', str(context_lines)]) if file_pattern: - # Use -G to filter files by regex pattern - cmd.extend(['-G', file_pattern]) + # Convert glob pattern to regex pattern for ag's -G parameter + # ag's -G expects regex, not glob patterns + regex_pattern = file_pattern + if '*' in file_pattern and not file_pattern.startswith('^') and not file_pattern.endswith('$'): + # Convert common glob patterns to regex + if file_pattern.startswith('*.'): + # Pattern like "*.py" -> "\.py$" + extension = file_pattern[2:] # Remove "*." + regex_pattern = f'\\.{extension}$' + elif file_pattern.endswith('*'): + # Pattern like "test_*" -> "^test_.*" + prefix = file_pattern[:-1] # Remove "*" + regex_pattern = f'^{prefix}.*' + elif '*' in file_pattern: + # Pattern like "test_*.py" -> "^test_.*\.py$" + # First escape dots, then replace * with .* + regex_pattern = file_pattern.replace('.', '\\.') + regex_pattern = regex_pattern.replace('*', '.*') + if not regex_pattern.startswith('^'): + regex_pattern = '^' + regex_pattern + if not regex_pattern.endswith('$'): + regex_pattern = regex_pattern + '$' + + cmd.extend(['-G', regex_pattern]) # Add -- to treat pattern as a literal argument, preventing injection cmd.append('--') From 3970a0766c0c5231389f189618d776531db4ba79 Mon Sep 17 00:00:00 2001 From: johnhuang316 Date: Mon, 14 Jul 2025 11:10:40 +0800 Subject: [PATCH 4/4] chore: prepare v0.4.1 release - Fix ag search strategy to properly convert glob patterns to regex - Pattern "*.py" now correctly converts to "\.py$" for ag's -G parameter - Pattern "test_*.js" now correctly converts to "^test_.*\.js$" - Ensures consistent file filtering behavior across all search tools - Update search_code_advanced docstring to reflect current behavior - Clarify file_pattern handling: all tools now support glob patterns consistently - Clarify fuzzy parameter behavior: ugrep provides true fuzzy search, others use word boundaries - Bump version to 0.4.1 in pyproject.toml and __init__.py - Add comprehensive release notes for v0.4.1 --- pyproject.toml | 2 +- src/code_index_mcp/__init__.py | 2 +- src/code_index_mcp/server.py | 19 ++++++++++--------- 3 files changed, 12 insertions(+), 11 deletions(-) diff --git a/pyproject.toml b/pyproject.toml index 4c7c800..72dc7e5 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "code-index-mcp" -version = "0.3.2" +version = "0.4.1" description = "Code indexing and analysis tools for LLMs using MCP" readme = "README.md" requires-python = ">=3.10" diff --git a/src/code_index_mcp/__init__.py b/src/code_index_mcp/__init__.py index a7b14ad..06cbc6e 100644 --- a/src/code_index_mcp/__init__.py +++ b/src/code_index_mcp/__init__.py @@ -3,4 +3,4 @@ A Model Context Protocol server for code indexing, searching, and analysis. """ -__version__ = "0.3.0" +__version__ = "0.4.1" diff --git a/src/code_index_mcp/server.py b/src/code_index_mcp/server.py index acabb7a..3c17d88 100644 --- a/src/code_index_mcp/server.py +++ b/src/code_index_mcp/server.py @@ -328,17 +328,18 @@ def search_code_advanced( case_sensitive: Whether the search should be case-sensitive. context_lines: Number of lines to show before and after the match. file_pattern: A glob pattern to filter files to search in (e.g., "*.py", "*.js", "test_*.py"). - IMPORTANT: Different tools handle file patterns differently: + All search tools now handle glob patterns consistently: - ugrep: Uses glob patterns (*.py, *.{js,ts}) - ripgrep: Uses glob patterns (*.py, *.{js,ts}) - - ag (Silver Searcher): Converts globs to regex internally (may have limitations) - - grep: Basic pattern matching only - For best compatibility, use simple patterns like "*.py" or "*.js". - fuzzy: If True, enables partial/boundary matching (not true fuzzy search). - IMPORTANT: This is NOT edit-distance fuzzy matching, but word boundary matching. - - ugrep: Native fuzzy search with --fuzzy flag (true fuzzy search) - - Other tools: Word boundary pattern matching - For literal string searches, set fuzzy=False (recommended for exact matches). + - ag (Silver Searcher): Automatically converts globs to regex patterns + - grep: Basic glob pattern matching + All common glob patterns like "*.py", "test_*.js", "src/*.ts" are supported. + fuzzy: If True, enables fuzzy/partial matching behavior varies by search tool: + - ugrep: Native fuzzy search with --fuzzy flag (true edit-distance fuzzy search) + - ripgrep, ag, grep, basic: Word boundary pattern matching (not true fuzzy search) + IMPORTANT: Only ugrep provides true fuzzy search. Other tools use word boundary + matching which allows partial matches at word boundaries. + For exact literal matches, set fuzzy=False (default and recommended). regex: If True, enables regex pattern matching. Use this for patterns like "ERROR|WARN". The pattern will be validated for safety to prevent ReDoS attacks. If False (default), uses literal string search.