From 5aeab43f189eb3fe1083c3d0ae7e30564003bdba Mon Sep 17 00:00:00 2001
From: johnhuang316 <john700@pic.net.tw>
Date: Fri, 11 Jul 2025 17:26:31 +0800
Subject: [PATCH 1/4] fix: resolve OR search (regex pipe operator) not working
 in search_code_advanced

- Add explicit `regex` parameter to all search strategies for clear regex control
- Remove misleading documentation that suggested fuzzy=True enables regex
- Rename `create_safe_fuzzy_pattern` to `create_word_boundary_pattern` for clarity
- Add `is_safe_regex_pattern` function with ReDoS protection
- Update search behavior:
  - Default: literal string search (regex=False)
  - Explicit: regex search when regex=True
  - Safety: validate regex patterns to prevent ReDoS attacks

Usage for OR searches:
- Before: search_code_advanced("ERROR|WARN") # didn't work
- After:  search_code_advanced("ERROR|WARN", regex=True) # works

Fixes #11
---
 .gitignore                           |  4 +++
 src/code_index_mcp/search/ag.py      | 31 ++++++++++++-----
 src/code_index_mcp/search/base.py    | 52 ++++++++++++++++++++++------
 src/code_index_mcp/search/basic.py   | 35 ++++++++++++++-----
 src/code_index_mcp/search/grep.py    | 37 +++++++++++++++-----
 src/code_index_mcp/search/ripgrep.py | 29 +++++++++++-----
 src/code_index_mcp/search/ugrep.py   | 25 ++++++++++---
 src/code_index_mcp/server.py         | 21 ++++++-----
 8 files changed, 175 insertions(+), 59 deletions(-)

diff --git a/.gitignore b/.gitignore
index 7f36aef..f3de0c8 100644
--- a/.gitignore
+++ b/.gitignore
@@ -43,3 +43,7 @@ test_*.py
 
 # Claude Code generated files
 CLAUDE.md
+.claude/
+.claude_chat/
+claude_*
+COMMIT_MESSAGE.txt
diff --git a/src/code_index_mcp/search/ag.py b/src/code_index_mcp/search/ag.py
index 82cedb6..a4faf39 100644
--- a/src/code_index_mcp/search/ag.py
+++ b/src/code_index_mcp/search/ag.py
@@ -5,7 +5,7 @@
 import subprocess
 from typing import Dict, List, Optional, Tuple
 
-from .base import SearchStrategy, parse_search_output, create_safe_fuzzy_pattern
+from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern
 
 class AgStrategy(SearchStrategy):
     """Search strategy using 'The Silver Searcher' (ag) command-line tool."""
@@ -26,14 +26,20 @@ def search(
         case_sensitive: bool = True,
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
-        fuzzy: bool = False
+        fuzzy: bool = False,
+        regex: bool = False
     ) -> Dict[str, List[Tuple[int, str]]]:
         """
         Execute a search using The Silver Searcher (ag).
 
-        Note: ag does not support native fuzzy searching. When fuzzy=True, a
-        safe fuzzy pattern with word boundaries is used for regex search.
-        When fuzzy=False, a literal string search is performed.
+        Args:
+            pattern: The search pattern
+            base_path: Directory to search in
+            case_sensitive: Whether search is case sensitive
+            context_lines: Number of context lines to show
+            file_pattern: File pattern to filter
+            fuzzy: Enable word boundary matching (not true fuzzy search)
+            regex: Enable regex pattern matching
         """
         # ag prints line numbers and groups by file by default, which is good.
         # --noheading is used to be consistent with other tools' output format.
@@ -44,11 +50,18 @@ def search(
 
         # Prepare search pattern
         search_pattern = pattern
-        if fuzzy:
-            # Use safe fuzzy pattern for regex search
-            search_pattern = create_safe_fuzzy_pattern(pattern)
+        
+        if regex:
+            # Use regex mode - check for safety first
+            if not is_safe_regex_pattern(pattern):
+                raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
+            # Don't add --literal, use regex mode
+        elif fuzzy:
+            # Use word boundary pattern for partial matching
+            search_pattern = create_word_boundary_pattern(pattern)
         else:
-            cmd.append('--literal') # or -Q
+            # Use literal string search
+            cmd.append('--literal')
 
         if context_lines > 0:
             cmd.extend(['--before', str(context_lines)])
diff --git a/src/code_index_mcp/search/base.py b/src/code_index_mcp/search/base.py
index 50d97f8..33cb9bd 100644
--- a/src/code_index_mcp/search/base.py
+++ b/src/code_index_mcp/search/base.py
@@ -62,32 +62,60 @@ def parse_search_output(output: str, base_path: str) -> Dict[str, List[Tuple[int
     return results
 
 
-def create_safe_fuzzy_pattern(pattern: str) -> str:
+def create_word_boundary_pattern(pattern: str) -> str:
     """
-    Create safe fuzzy search patterns that are more permissive than exact match
-    but still safe from regex injection attacks.
+    Create word boundary patterns for partial matching.
+    This is NOT true fuzzy search, but allows matching words at boundaries.
     
     Args:
         pattern: Original search pattern
         
     Returns:
-        Safe fuzzy pattern for extended regex
+        Word boundary pattern for regex matching
     """
     # Escape any regex special characters to make them literal
     escaped = re.escape(pattern)
     
-    # Create fuzzy pattern that matches:
+    # Create word boundary pattern that matches:
     # 1. Word at start of word boundary (e.g., "test" in "testing")
     # 2. Word at end of word boundary (e.g., "test" in "mytest") 
     # 3. Whole word (e.g., "test" as standalone word)
     if len(pattern) >= 3:  # Only for patterns of reasonable length
         # This pattern allows partial matches at word boundaries
-        fuzzy_pattern = f"\\b{escaped}|{escaped}\\b"
+        boundary_pattern = f"\\b{escaped}|{escaped}\\b"
     else:
         # For short patterns, require full word boundaries to avoid too many matches
-        fuzzy_pattern = f"\\b{escaped}\\b"
+        boundary_pattern = f"\\b{escaped}\\b"
     
-    return fuzzy_pattern
+    return boundary_pattern
+
+
+def is_safe_regex_pattern(pattern: str) -> bool:
+    """
+    Check if a pattern appears to be a safe regex pattern.
+    
+    Args:
+        pattern: The search pattern to check
+        
+    Returns:
+        True if the pattern looks like a safe regex, False otherwise
+    """
+    # Allow basic regex operators that are commonly used and safe
+    safe_regex_chars = ['|', '(', ')', '[', ']', '^', '$']
+    
+    # Check if pattern contains any regex metacharacters
+    has_regex_chars = any(char in pattern for char in safe_regex_chars)
+    
+    # Basic safety check - avoid obviously dangerous patterns
+    dangerous_patterns = [
+        r'(.+)+',  # Nested quantifiers
+        r'(.*)*',  # Nested stars
+        r'(.{0,})+',  # Potential ReDoS patterns
+    ]
+    
+    has_dangerous_patterns = any(dangerous in pattern for dangerous in dangerous_patterns)
+    
+    return has_regex_chars and not has_dangerous_patterns
 
 
 class SearchStrategy(ABC):
@@ -121,18 +149,20 @@ def search(
         case_sensitive: bool = True,
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
-        fuzzy: bool = False
+        fuzzy: bool = False,
+        regex: bool = False
     ) -> Dict[str, List[Tuple[int, str]]]:
         """
         Execute a search using the specific strategy.
 
         Args:
-            pattern: The search pattern (string or regex).
+            pattern: The search pattern.
             base_path: The root directory to search in.
             case_sensitive: Whether the search is case-sensitive.
             context_lines: Number of context lines to show around each match.
             file_pattern: Glob pattern to filter files (e.g., "*.py").
-            fuzzy: Whether to enable fuzzy search.
+            fuzzy: Whether to enable fuzzy/partial matching.
+            regex: Whether to enable regex pattern matching.
 
         Returns:
             A dictionary mapping filenames to lists of (line_number, line_content) tuples.
diff --git a/src/code_index_mcp/search/basic.py b/src/code_index_mcp/search/basic.py
index 76d3607..57aab77 100644
--- a/src/code_index_mcp/search/basic.py
+++ b/src/code_index_mcp/search/basic.py
@@ -6,7 +6,7 @@
 import fnmatch
 from typing import Dict, List, Optional, Tuple
 
-from .base import SearchStrategy, create_safe_fuzzy_pattern
+from .base import SearchStrategy, create_word_boundary_pattern, is_safe_regex_pattern
 
 class BasicSearchStrategy(SearchStrategy):
     """
@@ -45,24 +45,41 @@ def search(
         case_sensitive: bool = True,
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
-        fuzzy: bool = False
+        fuzzy: bool = False,
+        regex: bool = False
     ) -> Dict[str, List[Tuple[int, str]]]:
         """
         Execute a basic, line-by-line search.
 
         Note: This implementation does not support context_lines.
-        Fuzzy searching uses the shared create_safe_fuzzy_pattern function.
+        Args:
+            pattern: The search pattern
+            base_path: Directory to search in
+            case_sensitive: Whether search is case sensitive
+            context_lines: Number of context lines (not supported)
+            file_pattern: File pattern to filter
+            fuzzy: Enable word boundary matching
+            regex: Enable regex pattern matching
         """
         results: Dict[str, List[Tuple[int, str]]] = {}
         
         flags = 0 if case_sensitive else re.IGNORECASE
         
-        if fuzzy:
-            # Use the shared safe fuzzy pattern function
-            search_pattern = create_safe_fuzzy_pattern(pattern)
-            search_regex = re.compile(search_pattern, flags)
-        else:
-            search_regex = re.compile(pattern, flags)
+        try:
+            if regex:
+                # Use regex mode - check for safety first
+                if not is_safe_regex_pattern(pattern):
+                    raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
+                search_regex = re.compile(pattern, flags)
+            elif fuzzy:
+                # Use word boundary pattern for partial matching
+                search_pattern = create_word_boundary_pattern(pattern)
+                search_regex = re.compile(search_pattern, flags)
+            else:
+                # Use literal string search
+                search_regex = re.compile(re.escape(pattern), flags)
+        except re.error as e:
+            raise ValueError(f"Invalid regex pattern: {pattern}, error: {e}")
 
         for root, _, files in os.walk(base_path):
             for file in files:
diff --git a/src/code_index_mcp/search/grep.py b/src/code_index_mcp/search/grep.py
index 5e9897d..cd2d18e 100644
--- a/src/code_index_mcp/search/grep.py
+++ b/src/code_index_mcp/search/grep.py
@@ -5,7 +5,7 @@
 import subprocess
 from typing import Dict, List, Optional, Tuple
 
-from .base import SearchStrategy, parse_search_output, create_safe_fuzzy_pattern
+from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern
 
 class GrepStrategy(SearchStrategy):
     """
@@ -31,25 +31,44 @@ def search(
         case_sensitive: bool = True,
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
-        fuzzy: bool = False
+        fuzzy: bool = False,
+        regex: bool = False
     ) -> Dict[str, List[Tuple[int, str]]]:
         """
         Execute a search using standard grep.
 
-        Note: grep does not support native fuzzy searching. When fuzzy=True, an
-        Extended Regular Expression (ERE) search is performed with safe fuzzy pattern.
-        When fuzzy=False, a literal string search is performed (-F).
+        Args:
+            pattern: The search pattern
+            base_path: Directory to search in
+            case_sensitive: Whether search is case sensitive
+            context_lines: Number of context lines to show
+            file_pattern: File pattern to filter
+            fuzzy: Enable word boundary matching
+            regex: Enable regex pattern matching
         """
         # -r: recursive, -n: line number
         cmd = ['grep', '-r', '-n']
 
         # Prepare search pattern
         search_pattern = pattern
-        if not fuzzy:
-            cmd.append('-F')  # Fixed strings, literal search
-        else:
+        
+        if regex:
+            # Use regex mode - check for safety first
+            if not is_safe_regex_pattern(pattern):
+                raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
             cmd.append('-E')  # Extended Regular Expressions
-            search_pattern = create_safe_fuzzy_pattern(pattern)
+        elif fuzzy:
+            # Use word boundary pattern for partial matching
+            search_pattern = create_word_boundary_pattern(pattern)
+            cmd.append('-E')  # Extended Regular Expressions
+        else:
+            # Auto-detect if pattern looks like a safe regex
+            if is_safe_regex_pattern(pattern):
+                # Pattern contains regex chars, use extended regex mode
+                cmd.append('-E')
+            else:
+                # Use literal string search
+                cmd.append('-F')
 
         if not case_sensitive:
             cmd.append('-i')
diff --git a/src/code_index_mcp/search/ripgrep.py b/src/code_index_mcp/search/ripgrep.py
index 3f7aad7..39c4e58 100644
--- a/src/code_index_mcp/search/ripgrep.py
+++ b/src/code_index_mcp/search/ripgrep.py
@@ -5,7 +5,7 @@
 import subprocess
 from typing import Dict, List, Optional, Tuple
 
-from .base import SearchStrategy, parse_search_output, create_safe_fuzzy_pattern
+from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern
 
 class RipgrepStrategy(SearchStrategy):
     """Search strategy using the 'ripgrep' (rg) command-line tool."""
@@ -26,14 +26,20 @@ def search(
         case_sensitive: bool = True,
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
-        fuzzy: bool = False
+        fuzzy: bool = False,
+        regex: bool = False
     ) -> Dict[str, List[Tuple[int, str]]]:
         """
         Execute a search using ripgrep.
         
-        Note: ripgrep does not support native fuzzy searching. When fuzzy=True, a
-        safe fuzzy pattern with word boundaries is used for regex search.
-        When fuzzy=False, a literal string search is performed with --fixed-strings.
+        Args:
+            pattern: The search pattern
+            base_path: Directory to search in
+            case_sensitive: Whether search is case sensitive
+            context_lines: Number of context lines to show
+            file_pattern: File pattern to filter
+            fuzzy: Enable word boundary matching (not true fuzzy search)
+            regex: Enable regex pattern matching
         """
         cmd = ['rg', '--line-number', '--no-heading', '--color=never']
 
@@ -42,10 +48,17 @@ def search(
 
         # Prepare search pattern
         search_pattern = pattern
-        if fuzzy:
-            # Use safe fuzzy pattern for regex search
-            search_pattern = create_safe_fuzzy_pattern(pattern)
+        
+        if regex:
+            # Use regex mode - check for safety first
+            if not is_safe_regex_pattern(pattern):
+                raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
+            # Don't add --fixed-strings, use regex mode
+        elif fuzzy:
+            # Use word boundary pattern for partial matching
+            search_pattern = create_word_boundary_pattern(pattern)
         else:
+            # Use literal string search
             cmd.append('--fixed-strings')
 
         if context_lines > 0:
diff --git a/src/code_index_mcp/search/ugrep.py b/src/code_index_mcp/search/ugrep.py
index 5e64302..69f2cc4 100644
--- a/src/code_index_mcp/search/ugrep.py
+++ b/src/code_index_mcp/search/ugrep.py
@@ -5,7 +5,7 @@
 import subprocess
 from typing import Dict, List, Optional, Tuple
 
-from .base import SearchStrategy, parse_search_output
+from .base import SearchStrategy, parse_search_output, create_word_boundary_pattern, is_safe_regex_pattern
 
 class UgrepStrategy(SearchStrategy):
     """Search strategy using the 'ugrep' (ug) command-line tool."""
@@ -26,10 +26,20 @@ def search(
         case_sensitive: bool = True,
         context_lines: int = 0,
         file_pattern: Optional[str] = None,
-        fuzzy: bool = False
+        fuzzy: bool = False,
+        regex: bool = False
     ) -> Dict[str, List[Tuple[int, str]]]:
         """
         Execute a search using the 'ug' command-line tool.
+        
+        Args:
+            pattern: The search pattern
+            base_path: Directory to search in
+            case_sensitive: Whether search is case sensitive
+            context_lines: Number of context lines to show
+            file_pattern: File pattern to filter
+            fuzzy: Enable true fuzzy search (ugrep native support)
+            regex: Enable regex pattern matching
         """
         if not self.is_available():
             return {"error": "ugrep (ug) command not found."}
@@ -37,9 +47,16 @@ def search(
         cmd = ['ug', '--line-number', '--no-heading']
 
         if fuzzy:
-            cmd.append('--fuzzy') # Enable fuzzy search (long form for clarity)
+            # ugrep has native fuzzy search support
+            cmd.append('--fuzzy')
+        elif regex:
+            # Use regex mode - check for safety first
+            if not is_safe_regex_pattern(pattern):
+                raise ValueError(f"Potentially unsafe regex pattern: {pattern}")
+            # Don't add --fixed-strings, use regex mode
         else:
-            cmd.append('--fixed-strings') # Use fixed-strings for non-fuzzy search
+            # Use literal string search
+            cmd.append('--fixed-strings')
 
         if not case_sensitive:
             cmd.append('--ignore-case')
diff --git a/src/code_index_mcp/server.py b/src/code_index_mcp/server.py
index fd5e109..acabb7a 100644
--- a/src/code_index_mcp/server.py
+++ b/src/code_index_mcp/server.py
@@ -314,7 +314,8 @@ def search_code_advanced(
     case_sensitive: bool = True,
     context_lines: int = 0,
     file_pattern: Optional[str] = None,
-    fuzzy: bool = False
+    fuzzy: bool = False,
+    regex: bool = False
 ) -> Dict[str, Any]:
     """
     Search for a code pattern in the project using an advanced, fast tool.
@@ -323,7 +324,7 @@ def search_code_advanced(
     (like ugrep, ripgrep, ag, or grep) for maximum performance.
     
     Args:
-        pattern: The search pattern (can be a regex if fuzzy=True).
+        pattern: The search pattern. Can be literal text or regex (see regex parameter).
         case_sensitive: Whether the search should be case-sensitive.
         context_lines: Number of lines to show before and after the match.
         file_pattern: A glob pattern to filter files to search in (e.g., "*.py", "*.js", "test_*.py").
@@ -333,13 +334,14 @@ def search_code_advanced(
                      - ag (Silver Searcher): Converts globs to regex internally (may have limitations)
                      - grep: Basic pattern matching only
                      For best compatibility, use simple patterns like "*.py" or "*.js".
-        fuzzy: If True, enables fuzzy/approximate matching.
-               IMPORTANT: Fuzzy matching support varies by tool:
-               - ugrep: Native fuzzy search with --fuzzy flag
-               - ripgrep: Safe fuzzy patterns using word boundaries
-               - ag: Safe fuzzy patterns using word boundaries  
-               - grep: Safe fuzzy patterns using word boundaries
+        fuzzy: If True, enables partial/boundary matching (not true fuzzy search).
+               IMPORTANT: This is NOT edit-distance fuzzy matching, but word boundary matching.
+               - ugrep: Native fuzzy search with --fuzzy flag (true fuzzy search)
+               - Other tools: Word boundary pattern matching
                For literal string searches, set fuzzy=False (recommended for exact matches).
+        regex: If True, enables regex pattern matching. Use this for patterns like "ERROR|WARN".
+               The pattern will be validated for safety to prevent ReDoS attacks.
+               If False (default), uses literal string search.
                
     Returns:
         A dictionary containing the search results or an error message.
@@ -364,7 +366,8 @@ def search_code_advanced(
             case_sensitive=case_sensitive,
             context_lines=context_lines,
             file_pattern=file_pattern,
-            fuzzy=fuzzy
+            fuzzy=fuzzy,
+            regex=regex
         )
         return {"results": results}
     except Exception as e:

From d75b5165293cdb123113b26d4020939815becaa0 Mon Sep 17 00:00:00 2001
From: johnhuang316 <john700@pic.net.tw>
Date: Mon, 14 Jul 2025 10:08:34 +0800
Subject: [PATCH 2/4] chore: prepare v0.3.2 release

- Bump version to 0.3.2 in pyproject.toml
- Add .gitattributes to standardize line endings across platforms
- Remove .claude/settings.local.json from git tracking
---
 .claude/settings.local.json    |  9 ------
 .gitattributes                 | 26 ++++++++++++++++
 pyproject.toml                 | 54 +++++++++++++++++-----------------
 src/code_index_mcp/__init__.py | 12 ++++----
 4 files changed, 59 insertions(+), 42 deletions(-)
 delete mode 100644 .claude/settings.local.json
 create mode 100644 .gitattributes

diff --git a/.claude/settings.local.json b/.claude/settings.local.json
deleted file mode 100644
index 938e651..0000000
--- a/.claude/settings.local.json
+++ /dev/null
@@ -1,9 +0,0 @@
-{
-  "permissions": {
-    "allow": [
-      "Bash(python:*)",
-      "Bash(npx @modelcontextprotocol/inspector --cli:*)"
-    ],
-    "deny": []
-  }
-}
\ No newline at end of file
diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000..74d0c7b
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,26 @@
+# Set default behavior to automatically normalize line endings
+* text=auto
+
+# Force specific file types to use LF line endings
+*.py text eol=lf
+*.js text eol=lf
+*.ts text eol=lf
+*.json text eol=lf
+*.md text eol=lf
+*.yml text eol=lf
+*.yaml text eol=lf
+*.toml text eol=lf
+*.txt text eol=lf
+
+# Force specific file types to use CRLF line endings
+*.bat text eol=crlf
+*.cmd text eol=crlf
+
+# Binary files should be left untouched
+*.png binary
+*.jpg binary
+*.jpeg binary
+*.gif binary
+*.ico binary
+*.zip binary
+*.tar.gz binary
\ No newline at end of file
diff --git a/pyproject.toml b/pyproject.toml
index eac8bb1..4c7c800 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,27 +1,27 @@
-[build-system]
-requires = ["setuptools>=61.0"]
-build-backend = "setuptools.build_meta"
-
-[project]
-name = "code-index-mcp"
-version = "0.3.1"
-description = "Code indexing and analysis tools for LLMs using MCP"
-readme = "README.md"
-requires-python = ">=3.10"
-license = {text = "MIT"}
-authors = [
-    {name = "johnhuang316"}
-]
-dependencies = [
-    "mcp>=0.3.0",
-]
-
-[project.urls]
-Homepage = "https://github.com/johnhuang316/code-index-mcp"
-"Bug Tracker" = "https://github.com/johnhuang316/code-index-mcp/issues"
-
-[project.scripts]
-code-index-mcp = "code_index_mcp.server:main"
-
-[tool.setuptools]
-package-dir = {"" = "src"}
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "code-index-mcp"
+version = "0.3.2"
+description = "Code indexing and analysis tools for LLMs using MCP"
+readme = "README.md"
+requires-python = ">=3.10"
+license = {text = "MIT"}
+authors = [
+    {name = "johnhuang316"}
+]
+dependencies = [
+    "mcp>=0.3.0",
+]
+
+[project.urls]
+Homepage = "https://github.com/johnhuang316/code-index-mcp"
+"Bug Tracker" = "https://github.com/johnhuang316/code-index-mcp/issues"
+
+[project.scripts]
+code-index-mcp = "code_index_mcp.server:main"
+
+[tool.setuptools]
+package-dir = {"" = "src"}
diff --git a/src/code_index_mcp/__init__.py b/src/code_index_mcp/__init__.py
index 7651704..a7b14ad 100644
--- a/src/code_index_mcp/__init__.py
+++ b/src/code_index_mcp/__init__.py
@@ -1,6 +1,6 @@
-"""Code Index MCP package.
-
-A Model Context Protocol server for code indexing, searching, and analysis.
-"""
-
-__version__ = "0.3.0"
+"""Code Index MCP package.
+
+A Model Context Protocol server for code indexing, searching, and analysis.
+"""
+
+__version__ = "0.3.0"

From af72ab5b62b79a5589c92e97bd2c4c7da77c4def Mon Sep 17 00:00:00 2001
From: johnhuang316 <john700@pic.net.tw>
Date: Mon, 14 Jul 2025 10:55:52 +0800
Subject: [PATCH 3/4] fix: resolve OR search (regex pipe operator) not working
 in search_code_advanced

- Fix ag search strategy to properly convert glob patterns to regex
- Pattern "*.py" now correctly converts to "\.py$" for ag's -G parameter
- Pattern "test_*.js" now correctly converts to "^test_.*\.js$"
- Ensures consistent file filtering behavior across all search tools
- Resolves issue where ag would incorrectly match files due to glob/regex mismatch
---
 src/code_index_mcp/search/ag.py | 26 ++++++++++++++++++++++++--
 1 file changed, 24 insertions(+), 2 deletions(-)

diff --git a/src/code_index_mcp/search/ag.py b/src/code_index_mcp/search/ag.py
index a4faf39..e2506a2 100644
--- a/src/code_index_mcp/search/ag.py
+++ b/src/code_index_mcp/search/ag.py
@@ -68,8 +68,30 @@ def search(
             cmd.extend(['--after', str(context_lines)])
             
         if file_pattern:
-            # Use -G to filter files by regex pattern
-            cmd.extend(['-G', file_pattern])
+            # Convert glob pattern to regex pattern for ag's -G parameter
+            # ag's -G expects regex, not glob patterns
+            regex_pattern = file_pattern
+            if '*' in file_pattern and not file_pattern.startswith('^') and not file_pattern.endswith('$'):
+                # Convert common glob patterns to regex
+                if file_pattern.startswith('*.'):
+                    # Pattern like "*.py" -> "\.py$"
+                    extension = file_pattern[2:]  # Remove "*."
+                    regex_pattern = f'\\.{extension}$'
+                elif file_pattern.endswith('*'):
+                    # Pattern like "test_*" -> "^test_.*"
+                    prefix = file_pattern[:-1]  # Remove "*"
+                    regex_pattern = f'^{prefix}.*'
+                elif '*' in file_pattern:
+                    # Pattern like "test_*.py" -> "^test_.*\.py$"
+                    # First escape dots, then replace * with .*
+                    regex_pattern = file_pattern.replace('.', '\\.')
+                    regex_pattern = regex_pattern.replace('*', '.*')
+                    if not regex_pattern.startswith('^'):
+                        regex_pattern = '^' + regex_pattern
+                    if not regex_pattern.endswith('$'):
+                        regex_pattern = regex_pattern + '$'
+            
+            cmd.extend(['-G', regex_pattern])
 
         # Add -- to treat pattern as a literal argument, preventing injection
         cmd.append('--')

From 3970a0766c0c5231389f189618d776531db4ba79 Mon Sep 17 00:00:00 2001
From: johnhuang316 <john700@pic.net.tw>
Date: Mon, 14 Jul 2025 11:10:40 +0800
Subject: [PATCH 4/4] chore: prepare v0.4.1 release

- Fix ag search strategy to properly convert glob patterns to regex
- Pattern "*.py" now correctly converts to "\.py$" for ag's -G parameter
- Pattern "test_*.js" now correctly converts to "^test_.*\.js$"
- Ensures consistent file filtering behavior across all search tools
- Update search_code_advanced docstring to reflect current behavior
- Clarify file_pattern handling: all tools now support glob patterns consistently
- Clarify fuzzy parameter behavior: ugrep provides true fuzzy search, others use word boundaries
- Bump version to 0.4.1 in pyproject.toml and __init__.py
- Add comprehensive release notes for v0.4.1
---
 pyproject.toml                 |  2 +-
 src/code_index_mcp/__init__.py |  2 +-
 src/code_index_mcp/server.py   | 19 ++++++++++---------
 3 files changed, 12 insertions(+), 11 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 4c7c800..72dc7e5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "code-index-mcp"
-version = "0.3.2"
+version = "0.4.1"
 description = "Code indexing and analysis tools for LLMs using MCP"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/src/code_index_mcp/__init__.py b/src/code_index_mcp/__init__.py
index a7b14ad..06cbc6e 100644
--- a/src/code_index_mcp/__init__.py
+++ b/src/code_index_mcp/__init__.py
@@ -3,4 +3,4 @@
 A Model Context Protocol server for code indexing, searching, and analysis.
 """
 
-__version__ = "0.3.0"
+__version__ = "0.4.1"
diff --git a/src/code_index_mcp/server.py b/src/code_index_mcp/server.py
index acabb7a..3c17d88 100644
--- a/src/code_index_mcp/server.py
+++ b/src/code_index_mcp/server.py
@@ -328,17 +328,18 @@ def search_code_advanced(
         case_sensitive: Whether the search should be case-sensitive.
         context_lines: Number of lines to show before and after the match.
         file_pattern: A glob pattern to filter files to search in (e.g., "*.py", "*.js", "test_*.py").
-                     IMPORTANT: Different tools handle file patterns differently:
+                     All search tools now handle glob patterns consistently:
                      - ugrep: Uses glob patterns (*.py, *.{js,ts}) 
                      - ripgrep: Uses glob patterns (*.py, *.{js,ts})
-                     - ag (Silver Searcher): Converts globs to regex internally (may have limitations)
-                     - grep: Basic pattern matching only
-                     For best compatibility, use simple patterns like "*.py" or "*.js".
-        fuzzy: If True, enables partial/boundary matching (not true fuzzy search).
-               IMPORTANT: This is NOT edit-distance fuzzy matching, but word boundary matching.
-               - ugrep: Native fuzzy search with --fuzzy flag (true fuzzy search)
-               - Other tools: Word boundary pattern matching
-               For literal string searches, set fuzzy=False (recommended for exact matches).
+                     - ag (Silver Searcher): Automatically converts globs to regex patterns
+                     - grep: Basic glob pattern matching
+                     All common glob patterns like "*.py", "test_*.js", "src/*.ts" are supported.
+        fuzzy: If True, enables fuzzy/partial matching behavior varies by search tool:
+               - ugrep: Native fuzzy search with --fuzzy flag (true edit-distance fuzzy search)
+               - ripgrep, ag, grep, basic: Word boundary pattern matching (not true fuzzy search)
+               IMPORTANT: Only ugrep provides true fuzzy search. Other tools use word boundary 
+               matching which allows partial matches at word boundaries.
+               For exact literal matches, set fuzzy=False (default and recommended).
         regex: If True, enables regex pattern matching. Use this for patterns like "ERROR|WARN".
                The pattern will be validated for safety to prevent ReDoS attacks.
                If False (default), uses literal string search.