From e92dcec589ea25039611e73587a10dc7cdbbb421 Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Fri, 19 Sep 2025 17:24:43 +0800
Subject: [PATCH 01/14] refactor: remove SCIP official standards and symbol
 analyzer refactoring plan documents

---
 SCIP_OFFICIAL_STANDARDS.md               | 337 --------------------
 SCIP_SYMBOL_ANALYZER_REFACTORING_PLAN.md | 372 -----------------------
 2 files changed, 709 deletions(-)
 delete mode 100644 SCIP_OFFICIAL_STANDARDS.md
 delete mode 100644 SCIP_SYMBOL_ANALYZER_REFACTORING_PLAN.md
diff --git a/SCIP_OFFICIAL_STANDARDS.md b/SCIP_OFFICIAL_STANDARDS.md
deleted file mode 100644
index 763b56c..0000000
--- a/SCIP_OFFICIAL_STANDARDS.md
+++ /dev/null
@@ -1,337 +0,0 @@
-# SCIP (Source Code Intelligence Protocol) Official Standards
-
-*This document contains only the official SCIP standards as defined by Sourcegraph, without any project-specific implementations.*
-
-## Overview
-
-SCIP (pronounced "skip") is a language-agnostic protocol for indexing source code to power code navigation functionality such as Go to definition, Find references, and Find implementations. It is a recursive acronym that stands for "SCIP Code Intelligence Protocol."
-
-**Official Repository**: https://github.com/sourcegraph/scip
-
-## Core Design Principles (Official)
-
-### Primary Goals
-1. **Support code navigation at IDE-level fidelity** - Provide excellent code navigation experience
-2. **Make indexer creation easy** by:
-   - Enabling cross-repository navigation
-   - Supporting file-level incremental indexing
-   - Facilitating parallel indexing
-   - Supporting multi-language indexer development
-
-### Design Philosophy
-> "SCIP is meant to be a transmission format for sending data from some producers to some consumers -- it is not meant as a storage format for querying."
-
-### Technical Design Decisions
-1. **Protobuf Schema**
-   - Relatively compact binary format
-   - Supports easy code generation
-   - Enables streaming reads/writes
-   - Maintains forward/backward compatibility
-
-2. **String-based Identifiers**
-   - Prefer human-readable string IDs for symbols
-   - Avoid integer ID mapping tables
-   - Improve debuggability
-   - Limit potential bug impact
-
-3. **Data Encoding Approach**
-   - Avoid direct graph encoding
-   - Use document and array-based approaches
-   - Enable streaming capabilities
-   - Minimize memory consumption during indexing
-
-### Non-Goals
-- Not focused on code modification tools
-- Not optimizing for consumer-side tooling
-- Not prioritizing uncompressed data compactness
-- Not serving as a standalone query engine
-
-## Protocol Buffer Schema (Official)
-
-### Main Message Types
-
-```protobuf
-syntax = "proto3";
-package scip;
-
-message Index {
-  Metadata metadata = 1;
-  repeated Document documents = 2;
-  repeated SymbolInformation external_symbols = 3;
-}
-
-message Metadata {
-  ProtocolVersion version = 1;
-  ToolInfo tool_info = 2;
-  string project_root = 3;
-  TextEncoding text_encoding = 4;
-}
-
-message Document {
-  string language = 4;
-  string relative_path = 1;
-  repeated Occurrence occurrences = 2;
-  repeated SymbolInformation symbols = 3;
-  string text = 5;
-}
-
-message Symbol {
-  string scheme = 1;
-  Package package = 2;
-  repeated Descriptor descriptors = 3;
-}
-
-message SymbolInformation {
-  string symbol = 1;
-  repeated string documentation = 3;
-  repeated Relationship relationships = 4;
-  SymbolKind kind = 5;
-  string display_name = 6;
-  Signature signature_documentation = 7;
-  repeated string enclosing_symbol = 8;
-}
-
-message Occurrence {
-  Range range = 1;
-  string symbol = 2;
-  int32 symbol_roles = 3;
-  repeated Diagnostic override_documentation = 4;
-  SyntaxKind syntax_kind = 5;
-}
-
-message Range {
-  repeated int32 start = 1;  // [line, column]
-  repeated int32 end = 2;    // [line, column]
-}
-```
-
-## Official Symbol Format Specification
-
-### Symbol Grammar (Official)
-```
-<symbol> ::= <scheme> ' ' <package> ' ' (<descriptor>)+ | 'local ' <local-id>
-<package> ::= <manager> ' ' <package-name> ' ' <version>
-<scheme> ::= UTF-8 string (escape spaces with double space)
-<descriptor> ::= <namespace> | <type> | <term> | <method> | <type-parameter> | <parameter> | <meta> | <macro>
-```
-
-### Symbol Components
-
-**Scheme**: Identifies the symbol's origin/context
-- UTF-8 string
-- Escape spaces with double space
-
-**Package**: Includes manager, name, and version
-- Manager: Package manager identifier
-- Package name: Unique package identifier
-- Version: Package version
-
-**Descriptors**: Represent nested/hierarchical symbol structure
-- Form a fully qualified name
-- Support various symbol types
-
-**Local Symbols**: Only for entities within a single Document
-- Format: `local <local-id>`
-- Used for file-scoped symbols
-
-### Encoding Rules (Official)
-- Descriptors form a fully qualified name
-- Local symbols are only for entities within a single Document
-- Symbols must uniquely identify an entity across a package
-- Supports escaping special characters in identifiers
-
-## Enumerations (Official)
-
-### ProtocolVersion
-```protobuf
-enum ProtocolVersion {
-  UnspecifiedProtocolVersion = 0;
-}
-```
-
-### TextEncoding
-```protobuf
-enum TextEncoding {
-  UnspecifiedTextEncoding = 0;
-  UTF8 = 1;
-  UTF16 = 2;
-}
-```
-
-### SymbolRole
-```protobuf
-enum SymbolRole {
-  UnspecifiedSymbolRole = 0;
-  Definition = 1;
-  Import = 2;
-  WriteAccess = 4;
-  ReadAccess = 8;
-  Generated = 16;
-  Test = 32;
-}
-```
-
-### SymbolKind
-```protobuf
-enum SymbolKind {
-  UnspecifiedSymbolKind = 0;
-  Array = 1;
-  Boolean = 2;
-  Class = 3;
-  Constant = 4;
-  Constructor = 5;
-  Enum = 6;
-  EnumMember = 7;
-  Event = 8;
-  Field = 9;
-  File = 10;
-  Function = 11;
-  Interface = 12;
-  Key = 13;
-  Method = 14;
-  Module = 15;
-  Namespace = 16;
-  Null = 17;
-  Number = 18;
-  Object = 19;
-  Operator = 20;
-  Package = 21;
-  Property = 22;
-  String = 23;
-  Struct = 24;
-  TypeParameter = 25;
-  Variable = 26;
-  Macro = 27;
-}
-```
-
-### SyntaxKind
-```protobuf
-enum SyntaxKind {
-  UnspecifiedSyntaxKind = 0;
-  Comment = 1;
-  PunctuationDelimiter = 2;
-  PunctuationBracket = 3;
-  Keyword = 4;
-  // ... (additional syntax kinds)
-  IdentifierKeyword = 13;
-  IdentifierOperator = 14;
-  IdentifierBuiltin = 15;
-  IdentifierNull = 16;
-  IdentifierConstant = 17;
-  IdentifierMutableGlobal = 18;
-  IdentifierParameter = 19;
-  IdentifierLocal = 20;
-  IdentifierShadowed = 21;
-  IdentifierNamespace = 22;
-  IdentifierFunction = 23;
-  IdentifierFunctionDefinition = 24;
-  IdentifierMacro = 25;
-  IdentifierMacroDefinition = 26;
-  IdentifierType = 27;
-  IdentifierBuiltinType = 28;
-  IdentifierAttribute = 29;
-}
-```
-
-## Official Position and Range Specification
-
-### Coordinate System
-- **Line numbers**: 0-indexed
-- **Column numbers**: 0-indexed character positions
-- **UTF-8/UTF-16 aware**: Proper Unicode handling
-
-### Range Format
-```protobuf
-message Range {
-  repeated int32 start = 1;  // [line, column]
-  repeated int32 end = 2;    // [line, column]
-}
-```
-
-### Requirements
-- Start position must be <= end position
-- Ranges must be within document boundaries
-- Character-level precision required
-
-## Official Language Support
-
-### Currently Supported (Official Implementations)
-- **TypeScript/JavaScript**: scip-typescript
-- **Java**: scip-java (also supports Scala, Kotlin)
-- **Python**: In development
-
-### Language Bindings Available
-- **Rich bindings**: Go, Rust
-- **Auto-generated bindings**: TypeScript, Haskell
-- **CLI tools**: scip CLI for index manipulation
-
-## Performance Characteristics (Official Claims)
-
-### Compared to LSIF
-- **10x speedup** in CI environments
-- **4x smaller** compressed payload size
-- **Better streaming**: Enables processing without loading entire index
-- **Lower memory usage**: Document-based processing
-
-### Design Benefits
-- Static typing from Protobuf schema
-- More ergonomic debugging
-- Reduced runtime errors
-- Smaller index files
-
-## Official Tools and Ecosystem
-
-### SCIP CLI
-- Index manipulation and conversion
-- LSIF compatibility support
-- Debugging and inspection tools
-
-### Official Indexers
-- **scip-typescript**: `npm install -g @sourcegraph/scip-typescript`
-- **scip-java**: Available as Docker image, Java launcher, fat jar
-
-### Integration Support
-- GitLab Code Intelligence (via LSIF conversion)
-- Sourcegraph native support
-- VS Code extensions (community)
-
-## Standards Compliance Requirements
-
-### For SCIP Index Producers
-1. Must generate valid Protocol Buffer format
-2. Must follow symbol ID format specification
-3. Must provide accurate position information
-4. Should support streaming output
-5. Must handle UTF-8/UTF-16 encoding correctly
-
-### For SCIP Index Consumers
-1. Must handle streaming input
-2. Should support all standard symbol kinds
-3. Must respect symbol role classifications
-4. Should provide graceful error handling
-5. Must support position range validation
-
-## Official Documentation Sources
-
-### Primary Sources
-- **Main Repository**: https://github.com/sourcegraph/scip
-- **Protocol Schema**: https://github.com/sourcegraph/scip/blob/main/scip.proto
-- **Design Document**: https://github.com/sourcegraph/scip/blob/main/DESIGN.md
-- **Announcement Blog**: https://sourcegraph.com/blog/announcing-scip
-
-### Language-Specific Documentation
-- **Java**: https://github.com/sourcegraph/scip-java
-- **TypeScript**: https://github.com/sourcegraph/scip-typescript
-
-### Community Resources
-- **Bindings**: Available for Go, Rust, TypeScript, Haskell
-- **Examples**: Implementation examples in official repositories
-- **Issues**: Bug reports and feature requests on GitHub
-
----
-
-*This document contains only official SCIP standards as defined by Sourcegraph.*
-*Last updated: 2025-01-14*
-*SCIP Version: Compatible with official v0.3.x specification*
-*Source: Official Sourcegraph SCIP repositories and documentation*
\ No newline at end of file
diff --git a/SCIP_SYMBOL_ANALYZER_REFACTORING_PLAN.md b/SCIP_SYMBOL_ANALYZER_REFACTORING_PLAN.md
deleted file mode 100644
index 25d4e8c..0000000
--- a/SCIP_SYMBOL_ANALYZER_REFACTORING_PLAN.md
+++ /dev/null
@@ -1,372 +0,0 @@
-# SCIPSymbolAnalyzer Refactoring Plan
-
-## 🎯 Overview
-
-This document outlines a comprehensive refactoring plan for the `SCIPSymbolAnalyzer` class to transform it from a monolithic architecture into a modular, extensible, and maintainable system that supports multiple programming languages with proper separation of concerns.
-
-## 🔍 Current Architecture Problems
-
-### 1. **Monolithic Design Issues**
-- All language-specific logic is mixed within a single class
-- The `_extract_imports` method contains Python, Objective-C, and Zig-specific logic
-- Lack of extensibility - adding new languages requires modifying the core class
-- Violation of Single Responsibility Principle
-
-### 2. **Dependency Processing Chaos**
-- Methods like `_classify_zig_import`, `_categorize_import` are scattered throughout the codebase
-- No unified dependency classification standard
-- Language-specific standard library lists are hardcoded
-- Inconsistent dependency type mapping
-
-### 3. **Symbol Resolution Complexity**
-- Position detection logic is complex and error-prone
-- Three-layer position detection strategy is difficult to maintain
-- Symbol ID parsing logic lacks flexibility
-- Mixed concerns between symbol extraction and position calculation
-
-### 4. **Poor Language Support Scalability**
-- Each new language requires core class modifications
-- No clear plugin architecture
-- Language-specific logic embedded in generic methods
-- Difficult to test language-specific features in isolation
-
-## 🏗️ Proposed Refactoring Architecture
-
-### Phase 1: Language Plugin System
-
-```python
-# New architecture design
-class LanguageAnalyzer(ABC):
-    """Language-specific analyzer interface"""
-    
-    @abstractmethod
-    def extract_imports(self, document, imports: ImportGroup) -> None:
-        """Extract import information from SCIP document"""
-        
-    @abstractmethod 
-    def classify_dependency(self, module_name: str) -> str:
-        """Classify dependency as standard_library, third_party, or local"""
-        
-    @abstractmethod
-    def extract_symbol_metadata(self, symbol_info) -> Dict[str, Any]:
-        """Extract language-specific symbol metadata"""
-        
-    @abstractmethod
-    def get_standard_library_modules(self) -> Set[str]:
-        """Return set of standard library module names"""
-
-class ZigAnalyzer(LanguageAnalyzer):
-    """Zig language-specific analyzer"""
-    
-class PythonAnalyzer(LanguageAnalyzer):
-    """Python language-specific analyzer"""
-    
-class ObjectiveCAnalyzer(LanguageAnalyzer):
-    """Objective-C language-specific analyzer"""
-
-class LanguageAnalyzerFactory:
-    """Factory for creating language-specific analyzers"""
-    
-    def get_analyzer(self, language: str) -> LanguageAnalyzer:
-        """Get appropriate analyzer for language"""
-```
-
-### Phase 2: Dependency Management System
-
-```python
-class DependencyClassifier:
-    """Unified dependency classification system"""
-    
-    def __init__(self):
-        self.language_configs = {
-            'python': PythonDependencyConfig(),
-            'zig': ZigDependencyConfig(),
-            'javascript': JavaScriptDependencyConfig()
-        }
-    
-    def classify_import(self, import_path: str, language: str) -> str:
-        """Classify import based on language-specific rules"""
-
-class DependencyConfig(ABC):
-    """Language-specific dependency configuration"""
-    
-    @abstractmethod
-    def get_stdlib_modules(self) -> Set[str]:
-        """Return standard library modules for this language"""
-        
-    @abstractmethod  
-    def classify_import(self, import_path: str) -> str:
-        """Classify import path for this language"""
-        
-    @abstractmethod
-    def normalize_import_path(self, raw_path: str) -> str:
-        """Normalize import path for consistent processing"""
-```
-
-### Phase 3: Position Resolution System
-
-```python
-class PositionResolver:
-    """Unified symbol position resolution system"""
-    
-    def __init__(self):
-        self.strategies = [
-            SCIPOccurrenceStrategy(),    # High confidence
-            TreeSitterStrategy(),        # Medium confidence  
-            HeuristicStrategy()          # Fallback
-        ]
-    
-    def resolve_position(self, symbol, document) -> LocationInfo:
-        """Resolve symbol position using strategy pattern"""
-        
-class PositionStrategy(ABC):
-    """Base class for position resolution strategies"""
-    
-    @abstractmethod
-    def try_resolve(self, symbol, document) -> Optional[LocationInfo]:
-        """Attempt to resolve symbol position"""
-        
-    @abstractmethod
-    def get_confidence_level(self) -> str:
-        """Return confidence level: 'high', 'medium', 'low'"""
-```
-
-## 📋 Detailed Implementation Plan
-
-### **Phase 1: Architecture Separation (Week 1)**
-
-#### 1.1 Create Language Analyzer Interface
-```
-src/code_index_mcp/tools/scip/analyzers/
-├── base.py                 # Base interfaces and common utilities
-├── python_analyzer.py      # Python-specific analysis logic
-├── zig_analyzer.py         # Zig-specific analysis logic
-├── objc_analyzer.py        # Objective-C-specific analysis logic
-├── javascript_analyzer.py  # JavaScript/TypeScript analysis logic
-└── factory.py             # Analyzer factory and registry
-```
-
-**Tasks:**
-- [ ] Define `LanguageAnalyzer` abstract base class
-- [ ] Extract Python-specific logic to `PythonAnalyzer`
-- [ ] Move Zig logic from current implementation to `ZigAnalyzer`
-- [ ] Migrate Objective-C logic to `ObjectiveCAnalyzer`
-- [ ] Create factory pattern for analyzer instantiation
-
-#### 1.2 Extract Language-Specific Logic
-- [ ] Move `_classify_zig_import` to `ZigAnalyzer`
-- [ ] Move Python stdlib detection to `PythonAnalyzer`
-- [ ] Move Objective-C framework detection to `ObjectiveCAnalyzer`
-- [ ] Create language-specific symbol metadata extraction
-
-### **Phase 2: Dependency Processing Refactoring (Week 2)**
-
-#### 2.1 Create Dependency Management Module
-```
-src/code_index_mcp/tools/scip/dependencies/
-├── classifier.py           # Main dependency classifier
-├── configs/               # Language-specific configurations
-│   ├── __init__.py
-│   ├── python.py          # Python dependency rules
-│   ├── zig.py             # Zig dependency rules
-│   ├── javascript.py      # JavaScript dependency rules
-│   └── base.py            # Base configuration class
-├── registry.py            # Dependency registry and caching
-└── normalizer.py          # Import path normalization
-```
-
-**Tasks:**
-- [ ] Create unified `DependencyClassifier` class
-- [ ] Implement language-specific configuration classes
-- [ ] Standardize dependency type constants
-- [ ] Add configurable standard library lists
-- [ ] Implement caching for dependency classification results
-
-#### 2.2 Standardize Dependency Classification
-- [ ] Define consistent classification types: `standard_library`, `third_party`, `local`
-- [ ] Create configurable standard library lists per language
-- [ ] Support custom classification rules
-- [ ] Implement dependency version detection where applicable
-
-### **Phase 3: Symbol Resolution Refactoring (Week 3)**
-
-#### 3.1 Modularize Position Detection
-```
-src/code_index_mcp/tools/scip/position/
-├── resolver.py             # Main position resolver
-├── strategies/            # Position detection strategies
-│   ├── __init__.py
-│   ├── scip_occurrence.py  # SCIP occurrence-based detection
-│   ├── tree_sitter.py      # Tree-sitter AST-based detection
-│   ├── heuristic.py        # Heuristic fallback detection
-│   └── base.py             # Base strategy interface
-├── calculator.py          # Position calculation utilities
-└── confidence.py          # Confidence level management
-```
-
-**Tasks:**
-- [ ] Implement strategy pattern for position resolution
-- [ ] Separate SCIP occurrence processing logic
-- [ ] Extract tree-sitter position calculation
-- [ ] Create heuristic fallback mechanisms
-- [ ] Add confidence level tracking
-
-#### 3.2 Improve Symbol Parsing
-- [ ] Refactor `_extract_name_from_scip_symbol` method
-- [ ] Unify Symbol ID format processing
-- [ ] Support additional SCIP symbol formats
-- [ ] Add robust error handling for malformed symbols
-
-### **Phase 4: Relationship Analysis Refactoring (Week 4)**
-
-#### 4.1 Separate Relationship Analysis Logic
-```
-src/code_index_mcp/tools/scip/relationships/
-├── analyzer.py            # Main relationship analyzer
-├── types.py              # Relationship type definitions
-├── builder.py            # Relationship construction logic
-├── extractors/           # Relationship extraction strategies
-│   ├── __init__.py
-│   ├── call_extractor.py  # Function call relationships
-│   ├── inheritance_extractor.py  # Class inheritance
-│   └── reference_extractor.py    # Symbol references
-└── formatter.py          # Relationship output formatting
-```
-
-**Tasks:**
-- [ ] Extract relationship analysis from main analyzer
-- [ ] Implement relationship type system
-- [ ] Create relationship builders for different types
-- [ ] Add relationship validation logic
-
-#### 4.2 Optimize Relationship Detection
-- [ ] Improve function call detection accuracy
-- [ ] Support additional relationship types (inheritance, interfaces, etc.)
-- [ ] Add cross-file relationship resolution
-- [ ] Implement relationship confidence scoring
-
-### **Phase 5: Integration and Testing (Week 5)**
-
-#### 5.1 Integrate New Architecture
-- [ ] Update `SCIPSymbolAnalyzer` to use new plugin system
-- [ ] Create adapter layer for backward compatibility
-- [ ] Update configuration and initialization logic
-- [ ] Add performance monitoring
-
-#### 5.2 Comprehensive Testing
-- [ ] Unit tests for each language analyzer
-- [ ] Integration tests for dependency classification
-- [ ] Position resolution accuracy tests
-- [ ] Performance benchmark tests
-- [ ] Memory usage optimization tests
-
-## 🎯 Refactoring Goals
-
-### **Maintainability Improvements**
-- ✅ **Single Responsibility**: Each class focuses on specific functionality
-- ✅ **Open/Closed Principle**: Easy to add new language support without modifying existing code
-- ✅ **Dependency Injection**: Components are replaceable and testable
-- ✅ **Clear Separation of Concerns**: Position detection, dependency classification, and symbol analysis are separate
-
-### **Performance Optimizations**
-- ✅ **Lazy Loading**: Only load required language analyzers
-- ✅ **Caching Mechanisms**: Cache symbol resolution and dependency classification results
-- ✅ **Parallel Processing**: Support multi-file parallel analysis
-- ✅ **Memory Efficiency**: Reduce memory footprint through better data structures
-
-### **Extensibility Features**
-- ✅ **Plugin System**: Third-party language support through plugins
-- ✅ **Configuration-Driven**: Configurable analysis rules and standards
-- ✅ **Stable API**: Backward-compatible interfaces
-- ✅ **Language Agnostic Core**: Core logic independent of specific languages
-
-## 🧪 Testing Strategy
-
-### **Unit Testing Coverage**
-- [ ] Each language analyzer tested independently
-- [ ] Dependency classifier comprehensive test suite
-- [ ] Position resolver strategy tests
-- [ ] Symbol parsing edge case tests
-- [ ] Relationship extraction validation tests
-
-### **Integration Testing**
-- [ ] Cross-language analysis scenarios
-- [ ] End-to-end file analysis workflows
-- [ ] SCIP compliance validation
-- [ ] Performance regression testing
-
-### **Regression Testing**
-- [ ] Existing functionality preservation
-- [ ] Zig dependency processing validation
-- [ ] Python analysis accuracy maintenance
-- [ ] Objective-C framework detection consistency
-
-## 📈 Success Metrics
-
-### **Code Quality Improvements**
-- **Cyclomatic Complexity**: Reduce from current >50 to <10 per method
-- **Test Coverage**: Achieve >90% code coverage
-- **Maintainability Index**: Improve from current score to >80
-
-### **Performance Targets**
-- **Analysis Speed**: <500ms per file (currently ~2s)
-- **Memory Usage**: <50MB for 1000-file project (currently ~200MB)
-- **Accuracy**: >95% symbol position accuracy
-
-### **Extensibility Goals**
-- **New Language Addition**: <2 hours to add basic support
-- **Plugin Development**: Third-party plugin support
-- **Configuration Flexibility**: Runtime configuration changes
-
-## 🚀 Migration Plan
-
-### **Phase 1: Preparation (Week 1)**
-- Create new module structure
-- Implement base interfaces
-- Set up testing framework
-
-### **Phase 2: Gradual Migration (Weeks 2-4)**
-- Migrate one language at a time
-- Maintain backward compatibility
-- Add comprehensive tests for each component
-
-### **Phase 3: Integration (Week 5)**
-- Integrate all components
-- Performance optimization
-- Final testing and validation
-
-### **Phase 4: Documentation and Cleanup (Week 6)**
-- Update documentation
-- Remove deprecated code
-- Finalize API documentation
-
-## 🔧 Implementation Notes
-
-### **Backward Compatibility**
-- Maintain existing public API during transition
-- Create adapter layer for legacy code
-- Gradual deprecation of old methods
-
-### **Configuration Management**
-- Use dependency injection for configurability
-- Support runtime configuration updates
-- Provide sensible defaults for all languages
-
-### **Error Handling**
-- Implement comprehensive error handling at each layer
-- Provide detailed error messages for debugging
-- Graceful degradation when analyzers fail
-
-### **Logging and Monitoring**
-- Add structured logging throughout the system
-- Implement performance metrics collection
-- Create debugging tools for complex analysis scenarios
-
----
-
-**Status**: 📋 Planning Phase  
-**Priority**: 🔥 High  
-**Estimated Effort**: 6 weeks  
-**Dependencies**: None  
-
-This refactoring will establish a solid foundation for supporting additional programming languages and maintaining high code quality as the system grows.
\ No newline at end of file

From 810ee65c6113b98423486b133e16ea79fa0537a8 Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Fri, 19 Sep 2025 17:27:14 +0800
Subject: [PATCH 02/14] Remove benchmark suite and related tests; update
 code-index-mcp version to 2.3.1

---
 ARCHITECTURE.md             |  233 --------
 CHANGELOG.md                |  162 ------
 benchmark_scip_framework.py | 1017 -----------------------------------
 test_max_line_length.py     |  170 ------
 uv.lock                     |    2 +-
 5 files changed, 1 insertion(+), 1583 deletions(-)
 delete mode 100644 ARCHITECTURE.md
 delete mode 100644 CHANGELOG.md
 delete mode 100644 benchmark_scip_framework.py
 delete mode 100644 test_max_line_length.py

diff --git a/ARCHITECTURE.md b/ARCHITECTURE.md
deleted file mode 100644
index f3b2d5b..0000000
--- a/ARCHITECTURE.md
+++ /dev/null
@@ -1,233 +0,0 @@
-# Code Index MCP System Architecture
-
-## Overview
-
-Code Index MCP is a Model Context Protocol (MCP) server that provides intelligent code indexing and analysis capabilities. The system follows SCIP (Source Code Intelligence Protocol) standards and uses a service-oriented architecture with clear separation of concerns.
-
-## High-Level Architecture
-
-```
-┌─────────────────────────────────────────────────────────────────┐
-│                        MCP Interface Layer                      │
-├─────────────────────────────────────────────────────────────────┤
-│                        Service Layer                           │
-├─────────────────────────────────────────────────────────────────┤
-│                        SCIP Core Layer                         │
-├─────────────────────────────────────────────────────────────────┤
-│                     Language Strategies                        │
-├─────────────────────────────────────────────────────────────────┤
-│                    Technical Tools Layer                       │
-└─────────────────────────────────────────────────────────────────┘
-```
-
-## Layer Responsibilities
-
-### 1. MCP Interface Layer (`server.py`)
-**Purpose**: Exposes MCP tools and handles protocol communication
-
-**Key Components**:
-- MCP tool definitions (`@mcp.tool()`)
-- Error handling and response formatting
-- User interaction and guidance
-
-**MCP Tools**:
-- `set_project_path` - Initialize project indexing
-- `find_files` - File discovery with patterns
-- `get_file_summary` - File analysis and metadata
-- `search_code_advanced` - Content search across files
-- `refresh_index` - Manual index rebuilding
-- `get_file_watcher_status` - File monitoring status
-- `configure_file_watcher` - File watcher settings
-
-### 2. Service Layer (`services/`)
-**Purpose**: Business logic orchestration and workflow management
-
-**Key Services**:
-- `ProjectManagementService` - Project lifecycle and initialization
-- `FileWatcherService` - Real-time file monitoring and auto-refresh
-- `IndexManagementService` - Index rebuild operations
-- `CodeIntelligenceService` - File analysis and symbol intelligence
-- `FileDiscoveryService` - File pattern matching and discovery
-- `SearchService` - Advanced code search capabilities
-
-**Architecture Pattern**: Service delegation with clear business boundaries
-
-### 3. SCIP Core Layer (`scip/core/`)
-**Purpose**: Language-agnostic SCIP protocol implementation
-
-**Core Components**:
-- `SCIPSymbolManager` - Standard SCIP symbol ID generation
-- `LocalReferenceResolver` - Cross-file reference resolution
-- `PositionCalculator` - AST/Tree-sitter position conversion
-- `MonikerManager` - External package dependency handling
-
-**Standards Compliance**: Full SCIP protocol buffer implementation
-
-### 4. Language Strategies (`scip/strategies/`)
-**Purpose**: Language-specific code analysis using two-phase processing
-
-**Strategy Pattern Implementation**:
-- `BaseStrategy` - Abstract interface and common functionality
-- `PythonStrategy` - Python AST analysis
-- `JavaScriptStrategy` - JavaScript/TypeScript Tree-sitter analysis
-- `JavaStrategy` - Java Tree-sitter analysis
-- `ObjectiveCStrategy` - Objective-C Tree-sitter analysis
-- `FallbackStrategy` - Generic text-based analysis
-
-**Two-Phase Analysis**:
-1. **Phase 1**: Symbol definition collection
-2. **Phase 2**: Reference resolution and SCIP document generation
-
-### 5. Technical Tools Layer (`tools/`)
-**Purpose**: Low-level technical capabilities
-
-**Tool Categories**:
-- `filesystem/` - File system operations and pattern matching
-- `scip/` - SCIP index operations and symbol analysis
-- `config/` - Configuration and settings management
-- `monitoring/` - File watching and system monitoring
-
-## Data Flow Architecture
-
-### File Analysis Workflow
-```
-User Request → Service Layer → SCIP Strategy → Core Components → SCIP Documents
-```
-
-### Index Management Workflow
-```
-File Changes → File Watcher → Index Management Service → Strategy Factory → Updated Index
-```
-
-### Search Workflow
-```
-Search Query → Search Service → Advanced Search Tools → Filtered Results
-```
-
-## SCIP Implementation Details
-
-### Symbol ID Format
-```
-scip-{language} {manager} {package} [version] {descriptors}
-```
-
-**Examples**:
-- Local: `scip-python local myproject src/main.py/MyClass#method().`
-- External: `scip-python pip requests 2.31.0 sessions/Session#get().`
-
-### Language Support Strategy
-
-**Parsing Approaches**:
-- **Python**: Native AST module
-- **JavaScript/TypeScript**: Tree-sitter
-- **Java**: Tree-sitter  
-- **Objective-C**: Tree-sitter
-- **Others**: Fallback text analysis
-
-**Supported Code Intelligence**:
-- Symbol definitions (functions, classes, variables)
-- Import/export tracking
-- Cross-file reference resolution
-- External dependency management
-- Position-accurate symbol ranges
-
-## Configuration and Extensibility
-
-### Package Manager Integration
-- **Python**: pip, conda, poetry detection
-- **JavaScript**: npm, yarn package.json parsing
-- **Java**: Maven pom.xml, Gradle build files
-- **Configuration-driven**: Easy addition of new package managers
-
-### File Watcher System
-- **Real-time monitoring**: Watchdog-based file system events
-- **Debounced rebuilds**: 4-6 second batching of rapid changes
-- **Configurable patterns**: Customizable include/exclude rules
-- **Thread-safe**: ThreadPoolExecutor for concurrent rebuilds
-
-## Performance Characteristics
-
-### Indexing Performance
-- **Incremental updates**: File-level granular rebuilds
-- **Parallel processing**: Concurrent file analysis
-- **Memory efficient**: Streaming SCIP document generation
-- **Cache optimization**: Symbol table reuse across phases
-
-### Search Performance
-- **Advanced tools**: ripgrep, ugrep, ag integration
-- **Pattern optimization**: Glob-based file filtering
-- **Result streaming**: Large result set handling
-
-## Error Handling and Reliability
-
-### Fault Tolerance
-- **Graceful degradation**: Continue indexing on individual file failures
-- **Error isolation**: Per-file error boundaries
-- **Recovery mechanisms**: Automatic retry on transient failures
-- **Comprehensive logging**: Debug and audit trail support
-
-### Validation
-- **Input sanitization**: Path traversal protection
-- **Range validation**: SCIP position boundary checking
-- **Schema validation**: Protocol buffer structure verification
-
-## Future Architecture Considerations
-
-### Planned Enhancements
-1. **Function Call Relationships**: Complete call graph analysis
-2. **Type Information**: Enhanced semantic analysis
-3. **Cross-repository Navigation**: Multi-project symbol resolution
-4. **Language Server Protocol**: LSP compatibility layer
-5. **Distributed Indexing**: Horizontal scaling support
-
-### Extension Points
-- **Custom strategies**: Plugin architecture for new languages
-- **Analysis plugins**: Custom symbol analyzers
-- **Export formats**: Multiple output format support
-- **Integration APIs**: External tool connectivity
-
-## Directory Structure
-
-```
-src/code_index_mcp/
-├── server.py                   # MCP interface layer
-├── services/                   # Business logic services
-│   ├── project_management_service.py
-│   ├── file_watcher_service.py
-│   ├── index_management_service.py
-│   ├── code_intelligence_service.py
-│   └── ...
-├── scip/                       # SCIP implementation
-│   ├── core/                   # Language-agnostic core
-│   │   ├── symbol_manager.py
-│   │   ├── local_reference_resolver.py
-│   │   ├── position_calculator.py
-│   │   └── moniker_manager.py
-│   ├── strategies/             # Language-specific strategies
-│   │   ├── base_strategy.py
-│   │   ├── python_strategy.py
-│   │   ├── javascript_strategy.py
-│   │   └── ...
-│   └── factory.py              # Strategy selection
-├── tools/                      # Technical capabilities
-│   ├── filesystem/
-│   ├── scip/
-│   ├── config/
-│   └── monitoring/
-├── indexing/                   # Index management
-└── utils/                      # Shared utilities
-```
-
-## Key Design Principles
-
-1. **Standards Compliance**: Full SCIP protocol adherence
-2. **Language Agnostic**: Core components independent of specific languages
-3. **Extensible**: Easy addition of new languages and features
-4. **Performance**: Efficient indexing and search operations
-5. **Reliability**: Fault-tolerant with comprehensive error handling
-6. **Maintainability**: Clear separation of concerns and modular design
-
----
-
-*Last updated: 2025-01-14*
-*Architecture version: 2.1.0*
\ No newline at end of file
diff --git a/CHANGELOG.md b/CHANGELOG.md
deleted file mode 100644
index c3f9006..0000000
--- a/CHANGELOG.md
+++ /dev/null
@@ -1,162 +0,0 @@
-# Changelog
-
-All notable changes to this project will be documented in this file.
-
-## [2.1.1] - 2025-01-15
-
-### Fixed
-- **SCIP Java Strategy**: Simplified Java symbol analysis implementation
-  - Refactored JavaStrategy to use streamlined symbol registration methods
-  - Removed complex JavaAnalyzer and JavaRelationshipExtractor classes
-  - Fixed symbol creation with basic identifier extraction
-  - Removed relationships summary calculation that was causing issues
-  - Added back to_scip_relationships method for compatibility
-  - Streamlined Java AST processing to focus on core symbol definitions
-
-### Improved
-- **Code Maintainability**: Significantly reduced complexity in Java SCIP processing
-- **Performance**: Faster Java file analysis with simplified approach
-- **Reliability**: More stable symbol extraction without complex relationship tracking
-
-## [2.1.0] - 2025-01-13
-
-### Major SCIP Architecture Enhancement
-
-This release completes the migration to SCIP-based code indexing with significant improvements to the core infrastructure and API simplification.
-
-#### Core SCIP Infrastructure
-- **Complete SCIP core components**: Added symbol_manager, position_calculator, reference_resolver, moniker_manager
-- **Two-phase SCIP analysis**: Implemented symbol collection → reference resolution workflow
-- **Unified index management**: New index_provider and unified_index_manager for seamless index operations
-- **SCIP-compliant symbol IDs**: Standard symbol ID generation with cross-file reference support
-
-#### Enhanced Strategy System
-- **All language strategies SCIP-compliant**: Refactored Python, Java, JavaScript, Objective-C strategies
-- **External symbol extraction**: Added dependency tracking and external symbol resolution
-- **Proper SCIP classifications**: Implemented symbol roles and syntax kind detection
-- **Robust file handling**: Enhanced encoding detection and error recovery
-
-#### API Improvements
-- **Simplified find_files response**: Returns clean file path lists instead of complex metadata objects
-- **Enhanced SCIPSymbolAnalyzer**: Replaced legacy query tools with accurate symbol analysis
-- **Improved logging**: Comprehensive logging throughout SCIP indexing pipeline
-
-#### Dependency Updates
-- **pathspec integration**: Better .gitignore parsing and file filtering
-- **Updated requirements**: Added comprehensive dependency list for cross-platform support
-
-#### Technical Improvements
-- **Symbol analysis tools**: New inspection scripts for debugging and development
-- **Enhanced error handling**: Better fallback strategies and error recovery
-- **Testing improvements**: Updated sample projects for multilingual testing
-
-#### Breaking Changes
-- **find_files API**: Now returns `List[str]` instead of complex metadata dictionary
-- **Internal architecture**: Significant refactoring of internal components (no user-facing impact)
-
-## [2.0.0] - 2025-08-11
-
-### 🚀 MAJOR RELEASE - SCIP Architecture Migration
-
-This release represents a **complete architectural overhaul** of the code indexing system, migrating from language-specific analyzers to a unified SCIP-based approach.
-
-#### ✨ New Architecture
-- **Three-layer service architecture**: Service → Tool → Technical Components
-- **Unified SCIP indexing**: Replace 8 language-specific analyzers with single SCIP protobuf system
-- **Service-oriented design**: Clear separation of business logic, technical tools, and low-level operations
-- **Composable components**: Modular design enabling easier testing and maintenance
-
-#### 🔧 Technical Improvements
-- **Tree-sitter AST parsing**: Replace regex-based analysis with proper AST parsing
-- **SCIP protobuf format**: Industry-standard code intelligence format
-- **Reduced complexity**: Simplified from 40K+ lines to ~1K lines of core logic
-- **Better error handling**: Improved exception handling and validation
-- **Enhanced logging**: Better debugging and monitoring capabilities
-
-#### 📦 Backward Compatibility
-- **MCP API unchanged**: All existing MCP tools work without modification
-- **Automatic migration**: Legacy indexes automatically migrated to SCIP format
-- **Same functionality**: All user-facing features preserved and enhanced
-- **No breaking changes**: Seamless upgrade experience
-
-#### 🗑️ Removed Components
-- Language-specific analyzers (C, C++, C#, Go, Java, JavaScript, Objective-C, Python)
-- Legacy indexing models and relationship management
-- Complex duplicate detection and qualified name systems
-- Obsolete builder and scanner components
-- Demo files and temporary utilities
-
-#### 🆕 New Services
-- **ProjectManagementService**: Project lifecycle and configuration management
-- **IndexManagementService**: Index building, rebuilding, and status monitoring
-- **FileDiscoveryService**: Intelligent file discovery with pattern matching
-- **CodeIntelligenceService**: Code analysis and summary generation
-- **SystemManagementService**: File watcher and system configuration
-
-#### 🛠️ New Tool Layer
-- **SCIPIndexTool & SCIPQueryTool**: SCIP operations and querying
-- **FileMatchingTool & FileSystemTool**: File system operations
-- **ProjectConfigTool & SettingsTool**: Configuration management
-- **FileWatcherTool**: Enhanced file monitoring capabilities
-
-#### 📊 Performance Benefits
-- **Faster indexing**: Tree-sitter parsing significantly faster than regex
-- **Lower memory usage**: Streamlined data structures and processing
-- **Better accuracy**: SCIP provides more precise code intelligence
-- **Improved scalability**: Cleaner architecture supports larger codebases
-
-#### 🔄 Migration Guide
-Existing users can upgrade seamlessly:
-1. System automatically detects legacy index format
-2. Migrates to new SCIP format on first run
-3. All existing functionality preserved
-4. No manual intervention required
-
-This release establishes a solid foundation for future enhancements while dramatically simplifying the codebase and improving performance.
-
-## [1.2.1] - 2024-08-06
-
-### Fixed
-- **File Watcher**: Enhanced move event handling for modern editors (VS Code, etc.)
-  - Fixed issue where files created via temp-then-move pattern weren't being detected
-  - Improved event processing logic to exclusively check destination path for move events
-  - Eliminated ambiguous fallback behavior that could cause inconsistent results
-
-### Improved
-- **Code Quality**: Comprehensive Pylint compliance improvements
-  - Fixed all f-string logging warnings using lazy % formatting
-  - Added proper docstrings to fallback classes
-  - Fixed multiple-statements warnings
-  - Moved imports to top-level following PEP 8 conventions
-  - Added appropriate pylint disables for stub methods
-
-### Technical Details
-- Unified path checking logic across all event types
-- Reduced code complexity in `should_process_event()` method
-- Better error handling with consistent exception management
-- Enhanced debugging capabilities with improved logging
-
-## [1.2.0] - Previous Release
-
-### Added
-- Enhanced find_files functionality with filename search
-- Performance improvements to file discovery
-- Auto-refresh troubleshooting documentation
-
-## [1.1.1] - Previous Release
-
-### Fixed
-- Various bug fixes and stability improvements
-
-## [1.1.0] - Previous Release
-
-### Added
-- Initial file watcher functionality
-- Cross-platform file system monitoring
-
-## [1.0.0] - Initial Release
-
-### Added
-- Core MCP server implementation
-- Code indexing and analysis capabilities
-- Multi-language support
\ No newline at end of file
diff --git a/benchmark_scip_framework.py b/benchmark_scip_framework.py
deleted file mode 100644
index 88d05f5..0000000
--- a/benchmark_scip_framework.py
+++ /dev/null
@@ -1,1017 +0,0 @@
-"""SCIP Framework Performance Benchmark Suite - Comprehensive performance testing and analysis."""
-
-import os
-import time
-import tempfile
-import statistics
-import gc
-import psutil
-import threading
-from pathlib import Path
-from typing import Dict, List, Any, Tuple, Optional
-from dataclasses import dataclass, asdict
-from concurrent.futures import ThreadPoolExecutor, as_completed
-
-from src.code_index_mcp.scip.framework import (
-    SCIPFrameworkAPI, SCIPConfig, create_scip_framework,
-    PythonSCIPIndexFactory, JavaScriptSCIPIndexFactory, JavaSCIPIndexFactory,
-    SCIPCacheManager, StreamingIndexer
-)
-
-
-@dataclass
-class BenchmarkResult:
-    """Benchmark result data structure."""
-    test_name: str
-    file_count: int
-    total_time: float
-    memory_usage_mb: float
-    symbols_generated: int
-    occurrences_generated: int
-    cache_hit_rate: float
-    throughput_files_per_sec: float
-    throughput_symbols_per_sec: float
-    error_count: int
-    additional_metrics: Dict[str, Any]
-
-
-@dataclass
-class SystemMetrics:
-    """System resource metrics."""
-    cpu_percent: float
-    memory_percent: float
-    memory_available_mb: float
-    disk_io_read_mb: float
-    disk_io_write_mb: float
-
-
-class PerformanceMonitor:
-    """Real-time performance monitoring during benchmarks."""
-    
-    def __init__(self):
-        self.monitoring = False
-        self.metrics_history: List[SystemMetrics] = []
-        self.monitor_thread: Optional[threading.Thread] = None
-        self.process = psutil.Process()
-    
-    def start_monitoring(self, interval: float = 0.5):
-        """Start performance monitoring."""
-        self.monitoring = True
-        self.metrics_history.clear()
-        self.monitor_thread = threading.Thread(target=self._monitor_loop, args=(interval,))
-        self.monitor_thread.daemon = True
-        self.monitor_thread.start()
-    
-    def stop_monitoring(self) -> List[SystemMetrics]:
-        """Stop monitoring and return collected metrics."""
-        self.monitoring = False
-        if self.monitor_thread:
-            self.monitor_thread.join(timeout=2.0)
-        return self.metrics_history.copy()
-    
-    def _monitor_loop(self, interval: float):
-        """Monitor system metrics in a loop."""
-        while self.monitoring:
-            try:
-                # Get current metrics
-                memory_info = self.process.memory_info()
-                
-                metrics = SystemMetrics(
-                    cpu_percent=self.process.cpu_percent(),
-                    memory_percent=self.process.memory_percent(),
-                    memory_available_mb=memory_info.rss / 1024 / 1024,
-                    disk_io_read_mb=0.0,  # Simplified for demo
-                    disk_io_write_mb=0.0
-                )
-                
-                self.metrics_history.append(metrics)
-                time.sleep(interval)
-                
-            except Exception as e:
-                print(f"Monitoring error: {e}")
-                break
-
-
-class SCIPFrameworkBenchmark:
-    """Comprehensive benchmark suite for SCIP framework."""
-    
-    def __init__(self):
-        self.results: List[BenchmarkResult] = []
-        self.monitor = PerformanceMonitor()
-        
-    def run_all_benchmarks(self) -> Dict[str, Any]:
-        """Run complete benchmark suite."""
-        print("=== SCIP Framework Performance Benchmark Suite ===")
-        print(f"System: {psutil.cpu_count()} CPUs, {psutil.virtual_memory().total // 1024**3} GB RAM")
-        
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Create test projects of various sizes
-            small_project = self.create_test_project(temp_dir, "small", 50)
-            medium_project = self.create_test_project(temp_dir, "medium", 200)
-            large_project = self.create_test_project(temp_dir, "large", 1000)
-            
-            # Run benchmarks
-            benchmark_suite = [
-                ("Small Project (50 files)", small_project, {'max_workers': 2, 'batch_size': 10}),
-                ("Medium Project (200 files)", medium_project, {'max_workers': 4, 'batch_size': 50}),
-                ("Large Project (1000 files)", large_project, {'max_workers': 8, 'batch_size': 100}),
-            ]
-            
-            for test_name, project_path, config_overrides in benchmark_suite:
-                print(f"\n🏃 Running: {test_name}")
-                
-                # Basic index generation benchmark
-                result = self.benchmark_index_generation(test_name, project_path, config_overrides)
-                self.results.append(result)
-                
-                # Caching performance benchmark
-                cache_result = self.benchmark_caching_performance(f"{test_name} - Caching", project_path, config_overrides)
-                self.results.append(cache_result)
-                
-                # Streaming performance benchmark
-                streaming_result = self.benchmark_streaming_performance(f"{test_name} - Streaming", project_path, config_overrides)
-                self.results.append(streaming_result)
-            
-            # Multi-language benchmark
-            multi_lang_project = self.create_multi_language_project(temp_dir)
-            multi_result = self.benchmark_multi_language(multi_lang_project)
-            self.results.append(multi_result)
-            
-            # Memory stress test
-            memory_result = self.benchmark_memory_usage(large_project)
-            self.results.append(memory_result)
-            
-            # Concurrent processing benchmark
-            concurrent_result = self.benchmark_concurrent_processing(medium_project)
-            self.results.append(concurrent_result)
-        
-        # Generate comprehensive report
-        return self.generate_benchmark_report()
-    
-    def create_test_project(self, base_dir: str, project_name: str, file_count: int) -> str:
-        """Create test project with specified number of files."""
-        project_dir = os.path.join(base_dir, project_name)
-        os.makedirs(project_dir, exist_ok=True)
-        
-        # Generate Python files with varying complexity
-        for i in range(file_count):
-            file_path = os.path.join(project_dir, f"module_{i:04d}.py")
-            content = self.generate_python_file_content(i, file_count)
-            
-            with open(file_path, 'w', encoding='utf-8') as f:
-                f.write(content)
-        
-        return project_dir
-    
-    def create_multi_language_project(self, base_dir: str) -> str:
-        """Create project with multiple programming languages."""
-        project_dir = os.path.join(base_dir, "multi_language")
-        os.makedirs(project_dir, exist_ok=True)
-        
-        # Python files
-        for i in range(30):
-            file_path = os.path.join(project_dir, f"python_module_{i}.py")
-            with open(file_path, 'w') as f:
-                f.write(self.generate_python_file_content(i, 30))
-        
-        # JavaScript files
-        for i in range(20):
-            file_path = os.path.join(project_dir, f"js_module_{i}.js")
-            with open(file_path, 'w') as f:
-                f.write(self.generate_javascript_file_content(i))
-        
-        # Java files
-        for i in range(15):
-            file_path = os.path.join(project_dir, f"JavaClass_{i}.java")
-            with open(file_path, 'w') as f:
-                f.write(self.generate_java_file_content(i))
-        
-        return project_dir
-    
-    def generate_python_file_content(self, file_index: int, total_files: int) -> str:
-        """Generate Python file content with realistic complexity."""
-        imports_count = min(5, file_index % 8 + 1)
-        classes_count = file_index % 3 + 1
-        functions_count = file_index % 5 + 2
-        
-        content = f'"""Module {file_index} - Generated for performance testing."""\n\n'
-        
-        # Add imports
-        for i in range(imports_count):
-            import_target = f"module_{(file_index + i) % total_files:04d}"
-            content += f"from {import_target} import Class{i}, function_{i}\n"
-        
-        content += "\nimport os\nimport sys\nfrom typing import List, Dict, Optional\n\n"
-        
-        # Add classes
-        for class_i in range(classes_count):
-            content += f'''
-class Class{file_index}_{class_i}:
-    """Test class {class_i} in module {file_index}."""
-    
-    def __init__(self, value: int = 0):
-        self.value = value
-        self.data: Dict[str, int] = {{}}
-        self.items: List[str] = []
-    
-    def process_data(self, input_data: List[int]) -> Dict[str, int]:
-        """Process input data and return results."""
-        result = {{}}
-        for i, item in enumerate(input_data):
-            key = f"item_{{i}}"
-            result[key] = item * self.value
-        return result
-    
-    def calculate_total(self, multiplier: float = 1.0) -> float:
-        """Calculate total value."""
-        return sum(self.data.values()) * multiplier
-    
-    def add_item(self, item: str) -> None:
-        """Add item to collection."""
-        if item not in self.items:
-            self.items.append(item)
-    
-    @property
-    def item_count(self) -> int:
-        """Get number of items."""
-        return len(self.items)
-'''
-        
-        # Add functions
-        for func_i in range(functions_count):
-            content += f'''
-def function_{file_index}_{func_i}(param1: int, param2: str = "default") -> Tuple[int, str]:
-    """Function {func_i} in module {file_index}."""
-    processed_value = param1 * {func_i + 1}
-    processed_string = f"{{param2}}_{{processed_value}}"
-    
-    # Some processing logic
-    if processed_value > 100:
-        processed_value = processed_value // 2
-    
-    return processed_value, processed_string
-
-def helper_function_{file_index}_{func_i}(data: List[Any]) -> Optional[Any]:
-    """Helper function for function_{func_i}."""
-    if not data:
-        return None
-    
-    return data[0] if len(data) == 1 else data
-'''
-        
-        # Add module-level variables
-        content += f'''
-# Module-level variables
-MODULE_ID = {file_index}
-MODULE_NAME = "module_{file_index:04d}"
-DEFAULT_CONFIG = {{
-    "enabled": True,
-    "max_items": {file_index * 10 + 100},
-    "timeout": {file_index * 2 + 30}
-}}
-'''
-        
-        return content
-    
-    def generate_javascript_file_content(self, file_index: int) -> str:
-        """Generate JavaScript file content."""
-        return f'''
-// JavaScript module {file_index} for performance testing
-const express = require('express');
-const {{ EventEmitter }} = require('events');
-
-class Service{file_index} extends EventEmitter {{
-    constructor(config = {{}}) {{
-        super();
-        this.config = config;
-        this.data = new Map();
-        this.active = false;
-    }}
-    
-    async initialize() {{
-        this.active = true;
-        this.emit('initialized', {{ serviceId: {file_index} }});
-    }}
-    
-    processData(input) {{
-        const result = [];
-        for (const item of input) {{
-            result.push({{
-                id: item.id,
-                value: item.value * {file_index},
-                timestamp: Date.now()
-            }});
-        }}
-        return result;
-    }}
-    
-    async asyncOperation(delay = 100) {{
-        return new Promise(resolve => {{
-            setTimeout(() => {{
-                resolve({{ result: 'completed', serviceId: {file_index} }});
-            }}, delay);
-        }});
-    }}
-}}
-
-function helper{file_index}(data) {{
-    return data.map(item => ({{
-        ...item,
-        processed: true,
-        serviceId: {file_index}
-    }}));
-}}
-
-const config{file_index} = {{
-    serviceId: {file_index},
-    enabled: true,
-    maxConnections: {file_index * 10 + 50}
-}};
-
-module.exports = {{
-    Service{file_index},
-    helper{file_index},
-    config{file_index}
-}};
-'''
-    
-    def generate_java_file_content(self, file_index: int) -> str:
-        """Generate Java file content."""
-        return f'''
-package com.benchmark.test;
-
-import java.util.*;
-import java.util.concurrent.ConcurrentHashMap;
-import java.time.LocalDateTime;
-
-/**
- * Test class {file_index} for performance benchmarking.
- * Demonstrates various Java language features.
- */
-public class JavaClass_{file_index} {{
-    private final int classId;
-    private final Map<String, Object> data;
-    private final List<String> items;
-    private boolean active;
-    
-    /**
-     * Constructor for JavaClass_{file_index}.
-     * 
-     * @param classId Unique identifier for this class
-     */
-    public JavaClass_{file_index}(int classId) {{
-        this.classId = classId;
-        this.data = new ConcurrentHashMap<>();
-        this.items = new ArrayList<>();
-        this.active = false;
-    }}
-    
-    /**
-     * Initialize the class with default values.
-     */
-    public void initialize() {{
-        this.active = true;
-        this.data.put("initialized", LocalDateTime.now());
-        this.data.put("classId", this.classId);
-    }}
-    
-    /**
-     * Process a list of integers and return results.
-     * 
-     * @param input List of integers to process
-     * @return Map of processed results
-     */
-    public Map<String, Integer> processNumbers(List<Integer> input) {{
-        Map<String, Integer> results = new HashMap<>();
-        
-        for (int i = 0; i < input.size(); i++) {{
-            String key = "result_" + i;
-            Integer value = input.get(i) * {file_index} + i;
-            results.put(key, value);
-        }}
-        
-        return results;
-    }}
-    
-    /**
-     * Add item to the collection.
-     * 
-     * @param item Item to add
-     * @return true if item was added, false if it already exists
-     */
-    public boolean addItem(String item) {{
-        if (item == null || item.trim().isEmpty()) {{
-            return false;
-        }}
-        
-        if (!items.contains(item)) {{
-            items.add(item);
-            return true;
-        }}
-        
-        return false;
-    }}
-    
-    /**
-     * Get total count of items.
-     * 
-     * @return Number of items in collection
-     */
-    public int getItemCount() {{
-        return items.size();
-    }}
-    
-    /**
-     * Check if class is active.
-     * 
-     * @return true if active, false otherwise
-     */
-    public boolean isActive() {{
-        return active;
-    }}
-    
-    /**
-     * Set active status.
-     * 
-     * @param active New active status
-     */
-    public void setActive(boolean active) {{
-        this.active = active;
-        if (active) {{
-            data.put("lastActivated", LocalDateTime.now());
-        }}
-    }}
-    
-    @Override
-    public String toString() {{
-        return String.format("JavaClass_%d{{classId=%d, active=%s, items=%d}}", 
-                           {file_index}, classId, active, items.size());
-    }}
-    
-    @Override
-    public boolean equals(Object obj) {{
-        if (this == obj) return true;
-        if (obj == null || getClass() != obj.getClass()) return false;
-        JavaClass_{file_index} other = (JavaClass_{file_index}) obj;
-        return classId == other.classId;
-    }}
-    
-    @Override
-    public int hashCode() {{
-        return Objects.hash(classId);
-    }}
-}}
-'''
-    
-    def benchmark_index_generation(self, test_name: str, project_path: str, config_overrides: Dict) -> BenchmarkResult:
-        """Benchmark basic index generation performance."""
-        print(f"  📊 Index generation benchmark...")
-        
-        # Configure framework
-        config = SCIPConfig(
-            project_root=project_path,
-            cache_enabled=False,  # Disable cache for pure generation benchmark
-            validate_compliance=True,
-            **config_overrides
-        )
-        
-        framework = SCIPFrameworkAPI(config)
-        
-        # Count files
-        file_count = len(list(Path(project_path).rglob("*.py")))
-        
-        # Start monitoring
-        self.monitor.start_monitoring()
-        
-        # Run benchmark
-        start_time = time.time()
-        start_memory = psutil.Process().memory_info().rss / 1024 / 1024
-        
-        try:
-            index = framework.create_complete_index()
-            
-            end_time = time.time()
-            end_memory = psutil.Process().memory_info().rss / 1024 / 1024
-            
-            # Stop monitoring
-            metrics_history = self.monitor.stop_monitoring()
-            
-            # Calculate metrics
-            total_time = end_time - start_time
-            memory_usage = end_memory - start_memory
-            
-            symbols_count = sum(len(doc.symbols) for doc in index.documents)
-            occurrences_count = sum(len(doc.occurrences) for doc in index.occurrences)
-            
-            throughput_files = file_count / total_time if total_time > 0 else 0
-            throughput_symbols = symbols_count / total_time if total_time > 0 else 0
-            
-            # Additional metrics
-            avg_cpu = statistics.mean([m.cpu_percent for m in metrics_history]) if metrics_history else 0
-            peak_memory = max([m.memory_available_mb for m in metrics_history]) if metrics_history else end_memory
-            
-            result = BenchmarkResult(
-                test_name=test_name,
-                file_count=file_count,
-                total_time=total_time,
-                memory_usage_mb=memory_usage,
-                symbols_generated=symbols_count,
-                occurrences_generated=occurrences_count,
-                cache_hit_rate=0.0,  # No cache in this test
-                throughput_files_per_sec=throughput_files,
-                throughput_symbols_per_sec=throughput_symbols,
-                error_count=0,
-                additional_metrics={
-                    'avg_cpu_percent': avg_cpu,
-                    'peak_memory_mb': peak_memory,
-                    'documents_generated': len(index.documents),
-                    'external_symbols': len(index.external_symbols)
-                }
-            )
-            
-            print(f"    ✓ {file_count} files, {symbols_count} symbols in {total_time:.2f}s")
-            print(f"    ✓ {throughput_files:.1f} files/sec, {throughput_symbols:.1f} symbols/sec")
-            
-            return result
-            
-        except Exception as e:
-            self.monitor.stop_monitoring()
-            print(f"    ❌ Benchmark failed: {e}")
-            
-            return BenchmarkResult(
-                test_name=f"{test_name} (FAILED)",
-                file_count=file_count,
-                total_time=0,
-                memory_usage_mb=0,
-                symbols_generated=0,
-                occurrences_generated=0,
-                cache_hit_rate=0.0,
-                throughput_files_per_sec=0,
-                throughput_symbols_per_sec=0,
-                error_count=1,
-                additional_metrics={'error': str(e)}
-            )
-    
-    def benchmark_caching_performance(self, test_name: str, project_path: str, config_overrides: Dict) -> BenchmarkResult:
-        """Benchmark caching system performance."""
-        print(f"  🗂️  Caching performance benchmark...")
-        
-        config = SCIPConfig(
-            project_root=project_path,
-            cache_enabled=True,
-            **config_overrides
-        )
-        
-        framework = SCIPFrameworkAPI(config)
-        file_count = len(list(Path(project_path).rglob("*.py")))
-        
-        # First run to populate cache
-        start_time = time.time()
-        index1 = framework.create_complete_index()
-        first_run_time = time.time() - start_time
-        
-        # Second run with cache
-        start_time = time.time()
-        index2 = framework.create_complete_index()
-        second_run_time = time.time() - start_time
-        
-        # Get cache statistics
-        cache_stats = framework.get_cache_statistics()
-        hit_rate = float(cache_stats.get('hit_rate', '0%').rstrip('%')) / 100.0
-        
-        symbols_count = sum(len(doc.symbols) for doc in index2.documents)
-        
-        result = BenchmarkResult(
-            test_name=test_name,
-            file_count=file_count,
-            total_time=second_run_time,
-            memory_usage_mb=0,  # Not measured in this test
-            symbols_generated=symbols_count,
-            occurrences_generated=0,
-            cache_hit_rate=hit_rate,
-            throughput_files_per_sec=file_count / second_run_time if second_run_time > 0 else 0,
-            throughput_symbols_per_sec=symbols_count / second_run_time if second_run_time > 0 else 0,
-            error_count=0,
-            additional_metrics={
-                'first_run_time': first_run_time,
-                'second_run_time': second_run_time,
-                'cache_speedup': first_run_time / second_run_time if second_run_time > 0 else 0,
-                'cache_entries': cache_stats.get('memory_entries', 0)
-            }
-        )
-        
-        speedup = first_run_time / second_run_time if second_run_time > 0 else 0
-        print(f"    ✓ Cache hit rate: {hit_rate:.1%}, speedup: {speedup:.1f}x")
-        
-        return result
-    
-    def benchmark_streaming_performance(self, test_name: str, project_path: str, config_overrides: Dict) -> BenchmarkResult:
-        """Benchmark streaming indexer performance."""
-        print(f"  🌊 Streaming performance benchmark...")
-        
-        config = SCIPConfig(
-            project_root=project_path,
-            cache_enabled=True,
-            **config_overrides
-        )
-        
-        framework = SCIPFrameworkAPI(config)
-        python_files = list(Path(project_path).rglob("*.py"))
-        file_paths = [str(f) for f in python_files]
-        
-        # Create streaming indexer
-        python_factory = PythonSCIPIndexFactory(project_path)
-        cache_manager = SCIPCacheManager()
-        streaming_indexer = StreamingIndexer(
-            factory=python_factory,
-            cache_manager=cache_manager,
-            max_workers=config_overrides.get('max_workers', 4),
-            chunk_size=config_overrides.get('batch_size', 50) // 2
-        )
-        
-        # Track progress
-        progress_updates = []
-        def track_progress(progress):
-            progress_updates.append({
-                'percentage': progress.progress_percentage,
-                'elapsed': progress.elapsed_time
-            })
-        
-        streaming_indexer.add_progress_callback(track_progress)
-        
-        # Run streaming benchmark
-        start_time = time.time()
-        
-        documents = []
-        for doc in streaming_indexer.index_files_streaming(file_paths):
-            documents.append(doc)
-        
-        total_time = time.time() - start_time
-        
-        symbols_count = sum(len(doc.symbols) for doc in documents)
-        occurrences_count = sum(len(doc.occurrences) for doc in documents)
-        
-        result = BenchmarkResult(
-            test_name=test_name,
-            file_count=len(file_paths),
-            total_time=total_time,
-            memory_usage_mb=0,
-            symbols_generated=symbols_count,
-            occurrences_generated=occurrences_count,
-            cache_hit_rate=0.0,
-            throughput_files_per_sec=len(file_paths) / total_time if total_time > 0 else 0,
-            throughput_symbols_per_sec=symbols_count / total_time if total_time > 0 else 0,
-            error_count=0,
-            additional_metrics={
-                'progress_updates': len(progress_updates),
-                'avg_chunk_time': total_time / max(1, len(progress_updates)),
-                'documents_streamed': len(documents)
-            }
-        )
-        
-        print(f"    ✓ Streamed {len(documents)} documents in {total_time:.2f}s")
-        
-        return result
-    
-    def benchmark_multi_language(self, project_path: str) -> BenchmarkResult:
-        """Benchmark multi-language processing."""
-        print(f"  🌐 Multi-language performance benchmark...")
-        
-        config = SCIPConfig(
-            project_root=project_path,
-            max_workers=6,
-            supported_languages={'python', 'javascript', 'java'}
-        )
-        
-        framework = SCIPFrameworkAPI(config)
-        
-        # Count files by language
-        python_files = len(list(Path(project_path).rglob("*.py")))
-        js_files = len(list(Path(project_path).rglob("*.js")))
-        java_files = len(list(Path(project_path).rglob("*.java")))
-        total_files = python_files + js_files + java_files
-        
-        # Run benchmark
-        start_time = time.time()
-        index = framework.create_complete_index()
-        total_time = time.time() - start_time
-        
-        symbols_count = sum(len(doc.symbols) for doc in index.documents)
-        
-        result = BenchmarkResult(
-            test_name="Multi-Language Processing",
-            file_count=total_files,
-            total_time=total_time,
-            memory_usage_mb=0,
-            symbols_generated=symbols_count,
-            occurrences_generated=0,
-            cache_hit_rate=0.0,
-            throughput_files_per_sec=total_files / total_time if total_time > 0 else 0,
-            throughput_symbols_per_sec=symbols_count / total_time if total_time > 0 else 0,
-            error_count=0,
-            additional_metrics={
-                'python_files': python_files,
-                'javascript_files': js_files,
-                'java_files': java_files,
-                'languages_processed': 3,
-                'documents_generated': len(index.documents)
-            }
-        )
-        
-        print(f"    ✓ {total_files} files ({python_files} Python, {js_files} JS, {java_files} Java)")
-        print(f"    ✓ {symbols_count} symbols in {total_time:.2f}s")
-        
-        return result
-    
-    def benchmark_memory_usage(self, project_path: str) -> BenchmarkResult:
-        """Benchmark memory usage under load."""
-        print(f"  🧠 Memory usage benchmark...")
-        
-        # Configure for memory stress testing
-        config = SCIPConfig(
-            project_root=project_path,
-            max_workers=1,  # Single worker to control memory usage
-            batch_size=10,  # Small batches
-            cache_enabled=True
-        )
-        
-        framework = SCIPFrameworkAPI(config)
-        file_count = len(list(Path(project_path).rglob("*.py")))
-        
-        # Monitor memory throughout the process
-        self.monitor.start_monitoring(interval=0.1)  # High frequency monitoring
-        
-        process = psutil.Process()
-        initial_memory = process.memory_info().rss / 1024 / 1024
-        
-        start_time = time.time()
-        
-        # Process with memory monitoring
-        index = framework.create_complete_index()
-        
-        total_time = time.time() - start_time
-        final_memory = process.memory_info().rss / 1024 / 1024
-        
-        # Stop monitoring and analyze
-        metrics_history = self.monitor.stop_monitoring()
-        
-        if metrics_history:
-            peak_memory = max(m.memory_available_mb for m in metrics_history)
-            avg_memory = statistics.mean(m.memory_available_mb for m in metrics_history)
-        else:
-            peak_memory = final_memory
-            avg_memory = final_memory
-        
-        memory_growth = final_memory - initial_memory
-        symbols_count = sum(len(doc.symbols) for doc in index.documents)
-        
-        result = BenchmarkResult(
-            test_name="Memory Usage Analysis",
-            file_count=file_count,
-            total_time=total_time,
-            memory_usage_mb=memory_growth,
-            symbols_generated=symbols_count,
-            occurrences_generated=0,
-            cache_hit_rate=0.0,
-            throughput_files_per_sec=file_count / total_time if total_time > 0 else 0,
-            throughput_symbols_per_sec=symbols_count / total_time if total_time > 0 else 0,
-            error_count=0,
-            additional_metrics={
-                'initial_memory_mb': initial_memory,
-                'final_memory_mb': final_memory,
-                'peak_memory_mb': peak_memory,
-                'avg_memory_mb': avg_memory,
-                'memory_efficiency_mb_per_symbol': memory_growth / symbols_count if symbols_count > 0 else 0,
-                'monitoring_samples': len(metrics_history)
-            }
-        )
-        
-        print(f"    ✓ Memory growth: {memory_growth:.1f} MB (peak: {peak_memory:.1f} MB)")
-        print(f"    ✓ {memory_growth/symbols_count:.3f} MB per symbol")
-        
-        return result
-    
-    def benchmark_concurrent_processing(self, project_path: str) -> BenchmarkResult:
-        """Benchmark concurrent processing capabilities."""
-        print(f"  ⚡ Concurrent processing benchmark...")
-        
-        python_files = list(Path(project_path).rglob("*.py"))
-        file_paths = [str(f) for f in python_files]
-        
-        # Test different worker counts
-        worker_counts = [1, 2, 4, 8]
-        results = {}
-        
-        for workers in worker_counts:
-            config = SCIPConfig(
-                project_root=project_path,
-                max_workers=workers,
-                batch_size=50
-            )
-            
-            framework = SCIPFrameworkAPI(config)
-            
-            start_time = time.time()
-            index = framework.create_complete_index()
-            elapsed_time = time.time() - start_time
-            
-            results[workers] = {
-                'time': elapsed_time,
-                'symbols': sum(len(doc.symbols) for doc in index.documents)
-            }
-        
-        # Find optimal worker count
-        best_workers = min(results.keys(), key=lambda w: results[w]['time'])
-        best_time = results[best_workers]['time']
-        sequential_time = results[1]['time']
-        
-        speedup = sequential_time / best_time if best_time > 0 else 0
-        efficiency = speedup / best_workers if best_workers > 0 else 0
-        
-        result = BenchmarkResult(
-            test_name="Concurrent Processing Analysis",
-            file_count=len(file_paths),
-            total_time=best_time,
-            memory_usage_mb=0,
-            symbols_generated=results[best_workers]['symbols'],
-            occurrences_generated=0,
-            cache_hit_rate=0.0,
-            throughput_files_per_sec=len(file_paths) / best_time if best_time > 0 else 0,
-            throughput_symbols_per_sec=results[best_workers]['symbols'] / best_time if best_time > 0 else 0,
-            error_count=0,
-            additional_metrics={
-                'optimal_workers': best_workers,
-                'speedup': speedup,
-                'efficiency': efficiency,
-                'worker_results': results,
-                'parallel_efficiency_percent': efficiency * 100
-            }
-        )
-        
-        print(f"    ✓ Optimal workers: {best_workers}, speedup: {speedup:.1f}x")
-        print(f"    ✓ Parallel efficiency: {efficiency:.1%}")
-        
-        return result
-    
-    def generate_benchmark_report(self) -> Dict[str, Any]:
-        """Generate comprehensive benchmark report."""
-        if not self.results:
-            return {"error": "No benchmark results available"}
-        
-        # Calculate aggregate statistics
-        total_files = sum(r.file_count for r in self.results)
-        total_symbols = sum(r.symbols_generated for r in self.results)
-        total_time = sum(r.total_time for r in self.results)
-        
-        # Performance metrics
-        avg_throughput_files = statistics.mean([r.throughput_files_per_sec for r in self.results if r.throughput_files_per_sec > 0])
-        avg_throughput_symbols = statistics.mean([r.throughput_symbols_per_sec for r in self.results if r.throughput_symbols_per_sec > 0])
-        
-        # Memory analysis
-        memory_results = [r for r in self.results if r.memory_usage_mb > 0]
-        avg_memory_usage = statistics.mean([r.memory_usage_mb for r in memory_results]) if memory_results else 0
-        
-        # Cache performance
-        cache_results = [r for r in self.results if r.cache_hit_rate > 0]
-        avg_cache_hit_rate = statistics.mean([r.cache_hit_rate for r in cache_results]) if cache_results else 0
-        
-        # System information
-        system_info = {
-            'cpu_count': psutil.cpu_count(),
-            'cpu_freq_mhz': psutil.cpu_freq().current if psutil.cpu_freq() else 0,
-            'memory_total_gb': psutil.virtual_memory().total / 1024**3,
-            'memory_available_gb': psutil.virtual_memory().available / 1024**3,
-            'disk_usage_percent': psutil.disk_usage('/').percent if os.name != 'nt' else psutil.disk_usage('C:\\').percent
-        }
-        
-        # Performance summary
-        performance_summary = {
-            'total_benchmarks': len(self.results),
-            'total_files_processed': total_files,
-            'total_symbols_generated': total_symbols,
-            'total_processing_time': total_time,
-            'average_throughput_files_per_sec': avg_throughput_files,
-            'average_throughput_symbols_per_sec': avg_throughput_symbols,
-            'average_memory_usage_mb': avg_memory_usage,
-            'average_cache_hit_rate': avg_cache_hit_rate,
-            'failed_benchmarks': len([r for r in self.results if r.error_count > 0])
-        }
-        
-        # Detailed results
-        detailed_results = []
-        for result in self.results:
-            detailed_results.append(asdict(result))
-        
-        # Performance recommendations
-        recommendations = self.generate_performance_recommendations()
-        
-        report = {
-            'timestamp': time.strftime('%Y-%m-%d %H:%M:%S'),
-            'system_info': system_info,
-            'performance_summary': performance_summary,
-            'detailed_results': detailed_results,
-            'recommendations': recommendations
-        }
-        
-        # Print summary
-        print("\n" + "="*60)
-        print("📊 BENCHMARK RESULTS SUMMARY")
-        print("="*60)
-        print(f"Total benchmarks: {len(self.results)}")
-        print(f"Files processed: {total_files:,}")
-        print(f"Symbols generated: {total_symbols:,}")
-        print(f"Total time: {total_time:.2f} seconds")
-        print(f"Average throughput: {avg_throughput_files:.1f} files/sec, {avg_throughput_symbols:.1f} symbols/sec")
-        print(f"Average memory usage: {avg_memory_usage:.1f} MB")
-        if avg_cache_hit_rate > 0:
-            print(f"Average cache hit rate: {avg_cache_hit_rate:.1%}")
-        print()
-        
-        # Print individual results
-        for result in self.results:
-            status = "✓" if result.error_count == 0 else "❌"
-            print(f"{status} {result.test_name}")
-            print(f"   {result.file_count} files → {result.symbols_generated} symbols in {result.total_time:.2f}s")
-            print(f"   {result.throughput_files_per_sec:.1f} files/sec, {result.throughput_symbols_per_sec:.1f} symbols/sec")
-            if result.cache_hit_rate > 0:
-                print(f"   Cache hit rate: {result.cache_hit_rate:.1%}")
-            print()
-        
-        return report
-    
-    def generate_performance_recommendations(self) -> List[str]:
-        """Generate performance recommendations based on benchmark results."""
-        recommendations = []
-        
-        # Analyze results for recommendations
-        memory_results = [r for r in self.results if r.memory_usage_mb > 0]
-        if memory_results:
-            avg_memory = statistics.mean([r.memory_usage_mb for r in memory_results])
-            if avg_memory > 500:  # More than 500 MB
-                recommendations.append("Consider reducing batch_size or max_workers to control memory usage")
-        
-        # Cache performance
-        cache_results = [r for r in self.results if r.cache_hit_rate > 0]
-        if cache_results:
-            avg_cache_rate = statistics.mean([r.cache_hit_rate for r in cache_results])
-            if avg_cache_rate < 0.7:  # Less than 70% hit rate
-                recommendations.append("Cache performance is suboptimal. Consider increasing cache size or optimizing file change detection")
-        
-        # Throughput analysis
-        throughput_results = [r.throughput_files_per_sec for r in self.results if r.throughput_files_per_sec > 0]
-        if throughput_results:
-            avg_throughput = statistics.mean(throughput_results)
-            if avg_throughput < 10:  # Less than 10 files per second
-                recommendations.append("Consider increasing max_workers or batch_size to improve throughput")
-        
-        # Concurrent processing
-        concurrent_results = [r for r in self.results if 'speedup' in r.additional_metrics]
-        if concurrent_results:
-            for result in concurrent_results:
-                efficiency = result.additional_metrics.get('efficiency', 0)
-                if efficiency < 0.5:  # Less than 50% efficiency
-                    recommendations.append("Parallel processing efficiency is low. Consider reducing worker count or optimizing workload distribution")
-        
-        # General recommendations
-        recommendations.extend([
-            "Enable caching for repeated operations to improve performance",
-            "Use SSD storage for cache directory to reduce I/O latency",
-            "Monitor memory usage during large project processing",
-            "Consider streaming processing for very large codebases",
-            "Validate SCIP compliance only when necessary for better performance"
-        ])
-        
-        return recommendations
-
-
-def run_benchmark_suite():
-    """Main function to run the complete benchmark suite."""
-    benchmark = SCIPFrameworkBenchmark()
-    
-    try:
-        report = benchmark.run_all_benchmarks()
-        
-        # Save report to file
-        import json
-        report_path = "scip_framework_benchmark_report.json"
-        with open(report_path, 'w', encoding='utf-8') as f:
-            json.dump(report, f, indent=2, ensure_ascii=False)
-        
-        print(f"📄 Detailed benchmark report saved to: {report_path}")
-        
-        # Print recommendations
-        print("\n🎯 PERFORMANCE RECOMMENDATIONS:")
-        for i, rec in enumerate(report['recommendations'], 1):
-            print(f"{i}. {rec}")
-        
-        return report
-        
-    except Exception as e:
-        print(f"❌ Benchmark suite failed: {e}")
-        import traceback
-        traceback.print_exc()
-        return None
-
-
-if __name__ == "__main__":
-    run_benchmark_suite()
\ No newline at end of file
diff --git a/test_max_line_length.py b/test_max_line_length.py
deleted file mode 100644
index 71cb12a..0000000
--- a/test_max_line_length.py
+++ /dev/null
@@ -1,170 +0,0 @@
-#!/usr/bin/env python3
-"""
-Unit tests for max_line_length parameter in search functionality.
-Tests both the default behavior (no limit) and the truncation behavior.
-"""
-
-import pytest
-import tempfile
-import os
-from unittest.mock import Mock, patch
-from src.code_index_mcp.search.base import parse_search_output
-from src.code_index_mcp.search.basic import BasicSearchStrategy
-
-
-class TestMaxLineLengthParameter:
-    """Test class for max_line_length parameter functionality."""
-
-    def test_parse_search_output_no_limit_default(self):
-        """Test that parse_search_output has no limit by default (None)."""
-        # Create test output with a very long line
-        long_line = "x" * 1000  # 1000 character line
-        test_output = f"test_file.py:10:{long_line}"
-        base_path = "/test/path"
-        
-        result = parse_search_output(test_output, base_path)
-        
-        # Should return full line without truncation
-        # Check that we have exactly one result
-        assert len(result) == 1
-        # Get the first (and only) key-value pair
-        file_path, matches = next(iter(result.items()))
-        assert len(matches) == 1
-        line_num, content = matches[0]
-        assert line_num == 10
-        assert content == long_line
-        assert len(content) == 1000
-
-    def test_parse_search_output_no_limit_explicit(self):
-        """Test that parse_search_output with explicit None has no limit."""
-        # Create test output with a very long line
-        long_line = "x" * 500  # 500 character line
-        test_output = f"src/module.py:5:{long_line}"
-        base_path = "/project"
-        
-        result = parse_search_output(test_output, base_path, max_line_length=None)
-        
-        # Should return full line without truncation
-        assert len(result) == 1
-        file_path, matches = next(iter(result.items()))
-        line_num, content = matches[0]
-        assert line_num == 5
-        assert content == long_line
-        assert len(content) == 500
-
-    def test_parse_search_output_with_limit(self):
-        """Test that parse_search_output truncates when max_line_length is set."""
-        # Create test output with a long line
-        long_line = "This is a very long line that should be truncated when max_line_length is applied"
-        test_output = f"example.py:1:{long_line}"
-        base_path = "/base"
-        
-        result = parse_search_output(test_output, base_path, max_line_length=30)
-        
-        # Should return truncated line with suffix
-        assert len(result) == 1
-        file_path, matches = next(iter(result.items()))
-        line_num, content = matches[0]
-        assert line_num == 1
-        assert content == "This is a very long line that ... (truncated)"
-        assert len(content) == 45  # 30 + len("... (truncated)")
-
-    def test_parse_search_output_exactly_at_limit(self):
-        """Test that lines exactly at the limit are not truncated."""
-        exact_line = "x" * 50  # Exactly 50 characters
-        test_output = f"file.py:1:{exact_line}"
-        base_path = "/base"
-        
-        result = parse_search_output(test_output, base_path, max_line_length=50)
-        
-        # Should return full line without truncation
-        assert len(result) == 1
-        file_path, matches = next(iter(result.items()))
-        line_num, content = matches[0]
-        assert line_num == 1
-        assert content == exact_line
-        assert len(content) == 50
-        assert "truncated" not in content
-
-    def test_parse_search_output_under_limit(self):
-        """Test that short lines are never truncated."""
-        short_line = "Short line"
-        test_output = f"file.py:1:{short_line}"
-        base_path = "/base"
-        
-        result = parse_search_output(test_output, base_path, max_line_length=100)
-        
-        # Should return full line without truncation
-        assert len(result) == 1
-        file_path, matches = next(iter(result.items()))
-        line_num, content = matches[0]
-        assert line_num == 1
-        assert content == short_line
-        assert "truncated" not in content
-
-    def test_basic_search_strategy_max_line_length(self):
-        """Test that BasicSearchStrategy respects max_line_length parameter."""
-        strategy = BasicSearchStrategy()
-        
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Create test file with long line
-            test_file = os.path.join(temp_dir, "test.py")
-            long_line = "def very_long_function_name_that_should_be_cut_when_max_line_length_is_applied():"
-            
-            with open(test_file, "w") as f:
-                f.write(f"{long_line}\n")
-                f.write("    pass\n")
-            
-            # Search with max_line_length
-            results = strategy.search(
-                pattern="very_long_function",
-                base_path=temp_dir,
-                max_line_length=30
-            )
-            
-            # Should find the file and truncate the line
-            assert "test.py" in results
-            line_num, content = results["test.py"][0]
-            assert line_num == 1
-            assert content.endswith("... (truncated)")
-            # 30 chars + "... (truncated)" (15 chars) = 45 total
-            assert len(content) == 45
-
-    def test_basic_search_strategy_no_max_line_length(self):
-        """Test that BasicSearchStrategy returns full lines when max_line_length is None."""
-        strategy = BasicSearchStrategy()
-        
-        with tempfile.TemporaryDirectory() as temp_dir:
-            # Create test file with long line
-            test_file = os.path.join(temp_dir, "test.py")
-            long_line = "def very_long_function_name_that_should_not_be_cut_by_default():"
-            
-            with open(test_file, "w") as f:
-                f.write(f"{long_line}\n")
-                f.write("    pass\n")
-            
-            # Search without max_line_length (default None)
-            results = strategy.search(
-                pattern="very_long_function",
-                base_path=temp_dir,
-                max_line_length=None
-            )
-            
-            # Should find the file and return full line
-            assert "test.py" in results
-            line_num, content = results["test.py"][0]
-            assert line_num == 1
-            assert content == long_line
-            assert "truncated" not in content
-
-
-def test_integration_search_service_max_line_length():
-    """Integration test for SearchService with max_line_length parameter."""
-    # This would require mocking the full search service setup
-    # For now, we'll test the core functionality through parse_search_output
-    pass
-
-
-if __name__ == "__main__":
-    # Run tests directly
-    pytest.main([__file__, "-v"])
\ No newline at end of file
diff --git a/uv.lock b/uv.lock
index 9185ca3..f2cf2be 100644
--- a/uv.lock
+++ b/uv.lock
@@ -49,7 +49,7 @@ wheels = [
 
 [[package]]
 name = "code-index-mcp"
-version = "2.3.0"
+version = "2.3.1"
 source = { editable = "." }
 dependencies = [
     { name = "mcp" },

From 9eec8933b3dd50ddc4f230b7abd58b437bb76c54 Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Wed, 24 Sep 2025 15:14:12 +0800
Subject: [PATCH 03/14] refactor: clean up whitespace and improve code
 readability in indexing modules

- Removed unnecessary blank lines and adjusted spacing for consistency across JSONIndexBuilder, JSONIndexManager, TypeScriptParsingStrategy, and ProjectManagementService classes.
- Enhanced overall code clarity without altering functionality.
---
 .../indexing/json_index_builder.py            |  60 +++----
 .../indexing/json_index_manager.py            | 166 +++++++-----------
 .../strategies/typescript_strategy.py         |  20 +--
 .../services/project_management_service.py    |  12 +-
 4 files changed, 111 insertions(+), 147 deletions(-)

diff --git a/src/code_index_mcp/indexing/json_index_builder.py b/src/code_index_mcp/indexing/json_index_builder.py
index 750f3cf..5f69669 100644
--- a/src/code_index_mcp/indexing/json_index_builder.py
+++ b/src/code_index_mcp/indexing/json_index_builder.py
@@ -45,18 +45,18 @@ class JSONIndexBuilder:
 
     def __init__(self, project_path: str, additional_excludes: Optional[List[str]] = None):
         from ..utils import FileFilter
-        
+
         # Input validation
         if not isinstance(project_path, str):
             raise ValueError(f"Project path must be a string, got {type(project_path)}")
-        
+
         project_path = project_path.strip()
         if not project_path:
             raise ValueError("Project path cannot be empty")
-            
+
         if not os.path.isdir(project_path):
             raise ValueError(f"Project path does not exist: {project_path}")
-        
+
         self.project_path = project_path
         self.in_memory_index: Optional[Dict[str, Any]] = None
         self.strategy_factory = StrategyFactory()
@@ -74,11 +74,11 @@ def __init__(self, project_path: str, additional_excludes: Optional[List[str]] =
     def _process_file(self, file_path: str, specialized_extensions: set) -> Optional[Tuple[Dict, Dict, str, bool]]:
         """
         Process a single file - designed for parallel execution.
-        
+
         Args:
             file_path: Path to the file to process
             specialized_extensions: Set of extensions with specialized parsers
-        
+
         Returns:
             Tuple of (symbols, file_info, language, is_specialized) or None on error
         """
@@ -88,20 +88,20 @@ def _process_file(self, file_path: str, specialized_extensions: set) -> Optional
 
             ext = Path(file_path).suffix.lower()
             rel_path = os.path.relpath(file_path, self.project_path).replace('\\', '/')
-            
+
             # Get appropriate strategy
             strategy = self.strategy_factory.get_strategy(ext)
-            
+
             # Track strategy usage
             is_specialized = ext in specialized_extensions
-            
+
             # Parse file using strategy
             symbols, file_info = strategy.parse_file(rel_path, content)
-            
+
             logger.debug(f"Parsed {rel_path}: {len(symbols)} symbols ({file_info.language})")
-            
+
             return (symbols, {rel_path: file_info}, file_info.language, is_specialized)
-        
+
         except Exception as e:
             logger.warning(f"Error processing {file_path}: {e}")
             return None
@@ -128,49 +128,49 @@ def build_index(self, parallel: bool = True, max_workers: Optional[int] = None)
 
         # Get specialized extensions for tracking
         specialized_extensions = set(self.strategy_factory.get_specialized_extensions())
-        
+
         # Get list of files to process
         files_to_process = self._get_supported_files()
         total_files = len(files_to_process)
-        
+
         if total_files == 0:
             logger.warning("No files to process")
             return self._create_empty_index()
-        
+
         logger.info(f"Processing {total_files} files...")
-        
+
         if parallel and total_files > 1:
             # Use ThreadPoolExecutor for I/O-bound file reading
             # ProcessPoolExecutor has issues with strategy sharing
             if max_workers is None:
                 max_workers = min(os.cpu_count() or 4, total_files)
-            
+
             logger.info(f"Using parallel processing with {max_workers} workers")
-            
+
             with ThreadPoolExecutor(max_workers=max_workers) as executor:
                 # Submit all tasks
                 future_to_file = {
                     executor.submit(self._process_file, file_path, specialized_extensions): file_path
                     for file_path in files_to_process
                 }
-                
+
                 # Process completed tasks
                 processed = 0
                 for future in as_completed(future_to_file):
                     file_path = future_to_file[future]
                     result = future.result()
-                    
+
                     if result:
                         symbols, file_info_dict, language, is_specialized = result
                         all_symbols.update(symbols)
                         all_files.update(file_info_dict)
                         languages.add(language)
-                        
+
                         if is_specialized:
                             specialized_count += 1
                         else:
                             fallback_count += 1
-                    
+
                     processed += 1
                     if processed % 100 == 0:
                         logger.debug(f"Processed {processed}/{total_files} files")
@@ -184,7 +184,7 @@ def build_index(self, parallel: bool = True, max_workers: Optional[int] = None)
                     all_symbols.update(symbols)
                     all_files.update(file_info_dict)
                     languages.add(language)
-                    
+
                     if is_specialized:
                         specialized_count += 1
                     else:
@@ -218,7 +218,7 @@ def build_index(self, parallel: bool = True, max_workers: Optional[int] = None)
         logger.info(f"Strategy usage: {specialized_count} specialized, {fallback_count} fallback")
 
         return index
-    
+
     def _create_empty_index(self) -> Dict[str, Any]:
         """Create an empty index structure."""
         metadata = IndexMetadata(
@@ -231,7 +231,7 @@ def _create_empty_index(self) -> Dict[str, Any]:
             specialized_parsers=0,
             fallback_files=0
         )
-        
+
         return {
             "metadata": asdict(metadata),
             "symbols": {},
@@ -371,16 +371,16 @@ def get_file_symbols(self, file_path: str) -> List[Dict[str, Any]]:
             # Work directly with global symbols for this file
             global_symbols = self.in_memory_index.get("symbols", {})
             result = []
-            
+
             # Find all symbols for this file directly from global symbols
             for symbol_id, symbol_data in global_symbols.items():
                 symbol_file = symbol_data.get("file", "").replace("\\", "/")
-                
+
                 # Check if this symbol belongs to our file
                 if symbol_file == file_path:
                     symbol_type = symbol_data.get("type", "unknown")
                     symbol_name = symbol_id.split("::")[-1]  # Extract symbol name from ID
-                    
+
                     # Create symbol info
                     symbol_info = {
                         "name": symbol_name,
@@ -388,7 +388,7 @@ def get_file_symbols(self, file_path: str) -> List[Dict[str, Any]]:
                         "line": symbol_data.get("line"),
                         "signature": symbol_data.get("signature")
                     }
-                    
+
                     # Categorize by type
                     if symbol_type in ["function", "method"]:
                         result.append(symbol_info)
@@ -397,7 +397,7 @@ def get_file_symbols(self, file_path: str) -> List[Dict[str, Any]]:
 
             # Sort by line number for consistent ordering
             result.sort(key=lambda x: x.get("line", 0))
-            
+
             return result
 
         except Exception as e:
diff --git a/src/code_index_mcp/indexing/json_index_manager.py b/src/code_index_mcp/indexing/json_index_manager.py
index d4564f3..d01ddf1 100644
--- a/src/code_index_mcp/indexing/json_index_manager.py
+++ b/src/code_index_mcp/indexing/json_index_manager.py
@@ -11,18 +11,20 @@
 import os
 import tempfile
 import threading
+import fnmatch
 from pathlib import Path
 from typing import Dict, List, Optional, Any
 
 from .json_index_builder import JSONIndexBuilder
 from ..constants import SETTINGS_DIR, INDEX_FILE
+from ..utils import FileFilter
 
 logger = logging.getLogger(__name__)
 
 
 class JSONIndexManager:
     """Manages JSON-based code index lifecycle and storage."""
-    
+
     def __init__(self):
         self.project_path: Optional[str] = None
         self.index_builder: Optional[JSONIndexBuilder] = None
@@ -30,7 +32,7 @@ def __init__(self):
         self.index_path: Optional[str] = None
         self._lock = threading.RLock()
         logger.info("Initialized JSON Index Manager")
-    
+
     def set_project_path(self, project_path: str) -> bool:
         """Set the project path and initialize index storage."""
         with self._lock:
@@ -39,67 +41,67 @@ def set_project_path(self, project_path: str) -> bool:
                 if not project_path or not isinstance(project_path, str):
                     logger.error(f"Invalid project path: {project_path}")
                     return False
-                
+
                 project_path = project_path.strip()
                 if not project_path:
                     logger.error("Project path cannot be empty")
                     return False
-                
+
                 if not os.path.isdir(project_path):
                     logger.error(f"Project path does not exist: {project_path}")
                     return False
-                
+
                 self.project_path = project_path
                 self.index_builder = JSONIndexBuilder(project_path)
-                
+
                 # Create temp directory for index storage
                 project_hash = hashlib.md5(project_path.encode()).hexdigest()[:12]
                 self.temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR, project_hash)
                 os.makedirs(self.temp_dir, exist_ok=True)
-                
+
                 self.index_path = os.path.join(self.temp_dir, INDEX_FILE)
-                
+
                 logger.info(f"Set project path: {project_path}")
                 logger.info(f"Index storage: {self.index_path}")
                 return True
-                
+
             except Exception as e:
                 logger.error(f"Failed to set project path: {e}")
                 return False
-    
+
     def build_index(self, force_rebuild: bool = False) -> bool:
         """Build or rebuild the index."""
         with self._lock:
             if not self.index_builder or not self.project_path:
                 logger.error("Index builder not initialized")
                 return False
-            
+
             try:
                 # Check if we need to rebuild
                 if not force_rebuild and self._is_index_fresh():
                     logger.info("Index is fresh, skipping rebuild")
                     return True
-                
+
                 logger.info("Building JSON index...")
                 index = self.index_builder.build_index()
-                
+
                 # Save to disk
                 self.index_builder.save_index(index, self.index_path)
-                
+
                 logger.info(f"Successfully built index with {len(index['symbols'])} symbols")
                 return True
-                
+
             except Exception as e:
                 logger.error(f"Failed to build index: {e}")
                 return False
-    
+
     def load_index(self) -> bool:
         """Load existing index from disk."""
         with self._lock:
             if not self.index_builder or not self.index_path:
                 logger.error("Index manager not initialized")
                 return False
-            
+
             try:
                 index = self.index_builder.load_index(self.index_path)
                 if index:
@@ -108,11 +110,11 @@ def load_index(self) -> bool:
                 else:
                     logger.warning("No existing index found")
                     return False
-                    
+
             except Exception as e:
                 logger.error(f"Failed to load index: {e}")
                 return False
-    
+
     def refresh_index(self) -> bool:
         """Refresh the index (rebuild and reload)."""
         with self._lock:
@@ -120,7 +122,7 @@ def refresh_index(self) -> bool:
             if self.build_index(force_rebuild=True):
                 return self.load_index()
             return False
-    
+
     def find_files(self, pattern: str = "*") -> List[str]:
         """Find files matching a pattern."""
         with self._lock:
@@ -128,41 +130,40 @@ def find_files(self, pattern: str = "*") -> List[str]:
             if not isinstance(pattern, str):
                 logger.error(f"Pattern must be a string, got {type(pattern)}")
                 return []
-                
+
             pattern = pattern.strip()
             if not pattern:
                 pattern = "*"
-            
+
             if not self.index_builder or not self.index_builder.in_memory_index:
                 logger.warning("Index not loaded")
                 return []
-            
+
             try:
                 files = list(self.index_builder.in_memory_index["files"].keys())
-                
+
                 if pattern == "*":
                     return files
-                
+
                 # Simple pattern matching
-                import fnmatch
                 return [f for f in files if fnmatch.fnmatch(f, pattern)]
-                
+
             except Exception as e:
                 logger.error(f"Error finding files: {e}")
                 return []
-    
+
     def get_file_summary(self, file_path: str) -> Optional[Dict[str, Any]]:
         """
         Get summary information for a file.
-        
+
         This method attempts to retrieve comprehensive file information including
         symbol counts, functions, classes, methods, and imports. If the index
         is not loaded, it will attempt auto-initialization to restore from the
         most recent index state.
-        
+
         Args:
             file_path: Relative path to the file
-            
+
         Returns:
             Dictionary containing file summary information, or None if not found
         """
@@ -171,38 +172,38 @@ def get_file_summary(self, file_path: str) -> Optional[Dict[str, Any]]:
             if not isinstance(file_path, str):
                 logger.error(f"File path must be a string, got {type(file_path)}")
                 return None
-                
+
             file_path = file_path.strip()
             if not file_path:
                 logger.error("File path cannot be empty")
                 return None
-            
+
             # Try to load cached index if not ready
             if not self.index_builder or not self.index_builder.in_memory_index:
                 if not self._try_load_cached_index():
                     logger.warning("Index not loaded and no cached index available")
                     return None
-            
+
             try:
                 # Normalize file path
                 file_path = file_path.replace('\\', '/')
                 if file_path.startswith('./'):
                     file_path = file_path[2:]
-                
+
                 # Get file info
                 file_info = self.index_builder.in_memory_index["files"].get(file_path)
                 if not file_info:
                     logger.warning(f"File not found in index: {file_path}")
                     return None
-                
+
                 # Get symbols in file
                 symbols = self.index_builder.get_file_symbols(file_path)
-                
+
                 # Categorize symbols by signature
                 functions = []
                 classes = []
                 methods = []
-                
+
                 for s in symbols:
                     signature = s.get("signature", "")
                     if signature:
@@ -210,7 +211,7 @@ def get_file_summary(self, file_path: str) -> Optional[Dict[str, Any]]:
                             # Method: contains class context
                             methods.append(s)
                         elif signature.startswith("def "):
-                            # Function: starts with def but no class context  
+                            # Function: starts with def but no class context
                             functions.append(s)
                         elif signature.startswith("class ") or signature is None:
                             # Class: starts with class or has no signature
@@ -227,7 +228,7 @@ def get_file_summary(self, file_path: str) -> Optional[Dict[str, Any]]:
                         else:
                             # Default to function
                             functions.append(s)
-                
+
                 return {
                     "file_path": file_path,
                     "language": file_info["language"],
@@ -239,63 +240,26 @@ def get_file_summary(self, file_path: str) -> Optional[Dict[str, Any]]:
                     "imports": file_info.get("imports", []),
                     "exports": file_info.get("exports", [])
                 }
-                
+
             except Exception as e:
                 logger.error(f"Error getting file summary: {e}")
                 return None
-    
-    def search_symbols(self, query: str, symbol_type: Optional[str] = None) -> List[Dict[str, Any]]:
-        """Search for symbols by name."""
-        with self._lock:
-            if not self.index_builder or not self.index_builder.in_memory_index:
-                logger.warning("Index not loaded")
-                return []
-            
-            try:
-                results = []
-                query_lower = query.lower()
-                
-                for symbol_id, symbol_data in self.index_builder.in_memory_index["symbols"].items():
-                    # Filter by type if specified
-                    if symbol_type and symbol_data.get("type") != symbol_type:
-                        continue
-                    
-                    # Check if query matches symbol name
-                    if query_lower in symbol_id.lower():
-                        results.append({
-                            "id": symbol_id,
-                            **symbol_data
-                        })
-                
-                return results[:50]  # Limit results
-                
-            except Exception as e:
-                logger.error(f"Error searching symbols: {e}")
-                return []
-    
-    def get_symbol_callers(self, symbol_name: str) -> List[str]:
-        """Get all symbols that call the given symbol."""
-        with self._lock:
-            if not self.index_builder:
-                return []
-            
-            return self.index_builder.get_callers(symbol_name)
-    
+
     def get_index_stats(self) -> Dict[str, Any]:
         """Get statistics about the current index."""
         with self._lock:
             if not self.index_builder or not self.index_builder.in_memory_index:
                 return {"status": "not_loaded"}
-            
+
             try:
                 index = self.index_builder.in_memory_index
                 metadata = index["metadata"]
-                
+
                 symbol_counts = {}
                 for symbol_data in index["symbols"].values():
                     symbol_type = symbol_data.get("type", "unknown")
                     symbol_counts[symbol_type] = symbol_counts.get(symbol_type, 0) + 1
-                
+
                 return {
                     "status": "loaded",
                     "project_path": metadata["project_path"],
@@ -306,51 +270,50 @@ def get_index_stats(self) -> Dict[str, Any]:
                     "index_version": metadata["index_version"],
                     "timestamp": metadata["timestamp"]
                 }
-                
+
             except Exception as e:
                 logger.error(f"Error getting index stats: {e}")
                 return {"status": "error", "error": str(e)}
-    
+
     def _is_index_fresh(self) -> bool:
         """Check if the current index is fresh."""
         if not self.index_path or not os.path.exists(self.index_path):
             return False
-        
+
         try:
-            from ..utils import FileFilter
             file_filter = FileFilter()
-            
+
             # Simple freshness check - index exists and is recent
             index_mtime = os.path.getmtime(self.index_path)
             base_path = Path(self.project_path)
-            
+
             # Check if any source files are newer than index
             for root, dirs, files in os.walk(self.project_path):
                 # Filter directories using centralized logic
                 dirs[:] = [d for d in dirs if not file_filter.should_exclude_directory(d)]
-                
+
                 for file in files:
                     file_path = Path(root) / file
                     if file_filter.should_process_path(file_path, base_path):
                         if os.path.getmtime(str(file_path)) > index_mtime:
                             return False
-            
+
             return True
-            
+
         except Exception as e:
             logger.warning(f"Error checking index freshness: {e}")
             return False
-    
+
     def _try_load_cached_index(self, expected_project_path: Optional[str] = None) -> bool:
         """
         Try to load a cached index file if available.
-        
+
         This is a simplified version of auto-initialization that only loads
         a cached index if we can verify it matches the expected project.
-        
+
         Args:
             expected_project_path: Optional path to verify against cached index
-            
+
         Returns:
             True if cached index was loaded successfully, False otherwise.
         """
@@ -358,28 +321,28 @@ def _try_load_cached_index(self, expected_project_path: Optional[str] = None) ->
             # First try to load from current index_path if set
             if self.index_path and os.path.exists(self.index_path):
                 return self.load_index()
-            
+
             # If expected project path provided, try to find its cache
             if expected_project_path:
                 project_hash = hashlib.md5(expected_project_path.encode()).hexdigest()[:12]
                 temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR, project_hash)
                 index_path = os.path.join(temp_dir, INDEX_FILE)
-                
+
                 if os.path.exists(index_path):
                     # Verify the cached index matches the expected project
                     with open(index_path, 'r', encoding='utf-8') as f:
                         index_data = json.load(f)
                         cached_project = index_data.get('metadata', {}).get('project_path')
-                        
+
                     if cached_project == expected_project_path:
                         self.temp_dir = temp_dir
                         self.index_path = index_path
                         return self.load_index()
                     else:
                         logger.warning(f"Cached index project mismatch: {cached_project} != {expected_project_path}")
-            
+
             return False
-            
+
         except Exception as e:
             logger.debug(f"Failed to load cached index: {e}")
             return False
@@ -400,4 +363,5 @@ def cleanup(self):
 
 def get_index_manager() -> JSONIndexManager:
     """Get the global index manager instance."""
-    return _index_manager
\ No newline at end of file
+    return _index_manager
+    
\ No newline at end of file
diff --git a/src/code_index_mcp/indexing/strategies/typescript_strategy.py b/src/code_index_mcp/indexing/strategies/typescript_strategy.py
index 4f45747..05ed04d 100644
--- a/src/code_index_mcp/indexing/strategies/typescript_strategy.py
+++ b/src/code_index_mcp/indexing/strategies/typescript_strategy.py
@@ -32,13 +32,13 @@ def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo
         classes = []
         imports = []
         exports = []
-        
+
         # Symbol lookup index for O(1) access
         symbol_lookup = {}  # name -> symbol_id mapping
 
         parser = tree_sitter.Parser(self.ts_language)
         tree = parser.parse(content.encode('utf8'))
-        
+
         # Single-pass traversal that handles everything
         context = TraversalContext(
             content=content,
@@ -50,7 +50,7 @@ def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo
             exports=exports,
             symbol_lookup=symbol_lookup
         )
-        
+
         self._traverse_node_single_pass(tree.root_node, context)
 
         file_info = FileInfo(
@@ -67,7 +67,7 @@ def _traverse_node_single_pass(self, node, context: 'TraversalContext',
                                   current_function: Optional[str] = None,
                                   current_class: Optional[str] = None):
         """Single-pass traversal that extracts symbols and analyzes calls."""
-        
+
         # Handle function declarations
         if node.type == 'function_declaration':
             name = self._get_function_name(node, context.content)
@@ -83,11 +83,11 @@ def _traverse_node_single_pass(self, node, context: 'TraversalContext',
                 context.symbols[symbol_id] = symbol_info
                 context.symbol_lookup[name] = symbol_id
                 context.functions.append(name)
-                
+
                 # Traverse function body with updated context
                 func_context = f"{context.file_path}::{name}"
                 for child in node.children:
-                    self._traverse_node_single_pass(child, context, current_function=func_context, 
+                    self._traverse_node_single_pass(child, context, current_function=func_context,
                                                    current_class=current_class)
                 return
 
@@ -104,7 +104,7 @@ def _traverse_node_single_pass(self, node, context: 'TraversalContext',
                 context.symbols[symbol_id] = symbol_info
                 context.symbol_lookup[name] = symbol_id
                 context.classes.append(name)
-                
+
                 # Traverse class body with updated context
                 for child in node.children:
                     self._traverse_node_single_pass(child, context, current_function=current_function,
@@ -124,7 +124,7 @@ def _traverse_node_single_pass(self, node, context: 'TraversalContext',
                 context.symbols[symbol_id] = symbol_info
                 context.symbol_lookup[name] = symbol_id
                 context.classes.append(name)  # Group interfaces with classes
-                
+
                 # Traverse interface body with updated context
                 for child in node.children:
                     self._traverse_node_single_pass(child, context, current_function=current_function,
@@ -148,7 +148,7 @@ def _traverse_node_single_pass(self, node, context: 'TraversalContext',
                 context.symbol_lookup[full_name] = symbol_id
                 context.symbol_lookup[method_name] = symbol_id  # Also index by method name alone
                 context.functions.append(full_name)
-                
+
                 # Traverse method body with updated context
                 method_context = f"{context.file_path}::{full_name}"
                 for child in node.children:
@@ -238,7 +238,7 @@ def _get_ts_function_signature(self, node, content: str) -> str:
 
 class TraversalContext:
     """Context object to pass state during single-pass traversal."""
-    
+
     def __init__(self, content: str, file_path: str, symbols: Dict,
                  functions: List, classes: List, imports: List, exports: List, symbol_lookup: Dict):
         self.content = content
diff --git a/src/code_index_mcp/services/project_management_service.py b/src/code_index_mcp/services/project_management_service.py
index 1aa0706..ec6bd20 100644
--- a/src/code_index_mcp/services/project_management_service.py
+++ b/src/code_index_mcp/services/project_management_service.py
@@ -47,7 +47,7 @@ def __init__(self, ctx):
         # Import FileWatcherTool locally to avoid circular import
         from ..tools.monitoring import FileWatcherTool
         self._watcher_tool = FileWatcherTool(ctx)
-        
+
 
     @contextmanager
     def _noop_operation(self, *_args, **_kwargs):
@@ -106,7 +106,7 @@ def _execute_initialization_workflow(self, path: str) -> ProjectInitializationRe
         """
         # Business step 1: Initialize config tool
         self._config_tool.initialize_settings(path)
-        
+
         # Normalize path for consistent processing
         normalized_path = self._config_tool.normalize_project_path(path)
 
@@ -217,7 +217,7 @@ def _load_existing_index(self, index_data: Dict[str, Any]) -> Dict[str, Any]:
         Returns:
             Dictionary with loading results
         """
-        
+
 
         # Note: Legacy index loading is now handled by UnifiedIndexManager
         # This method is kept for backward compatibility but functionality moved
@@ -225,7 +225,7 @@ def _load_existing_index(self, index_data: Dict[str, Any]) -> Dict[str, Any]:
         # Extract file count from metadata
         file_count = index_data.get('project_metadata', {}).get('total_files', 0)
 
-        
+
 
         return {
             'file_count': file_count,
@@ -243,7 +243,7 @@ def _setup_file_monitoring(self, project_path: str) -> str:
         Returns:
             String describing monitoring setup result
         """
-        
+
 
         try:
             # Create rebuild callback that uses the JSON index manager
@@ -285,7 +285,7 @@ def rebuild_callback():
 
     def _update_project_state(self, project_path: str, file_count: int) -> None:
         """Business logic to update system state after project initialization."""
-        
+
 
         # Update context with file count
         self.helper.update_file_count(file_count)

From 8c870809c35ca8b4b241061a7474ed307e5d2689 Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Wed, 24 Sep 2025 16:12:37 +0800
Subject: [PATCH 04/14] refactor: update import statement for FileFilter in
 JSONIndexManager

- Changed the import of FileFilter to a more specific path to avoid potential circular import issues.
- This refactor maintains existing functionality while improving code organization.
---
 src/code_index_mcp/indexing/json_index_manager.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/code_index_mcp/indexing/json_index_manager.py b/src/code_index_mcp/indexing/json_index_manager.py
index d01ddf1..e0d8202 100644
--- a/src/code_index_mcp/indexing/json_index_manager.py
+++ b/src/code_index_mcp/indexing/json_index_manager.py
@@ -17,7 +17,6 @@
 
 from .json_index_builder import JSONIndexBuilder
 from ..constants import SETTINGS_DIR, INDEX_FILE
-from ..utils import FileFilter
 
 logger = logging.getLogger(__name__)
 
@@ -281,7 +280,8 @@ def _is_index_fresh(self) -> bool:
             return False
 
         try:
-            file_filter = FileFilter()
+            from code_index_mcp.utils.file_filter import FileFilter as _FileFilter  # pylint: disable=C0415
+            file_filter = _FileFilter()
 
             # Simple freshness check - index exists and is recent
             index_mtime = os.path.getmtime(self.index_path)

From 0c203497b0fe1ae42f807901e5855d3dffddb349 Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Tue, 30 Sep 2025 16:16:51 +0800
Subject: [PATCH 05/14] feat/shallow index (#40)

* feat(index): add shallow index support (INDEX_FILE_SHALLOW); builder method for file list; manager load/build shallow with find_files fallback

* feat(index): default to shallow index; add ShallowIndexManager; deep rebuild tool; remove build_shallow_index tool; watcher+refresh use shallow; fix server context type

* feat(find_files): enforce true glob semantics (* no dir, ** recursive) and unify to shallow index; watcher verified for add/delete

* feat(file_summary): return needs_deep_index hint when deep index not available
---
 src/code_index_mcp/constants.py               |   3 +-
 src/code_index_mcp/indexing/__init__.py       |   5 +
 .../indexing/deep_index_manager.py            |  46 ++++++
 .../indexing/json_index_builder.py            |  25 +++
 .../indexing/json_index_manager.py            | 118 +++++++++++--
 .../indexing/shallow_index_manager.py         | 155 ++++++++++++++++++
 src/code_index_mcp/server.py                  |  13 +-
 .../services/code_intelligence_service.py     |  13 +-
 .../services/file_discovery_service.py        |   4 +-
 .../services/index_management_service.py      |  71 +++++++-
 .../services/project_management_service.py    |  75 +++++++--
 uv.lock                                       |   2 +-
 12 files changed, 491 insertions(+), 39 deletions(-)
 create mode 100644 src/code_index_mcp/indexing/deep_index_manager.py
 create mode 100644 src/code_index_mcp/indexing/shallow_index_manager.py

diff --git a/src/code_index_mcp/constants.py b/src/code_index_mcp/constants.py
index d1d4235..159e31a 100644
--- a/src/code_index_mcp/constants.py
+++ b/src/code_index_mcp/constants.py
@@ -5,7 +5,8 @@
 # Directory and file names
 SETTINGS_DIR = "code_indexer"
 CONFIG_FILE = "config.json"
-INDEX_FILE = "index.json"  # JSON index file
+INDEX_FILE = "index.json"  # JSON index file (deep index)
+INDEX_FILE_SHALLOW = "index.shallow.json"  # Minimal shallow index (file list)
 
 # Supported file extensions for code analysis
 # This is the authoritative list used by both old and new indexing systems
diff --git a/src/code_index_mcp/indexing/__init__.py b/src/code_index_mcp/indexing/__init__.py
index 512ad3f..e779911 100644
--- a/src/code_index_mcp/indexing/__init__.py
+++ b/src/code_index_mcp/indexing/__init__.py
@@ -13,6 +13,8 @@
 # New JSON-based indexing system
 from .json_index_builder import JSONIndexBuilder, IndexMetadata
 from .json_index_manager import JSONIndexManager, get_index_manager
+from .shallow_index_manager import ShallowIndexManager, get_shallow_index_manager
+from .deep_index_manager import DeepIndexManager
 from .models import SymbolInfo, FileInfo
 
 __all__ = [
@@ -21,6 +23,9 @@
     'JSONIndexBuilder',
     'JSONIndexManager',
     'get_index_manager',
+    'ShallowIndexManager',
+    'get_shallow_index_manager',
+    'DeepIndexManager',
     'SymbolInfo',
     'FileInfo', 
     'IndexMetadata'
diff --git a/src/code_index_mcp/indexing/deep_index_manager.py b/src/code_index_mcp/indexing/deep_index_manager.py
new file mode 100644
index 0000000..6558703
--- /dev/null
+++ b/src/code_index_mcp/indexing/deep_index_manager.py
@@ -0,0 +1,46 @@
+"""
+Deep Index Manager - Wrapper around JSONIndexManager for deep indexing.
+
+This class provides a clear semantic separation from the shallow manager.
+It delegates to the existing JSONIndexManager (symbols + files JSON index).
+"""
+
+from __future__ import annotations
+
+from typing import Optional, Dict, Any, List
+
+from .json_index_manager import JSONIndexManager
+
+
+class DeepIndexManager:
+    """Thin wrapper over JSONIndexManager to expose deep-index API."""
+
+    def __init__(self) -> None:
+        self._mgr = JSONIndexManager()
+
+    # Expose a subset of API to keep callers simple
+    def set_project_path(self, project_path: str) -> bool:
+        return self._mgr.set_project_path(project_path)
+
+    def build_index(self, force_rebuild: bool = False) -> bool:
+        return self._mgr.build_index(force_rebuild=force_rebuild)
+
+    def load_index(self) -> bool:
+        return self._mgr.load_index()
+
+    def refresh_index(self) -> bool:
+        return self._mgr.refresh_index()
+
+    def find_files(self, pattern: str = "*") -> List[str]:
+        return self._mgr.find_files(pattern)
+
+    def get_file_summary(self, file_path: str) -> Optional[Dict[str, Any]]:
+        return self._mgr.get_file_summary(file_path)
+
+    def get_index_stats(self) -> Dict[str, Any]:
+        return self._mgr.get_index_stats()
+
+    def cleanup(self) -> None:
+        self._mgr.cleanup()
+
+
diff --git a/src/code_index_mcp/indexing/json_index_builder.py b/src/code_index_mcp/indexing/json_index_builder.py
index 5f69669..c12d694 100644
--- a/src/code_index_mcp/indexing/json_index_builder.py
+++ b/src/code_index_mcp/indexing/json_index_builder.py
@@ -274,6 +274,31 @@ def _get_supported_files(self) -> List[str]:
         logger.debug(f"Found {len(supported_files)} supported files")
         return supported_files
 
+    def build_shallow_file_list(self) -> List[str]:
+        """
+        Build a minimal shallow index consisting of relative file paths only.
+
+        This method does not read file contents. It enumerates supported files
+        using centralized filtering and returns normalized relative paths with
+        forward slashes for cross-platform consistency.
+
+        Returns:
+            List of relative file paths (using '/').
+        """
+        try:
+            absolute_files = self._get_supported_files()
+            result: List[str] = []
+            for abs_path in absolute_files:
+                rel_path = os.path.relpath(abs_path, self.project_path).replace('\\', '/')
+                # Normalize leading './'
+                if rel_path.startswith('./'):
+                    rel_path = rel_path[2:]
+                result.append(rel_path)
+            return result
+        except Exception as e:
+            logger.error(f"Failed to build shallow file list: {e}")
+            return []
+
     def save_index(self, index: Dict[str, Any], index_path: str) -> bool:
         """
         Save index to disk.
diff --git a/src/code_index_mcp/indexing/json_index_manager.py b/src/code_index_mcp/indexing/json_index_manager.py
index e0d8202..ec320e4 100644
--- a/src/code_index_mcp/indexing/json_index_manager.py
+++ b/src/code_index_mcp/indexing/json_index_manager.py
@@ -9,6 +9,7 @@
 import json
 import logging
 import os
+import re
 import tempfile
 import threading
 import fnmatch
@@ -16,7 +17,7 @@
 from typing import Dict, List, Optional, Any
 
 from .json_index_builder import JSONIndexBuilder
-from ..constants import SETTINGS_DIR, INDEX_FILE
+from ..constants import SETTINGS_DIR, INDEX_FILE, INDEX_FILE_SHALLOW
 
 logger = logging.getLogger(__name__)
 
@@ -29,6 +30,8 @@ def __init__(self):
         self.index_builder: Optional[JSONIndexBuilder] = None
         self.temp_dir: Optional[str] = None
         self.index_path: Optional[str] = None
+        self.shallow_index_path: Optional[str] = None
+        self._shallow_file_list: Optional[List[str]] = None
         self._lock = threading.RLock()
         logger.info("Initialized JSON Index Manager")
 
@@ -59,6 +62,7 @@ def set_project_path(self, project_path: str) -> bool:
                 os.makedirs(self.temp_dir, exist_ok=True)
 
                 self.index_path = os.path.join(self.temp_dir, INDEX_FILE)
+                self.shallow_index_path = os.path.join(self.temp_dir, INDEX_FILE_SHALLOW)
 
                 logger.info(f"Set project path: {project_path}")
                 logger.info(f"Index storage: {self.index_path}")
@@ -114,6 +118,52 @@ def load_index(self) -> bool:
                 logger.error(f"Failed to load index: {e}")
                 return False
 
+    def build_shallow_index(self) -> bool:
+        """Build and save the minimal shallow index (file list)."""
+        with self._lock:
+            if not self.index_builder or not self.project_path or not self.shallow_index_path:
+                logger.error("Index builder not initialized for shallow index")
+                return False
+
+            try:
+                file_list = self.index_builder.build_shallow_file_list()
+                # Persist as a JSON array for minimal overhead
+                with open(self.shallow_index_path, 'w', encoding='utf-8') as f:
+                    json.dump(file_list, f, ensure_ascii=False)
+                self._shallow_file_list = file_list
+                logger.info(f"Saved shallow index with {len(file_list)} files to {self.shallow_index_path}")
+                return True
+            except Exception as e:
+                logger.error(f"Failed to build shallow index: {e}")
+                return False
+
+    def load_shallow_index(self) -> bool:
+        """Load shallow index (file list) from disk into memory."""
+        with self._lock:
+            try:
+                if not self.shallow_index_path or not os.path.exists(self.shallow_index_path):
+                    logger.warning("No existing shallow index found")
+                    return False
+                with open(self.shallow_index_path, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    if not isinstance(data, list):
+                        logger.error("Shallow index format invalid (expected list)")
+                        return False
+                    # Normalize paths
+                    normalized = []
+                    for p in data:
+                        if isinstance(p, str):
+                            q = p.replace('\\\\', '/').replace('\\', '/')
+                            if q.startswith('./'):
+                                q = q[2:]
+                            normalized.append(q)
+                    self._shallow_file_list = normalized
+                    logger.info(f"Loaded shallow index with {len(normalized)} files")
+                    return True
+            except Exception as e:
+                logger.error(f"Failed to load shallow index: {e}")
+                return False
+
     def refresh_index(self) -> bool:
         """Refresh the index (rebuild and reload)."""
         with self._lock:
@@ -123,7 +173,14 @@ def refresh_index(self) -> bool:
             return False
 
     def find_files(self, pattern: str = "*") -> List[str]:
-        """Find files matching a pattern."""
+        """
+        Find files matching a glob pattern using the SHALLOW file list only.
+
+        Notes:
+            - '*' does not cross '/'
+            - '**' matches across directories
+            - Always sources from the shallow index for consistency and speed
+        """
         with self._lock:
             # Input validation
             if not isinstance(pattern, str):
@@ -134,18 +191,27 @@ def find_files(self, pattern: str = "*") -> List[str]:
             if not pattern:
                 pattern = "*"
 
-            if not self.index_builder or not self.index_builder.in_memory_index:
-                logger.warning("Index not loaded")
-                return []
+            # Normalize to forward slashes
+            norm_pattern = pattern.replace('\\\\', '/').replace('\\', '/')
+
+            # Build glob regex: '*' does not cross '/', '**' crosses directories
+            regex = self._compile_glob_regex(norm_pattern)
 
+            # Always use shallow index for file discovery
             try:
-                files = list(self.index_builder.in_memory_index["files"].keys())
+                if self._shallow_file_list is None:
+                    # Try load existing shallow index; if missing, build then load
+                    if not self.load_shallow_index():
+                        # If still not available, attempt to build
+                        if self.build_shallow_index():
+                            self.load_shallow_index()
 
-                if pattern == "*":
+                files = list(self._shallow_file_list or [])
+
+                if norm_pattern == "*":
                     return files
 
-                # Simple pattern matching
-                return [f for f in files if fnmatch.fnmatch(f, pattern)]
+                return [f for f in files if regex.match(f) is not None]
 
             except Exception as e:
                 logger.error(f"Error finding files: {e}")
@@ -356,6 +422,39 @@ def cleanup(self):
             self.index_path = None
             logger.info("Cleaned up JSON Index Manager")
 
+    @staticmethod
+    def _compile_glob_regex(pattern: str) -> re.Pattern:
+        """
+        Compile a glob pattern where '*' does not match '/', and '**' matches across directories.
+
+        Examples:
+            src/*.py  -> direct children .py under src
+            **/*.py   -> .py at any depth
+        """
+        # Translate glob to regex
+        i = 0
+        out = []
+        special = ".^$+{}[]|()"
+        while i < len(pattern):
+            c = pattern[i]
+            if c == '*':
+                if i + 1 < len(pattern) and pattern[i + 1] == '*':
+                    # '**' -> match across directories
+                    out.append('.*')
+                    i += 2
+                    continue
+                else:
+                    out.append('[^/]*')
+            elif c == '?':
+                out.append('[^/]')
+            elif c in special:
+                out.append('\\' + c)
+            else:
+                out.append(c)
+            i += 1
+        regex_str = '^' + ''.join(out) + '$'
+        return re.compile(regex_str)
+
 
 # Global instance
 _index_manager = JSONIndexManager()
@@ -364,4 +463,3 @@ def cleanup(self):
 def get_index_manager() -> JSONIndexManager:
     """Get the global index manager instance."""
     return _index_manager
-    
\ No newline at end of file
diff --git a/src/code_index_mcp/indexing/shallow_index_manager.py b/src/code_index_mcp/indexing/shallow_index_manager.py
new file mode 100644
index 0000000..530c593
--- /dev/null
+++ b/src/code_index_mcp/indexing/shallow_index_manager.py
@@ -0,0 +1,155 @@
+"""
+Shallow Index Manager - Manages a minimal file-list-only index.
+
+This manager builds and loads a shallow index consisting of relative file
+paths only. It is optimized for fast initialization and filename-based
+search/browsing. Content parsing and symbol extraction are not performed.
+"""
+
+from __future__ import annotations
+
+import hashlib
+import json
+import logging
+import os
+import tempfile
+import threading
+from typing import List, Optional
+import re
+
+from .json_index_builder import JSONIndexBuilder
+from ..constants import SETTINGS_DIR, INDEX_FILE_SHALLOW
+
+logger = logging.getLogger(__name__)
+
+
+class ShallowIndexManager:
+    """Manage shallow (file-list) index lifecycle and storage."""
+
+    def __init__(self) -> None:
+        self.project_path: Optional[str] = None
+        self.index_builder: Optional[JSONIndexBuilder] = None
+        self.temp_dir: Optional[str] = None
+        self.index_path: Optional[str] = None
+        self._file_list: Optional[List[str]] = None
+        self._lock = threading.RLock()
+
+    def set_project_path(self, project_path: str) -> bool:
+        with self._lock:
+            try:
+                if not isinstance(project_path, str) or not project_path.strip():
+                    logger.error("Invalid project path for shallow index")
+                    return False
+                project_path = project_path.strip()
+                if not os.path.isdir(project_path):
+                    logger.error(f"Project path does not exist: {project_path}")
+                    return False
+
+                self.project_path = project_path
+                self.index_builder = JSONIndexBuilder(project_path)
+
+                project_hash = hashlib.md5(project_path.encode()).hexdigest()[:12]
+                self.temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR, project_hash)
+                os.makedirs(self.temp_dir, exist_ok=True)
+                self.index_path = os.path.join(self.temp_dir, INDEX_FILE_SHALLOW)
+                return True
+            except Exception as e:  # noqa: BLE001 - centralized logging
+                logger.error(f"Failed to set project path (shallow): {e}")
+                return False
+
+    def build_index(self) -> bool:
+        """Build and persist the shallow file list index."""
+        with self._lock:
+            if not self.index_builder or not self.index_path:
+                logger.error("ShallowIndexManager not initialized")
+                return False
+            try:
+                file_list = self.index_builder.build_shallow_file_list()
+                with open(self.index_path, 'w', encoding='utf-8') as f:
+                    json.dump(file_list, f, ensure_ascii=False)
+                self._file_list = file_list
+                logger.info(f"Built shallow index with {len(file_list)} files")
+                return True
+            except Exception as e:  # noqa: BLE001
+                logger.error(f"Failed to build shallow index: {e}")
+                return False
+
+    def load_index(self) -> bool:
+        """Load shallow index from disk to memory."""
+        with self._lock:
+            try:
+                if not self.index_path or not os.path.exists(self.index_path):
+                    return False
+                with open(self.index_path, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                if isinstance(data, list):
+                    # Normalize slashes/prefix
+                    normalized: List[str] = []
+                    for p in data:
+                        if isinstance(p, str):
+                            q = p.replace('\\\\', '/').replace('\\', '/')
+                            if q.startswith('./'):
+                                q = q[2:]
+                            normalized.append(q)
+                    self._file_list = normalized
+                    return True
+                return False
+            except Exception as e:  # noqa: BLE001
+                logger.error(f"Failed to load shallow index: {e}")
+                return False
+
+    def get_file_list(self) -> List[str]:
+        with self._lock:
+            return list(self._file_list or [])
+
+    def find_files(self, pattern: str = "*") -> List[str]:
+        with self._lock:
+            if not isinstance(pattern, str):
+                return []
+            norm = (pattern.strip() or "*").replace('\\\\','/').replace('\\','/')
+            regex = self._compile_glob_regex(norm)
+            files = self._file_list or []
+            if norm == "*":
+                return list(files)
+            return [f for f in files if regex.match(f) is not None]
+
+    @staticmethod
+    def _compile_glob_regex(pattern: str) -> re.Pattern:
+        i = 0
+        out = []
+        special = ".^$+{}[]|()"
+        while i < len(pattern):
+            c = pattern[i]
+            if c == '*':
+                if i + 1 < len(pattern) and pattern[i + 1] == '*':
+                    out.append('.*')
+                    i += 2
+                    continue
+                else:
+                    out.append('[^/]*')
+            elif c == '?':
+                out.append('[^/]')
+            elif c in special:
+                out.append('\\' + c)
+            else:
+                out.append(c)
+            i += 1
+        return re.compile('^' + ''.join(out) + '$')
+
+    def cleanup(self) -> None:
+        with self._lock:
+            self.project_path = None
+            self.index_builder = None
+            self.temp_dir = None
+            self.index_path = None
+            self._file_list = None
+
+
+# Global singleton
+_shallow_manager = ShallowIndexManager()
+
+
+def get_shallow_index_manager() -> ShallowIndexManager:
+    return _shallow_manager
+
+
diff --git a/src/code_index_mcp/server.py b/src/code_index_mcp/server.py
index 75d83b2..982cea8 100644
--- a/src/code_index_mcp/server.py
+++ b/src/code_index_mcp/server.py
@@ -13,7 +13,7 @@
 import logging
 from contextlib import asynccontextmanager
 from dataclasses import dataclass
-from typing import AsyncIterator, Dict, Any, Optional, List
+from typing import AsyncIterator, Dict, Any, List
 
 # Third-party imports
 from mcp import types
@@ -60,7 +60,6 @@ class CodeIndexerContext:
     base_path: str
     settings: ProjectSettings
     file_count: int = 0
-    index_manager: Optional['UnifiedIndexManager'] = None
     file_watcher_service: FileWatcherService = None
 
 @asynccontextmanager
@@ -245,6 +244,16 @@ def refresh_index(ctx: Context) -> str:
     """
     return IndexManagementService(ctx).rebuild_index()
 
+@mcp.tool()
+@handle_mcp_tool_errors(return_type='str')
+def build_deep_index(ctx: Context) -> str:
+    """
+    Build the deep index (full symbol extraction) for the current project.
+
+    This performs a complete re-index and loads it into memory.
+    """
+    return IndexManagementService(ctx).rebuild_deep_index()
+
 @mcp.tool()
 @handle_mcp_tool_errors(return_type='dict')
 def get_settings_info(ctx: Context) -> Dict[str, Any]:
diff --git a/src/code_index_mcp/services/code_intelligence_service.py b/src/code_index_mcp/services/code_intelligence_service.py
index 77ff894..af0f1a2 100644
--- a/src/code_index_mcp/services/code_intelligence_service.py
+++ b/src/code_index_mcp/services/code_intelligence_service.py
@@ -9,12 +9,12 @@
 import os
 from typing import Dict, Any
 
-logger = logging.getLogger(__name__)
-
 from .base_service import BaseService
 from ..tools.filesystem import FileSystemTool
 from ..indexing import get_index_manager
 
+logger = logging.getLogger(__name__)
+
 
 class CodeIntelligenceService(BaseService):
     """
@@ -61,9 +61,14 @@ def analyze_file(self, file_path: str) -> Dict[str, Any]:
         # Get file summary from JSON index
         summary = index_manager.get_file_summary(file_path)
         logger.info(f"Summary result: {summary is not None}")
-        
+
+        # If deep index isn't available yet, return a helpful hint instead of error
         if not summary:
-            raise ValueError(f"File not found in index: {file_path}")
+            return {
+                "status": "needs_deep_index",
+                "message": "Deep index not available. Please run build_deep_index before calling get_file_summary.",
+                "file_path": file_path
+            }
 
         return summary
 
diff --git a/src/code_index_mcp/services/file_discovery_service.py b/src/code_index_mcp/services/file_discovery_service.py
index 478beea..d777511 100644
--- a/src/code_index_mcp/services/file_discovery_service.py
+++ b/src/code_index_mcp/services/file_discovery_service.py
@@ -9,7 +9,7 @@
 from dataclasses import dataclass
 
 from .base_service import BaseService
-from ..indexing import get_index_manager
+from ..indexing import get_shallow_index_manager
 
 
 @dataclass
@@ -32,7 +32,7 @@ class FileDiscoveryService(BaseService):
 
     def __init__(self, ctx):
         super().__init__(ctx)
-        self._index_manager = get_index_manager()
+        self._index_manager = get_shallow_index_manager()
 
     def find_files(self, pattern: str, max_results: Optional[int] = None) -> List[str]:
         """
diff --git a/src/code_index_mcp/services/index_management_service.py b/src/code_index_mcp/services/index_management_service.py
index e4714a3..f56c760 100644
--- a/src/code_index_mcp/services/index_management_service.py
+++ b/src/code_index_mcp/services/index_management_service.py
@@ -6,6 +6,8 @@
 """
 import time
 import logging
+import os
+import json
 
 from typing import Dict, Any
 from dataclasses import dataclass
@@ -13,7 +15,7 @@
 logger = logging.getLogger(__name__)
 
 from .base_service import BaseService
-from ..indexing import get_index_manager
+from ..indexing import get_index_manager, get_shallow_index_manager, DeepIndexManager
 
 
 @dataclass
@@ -35,11 +37,18 @@ class IndexManagementService(BaseService):
 
     def __init__(self, ctx):
         super().__init__(ctx)
+        # Deep manager (symbols/files, legacy JSON index manager)
         self._index_manager = get_index_manager()
+        # Shallow manager (file-list only) for default workflows
+        self._shallow_manager = get_shallow_index_manager()
+        # Optional wrapper for explicit deep builds
+        self._deep_wrapper = DeepIndexManager()
 
     def rebuild_index(self) -> str:
         """
-        Rebuild the project index using the new JSON indexing system.
+        Rebuild the project index (DEFAULT: shallow file list).
+
+        For deep/symbol rebuilds, use build_deep_index() tool instead.
 
         Returns:
             Success message with rebuild information
@@ -50,11 +59,17 @@ def rebuild_index(self) -> str:
         # Business validation
         self._validate_rebuild_request()
 
-        # Business workflow: Execute rebuild
-        result = self._execute_rebuild_workflow()
+        # Shallow rebuild only (fast path)
+        if not self._shallow_manager.set_project_path(self.base_path):
+            raise RuntimeError("Failed to set project path (shallow) in index manager")
+        if not self._shallow_manager.build_index():
+            raise RuntimeError("Failed to rebuild shallow index")
 
-        # Business result formatting
-        return self._format_rebuild_result(result)
+        try:
+            count = len(self._shallow_manager.get_file_list())
+        except Exception:
+            count = 0
+        return f"Shallow index re-built with {count} files."
 
     def get_rebuild_status(self) -> Dict[str, Any]:
         """
@@ -137,3 +152,47 @@ def _format_rebuild_result(self, result: IndexRebuildResult) -> str:
             Formatted result string for MCP response
         """
         return f"Project re-indexed. Found {result.file_count} files."
+
+    def build_shallow_index(self) -> str:
+        """
+        Build and persist the shallow index (file list only).
+
+        Returns:
+            Success message including file count if available.
+
+        Raises:
+            ValueError/RuntimeError on validation or build failure
+        """
+        # Ensure project is set up
+        self._require_project_setup()
+
+        # Initialize manager with current base path
+        if not self._shallow_manager.set_project_path(self.base_path):
+            raise RuntimeError("Failed to set project path in index manager")
+
+        # Build shallow index
+        if not self._shallow_manager.build_index():
+            raise RuntimeError("Failed to build shallow index")
+
+        # Try to report count
+        count = 0
+        try:
+            shallow_path = getattr(self._shallow_manager, 'index_path', None)
+            if shallow_path and os.path.exists(shallow_path):
+                with open(shallow_path, 'r', encoding='utf-8') as f:
+                    data = json.load(f)
+                    if isinstance(data, list):
+                        count = len(data)
+        except Exception as e:  # noqa: BLE001 - safe fallback to zero
+            logger.debug(f"Unable to read shallow index count: {e}")
+
+        return f"Shallow index built{f' with {count} files' if count else ''}."
+
+    def rebuild_deep_index(self) -> str:
+        """Rebuild the deep index using the original workflow."""
+        # Business validation
+        self._validate_rebuild_request()
+
+        # Deep rebuild via existing workflow
+        result = self._execute_rebuild_workflow()
+        return self._format_rebuild_result(result)
diff --git a/src/code_index_mcp/services/project_management_service.py b/src/code_index_mcp/services/project_management_service.py
index ec6bd20..267a0c1 100644
--- a/src/code_index_mcp/services/project_management_service.py
+++ b/src/code_index_mcp/services/project_management_service.py
@@ -13,7 +13,7 @@
 from .base_service import BaseService
 from ..utils.response_formatter import ResponseFormatter
 from ..constants import SUPPORTED_EXTENSIONS
-from ..indexing import get_index_manager
+from ..indexing import get_index_manager, get_shallow_index_manager
 
 logger = logging.getLogger(__name__)
 
@@ -40,8 +40,10 @@ class ProjectManagementService(BaseService):
 
     def __init__(self, ctx):
         super().__init__(ctx)
-        # Use the global singleton index manager
+        # Deep index manager (legacy full index)
         self._index_manager = get_index_manager()
+        # Shallow index manager (default for initialization)
+        self._shallow_manager = get_shallow_index_manager()
         from ..tools.config import ProjectConfigTool
         self._config_tool = ProjectConfigTool()
         # Import FileWatcherTool locally to avoid circular import
@@ -113,8 +115,8 @@ def _execute_initialization_workflow(self, path: str) -> ProjectInitializationRe
         # Business step 2: Cleanup existing project state
         self._cleanup_existing_project()
 
-        # Business step 3: Initialize JSON index manager
-        index_result = self._initialize_json_index_manager(normalized_path)
+        # Business step 3: Initialize shallow index by default (fast path)
+        index_result = self._initialize_shallow_index_manager(normalized_path)
 
         # Business step 3.1: Store index manager in context for other services
         self.helper.update_index_manager(self._index_manager)
@@ -185,6 +187,45 @@ def _initialize_json_index_manager(self, project_path: str) -> Dict[str, Any]:
             'languages': stats.get('languages', [])
         }
 
+    def _initialize_shallow_index_manager(self, project_path: str) -> Dict[str, Any]:
+        """
+        Business logic to initialize the shallow index manager by default.
+
+        Args:
+            project_path: Project path
+
+        Returns:
+            Dictionary with initialization results
+        """
+        # Set project path in shallow manager
+        if not self._shallow_manager.set_project_path(project_path):
+            raise RuntimeError(f"Failed to set project path (shallow): {project_path}")
+
+        # Update context
+        self.helper.update_base_path(project_path)
+
+        # Try to load existing shallow index or build new one
+        if self._shallow_manager.load_index():
+            source = "loaded_existing"
+        else:
+            if not self._shallow_manager.build_index():
+                raise RuntimeError("Failed to build shallow index")
+            source = "built_new"
+
+        # Determine file count from shallow list
+        try:
+            files = self._shallow_manager.get_file_list()
+            file_count = len(files)
+        except Exception:  # noqa: BLE001 - safe fallback
+            file_count = 0
+
+        return {
+            'file_count': file_count,
+            'source': source,
+            'total_symbols': 0,
+            'languages': []
+        }
+
 
     def _is_valid_existing_index(self, index_data: Dict[str, Any]) -> bool:
         """
@@ -250,15 +291,23 @@ def _setup_file_monitoring(self, project_path: str) -> str:
             def rebuild_callback():
                 logger.info("File watcher triggered rebuild callback")
                 try:
-                    logger.debug(f"Starting index rebuild for: {project_path}")
-                    # Business logic: File changed, rebuild using JSON index manager
-                    if self._index_manager.refresh_index():
-                        stats = self._index_manager.get_index_stats()
-                        file_count = stats.get('indexed_files', 0)
-                        logger.info(f"File watcher rebuild completed successfully - indexed {file_count} files")
-                        return True
-                    else:
-                        logger.warning("File watcher rebuild failed")
+                    logger.debug(f"Starting shallow index rebuild for: {project_path}")
+                    # Business logic: File changed, rebuild using SHALLOW index manager
+                    try:
+                        if not self._shallow_manager.set_project_path(project_path):
+                            logger.warning("Shallow manager set_project_path failed")
+                            return False
+                        if self._shallow_manager.build_index():
+                            files = self._shallow_manager.get_file_list()
+                            logger.info(f"File watcher shallow rebuild completed successfully - files {len(files)}")
+                            return True
+                        else:
+                            logger.warning("File watcher shallow rebuild failed")
+                            return False
+                    except Exception as e:
+                        import traceback
+                        logger.error(f"File watcher shallow rebuild failed: {e}")
+                        logger.error(f"Traceback: {traceback.format_exc()}")
                         return False
                 except Exception as e:
                     import traceback
diff --git a/uv.lock b/uv.lock
index f2cf2be..5bf691e 100644
--- a/uv.lock
+++ b/uv.lock
@@ -49,7 +49,7 @@ wheels = [
 
 [[package]]
 name = "code-index-mcp"
-version = "2.3.1"
+version = "2.3.2"
 source = { editable = "." }
 dependencies = [
     { name = "mcp" },

From 24285ed73867de7c98d62af0945a1409e3b55753 Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Tue, 30 Sep 2025 16:43:19 +0800
Subject: [PATCH 06/14] refactor(server): remove unused structure://project
 resource and MCP prompts

- delete resource registration in src/code_index_mcp/server.py
- remove ProjectManagementService.get_project_structure; drop unused json import
- remove analyze_code, code_search, set_project prompts; drop unused mcp.types import
---
 src/code_index_mcp/server.py                  | 66 +------------------
 .../services/project_management_service.py    | 38 +----------
 2 files changed, 3 insertions(+), 101 deletions(-)

diff --git a/src/code_index_mcp/server.py b/src/code_index_mcp/server.py
index 982cea8..2d1eb80 100644
--- a/src/code_index_mcp/server.py
+++ b/src/code_index_mcp/server.py
@@ -16,7 +16,6 @@
 from typing import AsyncIterator, Dict, Any, List
 
 # Third-party imports
-from mcp import types
 from mcp.server.fastmcp import FastMCP, Context
 
 # Local imports
@@ -106,13 +105,7 @@ def get_file_content(file_path: str) -> str:
     # Use FileService for simple file reading - this is appropriate for a resource
     return FileService(ctx).get_file_content(file_path)
 
-@mcp.resource("structure://project")
-@handle_mcp_resource_errors
-def get_project_structure() -> str:
-    """Get the structure of the project as a JSON tree."""
-    ctx = mcp.get_context()
-    return ProjectManagementService(ctx).get_project_structure()
-
+# Removed: structure://project resource - not necessary for most workflows
 # Removed: settings://stats resource - this information is available via get_settings_info() tool
 # and is more of a debugging/technical detail rather than context AI needs
 
@@ -305,62 +298,7 @@ def configure_file_watcher(
     return SystemManagementService(ctx).configure_file_watcher(enabled, debounce_seconds, additional_exclude_patterns)
 
 # ----- PROMPTS -----
-
-@mcp.prompt()
-def analyze_code(file_path: str = "", query: str = "") -> list[types.PromptMessage]:
-    """Prompt for analyzing code in the project."""
-    messages = [
-        types.PromptMessage(role="user", content=types.TextContent(type="text", text=f"""I need you to analyze some code from my project.
-
-{f'Please analyze the file: {file_path}' if file_path else ''}
-{f'I want to understand: {query}' if query else ''}
-
-First, let me give you some context about the project structure. Then, I'll provide the code to analyze.
-""")),
-        types.PromptMessage(
-            role="assistant",
-            content=types.TextContent(
-                type="text",
-                text="I'll help you analyze the code. Let me first examine the project structure to get a better understanding of the codebase."
-            )
-        )
-    ]
-    return messages
-
-@mcp.prompt()
-def code_search(query: str = "") -> types.TextContent:
-    """Prompt for searching code in the project."""
-    search_text = "\"query\"" if not query else f"\"{query}\""
-    return types.TextContent(
-        type="text",
-        text=f"""I need to search through my codebase for {search_text}.
-
-Please help me find all occurrences of this query and explain what each match means in its context.
-Focus on the most relevant files and provide a brief explanation of how each match is used in the code.
-
-If there are too many results, prioritize the most important ones and summarize the patterns you see."""
-    )
-
-@mcp.prompt()
-def set_project() -> list[types.PromptMessage]:
-    """Prompt for setting the project path."""
-    messages = [
-        types.PromptMessage(role="user", content=types.TextContent(type="text", text="""
-        I need to analyze code from a project, but I haven't set the project path yet. Please help me set up the project path and index the code.
-
-        First, I need to specify which project directory to analyze.
-        """)),
-        types.PromptMessage(role="assistant", content=types.TextContent(type="text", text="""
-        Before I can help you analyze any code, we need to set up the project path. This is a required first step.
-
-        Please provide the full path to your project folder. For example:
-        - Windows: "C:/Users/username/projects/my-project"
-        - macOS/Linux: "/home/username/projects/my-project"
-
-        Once you provide the path, I'll use the `set_project_path` tool to configure the code analyzer to work with your project.
-        """))
-    ]
-    return messages
+# Removed: analyze_code, code_search, set_project prompts
 
 def main():
     """Main function to run the MCP server."""
diff --git a/src/code_index_mcp/services/project_management_service.py b/src/code_index_mcp/services/project_management_service.py
index 267a0c1..c0f3a63 100644
--- a/src/code_index_mcp/services/project_management_service.py
+++ b/src/code_index_mcp/services/project_management_service.py
@@ -4,7 +4,6 @@
 This service handles the business logic for project initialization, configuration,
 and lifecycle management using the new JSON-based indexing system.
 """
-import json
 import logging
 from typing import Dict, Any
 from dataclasses import dataclass
@@ -409,39 +408,4 @@ def get_project_config(self) -> str:
 
         return ResponseFormatter.config_response(config_data)
 
-    def get_project_structure(self) -> str:
-        """
-        Get the project directory structure for MCP resource.
-
-        Returns:
-            JSON formatted project structure
-        """
-
-        # Check if project is configured
-        if not self.helper.base_path:
-            structure_data = {
-                "status": "not_configured",
-                "message": ("Project path not set. Please use set_project_path "
-                           "to set a project directory first.")
-            }
-            return json.dumps(structure_data, indent=2)
-
-        # Check if we have index cache with directory tree
-        if (hasattr(self.ctx.request_context.lifespan_context, 'index_cache') and
-            self.ctx.request_context.lifespan_context.index_cache and
-            'directory_tree' in self.ctx.request_context.lifespan_context.index_cache):
-
-            directory_tree = self.ctx.request_context.lifespan_context.index_cache['directory_tree']
-            return json.dumps(directory_tree, indent=2)
-
-        # If no directory tree available, try to build basic structure
-        try:
-            # Use config tool to get basic project structure
-            basic_structure = self._config_tool.get_basic_project_structure(self.helper.base_path)
-            return json.dumps(basic_structure, indent=2)
-        except Exception as e:
-            error_data = {
-                "error": f"Unable to get project structure: {e}",
-                "status": "error"
-            }
-            return json.dumps(error_data, indent=2)
+    # Removed: get_project_structure; the project structure resource is deprecated

From 5da5c4e7121bd5cb8323966753702fbb670a88ba Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Thu, 2 Oct 2025 09:39:02 +0800
Subject: [PATCH 07/14] fix: update .gitignore to include AGENTS.md and remove
 .kiro/

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 367a552..9539f72 100644
--- a/.gitignore
+++ b/.gitignore
@@ -48,4 +48,4 @@ COMMIT_MESSAGE.txt
 RELEASE_NOTE.txt
 
 .llm-context/
-.kiro/
\ No newline at end of file
+AGENTS.md

From 3fc0d2ed84e30e714405003c9fbec380cd4f29d4 Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Thu, 2 Oct 2025 10:11:49 +0800
Subject: [PATCH 08/14] docs: update README files to clarify indexing commands
 and their usage

---
 README.md    | 12 ++++++++----
 README_ja.md | 12 ++++++++----
 README_zh.md | 12 ++++++++----
 3 files changed, 24 insertions(+), 12 deletions(-)

diff --git a/README.md b/README.md
index f51ea87..5cabcbe 100644
--- a/README.md
+++ b/README.md
@@ -66,7 +66,7 @@ The easiest way to get started with any MCP-compatible application:
 - **Direct Tree-sitter Integration**: No regex fallbacks for specialized languages - fail fast with clear errors
 - **Advanced Search**: Auto-detects and uses the best available tool (ugrep, ripgrep, ag, or grep)
 - **Universal File Support**: Comprehensive coverage from advanced AST parsing to basic file indexing
-- **File Analysis**: Deep insights into structure, imports, classes, methods, and complexity metrics
+- **File Analysis**: Deep insights into structure, imports, classes, methods, and complexity metrics after running `build_deep_index`
 
 ### 🗂️ **Multi-Language Support**  
 - **7 Languages with Tree-sitter AST Parsing**: Python, JavaScript, TypeScript, Java, Go, Objective-C, Zig
@@ -81,7 +81,7 @@ The easiest way to get started with any MCP-compatible application:
 - **File Watcher**: Automatic index updates when files change
 - **Cross-platform**: Native OS file system monitoring
 - **Smart Processing**: Batches rapid changes to prevent excessive rebuilds
-- **Rich Metadata**: Captures symbols, references, definitions, and relationships
+- **Shallow Index Refresh**: Watches file changes and keeps the file list current; run a deep rebuild when you need symbol metadata
 
 ### ⚡ **Performance & Efficiency**
 - **Tree-sitter AST Parsing**: Native syntax parsing for accurate symbol extraction
@@ -218,15 +218,18 @@ Then configure:
 | Tool | Description |
 |------|-------------|
 | **`set_project_path`** | Initialize indexing for a project directory |
-| **`refresh_index`** | Rebuild the project index after file changes |
+| **`refresh_index`** | Rebuild the shallow file index after file changes |
+| **`build_deep_index`** | Generate the full symbol index used by deep analysis |
 | **`get_settings_info`** | View current project configuration and status |
 
+*Run `build_deep_index` when you need symbol-level data; the default shallow index powers quick file discovery.*
+
 ### 🔍 **Search & Discovery**
 | Tool | Description |
 |------|-------------|
 | **`search_code_advanced`** | Smart search with regex, fuzzy matching, and file filtering |
 | **`find_files`** | Locate files using glob patterns (e.g., `**/*.py`) |
-| **`get_file_summary`** | Analyze file structure, functions, imports, and complexity |
+| **`get_file_summary`** | Analyze file structure, functions, imports, and complexity (requires deep index) |
 
 ### 🔄 **Monitoring & Auto-refresh**
 | Tool | Description |
@@ -263,6 +266,7 @@ Find all TypeScript component files in src/components
 Give me a summary of src/api/userService.ts
 ```
 *Uses: `get_file_summary` to show functions, imports, and complexity*
+*Tip: run `build_deep_index` first if you get a `needs_deep_index` response.*
 
 ### 🔍 **Advanced Search Examples**
 
diff --git a/README_ja.md b/README_ja.md
index 76c419a..79059b1 100644
--- a/README_ja.md
+++ b/README_ja.md
@@ -66,7 +66,7 @@ Code Index MCPは、AIモデルと複雑なコードベースの橋渡しをす
 - **直接Tree-sitter統合**：特化言語で正規表現フォールバックなし - 明確なエラーメッセージで高速フェイル
 - **高度な検索**：最適なツール（ugrep、ripgrep、ag、grep）を自動検出・使用
 - **汎用ファイルサポート**：高度なAST解析から基本ファイルインデックスまでの包括的カバレッジ
-- **ファイル解析**：構造、インポート、クラス、メソッド、複雑度メトリクスへの深い洞察
+- **ファイル解析**：`build_deep_index` 実行後に構造、インポート、クラス、メソッド、複雑度メトリクスを深く把握
 
 ### 🗂️ **多言語サポート**
 - **7言語でTree-sitter AST解析**：Python、JavaScript、TypeScript、Java、Go、Objective-C、Zig
@@ -81,7 +81,7 @@ Code Index MCPは、AIモデルと複雑なコードベースの橋渡しをす
 - **ファイルウォッチャー**：ファイル変更時の自動インデックス更新
 - **クロスプラットフォーム**：ネイティブOSファイルシステム監視
 - **スマート処理**：急速な変更をバッチ処理して過度な再構築を防止
-- **豊富なメタデータ**：シンボル、参照、定義、関連性をキャプチャ
+- **浅いインデックス更新**：ファイル変更を監視して最新のファイル一覧を維持し、シンボルが必要な場合は `build_deep_index` を実行
 
 ### ⚡ **パフォーマンス・効率性**
 - **Tree-sitter AST解析**：正確なシンボル抽出のためのネイティブ構文解析
@@ -240,15 +240,18 @@ pip install code-index-mcp
 | ツール | 説明 |
 |--------|------|
 | **`set_project_path`** | プロジェクトディレクトリのインデックス作成を初期化 |
-| **`refresh_index`** | ファイル変更後にプロジェクトインデックスを再構築 |
+| **`refresh_index`** | ファイル変更後に浅いファイルインデックスを再構築 |
+| **`build_deep_index`** | 深い解析で使う完全なシンボルインデックスを生成 |
 | **`get_settings_info`** | 現在のプロジェクト設定と状態を表示 |
 
+*シンボルレベルのデータが必要な場合は `build_deep_index` を実行してください。デフォルトの浅いインデックスは高速なファイル探索を担います。*
+
 ### 🔍 **検索・発見**
 | ツール | 説明 |
 |--------|------|
 | **`search_code_advanced`** | 正規表現、ファジーマッチング、ファイルフィルタリング対応のスマート検索 |
 | **`find_files`** | globパターンを使用したファイル検索（例：`**/*.py`） |
-| **`get_file_summary`** | ファイル構造、関数、インポート、複雑度の解析 |
+| **`get_file_summary`** | ファイル構造、関数、インポート、複雑度の解析（深いインデックスが必要） |
 
 ### 🔄 **監視・自動更新**
 | ツール | 説明 |
@@ -285,6 +288,7 @@ src/components で全てのTypeScriptコンポーネントファイルを見つ
 src/api/userService.ts の要約を教えてください
 ```
 *使用ツール：`get_file_summary` で関数、インポート、複雑度を表示*
+*ヒント：`needs_deep_index` が返った場合は `build_deep_index` を先に実行してください。*
 
 ### 🔍 **高度な検索例**
 
diff --git a/README_zh.md b/README_zh.md
index 5a61fbb..1e9c5ae 100644
--- a/README_zh.md
+++ b/README_zh.md
@@ -66,7 +66,7 @@
 - **直接 Tree-sitter 整合**：專業化語言無正則表達式備用 - 快速失敗並提供清晰錯誤訊息
 - **進階搜尋**：自動偵測並使用最佳工具（ugrep、ripgrep、ag 或 grep）
 - **通用檔案支援**：從進階 AST 解析到基本檔案索引的全面覆蓋
-- **檔案分析**：深入了解結構、匯入、類別、方法和複雜度指標
+- **檔案分析**：執行 `build_deep_index` 後深入了解結構、匯入、類別、方法和複雜度指標
 
 ### 🗂️ **多語言支援**
 - **7 種語言使用 Tree-sitter AST 解析**：Python、JavaScript、TypeScript、Java、Go、Objective-C、Zig
@@ -81,7 +81,7 @@
 - **檔案監控器**：檔案變更時自動更新索引
 - **跨平台**：原生作業系統檔案系統監控
 - **智慧處理**：批次處理快速變更以防止過度重建
-- **豐富元資料**：捕獲符號、引用、定義和關聯性
+- **淺層索引更新**：監控檔案變更並維持檔案清單最新；需要符號資料時請執行 `build_deep_index`
 
 ### ⚡ **效能與效率**
 - **Tree-sitter AST 解析**：原生語法解析以實現準確的符號提取
@@ -240,15 +240,18 @@ pip install code-index-mcp
 | 工具 | 描述 |
 |------|------|
 | **`set_project_path`** | 為專案目錄初始化索引 |
-| **`refresh_index`** | 在檔案變更後重建專案索引 |
+| **`refresh_index`** | 在檔案變更後重建淺層檔案索引 |
+| **`build_deep_index`** | 產生供深度分析使用的完整符號索引 |
 | **`get_settings_info`** | 檢視目前專案配置和狀態 |
 
+*需要符號層級資料時，請執行 `build_deep_index`；預設的淺層索引提供快速檔案探索。*
+
 ### 🔍 **搜尋與探索**
 | 工具 | 描述 |
 |------|------|
 | **`search_code_advanced`** | 智慧搜尋，支援正規表達式、模糊匹配和檔案篩選 |
 | **`find_files`** | 使用萬用字元模式尋找檔案（例如 `**/*.py`） |
-| **`get_file_summary`** | 分析檔案結構、函式、匯入和複雜度 |
+| **`get_file_summary`** | 分析檔案結構、函式、匯入和複雜度（需要深度索引） |
 
 ### 🔄 **監控與自動刷新**
 | 工具 | 描述 |
@@ -285,6 +288,7 @@ pip install code-index-mcp
 給我 src/api/userService.ts 的摘要
 ```
 *使用：`get_file_summary` 顯示函式、匯入和複雜度*
+*提示：若收到 `needs_deep_index` 回應，請先執行 `build_deep_index`。*
 
 ### 🔍 **進階搜尋範例**
 

From 4d96073970facb76150a6f43412da1fca5ea2dba Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Thu, 2 Oct 2025 10:17:51 +0800
Subject: [PATCH 09/14] feat: add Korean README for Code Index MCP with
 detailed usage and features

---
 README_ko.md | 284 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 284 insertions(+)
 create mode 100644 README_ko.md

diff --git a/README_ko.md b/README_ko.md
new file mode 100644
index 0000000..6995b6a
--- /dev/null
+++ b/README_ko.md
@@ -0,0 +1,284 @@
+# 코드 인덱스 MCP
+
+<div align="center">
+
+[![MCP Server](https://img.shields.io/badge/MCP-Server-blue)](https://modelcontextprotocol.io)
+[![Python](https://img.shields.io/badge/Python-3.10%2B-green)](https://www.python.org/)
+[![License](https://img.shields.io/badge/License-MIT-yellow)](LICENSE)
+
+**대규모 언어 모델을 위한 지능형 코드 인덱싱과 분석**
+
+고급 검색, 정밀 분석, 유연한 탐색 기능으로 AI가 코드베이스를 이해하고 활용하는 방식을 혁신하세요.
+
+</div>
+
+<a href="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp">
+  <img width="380" height="200" src="https://glama.ai/mcp/servers/@johnhuang316/code-index-mcp/badge" alt="code-index-mcp MCP server" />
+</a>
+
+## 개요
+
+Code Index MCP는 [Model Context Protocol](https://modelcontextprotocol.io) 기반 MCP 서버로, AI 어시스턴트와 복잡한 코드베이스 사이를 연결합니다. 빠른 인덱싱, 강력한 검색, 정밀한 코드 분석을 제공하여 AI가 프로젝트 구조를 정확히 파악하고 효과적으로 지원하도록 돕습니다.
+
+**이럴 때 안성맞춤:** 코드 리뷰, 리팩터링, 문서화, 디버깅 지원, 아키텍처 분석
+
+## 빠른 시작
+
+### 🚀 **권장 설정 (대부분의 사용자)**
+
+어떤 MCP 호환 애플리케이션에서도 몇 단계만으로 시작할 수 있습니다.
+
+**사전 준비:** Python 3.10+ 및 [uv](https://github.com/astral-sh/uv)
+
+1. **MCP 설정에 서버 추가** (예: `claude_desktop_config.json` 또는 `~/.claude.json`)
+   ```json
+   {
+     "mcpServers": {
+       "code-index": {
+         "command": "uvx",
+         "args": ["code-index-mcp"]
+       }
+     }
+   }
+   ```
+
+2. **애플리케이션 재시작** – `uvx`가 설치와 실행을 자동으로 처리합니다.
+
+3. **사용 시작** (AI 어시스턴트에게 아래 프롬프트를 전달)
+   ```
+   프로젝트 경로를 /Users/dev/my-react-app 으로 설정해줘
+   이 프로젝트에서 모든 TypeScript 파일을 찾아줘
+   "authentication" 관련 함수를 검색해줘
+   src/App.tsx 파일을 분석해줘
+   ```
+
+## 대표 사용 사례
+
+**코드 리뷰:** "예전 API를 사용하는 부분을 모두 찾아줘"
+**리팩터링 지원:** "이 함수는 어디에서 호출되나요?"
+**프로젝트 학습:** "이 React 프로젝트의 핵심 컴포넌트를 보여줘"
+**디버깅:** "에러 처리 로직이 있는 파일을 찾아줘"
+
+## 주요 기능
+
+### 🧠 **지능형 검색과 분석**
+- **듀얼 전략 아키텍처:** 7개 핵심 언어는 전용 tree-sitter 파서를 사용하고, 그 외 50+ 파일 형식은 폴백 전략으로 처리
+- **직접 Tree-sitter 통합:** 특화 언어에 정규식 폴백 없음 – 문제 시 즉시 실패하고 명확한 오류 메시지 제공
+- **고급 검색:** ugrep, ripgrep, ag, grep 중 최적의 도구를 자동 선택해 활용
+- **범용 파일 지원:** 정교한 AST 분석부터 기본 파일 인덱싱까지 폭넓게 커버
+- **파일 분석:** `build_deep_index` 실행 후 구조, 임포트, 클래스, 메서드, 복잡도 지표를 심층적으로 파악
+
+### 🗂️ **다중 언어 지원**
+- **Tree-sitter AST 분석(7종):** Python, JavaScript, TypeScript, Java, Go, Objective-C, Zig
+- **폴백 전략(50+ 형식):** C/C++, Rust, Ruby, PHP 등 대부분의 프로그래밍 언어 지원
+- **문서 및 설정 파일:** Markdown, JSON, YAML, XML 등 상황에 맞는 처리
+- **웹 프론트엔드:** Vue, React, Svelte, HTML, CSS, SCSS
+- **데이터 계층:** SQL, NoSQL, 스토어드 프로시저, 마이그레이션 스크립트
+- **구성 파일:** JSON, YAML, XML, Markdown
+- **[지원 파일 전체 목록 보기](#지원-파일-형식)**
+
+### 🔄 **실시간 모니터링 & 자동 새로고침**
+- **파일 워처:** 파일 변경 시 자동으로 얕은 인덱스(파일 목록) 갱신
+- **크로스 플랫폼:** 운영체제 기본 파일시스템 이벤트 활용
+- **스마트 처리:** 빠른 변경을 묶어 과도한 재빌드를 방지
+- **얕은 인덱스 갱신:** 파일 목록을 최신 상태로 유지하며, 심볼 데이터가 필요하면 `build_deep_index`를 실행
+
+### ⚡ **성능 & 효율성**
+- **Tree-sitter AST 파싱:** 정확한 심볼 추출을 위한 네이티브 구문 분석
+- **지속 캐싱:** 인덱스를 저장해 이후 응답 속도를 극대화
+- **스마트 필터링:** 빌드 디렉터리·임시 파일을 자동 제외
+- **메모리 효율:** 대규모 코드베이스를 염두에 둔 설계
+- **직접 의존성:** 불필요한 폴백 없이 명확한 오류 메시지 제공
+
+## 지원 파일 형식
+
+<details>
+<summary><strong>💻 프로그래밍 언어 (클릭하여 확장)</strong></summary>
+
+**전용 Tree-sitter 전략 언어:**
+- **Python** (`.py`, `.pyw`) – 클래스/메서드 추출 및 호출 추적이 포함된 완전 AST 분석
+- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) – ES6+ 클래스와 함수를 tree-sitter로 파싱
+- **TypeScript** (`.ts`, `.tsx`) – 인터페이스를 포함한 타입 인지 심볼 추출
+- **Java** (`.java`) – 클래스 계층, 메서드 시그니처, 호출 관계 분석
+- **Go** (`.go`) – 구조체 메서드, 리시버 타입, 함수 분석
+- **Objective-C** (`.m`, `.mm`) – 클래스/인스턴스 메서드를 +/- 표기로 구분
+- **Zig** (`.zig`, `.zon`) – 함수와 구조체를 tree-sitter AST로 분석
+
+**기타 모든 프로그래밍 언어:**
+나머지 언어는 **폴백 파싱 전략**으로 기본 메타데이터와 파일 인덱싱을 제공합니다. 예:
+- **시스템/저수준:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`), Rust (`.rs`)
+- **객체지향:** C# (`.cs`), Kotlin (`.kt`), Scala (`.scala`), Swift (`.swift`)
+- **스크립트:** Ruby (`.rb`), PHP (`.php`), Shell (`.sh`, `.bash`)
+- **그 외 40+ 형식** – 폴백 전략으로 빠른 탐색 가능
+
+</details>
+
+<details>
+<summary><strong>🌐 웹 프론트엔드 & UI</strong></summary>
+
+- 프레임워크: Vue (`.vue`), Svelte (`.svelte`), Astro (`.astro`)
+- 스타일링: CSS (`.css`, `.scss`, `.less`, `.sass`, `.stylus`, `.styl`), HTML (`.html`)
+- 템플릿: Handlebars (`.hbs`, `.handlebars`), EJS (`.ejs`), Pug (`.pug`)
+
+</details>
+
+<details>
+<summary><strong>🗄️ 데이터 계층 & SQL</strong></summary>
+
+- **SQL 변형:** 표준 SQL (`.sql`, `.ddl`, `.dml`), 데이터베이스별 방언 (`.mysql`, `.postgresql`, `.psql`, `.sqlite`, `.mssql`, `.oracle`, `.ora`, `.db2`)
+- **DB 객체:** 프로시저/함수 (`.proc`, `.procedure`, `.func`, `.function`), 뷰/트리거/인덱스 (`.view`, `.trigger`, `.index`)
+- **마이그레이션 도구:** 마이그레이션 파일 (`.migration`, `.seed`, `.fixture`, `.schema`), 도구 구성 (`.liquibase`, `.flyway`)
+- **NoSQL & 그래프:** 질의 언어 (`.cql`, `.cypher`, `.sparql`, `.gql`)
+
+</details>
+
+<details>
+<summary><strong>📄 문서 & 설정 파일</strong></summary>
+
+- Markdown (`.md`, `.mdx`)
+- 구성 파일 (`.json`, `.xml`, `.yml`, `.yaml`)
+
+</details>
+
+## 사용 가능한 도구
+
+### 🏗️ **프로젝트 관리**
+| 도구 | 설명 |
+|------|------|
+| **`set_project_path`** | 프로젝트 디렉터리의 인덱스를 초기화 |
+| **`refresh_index`** | 파일 변경 후 얕은 파일 인덱스를 재생성 |
+| **`build_deep_index`** | 심층 분석에 사용하는 전체 심볼 인덱스를 생성 |
+| **`get_settings_info`** | 현재 프로젝트 설정과 상태를 확인 |
+
+*심볼 레벨 데이터가 필요하면 `build_deep_index`를 실행하세요. 기본 얕은 인덱스는 빠른 파일 탐색을 담당합니다.*
+
+### 🔍 **검색 & 탐색**
+| 도구 | 설명 |
+|------|------|
+| **`search_code_advanced`** | 정규식, 퍼지 매칭, 파일 필터링을 지원하는 스마트 검색 |
+| **`find_files`** | 글롭 패턴으로 파일 찾기 (예: `**/*.py`) |
+| **`get_file_summary`** | 파일 구조, 함수, 임포트, 복잡도를 분석 (심층 인덱스 필요) |
+
+### 🔄 **모니터링 & 자동 새로고침**
+| 도구 | 설명 |
+|------|------|
+| **`get_file_watcher_status`** | 파일 워처 상태와 구성을 확인 |
+| **`configure_file_watcher`** | 자동 새로고침 설정 (활성/비활성, 지연 시간, 추가 제외 패턴) |
+
+### 🛠️ **시스템 & 유지 관리**
+| 도구 | 설명 |
+|------|------|
+| **`create_temp_directory`** | 인덱스 저장용 임시 디렉터리를 생성 |
+| **`check_temp_directory`** | 인덱스 저장 위치와 권한을 확인 |
+| **`clear_settings`** | 모든 설정과 캐시 데이터를 초기화 |
+| **`refresh_search_tools`** | 사용 가능한 검색 도구를 재검색 (ugrep, ripgrep 등) |
+
+## 사용 예시
+
+### 🧭 **빠른 시작 워크플로**
+
+**1. 프로젝트 초기화**
+```
+프로젝트 경로를 /Users/dev/my-react-app 으로 설정해줘
+```
+*프로젝트를 설정하고 얕은 인덱스를 생성합니다.*
+
+**2. 프로젝트 구조 탐색**
+```
+src/components 안의 TypeScript 컴포넌트 파일을 모두 찾아줘
+```
+*사용 도구: `find_files` (`src/components/**/*.tsx`)*
+
+**3. 핵심 파일 분석**
+```
+src/api/userService.ts 요약을 알려줘
+```
+*사용 도구: `get_file_summary` (함수, 임포트, 복잡도 표시)*
+*팁: `needs_deep_index` 응답이 나오면 먼저 `build_deep_index`를 실행하세요.*
+
+### 🔍 **고급 검색 예시**
+
+<details>
+<summary><strong>코드 패턴 검색</strong></summary>
+
+```
+"get.*Data"에 해당하는 함수 호출을 정규식으로 찾아줘
+```
+*예: `getData()`, `getUserData()`, `getFormData()`*
+
+</details>
+
+<details>
+<summary><strong>퍼지 함수 검색</strong></summary>
+
+```
+'authUser'와 유사한 인증 관련 함수를 찾아줘
+```
+*예: `authenticateUser`, `authUserToken`, `userAuthCheck`*
+
+</details>
+
+<details>
+<summary><strong>언어별 검색</strong></summary>
+
+```
+Python 파일에서만 "API_ENDPOINT" 를 찾아줘
+```
+*`search_code_advanced` + `file_pattern="*.py"`*
+
+</details>
+
+<details>
+<summary><strong>자동 새로고침 설정</strong></summary>
+
+```
+파일 변경 시 자동으로 인덱스를 새로고침하도록 설정해줘
+```
+*`configure_file_watcher`로 활성화 및 지연 시간 설정*
+
+</details>
+
+<details>
+<summary><strong>프로젝트 유지 관리</strong></summary>
+
+```
+새 컴포넌트를 추가했어. 프로젝트 인덱스를 다시 빌드해줘
+```
+*`refresh_index`로 빠르게 얕은 인덱스를 업데이트*
+
+</details>
+
+## 문제 해결
+
+### 🔄 **자동 새로고침이 동작하지 않을 때**
+- 환경 문제로 `watchdog`가 빠졌다면 설치: `pip install watchdog`
+- 수동 새로고침: 변경 후 `refresh_index` 도구 실행
+- 워처 상태 확인: `get_file_watcher_status` 도구로 활성 여부 점검
+
+## 개발 & 기여
+
+### 🛠️ **소스에서 실행하기**
+```bash
+git clone https://github.com/johnhuang316/code-index-mcp.git
+cd code-index-mcp
+uv sync
+uv run code-index-mcp
+```
+
+### 🧪 **디버깅 도구**
+```bash
+npx @modelcontextprotocol/inspector uvx code-index-mcp
+```
+
+### 🤝 **기여 안내**
+Pull Request를 언제든 환영합니다. 변경 사항과 테스트 방법을 함께 공유해주세요.
+
+---
+
+### 📄 **라이선스**
+[MIT License](LICENSE)
+
+### 🌍 **번역본**
+- [English](README.md)
+- [繁體中文](README_zh.md)
+- [日本語](README_ja.md)

From 05fdc8c625f9b6390bed50e06bd786bc89141f78 Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Thu, 2 Oct 2025 10:36:16 +0800
Subject: [PATCH 10/14] chore: bump version to 2.4.0 in pyproject.toml,
 __init__.py, and uv.lock

---
 pyproject.toml                 | 2 +-
 src/code_index_mcp/__init__.py | 2 +-
 uv.lock                        | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/pyproject.toml b/pyproject.toml
index 6ff36df..bf781b5 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "code-index-mcp"
-version = "2.3.2"
+version = "2.4.0"
 description = "Code indexing and analysis tools for LLMs using MCP"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/src/code_index_mcp/__init__.py b/src/code_index_mcp/__init__.py
index a71ea55..8ae1a1d 100644
--- a/src/code_index_mcp/__init__.py
+++ b/src/code_index_mcp/__init__.py
@@ -3,4 +3,4 @@
 A Model Context Protocol server for code indexing, searching, and analysis.
 """
 
-__version__ = "2.3.2"
+__version__ = "2.4.0"
diff --git a/uv.lock b/uv.lock
index 5bf691e..78cc596 100644
--- a/uv.lock
+++ b/uv.lock
@@ -49,7 +49,7 @@ wheels = [
 
 [[package]]
 name = "code-index-mcp"
-version = "2.3.2"
+version = "2.4.0"
 source = { editable = "." }
 dependencies = [
     { name = "mcp" },

From 98075bafe2fc3ed790e16efb8eac99dc558033e2 Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Fri, 3 Oct 2025 10:11:15 +0800
Subject: [PATCH 11/14] feat(search): align code search filters with index

---
 src/code_index_mcp/search/ag.py               |  20 +++
 src/code_index_mcp/search/base.py             |  15 +-
 src/code_index_mcp/search/basic.py            |  26 ++--
 src/code_index_mcp/search/grep.py             |  21 +++
 src/code_index_mcp/search/ripgrep.py          |  25 +++
 src/code_index_mcp/search/ugrep.py            |  24 +++
 src/code_index_mcp/services/search_service.py | 145 ++++++++++--------
 tests/search/test_search_filters.py           |  52 +++++++
 8 files changed, 257 insertions(+), 71 deletions(-)
 create mode 100644 tests/search/test_search_filters.py

diff --git a/src/code_index_mcp/search/ag.py b/src/code_index_mcp/search/ag.py
index e5c7af5..aa3eb33 100644
--- a/src/code_index_mcp/search/ag.py
+++ b/src/code_index_mcp/search/ag.py
@@ -95,6 +95,26 @@ def search(
             
             cmd.extend(['-G', regex_pattern])
 
+        processed_patterns = set()
+        exclude_dirs = getattr(self, 'exclude_dirs', [])
+        exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
+
+        for directory in exclude_dirs:
+            normalized = directory.strip()
+            if not normalized or normalized in processed_patterns:
+                continue
+            cmd.extend(['--ignore', normalized])
+            processed_patterns.add(normalized)
+
+        for pattern in exclude_file_patterns:
+            normalized = pattern.strip()
+            if not normalized or normalized in processed_patterns:
+                continue
+            if normalized.startswith('!'):
+                normalized = normalized[1:]
+            cmd.extend(['--ignore', normalized])
+            processed_patterns.add(normalized)
+
         # Add -- to treat pattern as a literal argument, preventing injection
         cmd.append('--')
         cmd.append(search_pattern)
diff --git a/src/code_index_mcp/search/base.py b/src/code_index_mcp/search/base.py
index 0d50886..5e4c63b 100644
--- a/src/code_index_mcp/search/base.py
+++ b/src/code_index_mcp/search/base.py
@@ -10,10 +10,13 @@
 import subprocess
 import sys
 from abc import ABC, abstractmethod
-from typing import Dict, List, Optional, Tuple, Any
+from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING
 
 from ..indexing.qualified_names import normalize_file_path
 
+if TYPE_CHECKING:  # pragma: no cover
+    from ..utils.file_filter import FileFilter
+
 def parse_search_output(
     output: str,
     base_path: str,
@@ -182,6 +185,16 @@ class SearchStrategy(ABC):
     Each strategy is responsible for searching code using a specific tool or method.
     """
 
+    def configure_excludes(self, file_filter: Optional['FileFilter']) -> None:
+        """Configure shared exclusion settings for the strategy."""
+        self.file_filter = file_filter
+        if file_filter:
+            self.exclude_dirs = sorted(set(file_filter.exclude_dirs))
+            self.exclude_file_patterns = sorted(set(file_filter.exclude_files))
+        else:
+            self.exclude_dirs = []
+            self.exclude_file_patterns = []
+
     @property
     @abstractmethod
     def name(self) -> str:
diff --git a/src/code_index_mcp/search/basic.py b/src/code_index_mcp/search/basic.py
index c480990..9ef1846 100644
--- a/src/code_index_mcp/search/basic.py
+++ b/src/code_index_mcp/search/basic.py
@@ -1,9 +1,10 @@
 """
 Basic, pure-Python search strategy.
 """
+import fnmatch
 import os
 import re
-import fnmatch
+from pathlib import Path
 from typing import Dict, List, Optional, Tuple
 
 from .base import SearchStrategy, create_word_boundary_pattern, is_safe_regex_pattern
@@ -83,33 +84,38 @@ def search(
         except re.error as e:
             raise ValueError(f"Invalid regex pattern: {pattern}, error: {e}")
 
-        for root, _, files in os.walk(base_path):
+        file_filter = getattr(self, 'file_filter', None)
+        base = Path(base_path)
+
+        for root, dirs, files in os.walk(base_path):
+            if file_filter:
+                dirs[:] = [d for d in dirs if not file_filter.should_exclude_directory(d)]
+
             for file in files:
-                # Improved file pattern matching with glob support
                 if file_pattern and not self._matches_pattern(file, file_pattern):
                     continue
 
-                file_path = os.path.join(root, file)
+                file_path = Path(root) / file
+
+                if file_filter and not file_filter.should_process_path(file_path, base):
+                    continue
+
                 rel_path = os.path.relpath(file_path, base_path)
-                
+
                 try:
                     with open(file_path, 'r', encoding='utf-8', errors='ignore') as f:
                         for line_num, line in enumerate(f, 1):
                             if search_regex.search(line):
                                 content = line.rstrip('\n')
-                                # Truncate content if it exceeds max_line_length
                                 if max_line_length and len(content) > max_line_length:
                                     content = content[:max_line_length] + '... (truncated)'
-                                
+
                                 if rel_path not in results:
                                     results[rel_path] = []
-                                # Strip newline for consistent output
                                 results[rel_path].append((line_num, content))
                 except (UnicodeDecodeError, PermissionError, OSError):
-                    # Ignore files that can't be opened or read due to encoding/permission issues
                     continue
                 except Exception:
-                    # Ignore any other unexpected exceptions to maintain robustness
                     continue
         
         return results
diff --git a/src/code_index_mcp/search/grep.py b/src/code_index_mcp/search/grep.py
index 91ba575..f24c469 100644
--- a/src/code_index_mcp/search/grep.py
+++ b/src/code_index_mcp/search/grep.py
@@ -83,6 +83,27 @@ def search(
             # Note: grep's --include uses glob patterns, not regex
             cmd.append(f'--include={file_pattern}')
 
+        exclude_dirs = getattr(self, 'exclude_dirs', [])
+        exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
+
+        processed_dirs = set()
+        for directory in exclude_dirs:
+            normalized = directory.strip()
+            if not normalized or normalized in processed_dirs:
+                continue
+            cmd.append(f'--exclude-dir={normalized}')
+            processed_dirs.add(normalized)
+
+        processed_files = set()
+        for pattern in exclude_file_patterns:
+            normalized = pattern.strip()
+            if not normalized or normalized in processed_files:
+                continue
+            if normalized.startswith('!'):
+                normalized = normalized[1:]
+            cmd.append(f'--exclude={normalized}')
+            processed_files.add(normalized)
+
         # Add -- to treat pattern as a literal argument, preventing injection
         cmd.append('--')
         cmd.append(search_pattern)
diff --git a/src/code_index_mcp/search/ripgrep.py b/src/code_index_mcp/search/ripgrep.py
index 75efd3f..8a5c325 100644
--- a/src/code_index_mcp/search/ripgrep.py
+++ b/src/code_index_mcp/search/ripgrep.py
@@ -69,6 +69,31 @@ def search(
         if file_pattern:
             cmd.extend(['--glob', file_pattern])
 
+        exclude_dirs = getattr(self, 'exclude_dirs', [])
+        exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
+
+        processed_patterns = set()
+
+        for directory in exclude_dirs:
+            normalized = directory.strip()
+            if not normalized or normalized in processed_patterns:
+                continue
+            cmd.extend(['--glob', f'!**/{normalized}/**'])
+            processed_patterns.add(normalized)
+
+        for pattern in exclude_file_patterns:
+            normalized = pattern.strip()
+            if not normalized or normalized in processed_patterns:
+                continue
+            if normalized.startswith('!'):
+                glob_pattern = normalized
+            elif any(ch in normalized for ch in '*?[') or '/' in normalized:
+                glob_pattern = f'!{normalized}'
+            else:
+                glob_pattern = f'!**/{normalized}'
+            cmd.extend(['--glob', glob_pattern])
+            processed_patterns.add(normalized)
+
         # Add -- to treat pattern as a literal argument, preventing injection
         cmd.append('--')
         cmd.append(search_pattern)
diff --git a/src/code_index_mcp/search/ugrep.py b/src/code_index_mcp/search/ugrep.py
index 87f1c48..d4302c1 100644
--- a/src/code_index_mcp/search/ugrep.py
+++ b/src/code_index_mcp/search/ugrep.py
@@ -69,6 +69,30 @@ def search(
         if file_pattern:
             cmd.extend(['--include', file_pattern])
 
+        processed_patterns = set()
+        exclude_dirs = getattr(self, 'exclude_dirs', [])
+        exclude_file_patterns = getattr(self, 'exclude_file_patterns', [])
+
+        for directory in exclude_dirs:
+            normalized = directory.strip()
+            if not normalized or normalized in processed_patterns:
+                continue
+            cmd.extend(['--ignore', f'**/{normalized}/**'])
+            processed_patterns.add(normalized)
+
+        for pattern in exclude_file_patterns:
+            normalized = pattern.strip()
+            if not normalized or normalized in processed_patterns:
+                continue
+            if normalized.startswith('!'):
+                ignore_pattern = normalized[1:]
+            elif any(ch in normalized for ch in '*?[') or '/' in normalized:
+                ignore_pattern = normalized
+            else:
+                ignore_pattern = f'**/{normalized}'
+            cmd.extend(['--ignore', ignore_pattern])
+            processed_patterns.add(normalized)
+
         # Add '--' to treat pattern as a literal argument, preventing injection
         cmd.append('--')
         cmd.append(pattern)
diff --git a/src/code_index_mcp/services/search_service.py b/src/code_index_mcp/services/search_service.py
index 8d66f2d..a2c2799 100644
--- a/src/code_index_mcp/services/search_service.py
+++ b/src/code_index_mcp/services/search_service.py
@@ -5,24 +5,20 @@
 and search strategy selection.
 """
 
-from typing import Dict, Any, Optional
+from pathlib import Path
+from typing import Any, Dict, List, Optional
 
 from .base_service import BaseService
-from ..utils import ValidationHelper, ResponseFormatter
+from ..utils import FileFilter, ResponseFormatter, ValidationHelper
 from ..search.base import is_safe_regex_pattern
 
 
 class SearchService(BaseService):
-    """
-    Service for managing code search operations.
-
-    This service handles:
-    - Code search with various parameters and options
-    - Search tool management and detection
-    - Search strategy selection and optimization
-    - Search capabilities reporting
-    """
+    """Service for managing code search operations."""
 
+    def __init__(self, ctx):
+        super().__init__(ctx)
+        self.file_filter = self._create_file_filter()
 
     def search_code(  # pylint: disable=too-many-arguments
         self,
@@ -34,46 +30,21 @@ def search_code(  # pylint: disable=too-many-arguments
         regex: Optional[bool] = None,
         max_line_length: Optional[int] = None
     ) -> Dict[str, Any]:
-        """
-        Search for code patterns in the project.
-
-        Handles the logic for search_code_advanced MCP tool.
-
-        Args:
-            pattern: The search pattern
-            case_sensitive: Whether search should be case-sensitive
-            context_lines: Number of context lines to show
-            file_pattern: Glob pattern to filter files
-            fuzzy: Whether to enable fuzzy matching
-            regex: Regex mode - True/False to force, None for auto-detection
-            max_line_length: Optional. Default None (no limit). Limits the length of lines when context_lines is used.
-
-        Returns:
-            Dictionary with search results or error information
-
-        Raises:
-            ValueError: If project is not set up or search parameters are invalid
-        """
+        """Search for code patterns in the project."""
         self._require_project_setup()
 
-        # Smart regex detection if regex parameter is None
         if regex is None:
             regex = is_safe_regex_pattern(pattern)
-            if regex:
-                pass
 
-        # Validate search pattern
         error = ValidationHelper.validate_search_pattern(pattern, regex)
         if error:
             raise ValueError(error)
 
-        # Validate file pattern if provided
         if file_pattern:
             error = ValidationHelper.validate_glob_pattern(file_pattern)
             if error:
                 raise ValueError(f"Invalid file pattern: {error}")
 
-        # Get search strategy from settings
         if not self.settings:
             raise ValueError("Settings not available")
 
@@ -81,7 +52,7 @@ def search_code(  # pylint: disable=too-many-arguments
         if not strategy:
             raise ValueError("No search strategies available")
 
-
+        self._configure_strategy(strategy)
 
         try:
             results = strategy.search(
@@ -94,23 +65,13 @@ def search_code(  # pylint: disable=too-many-arguments
                 regex=regex,
                 max_line_length=max_line_length
             )
-            return ResponseFormatter.search_results_response(results)
-        except Exception as e:
-            raise ValueError(f"Search failed using '{strategy.name}': {e}") from e
-
+            filtered = self._filter_results(results)
+            return ResponseFormatter.search_results_response(filtered)
+        except Exception as exc:
+            raise ValueError(f"Search failed using '{strategy.name}': {exc}") from exc
 
     def refresh_search_tools(self) -> str:
-        """
-        Refresh the available search tools.
-
-        Handles the logic for refresh_search_tools MCP tool.
-
-        Returns:
-            Success message with available tools information
-
-        Raises:
-            ValueError: If refresh operation fails
-        """
+        """Refresh the available search tools."""
         if not self.settings:
             raise ValueError("Settings not available")
 
@@ -121,14 +82,8 @@ def refresh_search_tools(self) -> str:
         preferred = config['preferred_tool']
         return f"Search tools refreshed. Available: {available}. Preferred: {preferred}."
 
-
     def get_search_capabilities(self) -> Dict[str, Any]:
-        """
-        Get information about search capabilities and available tools.
-
-        Returns:
-            Dictionary with search tool information and capabilities
-        """
+        """Get information about search capabilities and available tools."""
         if not self.settings:
             return {"error": "Settings not available"}
 
@@ -145,3 +100,73 @@ def get_search_capabilities(self) -> Dict[str, Any]:
         }
 
         return capabilities
+
+    def _configure_strategy(self, strategy) -> None:
+        """Apply shared exclusion configuration to the strategy if supported."""
+        configure = getattr(strategy, 'configure_excludes', None)
+        if not configure:
+            return
+
+        try:
+            configure(self.file_filter)
+        except Exception:  # pragma: no cover - defensive fallback
+            pass
+
+    def _create_file_filter(self) -> FileFilter:
+        """Build a shared file filter drawing from project settings."""
+        additional_dirs: List[str] = []
+        additional_file_patterns: List[str] = []
+
+        settings = self.settings
+        if settings:
+            try:
+                config = settings.get_file_watcher_config()
+            except Exception:  # pragma: no cover - fallback if config fails
+                config = {}
+
+            for key in ('exclude_patterns', 'additional_exclude_patterns'):
+                patterns = config.get(key) or []
+                for pattern in patterns:
+                    if not isinstance(pattern, str):
+                        continue
+                    normalized = pattern.strip()
+                    if not normalized:
+                        continue
+                    additional_dirs.append(normalized)
+                    additional_file_patterns.append(normalized)
+
+        file_filter = FileFilter(additional_dirs or None)
+
+        if additional_file_patterns:
+            file_filter.exclude_files.update(additional_file_patterns)
+
+        return file_filter
+
+    def _filter_results(self, results: Dict[str, Any]) -> Dict[str, Any]:
+        """Filter out matches that reside under excluded paths."""
+        if not isinstance(results, dict) or not results:
+            return results
+
+        if 'error' in results or not self.file_filter or not self.base_path:
+            return results
+
+        base_path = Path(self.base_path)
+        filtered: Dict[str, Any] = {}
+
+        for rel_path, matches in results.items():
+            if not isinstance(rel_path, str):
+                continue
+
+            normalized = Path(rel_path.replace('\\', '/'))
+            try:
+                absolute = (base_path / normalized).resolve()
+            except Exception:  # pragma: no cover - invalid path safety
+                continue
+
+            try:
+                if self.file_filter.should_process_path(absolute, base_path):
+                    filtered[rel_path] = matches
+            except Exception:  # pragma: no cover - defensive fallback
+                continue
+
+        return filtered
diff --git a/tests/search/test_search_filters.py b/tests/search/test_search_filters.py
new file mode 100644
index 0000000..787461d
--- /dev/null
+++ b/tests/search/test_search_filters.py
@@ -0,0 +1,52 @@
+"""Tests covering shared search filtering behaviour."""
+import os
+from types import SimpleNamespace
+from unittest.mock import patch
+from pathlib import Path as _TestPath
+import sys
+
+ROOT = _TestPath(__file__).resolve().parents[2]
+SRC_PATH = ROOT / 'src'
+if str(SRC_PATH) not in sys.path:
+    sys.path.insert(0, str(SRC_PATH))
+
+from code_index_mcp.search.basic import BasicSearchStrategy
+from code_index_mcp.search.ripgrep import RipgrepStrategy
+from code_index_mcp.utils.file_filter import FileFilter
+
+
+def test_basic_strategy_skips_excluded_directories(tmp_path):
+    base = tmp_path
+    src_dir = base / "src"
+    src_dir.mkdir()
+    (src_dir / 'app.js').write_text("const db = 'mongo';\n")
+
+    node_modules_dir = base / "node_modules" / "pkg"
+    node_modules_dir.mkdir(parents=True)
+    (node_modules_dir / 'index.js').write_text("// mongo dependency\n")
+
+    strategy = BasicSearchStrategy()
+    strategy.configure_excludes(FileFilter())
+
+    results = strategy.search("mongo", str(base), case_sensitive=False)
+
+    included_path = os.path.join("src", "app.js")
+    excluded_path = os.path.join("node_modules", "pkg", "index.js")
+
+    assert included_path in results
+    assert excluded_path not in results
+
+
+@patch("code_index_mcp.search.ripgrep.subprocess.run")
+def test_ripgrep_strategy_adds_exclude_globs(mock_run, tmp_path):
+    mock_run.return_value = SimpleNamespace(returncode=0, stdout="", stderr="")
+
+    strategy = RipgrepStrategy()
+    strategy.configure_excludes(FileFilter())
+
+    strategy.search("mongo", str(tmp_path))
+
+    cmd = mock_run.call_args[0][0]
+    glob_args = [cmd[i + 1] for i, arg in enumerate(cmd) if arg == '--glob' and i + 1 < len(cmd)]
+
+    assert any(value.startswith('!**/node_modules/') for value in glob_args)

From efba963475e6bfbfe8126d3ce8c15d6f33bc50a7 Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Fri, 3 Oct 2025 10:24:00 +0800
Subject: [PATCH 12/14] chore(release): bump version to 2.4.1

---
 AGENTS.md                      | 25 ++++++++++++++++++
 RELEASE_NOTE.txt               | 48 ++++++++++++++++++++++++++++++++++
 pyproject.toml                 |  2 +-
 src/code_index_mcp/__init__.py |  3 ++-
 uv.lock                        |  3 ++-
 5 files changed, 78 insertions(+), 3 deletions(-)
 create mode 100644 AGENTS.md
 create mode 100644 RELEASE_NOTE.txt

diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..886f335
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,25 @@
+# Repository Guidelines
+
+## Project Structure & Module Organization
+Code Index MCP lives in `src/code_index_mcp/`, with `indexing/` managing builders, `services/` exposing MCP tool implementations, `search/` coordinating query utilities, and `utils/` housing cross-cutting helpers. The lightweight CLI bootstrapper is `run.py`, which adds `src/` to `PYTHONPATH` before invoking `code_index_mcp.server`. Sample corpora for language regression reside under `test/sample-projects/` (for example `python/user_management/`). Reserve `tests/` for runnable suites and avoid checking in generated `__pycache__` artifacts.
+
+## Build, Test, and Development Commands
+Install dependencies with `uv sync` after cloning. Use `uv run code-index-mcp` to launch the MCP server directly, or `uv run python run.py` when you need the local sys.path shim. During development, `uv run code-index-mcp --help` will list available CLI flags, and `uv run python -m code_index_mcp.server` mirrors the published entry point for debugging.
+
+## Coding Style & Naming Conventions
+Target Python 3.10+ and follow the `.pylintrc` configuration: 4-space indentation, 100-character line limit, and restrained function signatures (<= 7 parameters). Modules and functions stay `snake_case`, classes use `PascalCase`, and constants remain uppercase with underscores. Prefer explicit imports from sibling packages (`from .services import ...`) and keep logging to stderr as implemented in `server.py`.
+
+## Testing Guidelines
+Automated tests should live under `tests/`, mirroring the package hierarchy (`tests/indexing/test_shallow_index.py`, etc.). Use `uv run pytest` (with optional `-k` selectors) for unit and integration coverage, and stage representative fixtures inside `test/sample-projects/` when exercising new language strategies. Document expected behaviors in fixtures' README files or inline comments, and fail fast if tree-sitter support is not available for a language you add.
+
+## Commit & Pull Request Guidelines
+Follow the Conventional Commits style seen in history (`feat`, `fix`, `refactor(scope): summary`). Reference issue numbers when relevant and keep subjects under 72 characters. Pull requests should include: 1) a concise problem statement, 2) before/after behavior or performance notes, 3) instructions for reproducing test runs (`uv run pytest`, `uv run code-index-mcp`). Attach updated screenshots or logs when touching developer experience flows, and confirm the file watcher still transitions to "active" in manual smoke tests.
+
+## Agent Workflow Tips
+Always call `set_project_path` before invoking other tools, and prefer `search_code_advanced` with targeted `file_pattern` filters to minimize noise. When editing indexing strategies, run `refresh_index` in between changes to confirm cache rebuilds. Clean up temporary directories via `clear_settings` if you notice stale metadata, and document any new tooling you introduce in this guide.
+
+## Release Preparation Checklist
+- Update the project version everywhere it lives: `pyproject.toml`, `src/code_index_mcp/__init__.py`, and `uv.lock`.
+- Add a release note entry to `RELEASE_NOTE.txt` for the new version.
+- Commit the version bump (plus any release artifacts) and push the branch to `origin`.
+- Create a git tag for the new version and push the tag to `origin`.
diff --git a/RELEASE_NOTE.txt b/RELEASE_NOTE.txt
new file mode 100644
index 0000000..2295def
--- /dev/null
+++ b/RELEASE_NOTE.txt
@@ -0,0 +1,48 @@
+## 2.4.1 - Search Filtering Alignment
+
+### Highlights
+- Code search now shares the central FileFilter blacklist, keeping results consistent with indexing (no more `node_modules` noise).
+- CLI search strategies emit the appropriate exclusion flags automatically (ripgrep, ugrep, ag, grep).
+- Basic fallback search prunes excluded directories during traversal, avoiding unnecessary IO.
+- Added regression coverage for the new filtering behaviour (`tests/search/test_search_filters.py`).
+
+### Upgrade Notes
+- No new dependencies; update via standard `uv sync` after pulling.
+- Run `uv run pytest` to confirm the new search filter tests on your environment.
+
+## Shallow Index Default & Streamlined Server
+
+This release focuses on faster first-run experiences and a slimmer MCP surface area.
+
+### Highlights
+
+- **Shallow index by default**: Projects initialize with the new JSON-based shallow index for rapid file discovery.
+- **Deep index on demand**: Added the `build_deep_index` tool so symbol extraction happens only when you request it.
+- **Watcher-friendly rebuilds**: File watcher callbacks now refresh the shallow index, keeping file lists current without long rebuilds.
+- **Server cleanup**: Removed unused `structure://project` resource, legacy prompts, and auxiliary documents for a leaner runtime.
+
+### Developer Experience Improvements
+
+- `find_files` now enforces true glob semantics (single `*` for one segment, `**` for recursive matches).
+- `get_file_summary` responds with a `needs_deep_index` hint when deep symbols are unavailable.
+- Index management services split shallow vs deep rebuild paths to clarify tool behavior.
+- Repository docs (README, localized copies) highlight when to run `build_deep_index`.
+
+### Cleanups
+
+- Removed deprecated architecture and benchmarking documents.
+- Trimmed benchmark scripts and outdated tests tied to the old SCIP experiment.
+
+### Upgrade Notes
+
+1. After updating, call `set_project_path` as usual - the server will build the shallow index automatically.
+2. Run `build_deep_index` whenever you need symbol-level summaries (`get_file_summary`) or deep search capabilities.
+3. Optionally run `refresh_index` to refresh the shallow index if the watcher is disabled.
+
+### Compatibility
+
+- Tool names and signatures are unchanged.
+- Deep-index workflows remain available; they now require an explicit `build_deep_index` call.
+- Python 3.10+ requirement unchanged; no new third-party dependencies.
+
+Enjoy faster cold starts and a simpler interface tailored for LLM-driven workflows.
diff --git a/pyproject.toml b/pyproject.toml
index bf781b5..428e2d3 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "code-index-mcp"
-version = "2.4.0"
+version = "2.4.1"
 description = "Code indexing and analysis tools for LLMs using MCP"
 readme = "README.md"
 requires-python = ">=3.10"
diff --git a/src/code_index_mcp/__init__.py b/src/code_index_mcp/__init__.py
index 8ae1a1d..f47ee02 100644
--- a/src/code_index_mcp/__init__.py
+++ b/src/code_index_mcp/__init__.py
@@ -3,4 +3,5 @@
 A Model Context Protocol server for code indexing, searching, and analysis.
 """
 
-__version__ = "2.4.0"
+__version__ = "2.4.1"
+
diff --git a/uv.lock b/uv.lock
index 78cc596..08294cf 100644
--- a/uv.lock
+++ b/uv.lock
@@ -49,7 +49,7 @@ wheels = [
 
 [[package]]
 name = "code-index-mcp"
-version = "2.4.0"
+version = "2.4.1"
 source = { editable = "." }
 dependencies = [
     { name = "mcp" },
@@ -527,3 +527,4 @@ wheels = [
     { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070 },
     { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067 },
 ]
+

From 72bc96eeb36cf6637df1eda615720c8c65dd3dea Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Fri, 3 Oct 2025 10:28:34 +0800
Subject: [PATCH 13/14] docs: limit release note to current version

---
 RELEASE_NOTE.txt | 37 -------------------------------------
 1 file changed, 37 deletions(-)

diff --git a/RELEASE_NOTE.txt b/RELEASE_NOTE.txt
index 2295def..7333033 100644
--- a/RELEASE_NOTE.txt
+++ b/RELEASE_NOTE.txt
@@ -9,40 +9,3 @@
 ### Upgrade Notes
 - No new dependencies; update via standard `uv sync` after pulling.
 - Run `uv run pytest` to confirm the new search filter tests on your environment.
-
-## Shallow Index Default & Streamlined Server
-
-This release focuses on faster first-run experiences and a slimmer MCP surface area.
-
-### Highlights
-
-- **Shallow index by default**: Projects initialize with the new JSON-based shallow index for rapid file discovery.
-- **Deep index on demand**: Added the `build_deep_index` tool so symbol extraction happens only when you request it.
-- **Watcher-friendly rebuilds**: File watcher callbacks now refresh the shallow index, keeping file lists current without long rebuilds.
-- **Server cleanup**: Removed unused `structure://project` resource, legacy prompts, and auxiliary documents for a leaner runtime.
-
-### Developer Experience Improvements
-
-- `find_files` now enforces true glob semantics (single `*` for one segment, `**` for recursive matches).
-- `get_file_summary` responds with a `needs_deep_index` hint when deep symbols are unavailable.
-- Index management services split shallow vs deep rebuild paths to clarify tool behavior.
-- Repository docs (README, localized copies) highlight when to run `build_deep_index`.
-
-### Cleanups
-
-- Removed deprecated architecture and benchmarking documents.
-- Trimmed benchmark scripts and outdated tests tied to the old SCIP experiment.
-
-### Upgrade Notes
-
-1. After updating, call `set_project_path` as usual - the server will build the shallow index automatically.
-2. Run `build_deep_index` whenever you need symbol-level summaries (`get_file_summary`) or deep search capabilities.
-3. Optionally run `refresh_index` to refresh the shallow index if the watcher is disabled.
-
-### Compatibility
-
-- Tool names and signatures are unchanged.
-- Deep-index workflows remain available; they now require an explicit `build_deep_index` call.
-- Python 3.10+ requirement unchanged; no new third-party dependencies.
-
-Enjoy faster cold starts and a simpler interface tailored for LLM-driven workflows.

From 3699110c64cd5b6da2dd4b80865b65e2e9c55fcb Mon Sep 17 00:00:00 2001
From: johnhuang316 <134570882+johnhuang316@users.noreply.github.com>
Date: Fri, 3 Oct 2025 10:29:45 +0800
Subject: [PATCH 14/14] docs: streamline 2.4.1 release notes

---
 RELEASE_NOTE.txt | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/RELEASE_NOTE.txt b/RELEASE_NOTE.txt
index 7333033..8a744bb 100644
--- a/RELEASE_NOTE.txt
+++ b/RELEASE_NOTE.txt
@@ -5,7 +5,3 @@
 - CLI search strategies emit the appropriate exclusion flags automatically (ripgrep, ugrep, ag, grep).
 - Basic fallback search prunes excluded directories during traversal, avoiding unnecessary IO.
 - Added regression coverage for the new filtering behaviour (`tests/search/test_search_filters.py`).
-
-### Upgrade Notes
-- No new dependencies; update via standard `uv sync` after pulling.
-- Run `uv run pytest` to confirm the new search filter tests on your environment.