diff --git a/.gitignore b/.gitignore index a21056d..9539f72 100644 --- a/.gitignore +++ b/.gitignore @@ -38,15 +38,14 @@ Thumbs.db # Code Index MCP specific files .code_indexer/ -# Test files -test_*.py # Claude Code generated files -CLAUDE.md +CLAUDE.local.md .claude/ .claude_chat/ claude_* COMMIT_MESSAGE.txt +RELEASE_NOTE.txt .llm-context/ -.kiro/ \ No newline at end of file +AGENTS.md diff --git a/.pylintrc b/.pylintrc new file mode 100644 index 0000000..5bc48e5 --- /dev/null +++ b/.pylintrc @@ -0,0 +1,24 @@ +[MAIN] +# Ignore auto-generated protobuf files +ignore-paths=src/code_index_mcp/scip/proto/scip_pb2.py + +[MESSAGES CONTROL] +# Disable specific warnings for protobuf generated code +disable= + # Generated code warnings + protected-access, + bad-indentation, + line-too-long, + # Other common warnings we might want to disable globally + unused-import, + logging-fstring-interpolation + +[FORMAT] +# Maximum number of characters on a single line +max-line-length=100 + +[DESIGN] +# Maximum number of arguments for function / method +max-args=7 +# Maximum number of locals for function / method body +max-locals=20 \ No newline at end of file diff --git a/AGENTS.md b/AGENTS.md new file mode 100644 index 0000000..886f335 --- /dev/null +++ b/AGENTS.md @@ -0,0 +1,25 @@ +# Repository Guidelines + +## Project Structure & Module Organization +Code Index MCP lives in `src/code_index_mcp/`, with `indexing/` managing builders, `services/` exposing MCP tool implementations, `search/` coordinating query utilities, and `utils/` housing cross-cutting helpers. The lightweight CLI bootstrapper is `run.py`, which adds `src/` to `PYTHONPATH` before invoking `code_index_mcp.server`. Sample corpora for language regression reside under `test/sample-projects/` (for example `python/user_management/`). Reserve `tests/` for runnable suites and avoid checking in generated `__pycache__` artifacts. + +## Build, Test, and Development Commands +Install dependencies with `uv sync` after cloning. Use `uv run code-index-mcp` to launch the MCP server directly, or `uv run python run.py` when you need the local sys.path shim. During development, `uv run code-index-mcp --help` will list available CLI flags, and `uv run python -m code_index_mcp.server` mirrors the published entry point for debugging. + +## Coding Style & Naming Conventions +Target Python 3.10+ and follow the `.pylintrc` configuration: 4-space indentation, 100-character line limit, and restrained function signatures (<= 7 parameters). Modules and functions stay `snake_case`, classes use `PascalCase`, and constants remain uppercase with underscores. Prefer explicit imports from sibling packages (`from .services import ...`) and keep logging to stderr as implemented in `server.py`. + +## Testing Guidelines +Automated tests should live under `tests/`, mirroring the package hierarchy (`tests/indexing/test_shallow_index.py`, etc.). Use `uv run pytest` (with optional `-k` selectors) for unit and integration coverage, and stage representative fixtures inside `test/sample-projects/` when exercising new language strategies. Document expected behaviors in fixtures' README files or inline comments, and fail fast if tree-sitter support is not available for a language you add. + +## Commit & Pull Request Guidelines +Follow the Conventional Commits style seen in history (`feat`, `fix`, `refactor(scope): summary`). Reference issue numbers when relevant and keep subjects under 72 characters. Pull requests should include: 1) a concise problem statement, 2) before/after behavior or performance notes, 3) instructions for reproducing test runs (`uv run pytest`, `uv run code-index-mcp`). Attach updated screenshots or logs when touching developer experience flows, and confirm the file watcher still transitions to "active" in manual smoke tests. + +## Agent Workflow Tips +Always call `set_project_path` before invoking other tools, and prefer `search_code_advanced` with targeted `file_pattern` filters to minimize noise. When editing indexing strategies, run `refresh_index` in between changes to confirm cache rebuilds. Clean up temporary directories via `clear_settings` if you notice stale metadata, and document any new tooling you introduce in this guide. + +## Release Preparation Checklist +- Update the project version everywhere it lives: `pyproject.toml`, `src/code_index_mcp/__init__.py`, and `uv.lock`. +- Add a release note entry to `RELEASE_NOTE.txt` for the new version. +- Commit the version bump (plus any release artifacts) and push the branch to `origin`. +- Create a git tag for the new version and push the tag to `origin`. diff --git a/README.md b/README.md index 167b1d1..5cabcbe 100644 --- a/README.md +++ b/README.md @@ -22,56 +22,94 @@ Code Index MCP is a [Model Context Protocol](https://modelcontextprotocol.io) se **Perfect for:** Code review, refactoring, documentation generation, debugging assistance, and architectural analysis. +## Quick Start + +### 🚀 **Recommended Setup (Most Users)** + +The easiest way to get started with any MCP-compatible application: + +**Prerequisites:** Python 3.10+ and [uv](https://github.com/astral-sh/uv) + +1. **Add to your MCP configuration** (e.g., `claude_desktop_config.json` or `~/.claude.json`): + ```json + { + "mcpServers": { + "code-index": { + "command": "uvx", + "args": ["code-index-mcp"] + } + } + } + ``` + +2. **Restart your application** – `uvx` automatically handles installation and execution + +3. **Start using** (give these prompts to your AI assistant): + ``` + Set the project path to /Users/dev/my-react-app + Find all TypeScript files in this project + Search for "authentication" functions + Analyze the main App.tsx file + ``` + +## Typical Use Cases + +**Code Review**: "Find all places using the old API" +**Refactoring Help**: "Where is this function called?" +**Learning Projects**: "Show me the main components of this React project" +**Debugging**: "Search for all error handling related code" + ## Key Features ### 🔍 **Intelligent Search & Analysis** +- **Dual-Strategy Architecture**: Specialized tree-sitter parsing for 7 core languages, fallback strategy for 50+ file types +- **Direct Tree-sitter Integration**: No regex fallbacks for specialized languages - fail fast with clear errors - **Advanced Search**: Auto-detects and uses the best available tool (ugrep, ripgrep, ag, or grep) -- **Regex Support**: Full regex pattern matching with ReDoS attack prevention -- **Fuzzy Search**: True fuzzy matching with edit distance (ugrep) or word boundary patterns -- **File Analysis**: Deep insights into structure, imports, classes, methods, and complexity metrics - -### 🗂️ **Multi-Language Support** -- **Mainstream Languages**: Java, Python, JavaScript/TypeScript, C/C++, Go, Rust, C# -- **Mobile Development**: Swift, Kotlin, Objective-C/C++, React Native -- **Web Frontend**: Vue, React, Svelte, Astro, HTML, CSS, SCSS -- **Database**: SQL (MySQL, PostgreSQL, SQLite), NoSQL, stored procedures, migrations -- **Scripting**: Ruby, PHP, Shell, PowerShell, Bash -- **Systems**: C/C++, Rust, Go, Zig -- **JVM Ecosystem**: Java, Kotlin, Scala, Groovy -- **Others**: Lua, Perl, R, MATLAB, configuration files -- **50+ File Types Total** - [View complete list](#supported-file-types) +- **Universal File Support**: Comprehensive coverage from advanced AST parsing to basic file indexing +- **File Analysis**: Deep insights into structure, imports, classes, methods, and complexity metrics after running `build_deep_index` + +### 🗂️ **Multi-Language Support** +- **7 Languages with Tree-sitter AST Parsing**: Python, JavaScript, TypeScript, Java, Go, Objective-C, Zig +- **50+ File Types with Fallback Strategy**: C/C++, Rust, Ruby, PHP, and all other programming languages +- **Document & Config Files**: Markdown, JSON, YAML, XML with appropriate handling +- **Web Frontend**: Vue, React, Svelte, HTML, CSS, SCSS +- **Database**: SQL variants, NoSQL, stored procedures, migrations +- **Configuration**: JSON, YAML, XML, Markdown +- **[View complete list](#supported-file-types)** + +### ⚡ **Real-time Monitoring & Auto-refresh** +- **File Watcher**: Automatic index updates when files change +- **Cross-platform**: Native OS file system monitoring +- **Smart Processing**: Batches rapid changes to prevent excessive rebuilds +- **Shallow Index Refresh**: Watches file changes and keeps the file list current; run a deep rebuild when you need symbol metadata ### ⚡ **Performance & Efficiency** -- **Smart Indexing**: Recursively scans with intelligent filtering of build directories +- **Tree-sitter AST Parsing**: Native syntax parsing for accurate symbol extraction - **Persistent Caching**: Stores indexes for lightning-fast subsequent access -- **Lazy Loading**: Tools detected only when needed for optimal startup -- **Memory Efficient**: Intelligent caching strategies for large codebases +- **Smart Filtering**: Intelligent exclusion of build directories and temporary files +- **Memory Efficient**: Optimized for large codebases +- **Direct Dependencies**: No fallback mechanisms - fail fast with clear error messages ## Supported File Types
📁 Programming Languages (Click to expand) -**System & Low-Level:** -- C/C++ (`.c`, `.cpp`, `.h`, `.hpp`) -- Rust (`.rs`) -- Zig (`.zig`) -- Go (`.go`) - -**Object-Oriented:** -- Java (`.java`) -- C# (`.cs`) -- Kotlin (`.kt`) -- Scala (`.scala`) -- Objective-C/C++ (`.m`, `.mm`) -- Swift (`.swift`) - -**Scripting & Dynamic:** -- Python (`.py`) -- JavaScript/TypeScript (`.js`, `.ts`, `.jsx`, `.tsx`, `.mjs`, `.cjs`) -- Ruby (`.rb`) -- PHP (`.php`) -- Shell (`.sh`, `.bash`) +**Languages with Specialized Tree-sitter Strategies:** +- **Python** (`.py`, `.pyw`) - Full AST analysis with class/method extraction and call tracking +- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) - ES6+ class and function parsing with tree-sitter +- **TypeScript** (`.ts`, `.tsx`) - Complete type-aware symbol extraction with interfaces +- **Java** (`.java`) - Full class hierarchy, method signatures, and call relationships +- **Go** (`.go`) - Struct methods, receiver types, and function analysis +- **Objective-C** (`.m`, `.mm`) - Class/instance method distinction with +/- notation +- **Zig** (`.zig`, `.zon`) - Function and struct parsing with tree-sitter AST + +**All Other Programming Languages:** +All other programming languages use the **FallbackParsingStrategy** which provides basic file indexing and metadata extraction. This includes: +- **System & Low-Level:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`), Rust (`.rs`) +- **Object-Oriented:** C# (`.cs`), Kotlin (`.kt`), Scala (`.scala`), Swift (`.swift`) +- **Scripting & Dynamic:** Ruby (`.rb`), PHP (`.php`), Shell (`.sh`, `.bash`) +- **And 40+ more file types** - All handled through the fallback strategy for basic indexing
@@ -122,28 +160,6 @@ Code Index MCP is a [Model Context Protocol](https://modelcontextprotocol.io) se -## Quick Start - -### 🚀 **Recommended Setup (Most Users)** - -The easiest way to get started with any MCP-compatible application: - -**Prerequisites:** Python 3.10+ and [uv](https://github.com/astral-sh/uv) installed - -1. **Add to your MCP configuration** (e.g., `claude_desktop_config.json` or `~/.claude.json`): - ```json - { - "mcpServers": { - "code-index": { - "command": "uvx", - "args": ["code-index-mcp"] - } - } - } - ``` - -2. **Restart your application** – `uvx` automatically handles installation and execution - ### 🛠️ **Development Setup** For contributing or local development: @@ -161,7 +177,7 @@ For contributing or local development: "mcpServers": { "code-index": { "command": "uv", - "args": ["run", "code_index_mcp"] + "args": ["run", "code-index-mcp"] } } } @@ -169,7 +185,7 @@ For contributing or local development: 3. **Debug with MCP Inspector:** ```bash - npx @modelcontextprotocol/inspector uv run code_index_mcp + npx @modelcontextprotocol/inspector uv run code-index-mcp ```
@@ -195,21 +211,31 @@ Then configure:
+ ## Available Tools ### 🏗️ **Project Management** | Tool | Description | |------|-------------| | **`set_project_path`** | Initialize indexing for a project directory | -| **`refresh_index`** | Rebuild the project index after file changes | +| **`refresh_index`** | Rebuild the shallow file index after file changes | +| **`build_deep_index`** | Generate the full symbol index used by deep analysis | | **`get_settings_info`** | View current project configuration and status | +*Run `build_deep_index` when you need symbol-level data; the default shallow index powers quick file discovery.* + ### 🔍 **Search & Discovery** | Tool | Description | |------|-------------| | **`search_code_advanced`** | Smart search with regex, fuzzy matching, and file filtering | | **`find_files`** | Locate files using glob patterns (e.g., `**/*.py`) | -| **`get_file_summary`** | Analyze file structure, functions, imports, and complexity | +| **`get_file_summary`** | Analyze file structure, functions, imports, and complexity (requires deep index) | + +### 🔄 **Monitoring & Auto-refresh** +| Tool | Description | +|------|-------------| +| **`get_file_watcher_status`** | Check file watcher status and configuration | +| **`configure_file_watcher`** | Enable/disable auto-refresh and configure settings | ### 🛠️ **System & Maintenance** | Tool | Description | @@ -240,6 +266,7 @@ Find all TypeScript component files in src/components Give me a summary of src/api/userService.ts ``` *Uses: `get_file_summary` to show functions, imports, and complexity* +*Tip: run `build_deep_index` first if you get a `needs_deep_index` response.* ### 🔍 **Advanced Search Examples** @@ -273,6 +300,16 @@ Search for "API_ENDPOINT" only in Python files +
+Auto-refresh Configuration + +``` +Configure automatic index updates when files change +``` +*Uses: `configure_file_watcher` to enable/disable monitoring and set debounce timing* + +
+
Project Maintenance @@ -283,6 +320,15 @@ I added new components, please refresh the project index
+## Troubleshooting + +### 🔄 **Auto-refresh Not Working** + +If automatic index updates aren't working when files change, try: +- `pip install watchdog` (may resolve environment isolation issues) +- Use manual refresh: Call the `refresh_index` tool after making file changes +- Check file watcher status: Use `get_file_watcher_status` to verify monitoring is active + ## Development & Contributing ### 🔧 **Building from Source** @@ -290,7 +336,7 @@ I added new components, please refresh the project index git clone https://github.com/johnhuang316/code-index-mcp.git cd code-index-mcp uv sync -uv run code_index_mcp +uv run code-index-mcp ``` ### 🐛 **Debugging** diff --git a/README_ja.md b/README_ja.md index 3464758..79059b1 100644 --- a/README_ja.md +++ b/README_ja.md @@ -22,56 +22,94 @@ Code Index MCPは、AIモデルと複雑なコードベースの橋渡しをす **最適な用途:**コードレビュー、リファクタリング、ドキュメント生成、デバッグ支援、アーキテクチャ解析。 +## クイックスタート + +### 🚀 **推奨セットアップ(ほとんどのユーザー)** + +任意MCP対応アプリケーションで開始する最も簡単な方法: + +**前提条件:** Python 3.10+ および [uv](https://github.com/astral-sh/uv) + +1. **MCP設定に追加** (例:`claude_desktop_config.json` または `~/.claude.json`): + ```json + { + "mcpServers": { + "code-index": { + "command": "uvx", + "args": ["code-index-mcp"] + } + } + } + ``` + +2. **アプリケーションを再起動** – `uvx`がインストールと実行を自動処理 + +3. **使用開始**(AIアシスタントにこれらのプロンプトを与える): + ``` + プロジェクトパスを/Users/dev/my-react-appに設定 + このプロジェクトのすべてのTypeScriptファイルを検索 + 「authentication」関連関数を検索 + メインのApp.tsxファイルを解析 + ``` + +## 一般的な使用ケース + +**コードレビュー**:「旧いAPIを使用しているすべての箇所を検索」 +**リファクタリング支援**:「この関数はどこで呼ばれている?」 +**プロジェクト学習**:「このReactプロジェクトの主要コンポーネントを表示」 +**デバッグ支援**:「エラーハンドリング関連のコードをすべて検索」 + ## 主な機能 ### 🔍 **インテリジェント検索・解析** +- **二重戦略アーキテクチャ**:7つのコア言語に特化したTree-sitter解析、50+ファイルタイプにフォールバック戦略 +- **直接Tree-sitter統合**:特化言語で正規表現フォールバックなし - 明確なエラーメッセージで高速フェイル - **高度な検索**:最適なツール(ugrep、ripgrep、ag、grep)を自動検出・使用 -- **正規表現サポート**:ReDoS攻撃防御付きの完全な正規表現パターンマッチング -- **ファジー検索**:真の編集距離ファジーマッチング(ugrep)または単語境界パターンマッチング -- **ファイル解析**:構造、インポート、クラス、メソッド、複雑度メトリクスへの深い洞察 +- **汎用ファイルサポート**:高度なAST解析から基本ファイルインデックスまでの包括的カバレッジ +- **ファイル解析**:`build_deep_index` 実行後に構造、インポート、クラス、メソッド、複雑度メトリクスを深く把握 ### 🗂️ **多言語サポート** -- **主流言語**:Java、Python、JavaScript/TypeScript、C/C++、Go、Rust、C# -- **モバイル開発**:Swift、Kotlin、Objective-C/C++、React Native -- **Webフロントエンド**:Vue、React、Svelte、Astro、HTML、CSS、SCSS -- **データベース**:SQL (MySQL、PostgreSQL、SQLite)、NoSQL、ストアドプロシージャ、マイグレーション -- **スクリプト言語**:Ruby、PHP、Shell、PowerShell、Bash -- **システム言語**:C/C++、Rust、Go、Zig -- **JVMエコシステム**:Java、Kotlin、Scala、Groovy -- **その他の言語**:Lua、Perl、R、MATLAB、設定ファイル -- **合計50+ファイルタイプ** - [完全なリストを表示](#サポートされているファイルタイプ) +- **7言語でTree-sitter AST解析**:Python、JavaScript、TypeScript、Java、Go、Objective-C、Zig +- **50+ファイルタイプでフォールバック戦略**:C/C++、Rust、Ruby、PHPおよびすべての他のプログラミング言語 +- **文書・設定ファイル**:Markdown、JSON、YAML、XML適切な処理 +- **Webフロントエンド**:Vue、React、Svelte、HTML、CSS、SCSS +- **データベース**:SQLバリアント、NoSQL、ストアドプロシージャ、マイグレーション +- **設定ファイル**:JSON、YAML、XML、Markdown +- **[完全なリストを表示](#サポートされているファイルタイプ)** + +### ⚡ **リアルタイム監視・自動更新** +- **ファイルウォッチャー**:ファイル変更時の自動インデックス更新 +- **クロスプラットフォーム**:ネイティブOSファイルシステム監視 +- **スマート処理**:急速な変更をバッチ処理して過度な再構築を防止 +- **浅いインデックス更新**:ファイル変更を監視して最新のファイル一覧を維持し、シンボルが必要な場合は `build_deep_index` を実行 ### ⚡ **パフォーマンス・効率性** -- **スマートインデックス作成**:ビルドディレクトリをインテリジェントにフィルタリングしながら再帰的スキャン +- **Tree-sitter AST解析**:正確なシンボル抽出のためのネイティブ構文解析 - **永続キャッシュ**:超高速な後続アクセスのためのインデックス保存 -- **遅延ロード**:最適化された起動のため必要時のみツール検出 -- **メモリ効率**:大規模コードベース向けのインテリジェントキャッシュ戦略 +- **スマートフィルタリング**:ビルドディレクトリと一時ファイルのインテリジェント除外 +- **メモリ効率**:大規模コードベース向けに最適化 +- **直接依存関係**:フォールバック機構なし - 明確なエラーメッセージで高速フェイル ## サポートされているファイルタイプ
📁 プログラミング言語(クリックで展開) -**システム・低レベル言語:** -- C/C++ (`.c`, `.cpp`, `.h`, `.hpp`) -- Rust (`.rs`) -- Zig (`.zig`) -- Go (`.go`) - -**オブジェクト指向言語:** -- Java (`.java`) -- C# (`.cs`) -- Kotlin (`.kt`) -- Scala (`.scala`) -- Objective-C/C++ (`.m`, `.mm`) -- Swift (`.swift`) - -**スクリプト・動的言語:** -- Python (`.py`) -- JavaScript/TypeScript (`.js`, `.ts`, `.jsx`, `.tsx`, `.mjs`, `.cjs`) -- Ruby (`.rb`) -- PHP (`.php`) -- Shell (`.sh`, `.bash`) +**特化Tree-sitter戦略言語:** +- **Python** (`.py`, `.pyw`) - クラス/メソッド抽出と呼び出し追跡を含む完全AST解析 +- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) - Tree-sitterを使用したES6+クラスと関数解析 +- **TypeScript** (`.ts`, `.tsx`) - インターフェースを含む完全な型認識シンボル抽出 +- **Java** (`.java`) - 完全なクラス階層、メソッドシグネチャ、呼び出し関係 +- **Go** (`.go`) - 構造体メソッド、レシーバータイプ、関数解析 +- **Objective-C** (`.m`, `.mm`) - +/-記法を使用したクラス/インスタンスメソッド区別 +- **Zig** (`.zig`, `.zon`) - Tree-sitter ASTを使用した関数と構造体解析 + +**すべての他のプログラミング言語:** +すべての他のプログラミング言語は**フォールバック解析戦略**を使用し、基本ファイルインデックスとメタデータ抽出を提供します。これには以下が含まれます: +- **システム・低レベル言語:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`)、Rust (`.rs`) +- **オブジェクト指向言語:** C# (`.cs`)、Kotlin (`.kt`)、Scala (`.scala`)、Swift (`.swift`) +- **スクリプト・動的言語:** Ruby (`.rb`)、PHP (`.php`)、Shell (`.sh`, `.bash`) +- **および40+ファイルタイプ** - すべてフォールバック戦略による基本インデックス処理
@@ -128,7 +166,7 @@ Code Index MCPは、AIモデルと複雑なコードベースの橋渡しをす 任意のMCP対応アプリケーションで開始する最も簡単な方法: -**前提条件:** Python 3.10+ と [uv](https://github.com/astral-sh/uv) がインストール済み +**前提条件:** Python 3.10+ と [uv](https://github.com/astral-sh/uv) 1. **MCP設定に追加**(例:`claude_desktop_config.json` または `~/.claude.json`): ```json @@ -161,7 +199,7 @@ Code Index MCPは、AIモデルと複雑なコードベースの橋渡しをす "mcpServers": { "code-index": { "command": "uv", - "args": ["run", "code_index_mcp"] + "args": ["run", "code-index-mcp"] } } } @@ -169,7 +207,7 @@ Code Index MCPは、AIモデルと複雑なコードベースの橋渡しをす 3. **MCP Inspectorでデバッグ:** ```bash - npx @modelcontextprotocol/inspector uv run code_index_mcp + npx @modelcontextprotocol/inspector uv run code-index-mcp ```
@@ -195,21 +233,31 @@ pip install code-index-mcp
+ ## 利用可能なツール ### 🏗️ **プロジェクト管理** | ツール | 説明 | |--------|------| | **`set_project_path`** | プロジェクトディレクトリのインデックス作成を初期化 | -| **`refresh_index`** | ファイル変更後にプロジェクトインデックスを再構築 | +| **`refresh_index`** | ファイル変更後に浅いファイルインデックスを再構築 | +| **`build_deep_index`** | 深い解析で使う完全なシンボルインデックスを生成 | | **`get_settings_info`** | 現在のプロジェクト設定と状態を表示 | +*シンボルレベルのデータが必要な場合は `build_deep_index` を実行してください。デフォルトの浅いインデックスは高速なファイル探索を担います。* + ### 🔍 **検索・発見** | ツール | 説明 | |--------|------| | **`search_code_advanced`** | 正規表現、ファジーマッチング、ファイルフィルタリング対応のスマート検索 | | **`find_files`** | globパターンを使用したファイル検索(例:`**/*.py`) | -| **`get_file_summary`** | ファイル構造、関数、インポート、複雑度の解析 | +| **`get_file_summary`** | ファイル構造、関数、インポート、複雑度の解析(深いインデックスが必要) | + +### 🔄 **監視・自動更新** +| ツール | 説明 | +|--------|------| +| **`get_file_watcher_status`** | ファイルウォッチャーの状態と設定を確認 | +| **`configure_file_watcher`** | 自動更新の有効化/無効化と設定の構成 | ### 🛠️ **システム・メンテナンス** | ツール | 説明 | @@ -240,6 +288,7 @@ src/components で全てのTypeScriptコンポーネントファイルを見つ src/api/userService.ts の要約を教えてください ``` *使用ツール:`get_file_summary` で関数、インポート、複雑度を表示* +*ヒント:`needs_deep_index` が返った場合は `build_deep_index` を先に実行してください。* ### 🔍 **高度な検索例** @@ -273,6 +322,16 @@ Pythonファイルのみで "API_ENDPOINT" を検索してください +
+自動更新設定 + +``` +ファイル変更時の自動インデックス更新を設定してください +``` +*使用ツール:`configure_file_watcher` で監視の有効化/無効化とデバウンス時間を設定* + +
+
プロジェクトメンテナンス @@ -283,6 +342,15 @@ Pythonファイルのみで "API_ENDPOINT" を検索してください
+## トラブルシューティング + +### 🔄 **自動リフレッシュが動作しない** + +ファイル変更時に自動インデックス更新が動作しない場合、以下を試してください: +- `pip install watchdog`(環境分離の問題を解決する可能性があります) +- 手動リフレッシュを使用:ファイル変更後に `refresh_index` ツールを呼び出す +- ファイルウォッチャーステータスを確認:`get_file_watcher_status` を使用して監視がアクティブかどうかを確認 + ## 開発・貢献 ### 🔧 **ソースからのビルド** @@ -290,7 +358,7 @@ Pythonファイルのみで "API_ENDPOINT" を検索してください git clone https://github.com/johnhuang316/code-index-mcp.git cd code-index-mcp uv sync -uv run code_index_mcp +uv run code-index-mcp ``` ### 🐛 **デバッグ** @@ -308,4 +376,4 @@ npx @modelcontextprotocol/inspector uvx code-index-mcp ### 🌐 **翻訳** - [English](README.md) -- [繁體中文](README_zh.md) \ No newline at end of file +- [繁體中文](README_zh.md) diff --git a/README_ko.md b/README_ko.md new file mode 100644 index 0000000..6995b6a --- /dev/null +++ b/README_ko.md @@ -0,0 +1,284 @@ +# 코드 인덱스 MCP + +
+ +[![MCP Server](https://img.shields.io/badge/MCP-Server-blue)](https://modelcontextprotocol.io) +[![Python](https://img.shields.io/badge/Python-3.10%2B-green)](https://www.python.org/) +[![License](https://img.shields.io/badge/License-MIT-yellow)](LICENSE) + +**대규모 언어 모델을 위한 지능형 코드 인덱싱과 분석** + +고급 검색, 정밀 분석, 유연한 탐색 기능으로 AI가 코드베이스를 이해하고 활용하는 방식을 혁신하세요. + +
+ + + code-index-mcp MCP server + + +## 개요 + +Code Index MCP는 [Model Context Protocol](https://modelcontextprotocol.io) 기반 MCP 서버로, AI 어시스턴트와 복잡한 코드베이스 사이를 연결합니다. 빠른 인덱싱, 강력한 검색, 정밀한 코드 분석을 제공하여 AI가 프로젝트 구조를 정확히 파악하고 효과적으로 지원하도록 돕습니다. + +**이럴 때 안성맞춤:** 코드 리뷰, 리팩터링, 문서화, 디버깅 지원, 아키텍처 분석 + +## 빠른 시작 + +### 🚀 **권장 설정 (대부분의 사용자)** + +어떤 MCP 호환 애플리케이션에서도 몇 단계만으로 시작할 수 있습니다. + +**사전 준비:** Python 3.10+ 및 [uv](https://github.com/astral-sh/uv) + +1. **MCP 설정에 서버 추가** (예: `claude_desktop_config.json` 또는 `~/.claude.json`) + ```json + { + "mcpServers": { + "code-index": { + "command": "uvx", + "args": ["code-index-mcp"] + } + } + } + ``` + +2. **애플리케이션 재시작** – `uvx`가 설치와 실행을 자동으로 처리합니다. + +3. **사용 시작** (AI 어시스턴트에게 아래 프롬프트를 전달) + ``` + 프로젝트 경로를 /Users/dev/my-react-app 으로 설정해줘 + 이 프로젝트에서 모든 TypeScript 파일을 찾아줘 + "authentication" 관련 함수를 검색해줘 + src/App.tsx 파일을 분석해줘 + ``` + +## 대표 사용 사례 + +**코드 리뷰:** "예전 API를 사용하는 부분을 모두 찾아줘" +**리팩터링 지원:** "이 함수는 어디에서 호출되나요?" +**프로젝트 학습:** "이 React 프로젝트의 핵심 컴포넌트를 보여줘" +**디버깅:** "에러 처리 로직이 있는 파일을 찾아줘" + +## 주요 기능 + +### 🧠 **지능형 검색과 분석** +- **듀얼 전략 아키텍처:** 7개 핵심 언어는 전용 tree-sitter 파서를 사용하고, 그 외 50+ 파일 형식은 폴백 전략으로 처리 +- **직접 Tree-sitter 통합:** 특화 언어에 정규식 폴백 없음 – 문제 시 즉시 실패하고 명확한 오류 메시지 제공 +- **고급 검색:** ugrep, ripgrep, ag, grep 중 최적의 도구를 자동 선택해 활용 +- **범용 파일 지원:** 정교한 AST 분석부터 기본 파일 인덱싱까지 폭넓게 커버 +- **파일 분석:** `build_deep_index` 실행 후 구조, 임포트, 클래스, 메서드, 복잡도 지표를 심층적으로 파악 + +### 🗂️ **다중 언어 지원** +- **Tree-sitter AST 분석(7종):** Python, JavaScript, TypeScript, Java, Go, Objective-C, Zig +- **폴백 전략(50+ 형식):** C/C++, Rust, Ruby, PHP 등 대부분의 프로그래밍 언어 지원 +- **문서 및 설정 파일:** Markdown, JSON, YAML, XML 등 상황에 맞는 처리 +- **웹 프론트엔드:** Vue, React, Svelte, HTML, CSS, SCSS +- **데이터 계층:** SQL, NoSQL, 스토어드 프로시저, 마이그레이션 스크립트 +- **구성 파일:** JSON, YAML, XML, Markdown +- **[지원 파일 전체 목록 보기](#지원-파일-형식)** + +### 🔄 **실시간 모니터링 & 자동 새로고침** +- **파일 워처:** 파일 변경 시 자동으로 얕은 인덱스(파일 목록) 갱신 +- **크로스 플랫폼:** 운영체제 기본 파일시스템 이벤트 활용 +- **스마트 처리:** 빠른 변경을 묶어 과도한 재빌드를 방지 +- **얕은 인덱스 갱신:** 파일 목록을 최신 상태로 유지하며, 심볼 데이터가 필요하면 `build_deep_index`를 실행 + +### ⚡ **성능 & 효율성** +- **Tree-sitter AST 파싱:** 정확한 심볼 추출을 위한 네이티브 구문 분석 +- **지속 캐싱:** 인덱스를 저장해 이후 응답 속도를 극대화 +- **스마트 필터링:** 빌드 디렉터리·임시 파일을 자동 제외 +- **메모리 효율:** 대규모 코드베이스를 염두에 둔 설계 +- **직접 의존성:** 불필요한 폴백 없이 명확한 오류 메시지 제공 + +## 지원 파일 형식 + +
+💻 프로그래밍 언어 (클릭하여 확장) + +**전용 Tree-sitter 전략 언어:** +- **Python** (`.py`, `.pyw`) – 클래스/메서드 추출 및 호출 추적이 포함된 완전 AST 분석 +- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) – ES6+ 클래스와 함수를 tree-sitter로 파싱 +- **TypeScript** (`.ts`, `.tsx`) – 인터페이스를 포함한 타입 인지 심볼 추출 +- **Java** (`.java`) – 클래스 계층, 메서드 시그니처, 호출 관계 분석 +- **Go** (`.go`) – 구조체 메서드, 리시버 타입, 함수 분석 +- **Objective-C** (`.m`, `.mm`) – 클래스/인스턴스 메서드를 +/- 표기로 구분 +- **Zig** (`.zig`, `.zon`) – 함수와 구조체를 tree-sitter AST로 분석 + +**기타 모든 프로그래밍 언어:** +나머지 언어는 **폴백 파싱 전략**으로 기본 메타데이터와 파일 인덱싱을 제공합니다. 예: +- **시스템/저수준:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`), Rust (`.rs`) +- **객체지향:** C# (`.cs`), Kotlin (`.kt`), Scala (`.scala`), Swift (`.swift`) +- **스크립트:** Ruby (`.rb`), PHP (`.php`), Shell (`.sh`, `.bash`) +- **그 외 40+ 형식** – 폴백 전략으로 빠른 탐색 가능 + +
+ +
+🌐 웹 프론트엔드 & UI + +- 프레임워크: Vue (`.vue`), Svelte (`.svelte`), Astro (`.astro`) +- 스타일링: CSS (`.css`, `.scss`, `.less`, `.sass`, `.stylus`, `.styl`), HTML (`.html`) +- 템플릿: Handlebars (`.hbs`, `.handlebars`), EJS (`.ejs`), Pug (`.pug`) + +
+ +
+🗄️ 데이터 계층 & SQL + +- **SQL 변형:** 표준 SQL (`.sql`, `.ddl`, `.dml`), 데이터베이스별 방언 (`.mysql`, `.postgresql`, `.psql`, `.sqlite`, `.mssql`, `.oracle`, `.ora`, `.db2`) +- **DB 객체:** 프로시저/함수 (`.proc`, `.procedure`, `.func`, `.function`), 뷰/트리거/인덱스 (`.view`, `.trigger`, `.index`) +- **마이그레이션 도구:** 마이그레이션 파일 (`.migration`, `.seed`, `.fixture`, `.schema`), 도구 구성 (`.liquibase`, `.flyway`) +- **NoSQL & 그래프:** 질의 언어 (`.cql`, `.cypher`, `.sparql`, `.gql`) + +
+ +
+📄 문서 & 설정 파일 + +- Markdown (`.md`, `.mdx`) +- 구성 파일 (`.json`, `.xml`, `.yml`, `.yaml`) + +
+ +## 사용 가능한 도구 + +### 🏗️ **프로젝트 관리** +| 도구 | 설명 | +|------|------| +| **`set_project_path`** | 프로젝트 디렉터리의 인덱스를 초기화 | +| **`refresh_index`** | 파일 변경 후 얕은 파일 인덱스를 재생성 | +| **`build_deep_index`** | 심층 분석에 사용하는 전체 심볼 인덱스를 생성 | +| **`get_settings_info`** | 현재 프로젝트 설정과 상태를 확인 | + +*심볼 레벨 데이터가 필요하면 `build_deep_index`를 실행하세요. 기본 얕은 인덱스는 빠른 파일 탐색을 담당합니다.* + +### 🔍 **검색 & 탐색** +| 도구 | 설명 | +|------|------| +| **`search_code_advanced`** | 정규식, 퍼지 매칭, 파일 필터링을 지원하는 스마트 검색 | +| **`find_files`** | 글롭 패턴으로 파일 찾기 (예: `**/*.py`) | +| **`get_file_summary`** | 파일 구조, 함수, 임포트, 복잡도를 분석 (심층 인덱스 필요) | + +### 🔄 **모니터링 & 자동 새로고침** +| 도구 | 설명 | +|------|------| +| **`get_file_watcher_status`** | 파일 워처 상태와 구성을 확인 | +| **`configure_file_watcher`** | 자동 새로고침 설정 (활성/비활성, 지연 시간, 추가 제외 패턴) | + +### 🛠️ **시스템 & 유지 관리** +| 도구 | 설명 | +|------|------| +| **`create_temp_directory`** | 인덱스 저장용 임시 디렉터리를 생성 | +| **`check_temp_directory`** | 인덱스 저장 위치와 권한을 확인 | +| **`clear_settings`** | 모든 설정과 캐시 데이터를 초기화 | +| **`refresh_search_tools`** | 사용 가능한 검색 도구를 재검색 (ugrep, ripgrep 등) | + +## 사용 예시 + +### 🧭 **빠른 시작 워크플로** + +**1. 프로젝트 초기화** +``` +프로젝트 경로를 /Users/dev/my-react-app 으로 설정해줘 +``` +*프로젝트를 설정하고 얕은 인덱스를 생성합니다.* + +**2. 프로젝트 구조 탐색** +``` +src/components 안의 TypeScript 컴포넌트 파일을 모두 찾아줘 +``` +*사용 도구: `find_files` (`src/components/**/*.tsx`)* + +**3. 핵심 파일 분석** +``` +src/api/userService.ts 요약을 알려줘 +``` +*사용 도구: `get_file_summary` (함수, 임포트, 복잡도 표시)* +*팁: `needs_deep_index` 응답이 나오면 먼저 `build_deep_index`를 실행하세요.* + +### 🔍 **고급 검색 예시** + +
+코드 패턴 검색 + +``` +"get.*Data"에 해당하는 함수 호출을 정규식으로 찾아줘 +``` +*예: `getData()`, `getUserData()`, `getFormData()`* + +
+ +
+퍼지 함수 검색 + +``` +'authUser'와 유사한 인증 관련 함수를 찾아줘 +``` +*예: `authenticateUser`, `authUserToken`, `userAuthCheck`* + +
+ +
+언어별 검색 + +``` +Python 파일에서만 "API_ENDPOINT" 를 찾아줘 +``` +*`search_code_advanced` + `file_pattern="*.py"`* + +
+ +
+자동 새로고침 설정 + +``` +파일 변경 시 자동으로 인덱스를 새로고침하도록 설정해줘 +``` +*`configure_file_watcher`로 활성화 및 지연 시간 설정* + +
+ +
+프로젝트 유지 관리 + +``` +새 컴포넌트를 추가했어. 프로젝트 인덱스를 다시 빌드해줘 +``` +*`refresh_index`로 빠르게 얕은 인덱스를 업데이트* + +
+ +## 문제 해결 + +### 🔄 **자동 새로고침이 동작하지 않을 때** +- 환경 문제로 `watchdog`가 빠졌다면 설치: `pip install watchdog` +- 수동 새로고침: 변경 후 `refresh_index` 도구 실행 +- 워처 상태 확인: `get_file_watcher_status` 도구로 활성 여부 점검 + +## 개발 & 기여 + +### 🛠️ **소스에서 실행하기** +```bash +git clone https://github.com/johnhuang316/code-index-mcp.git +cd code-index-mcp +uv sync +uv run code-index-mcp +``` + +### 🧪 **디버깅 도구** +```bash +npx @modelcontextprotocol/inspector uvx code-index-mcp +``` + +### 🤝 **기여 안내** +Pull Request를 언제든 환영합니다. 변경 사항과 테스트 방법을 함께 공유해주세요. + +--- + +### 📄 **라이선스** +[MIT License](LICENSE) + +### 🌍 **번역본** +- [English](README.md) +- [繁體中文](README_zh.md) +- [日本語](README_ja.md) diff --git a/README_zh.md b/README_zh.md index aefb24a..1e9c5ae 100644 --- a/README_zh.md +++ b/README_zh.md @@ -22,56 +22,94 @@ **適用於:**程式碼審查、重構、文件生成、除錯協助和架構分析。 +## 快速開始 + +### 🚀 **推薦設定(大多數使用者)** + +與任何 MCP 相容應用程式開始的最簡單方式: + +**前置需求:** Python 3.10+ 和 [uv](https://github.com/astral-sh/uv) + +1. **新增到您的 MCP 設定** (例如 `claude_desktop_config.json` 或 `~/.claude.json`): + ```json + { + "mcpServers": { + "code-index": { + "command": "uvx", + "args": ["code-index-mcp"] + } + } + } + ``` + +2. **重新啟動應用程式** – `uvx` 會自動處理安裝和執行 + +3. **開始使用**(向您的 AI 助理提供這些提示): + ``` + 設定專案路徑為 /Users/dev/my-react-app + 在這個專案中找到所有 TypeScript 檔案 + 搜尋「authentication」相關函數 + 分析主要的 App.tsx 檔案 + ``` + +## 典型使用場景 + +**程式碼審查**:「找出所有使用舊 API 的地方」 +**重構協助**:「這個函數在哪裡被呼叫?」 +**學習專案**:「顯示這個 React 專案的主要元件」 +**除錯協助**:「搜尋所有錯誤處理相關的程式碼」 + ## 主要特性 ### 🔍 **智慧搜尋與分析** +- **雙策略架構**:7 種核心語言使用專業化 Tree-sitter 解析,50+ 種檔案類型使用備用策略 +- **直接 Tree-sitter 整合**:專業化語言無正則表達式備用 - 快速失敗並提供清晰錯誤訊息 - **進階搜尋**:自動偵測並使用最佳工具(ugrep、ripgrep、ag 或 grep) -- **正規表達式支援**:完整的正規表達式模式匹配,具備 ReDoS 攻擊防護 -- **模糊搜尋**:真正的編輯距離模糊匹配(ugrep)或詞邊界模式匹配 -- **檔案分析**:深入了解結構、匯入、類別、方法和複雜度指標 +- **通用檔案支援**:從進階 AST 解析到基本檔案索引的全面覆蓋 +- **檔案分析**:執行 `build_deep_index` 後深入了解結構、匯入、類別、方法和複雜度指標 ### 🗂️ **多語言支援** -- **主流語言**:Java、Python、JavaScript/TypeScript、C/C++、Go、Rust、C# -- **行動開發**:Swift、Kotlin、Objective-C/C++、React Native -- **網頁前端**:Vue、React、Svelte、Astro、HTML、CSS、SCSS -- **資料庫**:SQL (MySQL、PostgreSQL、SQLite)、NoSQL、存儲過程、遷移腳本 -- **腳本語言**:Ruby、PHP、Shell、PowerShell、Bash -- **系統語言**:C/C++、Rust、Go、Zig -- **JVM 生態系**:Java、Kotlin、Scala、Groovy -- **其他語言**:Lua、Perl、R、MATLAB、配置檔案 -- **共 50+ 種檔案類型** - [查看完整列表](#支援的檔案類型) +- **7 種語言使用 Tree-sitter AST 解析**:Python、JavaScript、TypeScript、Java、Go、Objective-C、Zig +- **50+ 種檔案類型使用備用策略**:C/C++、Rust、Ruby、PHP 和所有其他程式語言 +- **文件與配置檔案**:Markdown、JSON、YAML、XML 適當處理 +- **網頁前端**:Vue、React、Svelte、HTML、CSS、SCSS +- **資料庫**:SQL 變體、NoSQL、存儲過程、遷移腳本 +- **配置檔案**:JSON、YAML、XML、Markdown +- **[查看完整列表](#支援的檔案類型)** + +### ⚡ **即時監控與自動刷新** +- **檔案監控器**:檔案變更時自動更新索引 +- **跨平台**:原生作業系統檔案系統監控 +- **智慧處理**:批次處理快速變更以防止過度重建 +- **淺層索引更新**:監控檔案變更並維持檔案清單最新;需要符號資料時請執行 `build_deep_index` ### ⚡ **效能與效率** -- **智慧索引**:遞迴掃描並智慧篩選建構目錄 +- **Tree-sitter AST 解析**:原生語法解析以實現準確的符號提取 - **持久快取**:儲存索引以實現超快速的後續存取 -- **延遲載入**:僅在需要時偵測工具以優化啟動速度 -- **記憶體高效**:針對大型程式碼庫的智慧快取策略 +- **智慧篩選**:智能排除建構目錄和暫存檔案 +- **記憶體高效**:針對大型程式碼庫優化 +- **直接依賴**:無備用機制 - 快速失敗並提供清晰錯誤訊息 ## 支援的檔案類型
📁 程式語言(點擊展開) -**系統與低階語言:** -- C/C++ (`.c`, `.cpp`, `.h`, `.hpp`) -- Rust (`.rs`) -- Zig (`.zig`) -- Go (`.go`) - -**物件導向語言:** -- Java (`.java`) -- C# (`.cs`) -- Kotlin (`.kt`) -- Scala (`.scala`) -- Objective-C/C++ (`.m`, `.mm`) -- Swift (`.swift`) - -**腳本與動態語言:** -- Python (`.py`) -- JavaScript/TypeScript (`.js`, `.ts`, `.jsx`, `.tsx`, `.mjs`, `.cjs`) -- Ruby (`.rb`) -- PHP (`.php`) -- Shell (`.sh`, `.bash`) +**專業化 Tree-sitter 策略語言:** +- **Python** (`.py`, `.pyw`) - 完整 AST 分析,包含類別/方法提取和呼叫追蹤 +- **JavaScript** (`.js`, `.jsx`, `.mjs`, `.cjs`) - ES6+ 類別和函數解析使用 Tree-sitter +- **TypeScript** (`.ts`, `.tsx`) - 完整類型感知符號提取,包含介面 +- **Java** (`.java`) - 完整類別階層、方法簽名和呼叫關係 +- **Go** (`.go`) - 結構方法、接收者類型和函數分析 +- **Objective-C** (`.m`, `.mm`) - 類別/實例方法區分,使用 +/- 標記法 +- **Zig** (`.zig`, `.zon`) - 函數和結構解析使用 Tree-sitter AST + +**所有其他程式語言:** +所有其他程式語言使用 **備用解析策略**,提供基本檔案索引和元資料提取。包括: +- **系統與低階語言:** C/C++ (`.c`, `.cpp`, `.h`, `.hpp`)、Rust (`.rs`) +- **物件導向語言:** C# (`.cs`)、Kotlin (`.kt`)、Scala (`.scala`)、Swift (`.swift`) +- **腳本與動態語言:** Ruby (`.rb`)、PHP (`.php`)、Shell (`.sh`, `.bash`) +- **以及 40+ 種檔案類型** - 全部通過備用策略處理進行基本索引
@@ -128,7 +166,7 @@ 在任何相容 MCP 的應用程式中開始使用的最簡單方法: -**先決條件:** Python 3.10+ 和 [uv](https://github.com/astral-sh/uv) 已安裝 +**先決條件:** Python 3.10+ 和 [uv](https://github.com/astral-sh/uv) 1. **新增到您的 MCP 配置**(例如 `claude_desktop_config.json` 或 `~/.claude.json`): ```json @@ -161,7 +199,7 @@ "mcpServers": { "code-index": { "command": "uv", - "args": ["run", "code_index_mcp"] + "args": ["run", "code-index-mcp"] } } } @@ -169,7 +207,7 @@ 3. **使用 MCP Inspector 除錯:** ```bash - npx @modelcontextprotocol/inspector uv run code_index_mcp + npx @modelcontextprotocol/inspector uv run code-index-mcp ```
@@ -195,21 +233,31 @@ pip install code-index-mcp
+ ## 可用工具 ### 🏗️ **專案管理** | 工具 | 描述 | |------|------| | **`set_project_path`** | 為專案目錄初始化索引 | -| **`refresh_index`** | 在檔案變更後重建專案索引 | +| **`refresh_index`** | 在檔案變更後重建淺層檔案索引 | +| **`build_deep_index`** | 產生供深度分析使用的完整符號索引 | | **`get_settings_info`** | 檢視目前專案配置和狀態 | +*需要符號層級資料時,請執行 `build_deep_index`;預設的淺層索引提供快速檔案探索。* + ### 🔍 **搜尋與探索** | 工具 | 描述 | |------|------| | **`search_code_advanced`** | 智慧搜尋,支援正規表達式、模糊匹配和檔案篩選 | | **`find_files`** | 使用萬用字元模式尋找檔案(例如 `**/*.py`) | -| **`get_file_summary`** | 分析檔案結構、函式、匯入和複雜度 | +| **`get_file_summary`** | 分析檔案結構、函式、匯入和複雜度(需要深度索引) | + +### 🔄 **監控與自動刷新** +| 工具 | 描述 | +|------|------| +| **`get_file_watcher_status`** | 檢查檔案監控器狀態和配置 | +| **`configure_file_watcher`** | 啟用/停用自動刷新並配置設定 | ### 🛠️ **系統與維護** | 工具 | 描述 | @@ -240,6 +288,7 @@ pip install code-index-mcp 給我 src/api/userService.ts 的摘要 ``` *使用:`get_file_summary` 顯示函式、匯入和複雜度* +*提示:若收到 `needs_deep_index` 回應,請先執行 `build_deep_index`。* ### 🔍 **進階搜尋範例** @@ -273,6 +322,16 @@ pip install code-index-mcp +
+自動刷新配置 + +``` +配置檔案變更時的自動索引更新 +``` +*使用:`configure_file_watcher` 啟用/停用監控並設定防抖時間* + +
+
專案維護 @@ -283,6 +342,15 @@ pip install code-index-mcp
+## 故障排除 + +### 🔄 **自動刷新無法運作** + +如果檔案變更時自動索引更新無法運作,請嘗試: +- `pip install watchdog`(可能解決環境隔離問題) +- 使用手動刷新:在檔案變更後呼叫 `refresh_index` 工具 +- 檢查檔案監視器狀態:使用 `get_file_watcher_status` 驗證監控是否處於活動狀態 + ## 開發與貢獻 ### 🔧 **從原始碼建構** @@ -290,7 +358,7 @@ pip install code-index-mcp git clone https://github.com/johnhuang316/code-index-mcp.git cd code-index-mcp uv sync -uv run code_index_mcp +uv run code-index-mcp ``` ### 🐛 **除錯** @@ -308,4 +376,4 @@ npx @modelcontextprotocol/inspector uvx code-index-mcp ### 🌐 **翻譯** - [English](README.md) -- [日本語](README_ja.md) \ No newline at end of file +- [日本語](README_ja.md) diff --git a/RELEASE_NOTE.txt b/RELEASE_NOTE.txt new file mode 100644 index 0000000..8a744bb --- /dev/null +++ b/RELEASE_NOTE.txt @@ -0,0 +1,7 @@ +## 2.4.1 - Search Filtering Alignment + +### Highlights +- Code search now shares the central FileFilter blacklist, keeping results consistent with indexing (no more `node_modules` noise). +- CLI search strategies emit the appropriate exclusion flags automatically (ripgrep, ugrep, ag, grep). +- Basic fallback search prunes excluded directories during traversal, avoiding unnecessary IO. +- Added regression coverage for the new filtering behaviour (`tests/search/test_search_filters.py`). diff --git a/pyproject.toml b/pyproject.toml index 3c80757..428e2d3 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "code-index-mcp" -version = "0.5.0" +version = "2.4.1" description = "Code indexing and analysis tools for LLMs using MCP" readme = "README.md" requires-python = ">=3.10" @@ -14,6 +14,14 @@ authors = [ ] dependencies = [ "mcp>=0.3.0", + "watchdog>=3.0.0", + "tree-sitter>=0.20.0", + "tree-sitter-javascript>=0.20.0", + "tree-sitter-typescript>=0.20.0", + "tree-sitter-java>=0.20.0", + "tree-sitter-zig>=0.20.0", + "pathspec>=0.12.1", + "msgpack>=1.0.0", ] [project.urls] diff --git a/requirements.txt b/requirements.txt index a1da66e..1a80b2f 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1 +1,10 @@ mcp>=0.3.0 +watchdog>=3.0.0 +protobuf>=4.21.0 +tree-sitter>=0.20.0 +tree-sitter-javascript>=0.20.0 +tree-sitter-typescript>=0.20.0 +tree-sitter-java>=0.20.0 +tree-sitter-zig>=0.20.0 +pathspec>=0.12.1 +libclang>=16.0.0 diff --git a/run.py b/run.py index b07486f..1303bfe 100644 --- a/run.py +++ b/run.py @@ -4,7 +4,6 @@ """ import sys import os -import traceback # Add src directory to path src_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), 'src') @@ -14,15 +13,7 @@ from code_index_mcp.server import main if __name__ == "__main__": - print("Starting Code Index MCP server...", file=sys.stderr) - print(f"Added path: {src_path}", file=sys.stderr) main() -except ImportError as e: - print(f"Import Error: {e}", file=sys.stderr) - print(f"Current sys.path: {sys.path}", file=sys.stderr) - print("Traceback:", file=sys.stderr) - traceback.print_exc(file=sys.stderr) -except Exception as e: - print(f"Error starting server: {e}", file=sys.stderr) - print("Traceback:", file=sys.stderr) - traceback.print_exc(file=sys.stderr) +except Exception: + # Exit silently on failure without printing any messages + raise SystemExit(1) diff --git a/src/code_index_mcp/__init__.py b/src/code_index_mcp/__init__.py index 06cbc6e..f47ee02 100644 --- a/src/code_index_mcp/__init__.py +++ b/src/code_index_mcp/__init__.py @@ -3,4 +3,5 @@ A Model Context Protocol server for code indexing, searching, and analysis. """ -__version__ = "0.4.1" +__version__ = "2.4.1" + diff --git a/src/code_index_mcp/analyzers/__init__.py b/src/code_index_mcp/analyzers/__init__.py deleted file mode 100644 index 6060599..0000000 --- a/src/code_index_mcp/analyzers/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -"""Language analyzers for code analysis.""" - -from .base_analyzer import LanguageAnalyzer -from .analyzer_factory import AnalyzerFactory -from .analysis_result import AnalysisResult, Symbol -from .python_analyzer import PythonAnalyzer -from .javascript_analyzer import JavaScriptAnalyzer -from .java_analyzer import JavaAnalyzer -from .objective_c_analyzer import ObjectiveCAnalyzer -from .default_analyzer import DefaultAnalyzer - -__all__ = [ - 'LanguageAnalyzer', - 'AnalyzerFactory', - 'AnalysisResult', - 'Symbol', - 'PythonAnalyzer', - 'JavaScriptAnalyzer', - 'JavaAnalyzer', - 'ObjectiveCAnalyzer', - 'DefaultAnalyzer', -] diff --git a/src/code_index_mcp/analyzers/analysis_result.py b/src/code_index_mcp/analyzers/analysis_result.py deleted file mode 100644 index 4788f8a..0000000 --- a/src/code_index_mcp/analyzers/analysis_result.py +++ /dev/null @@ -1,96 +0,0 @@ -"""Standardized analysis result structure.""" - -from dataclasses import dataclass, field -from typing import Dict, List, Any, Optional - - -@dataclass -class Symbol: - """Represents a code symbol (function, class, etc.).""" - name: str - line: int - symbol_type: str # 'function', 'class', 'import', 'variable', etc. - metadata: Dict[str, Any] = field(default_factory=dict) - - -@dataclass -class AnalysisResult: - """Standardized result structure for all analyzers.""" - # Basic file information - file_path: str - line_count: int - size_bytes: int - extension: str - analysis_type: str - - # Symbols found in the file - symbols: Dict[str, List[Symbol]] = field(default_factory=dict) - - # Summary counts - counts: Dict[str, int] = field(default_factory=dict) - - # Language-specific metadata - metadata: Dict[str, Any] = field(default_factory=dict) - - # Error information if analysis failed - error: Optional[str] = None - - def add_symbol(self, symbol_type: str, name: str, line: int, metadata: Dict[str, Any] = None): - """Add a symbol to the result.""" - if symbol_type not in self.symbols: - self.symbols[symbol_type] = [] - - symbol = Symbol( - name=name, - line=line, - symbol_type=symbol_type, - metadata=metadata or {} - ) - self.symbols[symbol_type].append(symbol) - - # Update counts - count_key = f"{symbol_type}_count" - self.counts[count_key] = self.counts.get(count_key, 0) + 1 - - def get_symbols(self, symbol_type: str) -> List[Symbol]: - """Get symbols of a specific type.""" - return self.symbols.get(symbol_type, []) - - def get_count(self, symbol_type: str) -> int: - """Get count of symbols of a specific type.""" - return self.counts.get(f"{symbol_type}_count", 0) - - def to_dict(self) -> Dict[str, Any]: - """Convert to dictionary for backwards compatibility.""" - result = { - "file_path": self.file_path, - "line_count": self.line_count, - "size_bytes": self.size_bytes, - "extension": self.extension, - "analysis_type": self.analysis_type, - } - - # Add error if present - if self.error: - result["error"] = self.error - return result - - # Add symbol lists (backwards compatibility) - for symbol_type, symbols in self.symbols.items(): - if symbol_type == "import": - # Special handling for imports - return strings for backwards compatibility - result["imports"] = [s.name for s in symbols] - else: - # Return list of dicts for other symbols - result[f"{symbol_type}s"] = [ - {"line": s.line, "name": s.name, **s.metadata} - for s in symbols - ] - - # Add counts - result.update(self.counts) - - # Add metadata - result.update(self.metadata) - - return result \ No newline at end of file diff --git a/src/code_index_mcp/analyzers/analyzer_factory.py b/src/code_index_mcp/analyzers/analyzer_factory.py deleted file mode 100644 index 70a7a52..0000000 --- a/src/code_index_mcp/analyzers/analyzer_factory.py +++ /dev/null @@ -1,80 +0,0 @@ -"""Factory for creating language-specific analyzers.""" - -from typing import Dict, Type, Optional -from .base_analyzer import LanguageAnalyzer -from .default_analyzer import DefaultAnalyzer -from .python_analyzer import PythonAnalyzer -from .javascript_analyzer import JavaScriptAnalyzer -from .java_analyzer import JavaAnalyzer -from .objective_c_analyzer import ObjectiveCAnalyzer - - -class AnalyzerFactory: - """Factory class for creating language-specific analyzers.""" - - _analyzers: Dict[str, Type[LanguageAnalyzer]] = {} - - @classmethod - def register(cls, extensions: list[str], analyzer_class: Type[LanguageAnalyzer]) -> None: - """ - Register an analyzer for specific file extensions. - - Args: - extensions: List of file extensions (e.g., ['.py', '.pyx']) - analyzer_class: The analyzer class to register - """ - for extension in extensions: - cls._analyzers[extension.lower()] = analyzer_class - - @classmethod - def get_analyzer(cls, extension: str) -> LanguageAnalyzer: - """ - Get an analyzer instance for the given file extension. - - Args: - extension: The file extension (e.g., '.py') - - Returns: - Language analyzer instance, or DefaultAnalyzer if not found - """ - extension = extension.lower() - analyzer_class = cls._analyzers.get(extension, DefaultAnalyzer) - # Create instance - return analyzer_class() - - @classmethod - def get_supported_extensions(cls) -> list[str]: - """ - Get all supported file extensions. - - Returns: - List of all registered extensions - """ - return list(cls._analyzers.keys()) - - @classmethod - def is_extension_supported(cls, extension: str) -> bool: - """ - Check if an extension has a specific analyzer. - - Args: - extension: The file extension to check - - Returns: - True if a specific analyzer exists for the extension - """ - return extension.lower() in cls._analyzers - - -# Initialize factory with built-in analyzers -def _initialize_factory(): - """Initialize the factory with built-in analyzers.""" - # Register analyzers - AnalyzerFactory.register(['.py'], PythonAnalyzer) - AnalyzerFactory.register(['.js', '.jsx', '.ts', '.tsx', '.mjs', '.cjs'], JavaScriptAnalyzer) - AnalyzerFactory.register(['.java'], JavaAnalyzer) - AnalyzerFactory.register(['.m', '.mm'], ObjectiveCAnalyzer) - - -# Initialize on import -_initialize_factory() diff --git a/src/code_index_mcp/analyzers/base_analyzer.py b/src/code_index_mcp/analyzers/base_analyzer.py deleted file mode 100644 index 55a9c7c..0000000 --- a/src/code_index_mcp/analyzers/base_analyzer.py +++ /dev/null @@ -1,92 +0,0 @@ -"""Base analyzer interface for language-specific code analysis.""" - -from abc import ABC, abstractmethod -from typing import Dict, Any, List, Optional -import os -import re -from .analysis_result import AnalysisResult - - -class LanguageAnalyzer(ABC): - """Abstract base class for language-specific code analyzers.""" - - @abstractmethod - def analyze(self, content: str, file_path: str, full_path: str = None) -> AnalysisResult: - """ - Analyze the content of a file and return structured information. - - Args: - content: The file content as a string - file_path: The relative path of the file - full_path: The absolute path of the file (optional) - - Returns: - AnalysisResult containing structured analysis information - """ - - - def _count_lines(self, content: str) -> int: - """Count the number of lines in the content.""" - return len(content.splitlines()) - - def _get_file_size(self, content: str, full_path: str = None) -> int: - """Get the file size in bytes.""" - if full_path: - try: - return os.path.getsize(full_path) - except (OSError, IOError): - pass - # Fallback to content size in bytes - return len(content.encode('utf-8')) - - def _filter_comments_and_empty_lines(self, lines: List[str], comment_patterns: List[str] = None) -> List[str]: - """Filter out comments and empty lines.""" - if comment_patterns is None: - comment_patterns = ['//', '#', '/*', '*', '--'] - - filtered_lines = [] - in_multiline_comment = False - - for line in lines: - stripped = line.strip() - - # Skip empty lines - if not stripped: - continue - - # Handle multiline comments - if '/*' in stripped: - in_multiline_comment = True - if '*/' in stripped: - in_multiline_comment = False - continue - if in_multiline_comment: - continue - - # Skip single line comments - is_comment = False - for pattern in comment_patterns: - if stripped.startswith(pattern): - is_comment = True - break - - if not is_comment: - filtered_lines.append(stripped) - - return filtered_lines - - # Constants for ReDoS protection - MAX_PATTERN_LENGTH = 500 - MAX_WILDCARD_COUNT = 10 - - def _safe_regex_match(self, pattern: str, text: str) -> Optional[re.Match]: - """Safely match regex pattern with timeout protection.""" - try: - # Simple pattern validation to prevent ReDoS - if (len(pattern) > self.MAX_PATTERN_LENGTH or - pattern.count('*') > self.MAX_WILDCARD_COUNT or - pattern.count('+') > self.MAX_WILDCARD_COUNT): - return None - return re.match(pattern, text) - except re.error: - return None diff --git a/src/code_index_mcp/analyzers/default_analyzer.py b/src/code_index_mcp/analyzers/default_analyzer.py deleted file mode 100644 index a9e2588..0000000 --- a/src/code_index_mcp/analyzers/default_analyzer.py +++ /dev/null @@ -1,23 +0,0 @@ -"""Default analyzer for basic file information.""" - -import os -from typing import Dict, Any -from .base_analyzer import LanguageAnalyzer -from .analysis_result import AnalysisResult - - -class DefaultAnalyzer(LanguageAnalyzer): - """Default analyzer that provides basic file information.""" - - def analyze(self, content: str, file_path: str, full_path: str = None) -> AnalysisResult: - """Provide basic file analysis.""" - _, ext = os.path.splitext(file_path) - - return AnalysisResult( - file_path=file_path, - line_count=self._count_lines(content), - size_bytes=self._get_file_size(content, full_path), - extension=ext, - analysis_type="basic" - ) - diff --git a/src/code_index_mcp/analyzers/java_analyzer.py b/src/code_index_mcp/analyzers/java_analyzer.py deleted file mode 100644 index d937b10..0000000 --- a/src/code_index_mcp/analyzers/java_analyzer.py +++ /dev/null @@ -1,77 +0,0 @@ -"""Java language analyzer.""" - -import os -import re -from typing import Dict, Any -from .base_analyzer import LanguageAnalyzer -from .analysis_result import AnalysisResult - - -class JavaAnalyzer(LanguageAnalyzer): - """Analyzer for Java files.""" - - def __init__(self): - """Initialize with compiled regex patterns for performance.""" - self.import_pattern = re.compile(r'^import\s+([\w.]+);') - self.class_pattern = re.compile(r'^(public\s+|protected\s+|private\s+)?(static\s+)?(abstract\s+)?(final\s+)?class\s+(\w+)') - self.method_pattern = re.compile(r'^(public|protected|private|static|final|abstract|synchronized|native|strictfp|\s)+[\w<>\[\]]+\s+(\w+)\s*\([^)]*\)') - self.field_pattern = re.compile(r'^(public|protected|private|static|final|transient|volatile|\s)+[\w<>\[\]]+\s+(\w+)\s*(=|;)') - - def analyze(self, content: str, file_path: str, full_path: str = None) -> AnalysisResult: - """Analyze Java file content.""" - lines = content.splitlines() - - # Create result object - _, ext = os.path.splitext(file_path) - result = AnalysisResult( - file_path=file_path, - line_count=self._count_lines(content), - size_bytes=self._get_file_size(content, full_path), - extension=ext, - analysis_type="java" - ) - - # Java-specific analysis using pre-compiled patterns - - in_multiline_comment = False - - for i, line in enumerate(lines): - line = line.strip() - - # Skip comments and empty lines - if not line or line.startswith('//'): - continue - - # Handle multiline comments - if '/*' in line: - in_multiline_comment = True - if '*/' in line: - in_multiline_comment = False - continue - if in_multiline_comment: - continue - - # Check for imports - import_match = self.import_pattern.match(line) - if import_match: - result.add_symbol("import", import_match.group(1), i + 1) - - # Check for class definitions - class_match = self.class_pattern.match(line) - if class_match: - modifiers = [m for m in class_match.groups()[:4] if m and m.strip()] - result.add_symbol("class", class_match.group(5), i + 1, - {"modifiers": modifiers}) - - # Check for method definitions - method_match = self.method_pattern.match(line) - if method_match and not line.strip().endswith(';'): - result.add_symbol("function", method_match.group(2), i + 1) - - # Check for field definitions - field_match = self.field_pattern.match(line) - if field_match and not line.strip().startswith('//'): - result.add_symbol("field", field_match.group(2), i + 1) - - return result - diff --git a/src/code_index_mcp/analyzers/javascript_analyzer.py b/src/code_index_mcp/analyzers/javascript_analyzer.py deleted file mode 100644 index 52dc690..0000000 --- a/src/code_index_mcp/analyzers/javascript_analyzer.py +++ /dev/null @@ -1,82 +0,0 @@ -"""JavaScript/TypeScript language analyzer.""" - -import os -from typing import Dict, Any -from .base_analyzer import LanguageAnalyzer -from .analysis_result import AnalysisResult - - -class JavaScriptAnalyzer(LanguageAnalyzer): - """Analyzer for JavaScript and TypeScript files.""" - - def analyze(self, content: str, file_path: str, full_path: str = None) -> AnalysisResult: - """Analyze JavaScript/TypeScript file content.""" - lines = content.splitlines() - - # Create result object - _, ext = os.path.splitext(file_path) - result = AnalysisResult( - file_path=file_path, - line_count=self._count_lines(content), - size_bytes=self._get_file_size(content, full_path), - extension=ext, - analysis_type="javascript" - ) - - # JavaScript/TypeScript-specific analysis - - # Simplified patterns for better performance and safety - # Using simpler string matching instead of complex regex - - for i, line in enumerate(lines): - line = line.strip() - - # Skip empty lines and comments - if not line or line.startswith('//') or line.startswith('/*') or line.startswith('*'): - continue - - # Check for imports (simplified) - if (line.startswith('import ') or line.startswith('export import') or - 'require(' in line or line.startswith('import(')): - result.add_symbol("import", line, i + 1) - - # Check for exports (simplified) - if (line.startswith('export ') or line.startswith('module.exports')): - result.add_symbol("export", line, i + 1) - - # Check for class definitions (simplified) - if 'class ' in line and ('export class' in line or line.startswith('class ')): - # Extract class name - parts = line.split('class ')[1] if 'class ' in line else '' - if parts: - class_name = parts.split(' ')[0].split('{')[0].split('(')[0].strip() - if class_name: - result.add_symbol("class", class_name, i + 1) - - # Check for function definitions (simplified) - if ('function ' in line or '=>' in line) and not line.endswith(';'): - func_name = "" - if 'function ' in line: - parts = line.split('function ')[1] if 'function ' in line else '' - if parts: - func_name = parts.split('(')[0].strip() - elif '=>' in line and ('const ' in line or 'let ' in line or 'var ' in line): - # Arrow function - for keyword in ['const ', 'let ', 'var ']: - if keyword in line: - parts = line.split(keyword)[1] - func_name = parts.split('=')[0].strip() - break - - if func_name and func_name.isidentifier(): - result.add_symbol("function", func_name, i + 1) - - # Check for constants (simplified) - if line.startswith('const ') and '=' in line: - parts = line.split('const ')[1] - const_name = parts.split('=')[0].strip() - if const_name and const_name.isidentifier(): - result.add_symbol("constant", const_name, i + 1) - - return result - diff --git a/src/code_index_mcp/analyzers/objective_c_analyzer.py b/src/code_index_mcp/analyzers/objective_c_analyzer.py deleted file mode 100644 index 93bec07..0000000 --- a/src/code_index_mcp/analyzers/objective_c_analyzer.py +++ /dev/null @@ -1,86 +0,0 @@ -"""Objective-C language analyzer.""" - -import os -import re -from typing import Dict, Any -from .base_analyzer import LanguageAnalyzer -from .analysis_result import AnalysisResult - - -class ObjectiveCAnalyzer(LanguageAnalyzer): - """Analyzer for Objective-C files.""" - - def __init__(self): - """Initialize with compiled regex patterns for performance.""" - self.import_pattern = re.compile(r'^#import\s+["<]([^">]+)[">]') - self.interface_pattern = re.compile(r'^@interface\s+(\w+)(?:\s*:\s*(\w+))?') - self.implementation_pattern = re.compile(r'^@implementation\s+(\w+)') - self.method_pattern = re.compile(r'^[-+]\s*\([^)]+\)\s*(\w+)') - self.property_pattern = re.compile(r'^@property\s*\([^)]*\)\s*[\w\s*]+\s*(\w+)') - - def analyze(self, content: str, file_path: str, full_path: str = None) -> AnalysisResult: - """Analyze Objective-C file content.""" - lines = content.splitlines() - - # Create result object - _, ext = os.path.splitext(file_path) - result = AnalysisResult( - file_path=file_path, - line_count=self._count_lines(content), - size_bytes=self._get_file_size(content, full_path), - extension=ext, - analysis_type="objective-c" - ) - - # Objective-C specific analysis using pre-compiled patterns - - in_interface = False - in_implementation = False - - for i, line in enumerate(lines): - line = line.strip() - - # Skip empty lines and comments - if not line or line.startswith('//'): - continue - - # Check for imports - import_match = self.import_pattern.match(line) - if import_match: - result.add_symbol("import", import_match.group(1), i + 1) - - # Check for interface definitions - interface_match = self.interface_pattern.match(line) - if interface_match: - superclass = interface_match.group(2) if interface_match.group(2) else None - result.add_symbol("interface", interface_match.group(1), i + 1, - {"superclass": superclass}) - in_interface = True - in_implementation = False - - # Check for implementation definitions - implementation_match = self.implementation_pattern.match(line) - if implementation_match: - result.add_symbol("implementation", implementation_match.group(1), i + 1) - in_interface = False - in_implementation = True - - # Check for method definitions - method_match = self.method_pattern.match(line) - if method_match and (in_interface or in_implementation): - method_type = "instance" if line.startswith('-') else "class" - result.add_symbol("function", method_match.group(1), i + 1, - {"type": method_type}) - - # Check for property definitions - property_match = self.property_pattern.match(line) - if property_match and in_interface: - result.add_symbol("property", property_match.group(1), i + 1) - - # Reset context on @end - if line == '@end': - in_interface = False - in_implementation = False - - return result - diff --git a/src/code_index_mcp/analyzers/python_analyzer.py b/src/code_index_mcp/analyzers/python_analyzer.py deleted file mode 100644 index f1a139b..0000000 --- a/src/code_index_mcp/analyzers/python_analyzer.py +++ /dev/null @@ -1,49 +0,0 @@ -"""Python language analyzer.""" - -import os -from typing import Dict, Any -from .base_analyzer import LanguageAnalyzer -from .analysis_result import AnalysisResult - - -class PythonAnalyzer(LanguageAnalyzer): - """Analyzer for Python files.""" - - def analyze(self, content: str, file_path: str, full_path: str = None) -> AnalysisResult: - """Analyze Python file content.""" - lines = content.splitlines() - - # Create result object - _, ext = os.path.splitext(file_path) - result = AnalysisResult( - file_path=file_path, - line_count=self._count_lines(content), - size_bytes=self._get_file_size(content, full_path), - extension=ext, - analysis_type="python" - ) - - # Python-specific analysis - for i, line in enumerate(lines): - line = line.strip() - - # Skip empty lines and comments - if not line or line.startswith('#'): - continue - - # Check for imports - if line.startswith('import ') or line.startswith('from '): - result.add_symbol("import", line, i + 1) - - # Check for class definitions - if line.startswith('class '): - class_name = line.replace('class ', '').split('(')[0].split(':')[0].strip() - result.add_symbol("class", class_name, i + 1) - - # Check for function definitions - if line.startswith('def '): - func_name = line.replace('def ', '').split('(')[0].strip() - result.add_symbol("function", func_name, i + 1) - - return result - diff --git a/src/code_index_mcp/constants.py b/src/code_index_mcp/constants.py index cacfdd2..159e31a 100644 --- a/src/code_index_mcp/constants.py +++ b/src/code_index_mcp/constants.py @@ -5,5 +5,114 @@ # Directory and file names SETTINGS_DIR = "code_indexer" CONFIG_FILE = "config.json" -INDEX_FILE = "file_index.pickle" -CACHE_FILE = "content_cache.pickle" \ No newline at end of file +INDEX_FILE = "index.json" # JSON index file (deep index) +INDEX_FILE_SHALLOW = "index.shallow.json" # Minimal shallow index (file list) + +# Supported file extensions for code analysis +# This is the authoritative list used by both old and new indexing systems +SUPPORTED_EXTENSIONS = [ + # Core programming languages + '.py', '.pyw', # Python + '.js', '.jsx', '.ts', '.tsx', # JavaScript/TypeScript + '.mjs', '.cjs', # Modern JavaScript + '.java', # Java + '.c', '.cpp', '.h', '.hpp', # C/C++ + '.cxx', '.cc', '.hxx', '.hh', # C++ variants + '.cs', # C# + '.go', # Go + '.m', '.mm', # Objective-C + '.rb', # Ruby + '.php', # PHP + '.swift', # Swift + '.kt', '.kts', # Kotlin + '.rs', # Rust + '.scala', # Scala + '.sh', '.bash', '.zsh', # Shell scripts + '.ps1', # PowerShell + '.bat', '.cmd', # Windows batch + '.r', '.R', # R + '.pl', '.pm', # Perl + '.lua', # Lua + '.dart', # Dart + '.hs', # Haskell + '.ml', '.mli', # OCaml + '.fs', '.fsx', # F# + '.clj', '.cljs', # Clojure + '.vim', # Vim script + '.zig', '.zon', # Zig + + # Web and markup + '.html', '.htm', # HTML + '.css', '.scss', '.sass', # Stylesheets + '.less', '.stylus', '.styl', # Style languages + '.md', '.mdx', # Markdown + '.json', '.jsonc', # JSON + '.xml', # XML + '.yml', '.yaml', # YAML + + # Frontend frameworks + '.vue', # Vue.js + '.svelte', # Svelte + '.astro', # Astro + + # Template engines + '.hbs', '.handlebars', # Handlebars + '.ejs', # EJS + '.pug', # Pug + + # Database and SQL + '.sql', '.ddl', '.dml', # SQL + '.mysql', '.postgresql', '.psql', # Database-specific SQL + '.sqlite', '.mssql', '.oracle', # More databases + '.ora', '.db2', # Oracle and DB2 + '.proc', '.procedure', # Stored procedures + '.func', '.function', # Functions + '.view', '.trigger', '.index', # Database objects + '.migration', '.seed', '.fixture', # Migration files + '.schema', # Schema files + '.cql', '.cypher', '.sparql', # NoSQL query languages + '.gql', # GraphQL + '.liquibase', '.flyway', # Migration tools +] + +# Centralized filtering configuration +FILTER_CONFIG = { + "exclude_directories": { + # Version control + '.git', '.svn', '.hg', '.bzr', + + # Package managers & dependencies + 'node_modules', '__pycache__', '.venv', 'venv', + 'vendor', 'bower_components', + + # Build outputs + 'dist', 'build', 'target', 'out', 'bin', 'obj', + + # IDE & editors + '.idea', '.vscode', '.vs', '.sublime-workspace', + + # Testing & coverage + '.pytest_cache', '.coverage', '.tox', '.nyc_output', + 'coverage', 'htmlcov', + + # OS artifacts + '.DS_Store', 'Thumbs.db', 'desktop.ini' + }, + + "exclude_files": { + # Temporary files + '*.tmp', '*.temp', '*.swp', '*.swo', + + # Backup files + '*.bak', '*~', '*.orig', + + # Log files + '*.log', + + # Lock files + 'package-lock.json', 'yarn.lock', 'Pipfile.lock' + }, + + "supported_extensions": SUPPORTED_EXTENSIONS +} + diff --git a/src/code_index_mcp/indexing/__init__.py b/src/code_index_mcp/indexing/__init__.py new file mode 100644 index 0000000..e779911 --- /dev/null +++ b/src/code_index_mcp/indexing/__init__.py @@ -0,0 +1,32 @@ +""" +Code indexing utilities for the MCP server. + +This module provides simple JSON-based indexing optimized for LLM consumption. +""" + +# Import utility functions that are still used +from .qualified_names import ( + generate_qualified_name, + normalize_file_path +) + +# New JSON-based indexing system +from .json_index_builder import JSONIndexBuilder, IndexMetadata +from .json_index_manager import JSONIndexManager, get_index_manager +from .shallow_index_manager import ShallowIndexManager, get_shallow_index_manager +from .deep_index_manager import DeepIndexManager +from .models import SymbolInfo, FileInfo + +__all__ = [ + 'generate_qualified_name', + 'normalize_file_path', + 'JSONIndexBuilder', + 'JSONIndexManager', + 'get_index_manager', + 'ShallowIndexManager', + 'get_shallow_index_manager', + 'DeepIndexManager', + 'SymbolInfo', + 'FileInfo', + 'IndexMetadata' +] \ No newline at end of file diff --git a/src/code_index_mcp/indexing/deep_index_manager.py b/src/code_index_mcp/indexing/deep_index_manager.py new file mode 100644 index 0000000..6558703 --- /dev/null +++ b/src/code_index_mcp/indexing/deep_index_manager.py @@ -0,0 +1,46 @@ +""" +Deep Index Manager - Wrapper around JSONIndexManager for deep indexing. + +This class provides a clear semantic separation from the shallow manager. +It delegates to the existing JSONIndexManager (symbols + files JSON index). +""" + +from __future__ import annotations + +from typing import Optional, Dict, Any, List + +from .json_index_manager import JSONIndexManager + + +class DeepIndexManager: + """Thin wrapper over JSONIndexManager to expose deep-index API.""" + + def __init__(self) -> None: + self._mgr = JSONIndexManager() + + # Expose a subset of API to keep callers simple + def set_project_path(self, project_path: str) -> bool: + return self._mgr.set_project_path(project_path) + + def build_index(self, force_rebuild: bool = False) -> bool: + return self._mgr.build_index(force_rebuild=force_rebuild) + + def load_index(self) -> bool: + return self._mgr.load_index() + + def refresh_index(self) -> bool: + return self._mgr.refresh_index() + + def find_files(self, pattern: str = "*") -> List[str]: + return self._mgr.find_files(pattern) + + def get_file_summary(self, file_path: str) -> Optional[Dict[str, Any]]: + return self._mgr.get_file_summary(file_path) + + def get_index_stats(self) -> Dict[str, Any]: + return self._mgr.get_index_stats() + + def cleanup(self) -> None: + self._mgr.cleanup() + + diff --git a/src/code_index_mcp/indexing/index_provider.py b/src/code_index_mcp/indexing/index_provider.py new file mode 100644 index 0000000..660bb8d --- /dev/null +++ b/src/code_index_mcp/indexing/index_provider.py @@ -0,0 +1,125 @@ +""" +Index provider interface definitions. + +Defines standard interfaces for all index access, ensuring consistency across different implementations. +""" + +from typing import List, Optional, Dict, Any, Protocol +from dataclasses import dataclass + +from .models import SymbolInfo, FileInfo + + +@dataclass +class IndexMetadata: + """Standard index metadata structure.""" + version: str + format_type: str + created_at: float + last_updated: float + file_count: int + project_root: str + tool_version: str + + +class IIndexProvider(Protocol): + """ + Standard index provider interface. + + All index implementations must follow this interface to ensure consistent access patterns. + """ + + def get_file_list(self) -> List[FileInfo]: + """ + Get list of all indexed files. + + Returns: + List of file information objects + """ + ... + + def get_file_info(self, file_path: str) -> Optional[FileInfo]: + """ + Get information for a specific file. + + Args: + file_path: Relative file path + + Returns: + File information, or None if file is not in index + """ + ... + + def query_symbols(self, file_path: str) -> List[SymbolInfo]: + """ + Query symbol information in a file. + + Args: + file_path: Relative file path + + Returns: + List of symbol information objects + """ + ... + + def search_files(self, pattern: str) -> List[str]: + """ + Search files by pattern. + + Args: + pattern: Glob pattern or regular expression + + Returns: + List of matching file paths + """ + ... + + def get_metadata(self) -> IndexMetadata: + """ + Get index metadata. + + Returns: + Index metadata information + """ + ... + + def is_available(self) -> bool: + """ + Check if index is available. + + Returns: + True if index is available and functional + """ + ... + + +class IIndexManager(Protocol): + """ + Index manager interface. + + Defines standard interface for index lifecycle management. + """ + + def initialize(self) -> bool: + """Initialize the index manager.""" + ... + + def get_provider(self) -> Optional[IIndexProvider]: + """Get the current active index provider.""" + ... + + def refresh_index(self, force: bool = False) -> bool: + """Refresh the index.""" + ... + + def save_index(self) -> bool: + """Save index state.""" + ... + + def clear_index(self) -> None: + """Clear index state.""" + ... + + def get_index_status(self) -> Dict[str, Any]: + """Get index status information.""" + ... diff --git a/src/code_index_mcp/indexing/json_index_builder.py b/src/code_index_mcp/indexing/json_index_builder.py new file mode 100644 index 0000000..c12d694 --- /dev/null +++ b/src/code_index_mcp/indexing/json_index_builder.py @@ -0,0 +1,430 @@ +""" +JSON Index Builder - Clean implementation using Strategy pattern. + +This replaces the monolithic parser implementation with a clean, +maintainable Strategy pattern architecture. +""" + +import logging +import os +import time +from concurrent.futures import ProcessPoolExecutor, ThreadPoolExecutor, as_completed +from dataclasses import dataclass, asdict +from pathlib import Path +from typing import Dict, List, Optional, Any, Tuple + +from .strategies import StrategyFactory +from .models import SymbolInfo, FileInfo + +logger = logging.getLogger(__name__) + + +@dataclass +class IndexMetadata: + """Metadata for the JSON index.""" + project_path: str + indexed_files: int + index_version: str + timestamp: str + languages: List[str] + total_symbols: int = 0 + specialized_parsers: int = 0 + fallback_files: int = 0 + + +class JSONIndexBuilder: + """ + Main index builder using Strategy pattern for language parsing. + + This class orchestrates the index building process by: + 1. Discovering files in the project + 2. Using StrategyFactory to get appropriate parsers + 3. Extracting symbols and metadata + 4. Assembling the final JSON index + """ + + def __init__(self, project_path: str, additional_excludes: Optional[List[str]] = None): + from ..utils import FileFilter + + # Input validation + if not isinstance(project_path, str): + raise ValueError(f"Project path must be a string, got {type(project_path)}") + + project_path = project_path.strip() + if not project_path: + raise ValueError("Project path cannot be empty") + + if not os.path.isdir(project_path): + raise ValueError(f"Project path does not exist: {project_path}") + + self.project_path = project_path + self.in_memory_index: Optional[Dict[str, Any]] = None + self.strategy_factory = StrategyFactory() + self.file_filter = FileFilter(additional_excludes) + + logger.info(f"Initialized JSON index builder for {project_path}") + strategy_info = self.strategy_factory.get_strategy_info() + logger.info(f"Available parsing strategies: {len(strategy_info)} types") + + # Log specialized vs fallback coverage + specialized = len(self.strategy_factory.get_specialized_extensions()) + fallback = len(self.strategy_factory.get_fallback_extensions()) + logger.info(f"Specialized parsers: {specialized} extensions, Fallback coverage: {fallback} extensions") + + def _process_file(self, file_path: str, specialized_extensions: set) -> Optional[Tuple[Dict, Dict, str, bool]]: + """ + Process a single file - designed for parallel execution. + + Args: + file_path: Path to the file to process + specialized_extensions: Set of extensions with specialized parsers + + Returns: + Tuple of (symbols, file_info, language, is_specialized) or None on error + """ + try: + with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: + content = f.read() + + ext = Path(file_path).suffix.lower() + rel_path = os.path.relpath(file_path, self.project_path).replace('\\', '/') + + # Get appropriate strategy + strategy = self.strategy_factory.get_strategy(ext) + + # Track strategy usage + is_specialized = ext in specialized_extensions + + # Parse file using strategy + symbols, file_info = strategy.parse_file(rel_path, content) + + logger.debug(f"Parsed {rel_path}: {len(symbols)} symbols ({file_info.language})") + + return (symbols, {rel_path: file_info}, file_info.language, is_specialized) + + except Exception as e: + logger.warning(f"Error processing {file_path}: {e}") + return None + + def build_index(self, parallel: bool = True, max_workers: Optional[int] = None) -> Dict[str, Any]: + """ + Build the complete index using Strategy pattern with parallel processing. + + Args: + parallel: Whether to use parallel processing (default: True) + max_workers: Maximum number of worker processes/threads (default: CPU count) + + Returns: + Complete JSON index with metadata, symbols, and file information + """ + logger.info(f"Building JSON index using Strategy pattern (parallel={parallel})...") + start_time = time.time() + + all_symbols = {} + all_files = {} + languages = set() + specialized_count = 0 + fallback_count = 0 + + # Get specialized extensions for tracking + specialized_extensions = set(self.strategy_factory.get_specialized_extensions()) + + # Get list of files to process + files_to_process = self._get_supported_files() + total_files = len(files_to_process) + + if total_files == 0: + logger.warning("No files to process") + return self._create_empty_index() + + logger.info(f"Processing {total_files} files...") + + if parallel and total_files > 1: + # Use ThreadPoolExecutor for I/O-bound file reading + # ProcessPoolExecutor has issues with strategy sharing + if max_workers is None: + max_workers = min(os.cpu_count() or 4, total_files) + + logger.info(f"Using parallel processing with {max_workers} workers") + + with ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all tasks + future_to_file = { + executor.submit(self._process_file, file_path, specialized_extensions): file_path + for file_path in files_to_process + } + + # Process completed tasks + processed = 0 + for future in as_completed(future_to_file): + file_path = future_to_file[future] + result = future.result() + + if result: + symbols, file_info_dict, language, is_specialized = result + all_symbols.update(symbols) + all_files.update(file_info_dict) + languages.add(language) + + if is_specialized: + specialized_count += 1 + else: + fallback_count += 1 + + processed += 1 + if processed % 100 == 0: + logger.debug(f"Processed {processed}/{total_files} files") + else: + # Sequential processing + logger.info("Using sequential processing") + for file_path in files_to_process: + result = self._process_file(file_path, specialized_extensions) + if result: + symbols, file_info_dict, language, is_specialized = result + all_symbols.update(symbols) + all_files.update(file_info_dict) + languages.add(language) + + if is_specialized: + specialized_count += 1 + else: + fallback_count += 1 + + # Build index metadata + metadata = IndexMetadata( + project_path=self.project_path, + indexed_files=len(all_files), + index_version="2.0.0-strategy", + timestamp=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + languages=sorted(list(languages)), + total_symbols=len(all_symbols), + specialized_parsers=specialized_count, + fallback_files=fallback_count + ) + + # Assemble final index + index = { + "metadata": asdict(metadata), + "symbols": {k: asdict(v) for k, v in all_symbols.items()}, + "files": {k: asdict(v) for k, v in all_files.items()} + } + + # Cache in memory + self.in_memory_index = index + + elapsed = time.time() - start_time + logger.info(f"Built index with {len(all_symbols)} symbols from {len(all_files)} files in {elapsed:.2f}s") + logger.info(f"Languages detected: {sorted(languages)}") + logger.info(f"Strategy usage: {specialized_count} specialized, {fallback_count} fallback") + + return index + + def _create_empty_index(self) -> Dict[str, Any]: + """Create an empty index structure.""" + metadata = IndexMetadata( + project_path=self.project_path, + indexed_files=0, + index_version="2.0.0-strategy", + timestamp=time.strftime("%Y-%m-%dT%H:%M:%SZ", time.gmtime()), + languages=[], + total_symbols=0, + specialized_parsers=0, + fallback_files=0 + ) + + return { + "metadata": asdict(metadata), + "symbols": {}, + "files": {} + } + + def get_index(self) -> Optional[Dict[str, Any]]: + """Get the current in-memory index.""" + return self.in_memory_index + + def clear_index(self): + """Clear the in-memory index.""" + self.in_memory_index = None + logger.debug("Cleared in-memory index") + + def _get_supported_files(self) -> List[str]: + """ + Get all supported files in the project using centralized filtering. + + Returns: + List of file paths that can be parsed + """ + supported_files = [] + base_path = Path(self.project_path) + + try: + for root, dirs, files in os.walk(self.project_path): + # Filter directories in-place using centralized logic + dirs[:] = [d for d in dirs if not self.file_filter.should_exclude_directory(d)] + + # Filter files using centralized logic + for file in files: + file_path = Path(root) / file + if self.file_filter.should_process_path(file_path, base_path): + supported_files.append(str(file_path)) + + except Exception as e: + logger.error(f"Error scanning directory {self.project_path}: {e}") + + logger.debug(f"Found {len(supported_files)} supported files") + return supported_files + + def build_shallow_file_list(self) -> List[str]: + """ + Build a minimal shallow index consisting of relative file paths only. + + This method does not read file contents. It enumerates supported files + using centralized filtering and returns normalized relative paths with + forward slashes for cross-platform consistency. + + Returns: + List of relative file paths (using '/'). + """ + try: + absolute_files = self._get_supported_files() + result: List[str] = [] + for abs_path in absolute_files: + rel_path = os.path.relpath(abs_path, self.project_path).replace('\\', '/') + # Normalize leading './' + if rel_path.startswith('./'): + rel_path = rel_path[2:] + result.append(rel_path) + return result + except Exception as e: + logger.error(f"Failed to build shallow file list: {e}") + return [] + + def save_index(self, index: Dict[str, Any], index_path: str) -> bool: + """ + Save index to disk. + + Args: + index: Index data to save + index_path: Path where to save the index + + Returns: + True if successful, False otherwise + """ + try: + import json + with open(index_path, 'w', encoding='utf-8') as f: + json.dump(index, f, indent=2, ensure_ascii=False) + logger.info(f"Saved index to {index_path}") + return True + except Exception as e: + logger.error(f"Failed to save index to {index_path}: {e}") + return False + + def load_index(self, index_path: str) -> Optional[Dict[str, Any]]: + """ + Load index from disk. + + Args: + index_path: Path to the index file + + Returns: + Index data if successful, None otherwise + """ + try: + if not os.path.exists(index_path): + logger.debug(f"Index file not found: {index_path}") + return None + + import json + with open(index_path, 'r', encoding='utf-8') as f: + index = json.load(f) + + # Cache in memory + self.in_memory_index = index + logger.info(f"Loaded index from {index_path}") + return index + + except Exception as e: + logger.error(f"Failed to load index from {index_path}: {e}") + return None + + def get_parsing_statistics(self) -> Dict[str, Any]: + """ + Get detailed statistics about parsing capabilities. + + Returns: + Dictionary with parsing statistics and strategy information + """ + strategy_info = self.strategy_factory.get_strategy_info() + + return { + "total_strategies": len(strategy_info), + "specialized_languages": [lang for lang in strategy_info.keys() if not lang.startswith('fallback_')], + "fallback_languages": [lang.replace('fallback_', '') for lang in strategy_info.keys() if lang.startswith('fallback_')], + "total_extensions": len(self.strategy_factory.get_all_supported_extensions()), + "specialized_extensions": len(self.strategy_factory.get_specialized_extensions()), + "fallback_extensions": len(self.strategy_factory.get_fallback_extensions()), + "strategy_details": strategy_info + } + + def get_file_symbols(self, file_path: str) -> List[Dict[str, Any]]: + """ + Get symbols for a specific file. + + Args: + file_path: Relative path to the file + + Returns: + List of symbols in the file + """ + if not self.in_memory_index: + logger.warning("Index not loaded") + return [] + + try: + # Normalize file path + file_path = file_path.replace('\\', '/') + if file_path.startswith('./'): + file_path = file_path[2:] + + # Get file info + file_info = self.in_memory_index["files"].get(file_path) + if not file_info: + logger.warning(f"File not found in index: {file_path}") + return [] + + # Work directly with global symbols for this file + global_symbols = self.in_memory_index.get("symbols", {}) + result = [] + + # Find all symbols for this file directly from global symbols + for symbol_id, symbol_data in global_symbols.items(): + symbol_file = symbol_data.get("file", "").replace("\\", "/") + + # Check if this symbol belongs to our file + if symbol_file == file_path: + symbol_type = symbol_data.get("type", "unknown") + symbol_name = symbol_id.split("::")[-1] # Extract symbol name from ID + + # Create symbol info + symbol_info = { + "name": symbol_name, + "called_by": symbol_data.get("called_by", []), + "line": symbol_data.get("line"), + "signature": symbol_data.get("signature") + } + + # Categorize by type + if symbol_type in ["function", "method"]: + result.append(symbol_info) + elif symbol_type == "class": + result.append(symbol_info) + + # Sort by line number for consistent ordering + result.sort(key=lambda x: x.get("line", 0)) + + return result + + except Exception as e: + logger.error(f"Error getting file symbols for {file_path}: {e}") + return [] diff --git a/src/code_index_mcp/indexing/json_index_manager.py b/src/code_index_mcp/indexing/json_index_manager.py new file mode 100644 index 0000000..ec320e4 --- /dev/null +++ b/src/code_index_mcp/indexing/json_index_manager.py @@ -0,0 +1,465 @@ +""" +JSON Index Manager - Manages the lifecycle of the JSON-based index. + +This replaces the SCIP unified_index_manager with a simpler approach +focused on fast JSON-based indexing and querying. +""" + +import hashlib +import json +import logging +import os +import re +import tempfile +import threading +import fnmatch +from pathlib import Path +from typing import Dict, List, Optional, Any + +from .json_index_builder import JSONIndexBuilder +from ..constants import SETTINGS_DIR, INDEX_FILE, INDEX_FILE_SHALLOW + +logger = logging.getLogger(__name__) + + +class JSONIndexManager: + """Manages JSON-based code index lifecycle and storage.""" + + def __init__(self): + self.project_path: Optional[str] = None + self.index_builder: Optional[JSONIndexBuilder] = None + self.temp_dir: Optional[str] = None + self.index_path: Optional[str] = None + self.shallow_index_path: Optional[str] = None + self._shallow_file_list: Optional[List[str]] = None + self._lock = threading.RLock() + logger.info("Initialized JSON Index Manager") + + def set_project_path(self, project_path: str) -> bool: + """Set the project path and initialize index storage.""" + with self._lock: + try: + # Input validation + if not project_path or not isinstance(project_path, str): + logger.error(f"Invalid project path: {project_path}") + return False + + project_path = project_path.strip() + if not project_path: + logger.error("Project path cannot be empty") + return False + + if not os.path.isdir(project_path): + logger.error(f"Project path does not exist: {project_path}") + return False + + self.project_path = project_path + self.index_builder = JSONIndexBuilder(project_path) + + # Create temp directory for index storage + project_hash = hashlib.md5(project_path.encode()).hexdigest()[:12] + self.temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR, project_hash) + os.makedirs(self.temp_dir, exist_ok=True) + + self.index_path = os.path.join(self.temp_dir, INDEX_FILE) + self.shallow_index_path = os.path.join(self.temp_dir, INDEX_FILE_SHALLOW) + + logger.info(f"Set project path: {project_path}") + logger.info(f"Index storage: {self.index_path}") + return True + + except Exception as e: + logger.error(f"Failed to set project path: {e}") + return False + + def build_index(self, force_rebuild: bool = False) -> bool: + """Build or rebuild the index.""" + with self._lock: + if not self.index_builder or not self.project_path: + logger.error("Index builder not initialized") + return False + + try: + # Check if we need to rebuild + if not force_rebuild and self._is_index_fresh(): + logger.info("Index is fresh, skipping rebuild") + return True + + logger.info("Building JSON index...") + index = self.index_builder.build_index() + + # Save to disk + self.index_builder.save_index(index, self.index_path) + + logger.info(f"Successfully built index with {len(index['symbols'])} symbols") + return True + + except Exception as e: + logger.error(f"Failed to build index: {e}") + return False + + def load_index(self) -> bool: + """Load existing index from disk.""" + with self._lock: + if not self.index_builder or not self.index_path: + logger.error("Index manager not initialized") + return False + + try: + index = self.index_builder.load_index(self.index_path) + if index: + logger.info(f"Loaded index with {len(index['symbols'])} symbols") + return True + else: + logger.warning("No existing index found") + return False + + except Exception as e: + logger.error(f"Failed to load index: {e}") + return False + + def build_shallow_index(self) -> bool: + """Build and save the minimal shallow index (file list).""" + with self._lock: + if not self.index_builder or not self.project_path or not self.shallow_index_path: + logger.error("Index builder not initialized for shallow index") + return False + + try: + file_list = self.index_builder.build_shallow_file_list() + # Persist as a JSON array for minimal overhead + with open(self.shallow_index_path, 'w', encoding='utf-8') as f: + json.dump(file_list, f, ensure_ascii=False) + self._shallow_file_list = file_list + logger.info(f"Saved shallow index with {len(file_list)} files to {self.shallow_index_path}") + return True + except Exception as e: + logger.error(f"Failed to build shallow index: {e}") + return False + + def load_shallow_index(self) -> bool: + """Load shallow index (file list) from disk into memory.""" + with self._lock: + try: + if not self.shallow_index_path or not os.path.exists(self.shallow_index_path): + logger.warning("No existing shallow index found") + return False + with open(self.shallow_index_path, 'r', encoding='utf-8') as f: + data = json.load(f) + if not isinstance(data, list): + logger.error("Shallow index format invalid (expected list)") + return False + # Normalize paths + normalized = [] + for p in data: + if isinstance(p, str): + q = p.replace('\\\\', '/').replace('\\', '/') + if q.startswith('./'): + q = q[2:] + normalized.append(q) + self._shallow_file_list = normalized + logger.info(f"Loaded shallow index with {len(normalized)} files") + return True + except Exception as e: + logger.error(f"Failed to load shallow index: {e}") + return False + + def refresh_index(self) -> bool: + """Refresh the index (rebuild and reload).""" + with self._lock: + logger.info("Refreshing index...") + if self.build_index(force_rebuild=True): + return self.load_index() + return False + + def find_files(self, pattern: str = "*") -> List[str]: + """ + Find files matching a glob pattern using the SHALLOW file list only. + + Notes: + - '*' does not cross '/' + - '**' matches across directories + - Always sources from the shallow index for consistency and speed + """ + with self._lock: + # Input validation + if not isinstance(pattern, str): + logger.error(f"Pattern must be a string, got {type(pattern)}") + return [] + + pattern = pattern.strip() + if not pattern: + pattern = "*" + + # Normalize to forward slashes + norm_pattern = pattern.replace('\\\\', '/').replace('\\', '/') + + # Build glob regex: '*' does not cross '/', '**' crosses directories + regex = self._compile_glob_regex(norm_pattern) + + # Always use shallow index for file discovery + try: + if self._shallow_file_list is None: + # Try load existing shallow index; if missing, build then load + if not self.load_shallow_index(): + # If still not available, attempt to build + if self.build_shallow_index(): + self.load_shallow_index() + + files = list(self._shallow_file_list or []) + + if norm_pattern == "*": + return files + + return [f for f in files if regex.match(f) is not None] + + except Exception as e: + logger.error(f"Error finding files: {e}") + return [] + + def get_file_summary(self, file_path: str) -> Optional[Dict[str, Any]]: + """ + Get summary information for a file. + + This method attempts to retrieve comprehensive file information including + symbol counts, functions, classes, methods, and imports. If the index + is not loaded, it will attempt auto-initialization to restore from the + most recent index state. + + Args: + file_path: Relative path to the file + + Returns: + Dictionary containing file summary information, or None if not found + """ + with self._lock: + # Input validation + if not isinstance(file_path, str): + logger.error(f"File path must be a string, got {type(file_path)}") + return None + + file_path = file_path.strip() + if not file_path: + logger.error("File path cannot be empty") + return None + + # Try to load cached index if not ready + if not self.index_builder or not self.index_builder.in_memory_index: + if not self._try_load_cached_index(): + logger.warning("Index not loaded and no cached index available") + return None + + try: + # Normalize file path + file_path = file_path.replace('\\', '/') + if file_path.startswith('./'): + file_path = file_path[2:] + + # Get file info + file_info = self.index_builder.in_memory_index["files"].get(file_path) + if not file_info: + logger.warning(f"File not found in index: {file_path}") + return None + + # Get symbols in file + symbols = self.index_builder.get_file_symbols(file_path) + + # Categorize symbols by signature + functions = [] + classes = [] + methods = [] + + for s in symbols: + signature = s.get("signature", "") + if signature: + if signature.startswith("def ") and "::" in signature: + # Method: contains class context + methods.append(s) + elif signature.startswith("def "): + # Function: starts with def but no class context + functions.append(s) + elif signature.startswith("class ") or signature is None: + # Class: starts with class or has no signature + classes.append(s) + else: + # Default to function for unknown signatures + functions.append(s) + else: + # No signature - try to infer from name patterns or default to function + name = s.get("name", "") + if name and name[0].isupper(): + # Capitalized names are likely classes + classes.append(s) + else: + # Default to function + functions.append(s) + + return { + "file_path": file_path, + "language": file_info["language"], + "line_count": file_info["line_count"], + "symbol_count": len(symbols), + "functions": functions, + "classes": classes, + "methods": methods, + "imports": file_info.get("imports", []), + "exports": file_info.get("exports", []) + } + + except Exception as e: + logger.error(f"Error getting file summary: {e}") + return None + + def get_index_stats(self) -> Dict[str, Any]: + """Get statistics about the current index.""" + with self._lock: + if not self.index_builder or not self.index_builder.in_memory_index: + return {"status": "not_loaded"} + + try: + index = self.index_builder.in_memory_index + metadata = index["metadata"] + + symbol_counts = {} + for symbol_data in index["symbols"].values(): + symbol_type = symbol_data.get("type", "unknown") + symbol_counts[symbol_type] = symbol_counts.get(symbol_type, 0) + 1 + + return { + "status": "loaded", + "project_path": metadata["project_path"], + "indexed_files": metadata["indexed_files"], + "total_symbols": len(index["symbols"]), + "symbol_types": symbol_counts, + "languages": metadata["languages"], + "index_version": metadata["index_version"], + "timestamp": metadata["timestamp"] + } + + except Exception as e: + logger.error(f"Error getting index stats: {e}") + return {"status": "error", "error": str(e)} + + def _is_index_fresh(self) -> bool: + """Check if the current index is fresh.""" + if not self.index_path or not os.path.exists(self.index_path): + return False + + try: + from code_index_mcp.utils.file_filter import FileFilter as _FileFilter # pylint: disable=C0415 + file_filter = _FileFilter() + + # Simple freshness check - index exists and is recent + index_mtime = os.path.getmtime(self.index_path) + base_path = Path(self.project_path) + + # Check if any source files are newer than index + for root, dirs, files in os.walk(self.project_path): + # Filter directories using centralized logic + dirs[:] = [d for d in dirs if not file_filter.should_exclude_directory(d)] + + for file in files: + file_path = Path(root) / file + if file_filter.should_process_path(file_path, base_path): + if os.path.getmtime(str(file_path)) > index_mtime: + return False + + return True + + except Exception as e: + logger.warning(f"Error checking index freshness: {e}") + return False + + def _try_load_cached_index(self, expected_project_path: Optional[str] = None) -> bool: + """ + Try to load a cached index file if available. + + This is a simplified version of auto-initialization that only loads + a cached index if we can verify it matches the expected project. + + Args: + expected_project_path: Optional path to verify against cached index + + Returns: + True if cached index was loaded successfully, False otherwise. + """ + try: + # First try to load from current index_path if set + if self.index_path and os.path.exists(self.index_path): + return self.load_index() + + # If expected project path provided, try to find its cache + if expected_project_path: + project_hash = hashlib.md5(expected_project_path.encode()).hexdigest()[:12] + temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR, project_hash) + index_path = os.path.join(temp_dir, INDEX_FILE) + + if os.path.exists(index_path): + # Verify the cached index matches the expected project + with open(index_path, 'r', encoding='utf-8') as f: + index_data = json.load(f) + cached_project = index_data.get('metadata', {}).get('project_path') + + if cached_project == expected_project_path: + self.temp_dir = temp_dir + self.index_path = index_path + return self.load_index() + else: + logger.warning(f"Cached index project mismatch: {cached_project} != {expected_project_path}") + + return False + + except Exception as e: + logger.debug(f"Failed to load cached index: {e}") + return False + + def cleanup(self): + """Clean up resources.""" + with self._lock: + self.project_path = None + self.index_builder = None + self.temp_dir = None + self.index_path = None + logger.info("Cleaned up JSON Index Manager") + + @staticmethod + def _compile_glob_regex(pattern: str) -> re.Pattern: + """ + Compile a glob pattern where '*' does not match '/', and '**' matches across directories. + + Examples: + src/*.py -> direct children .py under src + **/*.py -> .py at any depth + """ + # Translate glob to regex + i = 0 + out = [] + special = ".^$+{}[]|()" + while i < len(pattern): + c = pattern[i] + if c == '*': + if i + 1 < len(pattern) and pattern[i + 1] == '*': + # '**' -> match across directories + out.append('.*') + i += 2 + continue + else: + out.append('[^/]*') + elif c == '?': + out.append('[^/]') + elif c in special: + out.append('\\' + c) + else: + out.append(c) + i += 1 + regex_str = '^' + ''.join(out) + '$' + return re.compile(regex_str) + + +# Global instance +_index_manager = JSONIndexManager() + + +def get_index_manager() -> JSONIndexManager: + """Get the global index manager instance.""" + return _index_manager diff --git a/src/code_index_mcp/indexing/models/__init__.py b/src/code_index_mcp/indexing/models/__init__.py new file mode 100644 index 0000000..b120a34 --- /dev/null +++ b/src/code_index_mcp/indexing/models/__init__.py @@ -0,0 +1,8 @@ +""" +Model classes for the indexing system. +""" + +from .symbol_info import SymbolInfo +from .file_info import FileInfo + +__all__ = ['SymbolInfo', 'FileInfo'] \ No newline at end of file diff --git a/src/code_index_mcp/indexing/models/file_info.py b/src/code_index_mcp/indexing/models/file_info.py new file mode 100644 index 0000000..0678774 --- /dev/null +++ b/src/code_index_mcp/indexing/models/file_info.py @@ -0,0 +1,24 @@ +""" +FileInfo model for representing file metadata. +""" + +from dataclasses import dataclass +from typing import Dict, List, Optional, Any + + +@dataclass +class FileInfo: + """Information about a source code file.""" + + language: str # programming language + line_count: int # total lines in file + symbols: Dict[str, List[str]] # symbol categories (functions, classes, etc.) + imports: List[str] # imported modules/packages + exports: Optional[List[str]] = None # exported symbols (for JS/TS modules) + package: Optional[str] = None # package name (for Java, Go, etc.) + docstring: Optional[str] = None # file-level documentation + + def __post_init__(self): + """Initialize mutable defaults.""" + if self.exports is None: + self.exports = [] \ No newline at end of file diff --git a/src/code_index_mcp/indexing/models/symbol_info.py b/src/code_index_mcp/indexing/models/symbol_info.py new file mode 100644 index 0000000..1659330 --- /dev/null +++ b/src/code_index_mcp/indexing/models/symbol_info.py @@ -0,0 +1,23 @@ +""" +SymbolInfo model for representing code symbols. +""" + +from dataclasses import dataclass +from typing import Optional, List + + +@dataclass +class SymbolInfo: + """Information about a code symbol (function, class, method, etc.).""" + + type: str # function, class, method, interface, etc. + file: str # file path where symbol is defined + line: int # line number where symbol starts + signature: Optional[str] = None # function/method signature + docstring: Optional[str] = None # documentation string + called_by: Optional[List[str]] = None # list of symbols that call this symbol + + def __post_init__(self): + """Initialize mutable defaults.""" + if self.called_by is None: + self.called_by = [] \ No newline at end of file diff --git a/src/code_index_mcp/indexing/qualified_names.py b/src/code_index_mcp/indexing/qualified_names.py new file mode 100644 index 0000000..18e108c --- /dev/null +++ b/src/code_index_mcp/indexing/qualified_names.py @@ -0,0 +1,49 @@ +""" +Qualified name generation utilities. +""" +import os +from typing import Optional + + +def normalize_file_path(file_path: str) -> str: + """ + Normalize a file path to use forward slashes and relative paths. + + Args: + file_path: The file path to normalize + + Returns: + Normalized file path + """ + # Convert to forward slashes and make relative + normalized = file_path.replace('\\', '/') + + # Remove leading slash if present + if normalized.startswith('/'): + normalized = normalized[1:] + + return normalized + + +def generate_qualified_name(file_path: str, symbol_name: str, namespace: Optional[str] = None) -> str: + """ + Generate a qualified name for a symbol. + + Args: + file_path: Path to the file containing the symbol + symbol_name: Name of the symbol + namespace: Optional namespace/module context + + Returns: + Qualified name for the symbol + """ + normalized_path = normalize_file_path(file_path) + + # Remove file extension for module-like name + base_name = os.path.splitext(normalized_path)[0] + module_path = base_name.replace('/', '.') + + if namespace: + return f"{module_path}.{namespace}.{symbol_name}" + else: + return f"{module_path}.{symbol_name}" \ No newline at end of file diff --git a/src/code_index_mcp/indexing/shallow_index_manager.py b/src/code_index_mcp/indexing/shallow_index_manager.py new file mode 100644 index 0000000..530c593 --- /dev/null +++ b/src/code_index_mcp/indexing/shallow_index_manager.py @@ -0,0 +1,155 @@ +""" +Shallow Index Manager - Manages a minimal file-list-only index. + +This manager builds and loads a shallow index consisting of relative file +paths only. It is optimized for fast initialization and filename-based +search/browsing. Content parsing and symbol extraction are not performed. +""" + +from __future__ import annotations + +import hashlib +import json +import logging +import os +import tempfile +import threading +from typing import List, Optional +import re + +from .json_index_builder import JSONIndexBuilder +from ..constants import SETTINGS_DIR, INDEX_FILE_SHALLOW + +logger = logging.getLogger(__name__) + + +class ShallowIndexManager: + """Manage shallow (file-list) index lifecycle and storage.""" + + def __init__(self) -> None: + self.project_path: Optional[str] = None + self.index_builder: Optional[JSONIndexBuilder] = None + self.temp_dir: Optional[str] = None + self.index_path: Optional[str] = None + self._file_list: Optional[List[str]] = None + self._lock = threading.RLock() + + def set_project_path(self, project_path: str) -> bool: + with self._lock: + try: + if not isinstance(project_path, str) or not project_path.strip(): + logger.error("Invalid project path for shallow index") + return False + project_path = project_path.strip() + if not os.path.isdir(project_path): + logger.error(f"Project path does not exist: {project_path}") + return False + + self.project_path = project_path + self.index_builder = JSONIndexBuilder(project_path) + + project_hash = hashlib.md5(project_path.encode()).hexdigest()[:12] + self.temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR, project_hash) + os.makedirs(self.temp_dir, exist_ok=True) + self.index_path = os.path.join(self.temp_dir, INDEX_FILE_SHALLOW) + return True + except Exception as e: # noqa: BLE001 - centralized logging + logger.error(f"Failed to set project path (shallow): {e}") + return False + + def build_index(self) -> bool: + """Build and persist the shallow file list index.""" + with self._lock: + if not self.index_builder or not self.index_path: + logger.error("ShallowIndexManager not initialized") + return False + try: + file_list = self.index_builder.build_shallow_file_list() + with open(self.index_path, 'w', encoding='utf-8') as f: + json.dump(file_list, f, ensure_ascii=False) + self._file_list = file_list + logger.info(f"Built shallow index with {len(file_list)} files") + return True + except Exception as e: # noqa: BLE001 + logger.error(f"Failed to build shallow index: {e}") + return False + + def load_index(self) -> bool: + """Load shallow index from disk to memory.""" + with self._lock: + try: + if not self.index_path or not os.path.exists(self.index_path): + return False + with open(self.index_path, 'r', encoding='utf-8') as f: + data = json.load(f) + if isinstance(data, list): + # Normalize slashes/prefix + normalized: List[str] = [] + for p in data: + if isinstance(p, str): + q = p.replace('\\\\', '/').replace('\\', '/') + if q.startswith('./'): + q = q[2:] + normalized.append(q) + self._file_list = normalized + return True + return False + except Exception as e: # noqa: BLE001 + logger.error(f"Failed to load shallow index: {e}") + return False + + def get_file_list(self) -> List[str]: + with self._lock: + return list(self._file_list or []) + + def find_files(self, pattern: str = "*") -> List[str]: + with self._lock: + if not isinstance(pattern, str): + return [] + norm = (pattern.strip() or "*").replace('\\\\','/').replace('\\','/') + regex = self._compile_glob_regex(norm) + files = self._file_list or [] + if norm == "*": + return list(files) + return [f for f in files if regex.match(f) is not None] + + @staticmethod + def _compile_glob_regex(pattern: str) -> re.Pattern: + i = 0 + out = [] + special = ".^$+{}[]|()" + while i < len(pattern): + c = pattern[i] + if c == '*': + if i + 1 < len(pattern) and pattern[i + 1] == '*': + out.append('.*') + i += 2 + continue + else: + out.append('[^/]*') + elif c == '?': + out.append('[^/]') + elif c in special: + out.append('\\' + c) + else: + out.append(c) + i += 1 + return re.compile('^' + ''.join(out) + '$') + + def cleanup(self) -> None: + with self._lock: + self.project_path = None + self.index_builder = None + self.temp_dir = None + self.index_path = None + self._file_list = None + + +# Global singleton +_shallow_manager = ShallowIndexManager() + + +def get_shallow_index_manager() -> ShallowIndexManager: + return _shallow_manager + + diff --git a/src/code_index_mcp/indexing/strategies/__init__.py b/src/code_index_mcp/indexing/strategies/__init__.py new file mode 100644 index 0000000..0f51274 --- /dev/null +++ b/src/code_index_mcp/indexing/strategies/__init__.py @@ -0,0 +1,8 @@ +""" +Parsing strategies for different programming languages. +""" + +from .base_strategy import ParsingStrategy +from .strategy_factory import StrategyFactory + +__all__ = ['ParsingStrategy', 'StrategyFactory'] \ No newline at end of file diff --git a/src/code_index_mcp/indexing/strategies/base_strategy.py b/src/code_index_mcp/indexing/strategies/base_strategy.py new file mode 100644 index 0000000..691dce0 --- /dev/null +++ b/src/code_index_mcp/indexing/strategies/base_strategy.py @@ -0,0 +1,87 @@ +""" +Abstract base class for language parsing strategies. +""" + +import os +from abc import ABC, abstractmethod +from typing import Dict, List, Tuple, Optional +from ..models import SymbolInfo, FileInfo + + +class ParsingStrategy(ABC): + """Abstract base class for language parsing strategies.""" + + @abstractmethod + def get_language_name(self) -> str: + """Return the language name this strategy handles.""" + + @abstractmethod + def get_supported_extensions(self) -> List[str]: + """Return list of file extensions this strategy supports.""" + + @abstractmethod + def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]: + """ + Parse file content and extract symbols. + + Args: + file_path: Path to the file being parsed + content: File content as string + + Returns: + Tuple of (symbols_dict, file_info) + - symbols_dict: Maps symbol_id -> SymbolInfo + - file_info: FileInfo with metadata about the file + """ + + def _create_symbol_id(self, file_path: str, symbol_name: str) -> str: + """ + Create a unique symbol ID. + + Args: + file_path: Path to the file containing the symbol + symbol_name: Name of the symbol + + Returns: + Unique symbol identifier in format "relative_path::symbol_name" + """ + relative_path = self._get_relative_path(file_path) + return f"{relative_path}::{symbol_name}" + + def _get_relative_path(self, file_path: str) -> str: + """Convert absolute file path to relative path.""" + parts = file_path.replace('\\', '/').split('/') + + # Priority order: test > src (outermost project roots first) + for root_dir in ['test', 'src']: + if root_dir in parts: + root_index = parts.index(root_dir) + relative_parts = parts[root_index:] + return '/'.join(relative_parts) + + # Fallback: use just filename + return os.path.basename(file_path) + + def _extract_line_number(self, content: str, symbol_position: int) -> int: + """ + Extract line number from character position in content. + + Args: + content: File content + symbol_position: Character position in content + + Returns: + Line number (1-based) + """ + return content[:symbol_position].count('\n') + 1 + + def _get_file_name(self, file_path: str) -> str: + """Get just the filename from a full path.""" + return os.path.basename(file_path) + + def _safe_extract_text(self, content: str, start: int, end: int) -> str: + """Safely extract text from content, handling bounds.""" + try: + return content[start:end].strip() + except (IndexError, TypeError): + return "" diff --git a/src/code_index_mcp/indexing/strategies/fallback_strategy.py b/src/code_index_mcp/indexing/strategies/fallback_strategy.py new file mode 100644 index 0000000..21653bd --- /dev/null +++ b/src/code_index_mcp/indexing/strategies/fallback_strategy.py @@ -0,0 +1,46 @@ +""" +Fallback parsing strategy for unsupported languages and file types. +""" + +import os +from typing import Dict, List, Tuple +from .base_strategy import ParsingStrategy +from ..models import SymbolInfo, FileInfo + + +class FallbackParsingStrategy(ParsingStrategy): + """Fallback parser for unsupported languages and file types.""" + + def __init__(self, language_name: str = "unknown"): + self.language_name = language_name + + def get_language_name(self) -> str: + return self.language_name + + def get_supported_extensions(self) -> List[str]: + return [] # Fallback supports any extension + + def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]: + """Basic parsing: extract file information without symbol parsing.""" + symbols = {} + + # For document files, we can at least index their existence + file_info = FileInfo( + language=self.language_name, + line_count=len(content.splitlines()), + symbols={"functions": [], "classes": []}, + imports=[] + ) + + # For document files (e.g. .md, .txt, .json), we can add a symbol representing the file itself + if self.language_name in ['markdown', 'text', 'json', 'yaml', 'xml', 'config', 'css', 'html']: + filename = os.path.basename(file_path) + symbol_id = self._create_symbol_id(file_path, f"file:{filename}") + symbols[symbol_id] = SymbolInfo( + type="file", + file=file_path, + line=1, + signature=f"{self.language_name} file: {filename}" + ) + + return symbols, file_info diff --git a/src/code_index_mcp/indexing/strategies/go_strategy.py b/src/code_index_mcp/indexing/strategies/go_strategy.py new file mode 100644 index 0000000..b3a95cb --- /dev/null +++ b/src/code_index_mcp/indexing/strategies/go_strategy.py @@ -0,0 +1,164 @@ +""" +Go parsing strategy using regex patterns. +""" + +import re +from typing import Dict, List, Tuple, Optional +from .base_strategy import ParsingStrategy +from ..models import SymbolInfo, FileInfo + + +class GoParsingStrategy(ParsingStrategy): + """Go-specific parsing strategy using regex patterns.""" + + def get_language_name(self) -> str: + return "go" + + def get_supported_extensions(self) -> List[str]: + return ['.go'] + + def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]: + """Parse Go file using regex patterns.""" + symbols = {} + functions = [] + classes = [] # Go doesn't have classes, but we'll track structs/interfaces + imports = [] + package = None + + lines = content.splitlines() + + for i, line in enumerate(lines): + line = line.strip() + + # Package declaration + if line.startswith('package '): + package = line.split('package ')[1].strip() + + # Import statements + elif line.startswith('import '): + import_match = re.search(r'import\s+"([^"]+)"', line) + if import_match: + imports.append(import_match.group(1)) + + # Function declarations + elif line.startswith('func '): + func_match = re.match(r'func\s+(\w+)\s*\(', line) + if func_match: + func_name = func_match.group(1) + symbol_id = self._create_symbol_id(file_path, func_name) + symbols[symbol_id] = SymbolInfo( + type="function", + file=file_path, + line=i + 1, + signature=line + ) + functions.append(func_name) + + # Method declarations (func (receiver) methodName) + method_match = re.match(r'func\s+\([^)]+\)\s+(\w+)\s*\(', line) + if method_match: + method_name = method_match.group(1) + symbol_id = self._create_symbol_id(file_path, method_name) + symbols[symbol_id] = SymbolInfo( + type="method", + file=file_path, + line=i + 1, + signature=line + ) + functions.append(method_name) + + # Struct declarations + elif re.match(r'type\s+\w+\s+struct\s*\{', line): + struct_match = re.match(r'type\s+(\w+)\s+struct', line) + if struct_match: + struct_name = struct_match.group(1) + symbol_id = self._create_symbol_id(file_path, struct_name) + symbols[symbol_id] = SymbolInfo( + type="struct", + file=file_path, + line=i + 1 + ) + classes.append(struct_name) + + # Interface declarations + elif re.match(r'type\s+\w+\s+interface\s*\{', line): + interface_match = re.match(r'type\s+(\w+)\s+interface', line) + if interface_match: + interface_name = interface_match.group(1) + symbol_id = self._create_symbol_id(file_path, interface_name) + symbols[symbol_id] = SymbolInfo( + type="interface", + file=file_path, + line=i + 1 + ) + classes.append(interface_name) + + # Phase 2: Add call relationship analysis + self._analyze_go_calls(content, symbols, file_path) + + file_info = FileInfo( + language=self.get_language_name(), + line_count=len(lines), + symbols={"functions": functions, "classes": classes}, + imports=imports, + package=package + ) + + return symbols, file_info + + def _analyze_go_calls(self, content: str, symbols: Dict[str, SymbolInfo], file_path: str): + """Analyze Go function calls for relationships.""" + lines = content.splitlines() + current_function = None + is_function_declaration_line = False + + for i, line in enumerate(lines): + original_line = line + line = line.strip() + + # Track current function context + if line.startswith('func '): + func_name = self._extract_go_function_name(line) + if func_name: + current_function = self._create_symbol_id(file_path, func_name) + is_function_declaration_line = True + else: + is_function_declaration_line = False + + # Find function calls: functionName() or obj.methodName() + # Skip the function declaration line itself to avoid false self-calls + if current_function and not is_function_declaration_line and ('(' in line and ')' in line): + called_functions = self._extract_go_called_functions(line) + for called_func in called_functions: + # Find the called function in symbols and add relationship + for symbol_id, symbol_info in symbols.items(): + if called_func in symbol_id.split("::")[-1]: + if current_function not in symbol_info.called_by: + symbol_info.called_by.append(current_function) + + def _extract_go_function_name(self, line: str) -> Optional[str]: + """Extract function name from Go function declaration.""" + try: + # func functionName(...) or func (receiver) methodName(...) + match = re.match(r'func\s+(?:\([^)]*\)\s+)?(\w+)\s*\(', line) + if match: + return match.group(1) + except: + pass + return None + + def _extract_go_called_functions(self, line: str) -> List[str]: + """Extract function names that are being called in this line.""" + called_functions = [] + + # Find patterns like: functionName( or obj.methodName( + patterns = [ + r'(\w+)\s*\(', # functionName( + r'\.(\w+)\s*\(', # .methodName( + ] + + for pattern in patterns: + matches = re.findall(pattern, line) + called_functions.extend(matches) + + return called_functions diff --git a/src/code_index_mcp/indexing/strategies/java_strategy.py b/src/code_index_mcp/indexing/strategies/java_strategy.py new file mode 100644 index 0000000..af2ff8e --- /dev/null +++ b/src/code_index_mcp/indexing/strategies/java_strategy.py @@ -0,0 +1,209 @@ +""" +Java parsing strategy using tree-sitter - Optimized single-pass version. +""" + +import logging +from typing import Dict, List, Tuple, Optional, Set +from .base_strategy import ParsingStrategy +from ..models import SymbolInfo, FileInfo + +logger = logging.getLogger(__name__) + +import tree_sitter +from tree_sitter_java import language + + +class JavaParsingStrategy(ParsingStrategy): + """Java-specific parsing strategy - Single Pass Optimized.""" + + def __init__(self): + self.java_language = tree_sitter.Language(language()) + + def get_language_name(self) -> str: + return "java" + + def get_supported_extensions(self) -> List[str]: + return ['.java'] + + def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]: + """Parse Java file using tree-sitter with single-pass optimization.""" + symbols = {} + functions = [] + classes = [] + imports = [] + package = None + + # Symbol lookup index for O(1) access + symbol_lookup = {} # name -> symbol_id mapping + + parser = tree_sitter.Parser(self.java_language) + + try: + tree = parser.parse(content.encode('utf8')) + + # Extract package info first + for node in tree.root_node.children: + if node.type == 'package_declaration': + package = self._extract_java_package(node, content) + break + + # Single-pass traversal that handles everything + context = TraversalContext( + content=content, + file_path=file_path, + symbols=symbols, + functions=functions, + classes=classes, + imports=imports, + symbol_lookup=symbol_lookup + ) + + self._traverse_node_single_pass(tree.root_node, context) + + except Exception as e: + logger.warning(f"Error parsing Java file {file_path}: {e}") + + file_info = FileInfo( + language=self.get_language_name(), + line_count=len(content.splitlines()), + symbols={"functions": functions, "classes": classes}, + imports=imports, + package=package + ) + + return symbols, file_info + + def _traverse_node_single_pass(self, node, context: 'TraversalContext', + current_class: Optional[str] = None, + current_method: Optional[str] = None): + """Single-pass traversal that extracts symbols and analyzes calls.""" + + # Handle class declarations + if node.type == 'class_declaration': + name = self._get_java_class_name(node, context.content) + if name: + symbol_id = self._create_symbol_id(context.file_path, name) + symbol_info = SymbolInfo( + type="class", + file=context.file_path, + line=node.start_point[0] + 1 + ) + context.symbols[symbol_id] = symbol_info + context.symbol_lookup[name] = symbol_id + context.classes.append(name) + + # Traverse class body with updated context + for child in node.children: + self._traverse_node_single_pass(child, context, current_class=name, current_method=current_method) + return + + # Handle method declarations + elif node.type == 'method_declaration': + name = self._get_java_method_name(node, context.content) + if name: + # Build full method name with class context + if current_class: + full_name = f"{current_class}.{name}" + else: + full_name = name + + symbol_id = self._create_symbol_id(context.file_path, full_name) + symbol_info = SymbolInfo( + type="method", + file=context.file_path, + line=node.start_point[0] + 1, + signature=self._get_java_method_signature(node, context.content) + ) + context.symbols[symbol_id] = symbol_info + context.symbol_lookup[full_name] = symbol_id + context.symbol_lookup[name] = symbol_id # Also index by method name alone + context.functions.append(full_name) + + # Traverse method body with updated context + for child in node.children: + self._traverse_node_single_pass(child, context, current_class=current_class, + current_method=symbol_id) + return + + # Handle method invocations (calls) + elif node.type == 'method_invocation': + if current_method: + called_method = self._get_called_method_name(node, context.content) + if called_method: + # Use O(1) lookup instead of O(n) iteration + if called_method in context.symbol_lookup: + symbol_id = context.symbol_lookup[called_method] + symbol_info = context.symbols[symbol_id] + if current_method not in symbol_info.called_by: + symbol_info.called_by.append(current_method) + else: + # Try to find method with class prefix + for name, sid in context.symbol_lookup.items(): + if name.endswith(f".{called_method}"): + symbol_info = context.symbols[sid] + if current_method not in symbol_info.called_by: + symbol_info.called_by.append(current_method) + break + + # Handle import declarations + elif node.type == 'import_declaration': + import_text = context.content[node.start_byte:node.end_byte] + # Extract the import path (remove 'import' keyword and semicolon) + import_path = import_text.replace('import', '').replace(';', '').strip() + if import_path: + context.imports.append(import_path) + + # Continue traversing children for other node types + for child in node.children: + self._traverse_node_single_pass(child, context, current_class=current_class, + current_method=current_method) + + def _get_java_class_name(self, node, content: str) -> Optional[str]: + for child in node.children: + if child.type == 'identifier': + return content[child.start_byte:child.end_byte] + return None + + def _get_java_method_name(self, node, content: str) -> Optional[str]: + for child in node.children: + if child.type == 'identifier': + return content[child.start_byte:child.end_byte] + return None + + def _get_java_method_signature(self, node, content: str) -> str: + return content[node.start_byte:node.end_byte].split('\n')[0].strip() + + def _extract_java_package(self, node, content: str) -> Optional[str]: + for child in node.children: + if child.type == 'scoped_identifier': + return content[child.start_byte:child.end_byte] + return None + + def _get_called_method_name(self, node, content: str) -> Optional[str]: + """Extract called method name from method invocation node.""" + # Handle obj.method() pattern - look for the method name after the dot + for child in node.children: + if child.type == 'field_access': + # For field_access nodes, get the field (method) name + for subchild in child.children: + if subchild.type == 'identifier' and subchild.start_byte > child.start_byte: + # Get the rightmost identifier (the method name) + return content[subchild.start_byte:subchild.end_byte] + elif child.type == 'identifier': + # Direct method call without object reference + return content[child.start_byte:child.end_byte] + return None + + +class TraversalContext: + """Context object to pass state during single-pass traversal.""" + + def __init__(self, content: str, file_path: str, symbols: Dict, + functions: List, classes: List, imports: List, symbol_lookup: Dict): + self.content = content + self.file_path = file_path + self.symbols = symbols + self.functions = functions + self.classes = classes + self.imports = imports + self.symbol_lookup = symbol_lookup \ No newline at end of file diff --git a/src/code_index_mcp/indexing/strategies/javascript_strategy.py b/src/code_index_mcp/indexing/strategies/javascript_strategy.py new file mode 100644 index 0000000..63c78f7 --- /dev/null +++ b/src/code_index_mcp/indexing/strategies/javascript_strategy.py @@ -0,0 +1,154 @@ +""" +JavaScript parsing strategy using tree-sitter. +""" + +import logging +from typing import Dict, List, Tuple, Optional +import tree_sitter +from tree_sitter_javascript import language +from .base_strategy import ParsingStrategy +from ..models import SymbolInfo, FileInfo + +logger = logging.getLogger(__name__) + + +class JavaScriptParsingStrategy(ParsingStrategy): + """JavaScript-specific parsing strategy using tree-sitter.""" + + def __init__(self): + self.js_language = tree_sitter.Language(language()) + + def get_language_name(self) -> str: + return "javascript" + + def get_supported_extensions(self) -> List[str]: + return ['.js', '.jsx', '.mjs', '.cjs'] + + def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]: + """Parse JavaScript file using tree-sitter.""" + symbols = {} + functions = [] + classes = [] + imports = [] + exports = [] + + parser = tree_sitter.Parser(self.js_language) + tree = parser.parse(content.encode('utf8')) + self._traverse_js_node(tree.root_node, content, file_path, symbols, functions, classes, imports, exports) + + file_info = FileInfo( + language=self.get_language_name(), + line_count=len(content.splitlines()), + symbols={"functions": functions, "classes": classes}, + imports=imports, + exports=exports + ) + + return symbols, file_info + + def _traverse_js_node(self, node, content: str, file_path: str, symbols: Dict[str, SymbolInfo], + functions: List[str], classes: List[str], imports: List[str], exports: List[str]): + """Traverse JavaScript AST node.""" + if node.type == 'function_declaration': + name = self._get_function_name(node, content) + if name: + symbol_id = self._create_symbol_id(file_path, name) + signature = self._get_js_function_signature(node, content) + symbols[symbol_id] = SymbolInfo( + type="function", + file=file_path, + line=node.start_point[0] + 1, + signature=signature + ) + functions.append(name) + + # Handle arrow functions and function expressions in lexical declarations (const/let) + elif node.type in ['lexical_declaration', 'variable_declaration']: + # Look for const/let/var name = arrow_function or function_expression + for child in node.children: + if child.type == 'variable_declarator': + name_node = None + value_node = None + for declarator_child in child.children: + if declarator_child.type == 'identifier': + name_node = declarator_child + elif declarator_child.type in ['arrow_function', 'function_expression', 'function']: + value_node = declarator_child + + if name_node and value_node: + name = content[name_node.start_byte:name_node.end_byte] + symbol_id = self._create_symbol_id(file_path, name) + # Create signature from the declaration + signature = content[child.start_byte:child.end_byte].split('\n')[0].strip() + symbols[symbol_id] = SymbolInfo( + type="function", + file=file_path, + line=child.start_point[0] + 1, # Use child position, not parent + signature=signature + ) + functions.append(name) + + elif node.type == 'class_declaration': + name = self._get_class_name(node, content) + if name: + symbol_id = self._create_symbol_id(file_path, name) + symbols[symbol_id] = SymbolInfo( + type="class", + file=file_path, + line=node.start_point[0] + 1 + ) + classes.append(name) + + elif node.type == 'method_definition': + method_name = self._get_method_name(node, content) + class_name = self._find_parent_class(node, content) + if method_name and class_name: + full_name = f"{class_name}.{method_name}" + symbol_id = self._create_symbol_id(file_path, full_name) + signature = self._get_js_function_signature(node, content) + symbols[symbol_id] = SymbolInfo( + type="method", + file=file_path, + line=node.start_point[0] + 1, + signature=signature + ) + # Add method to functions list for consistency + functions.append(full_name) + + # Continue traversing children + for child in node.children: + self._traverse_js_node(child, content, file_path, symbols, functions, classes, imports, exports) + + def _get_function_name(self, node, content: str) -> Optional[str]: + """Extract function name from tree-sitter node.""" + for child in node.children: + if child.type == 'identifier': + return content[child.start_byte:child.end_byte] + return None + + def _get_class_name(self, node, content: str) -> Optional[str]: + """Extract class name from tree-sitter node.""" + for child in node.children: + if child.type == 'identifier': + return content[child.start_byte:child.end_byte] + return None + + def _get_method_name(self, node, content: str) -> Optional[str]: + """Extract method name from tree-sitter node.""" + for child in node.children: + if child.type == 'property_identifier': + return content[child.start_byte:child.end_byte] + return None + + def _find_parent_class(self, node, content: str) -> Optional[str]: + """Find the parent class of a method.""" + parent = node.parent + while parent: + if parent.type == 'class_declaration': + return self._get_class_name(parent, content) + parent = parent.parent + return None + + def _get_js_function_signature(self, node, content: str) -> str: + """Extract JavaScript function signature.""" + return content[node.start_byte:node.end_byte].split('\n')[0].strip() diff --git a/src/code_index_mcp/indexing/strategies/objective_c_strategy.py b/src/code_index_mcp/indexing/strategies/objective_c_strategy.py new file mode 100644 index 0000000..4226f1c --- /dev/null +++ b/src/code_index_mcp/indexing/strategies/objective_c_strategy.py @@ -0,0 +1,154 @@ +""" +Objective-C parsing strategy using regex patterns. +""" + +import re +from typing import Dict, List, Tuple, Optional +from .base_strategy import ParsingStrategy +from ..models import SymbolInfo, FileInfo + + +class ObjectiveCParsingStrategy(ParsingStrategy): + """Objective-C parsing strategy using regex patterns.""" + + def get_language_name(self) -> str: + return "objective-c" + + def get_supported_extensions(self) -> List[str]: + return ['.m', '.mm'] + + def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]: + """Parse Objective-C file using regex patterns.""" + symbols = {} + functions = [] + classes = [] + imports = [] + + lines = content.splitlines() + current_class = None + + for i, line in enumerate(lines): + line = line.strip() + + # Import statements + if line.startswith('#import ') or line.startswith('#include '): + import_match = re.search(r'#(?:import|include)\s+[<"]([^>"]+)[>"]', line) + if import_match: + imports.append(import_match.group(1)) + + # Interface declarations + elif line.startswith('@interface '): + interface_match = re.match(r'@interface\s+(\w+)', line) + if interface_match: + class_name = interface_match.group(1) + current_class = class_name + symbol_id = self._create_symbol_id(file_path, class_name) + symbols[symbol_id] = SymbolInfo( + type="class", + file=file_path, + line=i + 1 + ) + classes.append(class_name) + + # Implementation declarations + elif line.startswith('@implementation '): + impl_match = re.match(r'@implementation\s+(\w+)', line) + if impl_match: + current_class = impl_match.group(1) + + # Method declarations + elif line.startswith(('- (', '+ (')): + method_match = re.search(r'[+-]\s*\([^)]+\)\s*(\w+)', line) + if method_match: + method_name = method_match.group(1) + full_name = f"{current_class}.{method_name}" if current_class else method_name + symbol_id = self._create_symbol_id(file_path, full_name) + symbols[symbol_id] = SymbolInfo( + type="method", + file=file_path, + line=i + 1, + signature=line + ) + functions.append(full_name) + + # C function declarations + elif re.match(r'\w+.*\s+\w+\s*\([^)]*\)\s*\{?', line) and not line.startswith(('if', 'for', 'while')): + func_match = re.search(r'\s(\w+)\s*\([^)]*\)', line) + if func_match: + func_name = func_match.group(1) + symbol_id = self._create_symbol_id(file_path, func_name) + symbols[symbol_id] = SymbolInfo( + type="function", + file=file_path, + line=i + 1, + signature=line + ) + functions.append(func_name) + + # End of class + elif line == '@end': + current_class = None + + # Phase 2: Add call relationship analysis + self._analyze_objc_calls(content, symbols, file_path) + + file_info = FileInfo( + language=self.get_language_name(), + line_count=len(lines), + symbols={"functions": functions, "classes": classes}, + imports=imports + ) + + return symbols, file_info + + def _analyze_objc_calls(self, content: str, symbols: Dict[str, SymbolInfo], file_path: str): + """Analyze Objective-C method calls for relationships.""" + lines = content.splitlines() + current_function = None + + for i, line in enumerate(lines): + original_line = line + line = line.strip() + + # Track current method context + if line.startswith('- (') or line.startswith('+ ('): + func_name = self._extract_objc_method_name(line) + if func_name: + current_function = self._create_symbol_id(file_path, func_name) + + # Find method calls: [obj methodName] or functionName() + if current_function and ('[' in line and ']' in line or ('(' in line and ')' in line)): + called_functions = self._extract_objc_called_functions(line) + for called_func in called_functions: + # Find the called function in symbols and add relationship + for symbol_id, symbol_info in symbols.items(): + if called_func in symbol_id.split("::")[-1]: + if current_function not in symbol_info.called_by: + symbol_info.called_by.append(current_function) + + def _extract_objc_method_name(self, line: str) -> Optional[str]: + """Extract method name from Objective-C method declaration.""" + try: + # - (returnType)methodName:(params) or + (returnType)methodName + match = re.search(r'[+-]\s*\([^)]*\)\s*(\w+)', line) + if match: + return match.group(1) + except: + pass + return None + + def _extract_objc_called_functions(self, line: str) -> List[str]: + """Extract method names that are being called in this line.""" + called_functions = [] + + # Find patterns like: [obj methodName] or functionName( + patterns = [ + r'\[\s*\w+\s+(\w+)\s*[\]:]', # [obj methodName] + r'(\w+)\s*\(', # functionName( + ] + + for pattern in patterns: + matches = re.findall(pattern, line) + called_functions.extend(matches) + + return called_functions diff --git a/src/code_index_mcp/indexing/strategies/python_strategy.py b/src/code_index_mcp/indexing/strategies/python_strategy.py new file mode 100644 index 0000000..a09d00c --- /dev/null +++ b/src/code_index_mcp/indexing/strategies/python_strategy.py @@ -0,0 +1,264 @@ +""" +Python parsing strategy using AST - Optimized single-pass version. +""" + +import ast +import logging +from typing import Dict, List, Tuple, Optional, Set +from .base_strategy import ParsingStrategy +from ..models import SymbolInfo, FileInfo + +logger = logging.getLogger(__name__) + + +class PythonParsingStrategy(ParsingStrategy): + """Python-specific parsing strategy using Python's built-in AST - Single Pass Optimized.""" + + def get_language_name(self) -> str: + return "python" + + def get_supported_extensions(self) -> List[str]: + return ['.py', '.pyw'] + + def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]: + """Parse Python file using AST with single-pass optimization.""" + symbols = {} + functions = [] + classes = [] + imports = [] + + try: + tree = ast.parse(content) + # Single-pass visitor that handles everything at once + visitor = SinglePassVisitor(symbols, functions, classes, imports, file_path) + visitor.visit(tree) + except SyntaxError as e: + logger.warning(f"Syntax error in Python file {file_path}: {e}") + except Exception as e: + logger.warning(f"Error parsing Python file {file_path}: {e}") + + file_info = FileInfo( + language=self.get_language_name(), + line_count=len(content.splitlines()), + symbols={"functions": functions, "classes": classes}, + imports=imports + ) + + return symbols, file_info + + +class SinglePassVisitor(ast.NodeVisitor): + """Single-pass AST visitor that extracts symbols and analyzes calls in one traversal.""" + + def __init__(self, symbols: Dict[str, SymbolInfo], functions: List[str], + classes: List[str], imports: List[str], file_path: str): + self.symbols = symbols + self.functions = functions + self.classes = classes + self.imports = imports + self.file_path = file_path + + # Context tracking for call analysis + self.current_function_stack = [] + self.current_class = None + + # Symbol lookup index for O(1) access + self.symbol_lookup = {} # name -> symbol_id mapping for fast lookups + + # Track processed nodes to avoid duplicates + self.processed_nodes: Set[int] = set() + + def visit_ClassDef(self, node: ast.ClassDef): + """Visit class definition - extract symbol and analyze in single pass.""" + class_name = node.name + symbol_id = self._create_symbol_id(self.file_path, class_name) + + # Extract docstring + docstring = ast.get_docstring(node) + + # Create symbol info + symbol_info = SymbolInfo( + type="class", + file=self.file_path, + line=node.lineno, + docstring=docstring + ) + + # Store in symbols and lookup index + self.symbols[symbol_id] = symbol_info + self.symbol_lookup[class_name] = symbol_id + self.classes.append(class_name) + + # Track class context for method processing + old_class = self.current_class + self.current_class = class_name + + # Process class body (including methods) + for child in node.body: + if isinstance(child, ast.FunctionDef): + self._handle_method(child, class_name) + else: + # Visit other nodes in class body + self.visit(child) + + # Restore previous class context + self.current_class = old_class + + def visit_FunctionDef(self, node: ast.FunctionDef): + """Visit function definition - extract symbol and track context.""" + # Skip if this is a method (already handled by ClassDef) + if self.current_class: + return + + # Skip if already processed + node_id = id(node) + if node_id in self.processed_nodes: + return + self.processed_nodes.add(node_id) + + func_name = node.name + symbol_id = self._create_symbol_id(self.file_path, func_name) + + # Extract function signature and docstring + signature = self._extract_function_signature(node) + docstring = ast.get_docstring(node) + + # Create symbol info + symbol_info = SymbolInfo( + type="function", + file=self.file_path, + line=node.lineno, + signature=signature, + docstring=docstring + ) + + # Store in symbols and lookup index + self.symbols[symbol_id] = symbol_info + self.symbol_lookup[func_name] = symbol_id + self.functions.append(func_name) + + # Track function context for call analysis + function_id = f"{self.file_path}::{func_name}" + self.current_function_stack.append(function_id) + + # Visit function body to analyze calls + self.generic_visit(node) + + # Pop function from stack + self.current_function_stack.pop() + + def _handle_method(self, node: ast.FunctionDef, class_name: str): + """Handle method definition within a class.""" + method_name = f"{class_name}.{node.name}" + method_symbol_id = self._create_symbol_id(self.file_path, method_name) + + method_signature = self._extract_function_signature(node) + method_docstring = ast.get_docstring(node) + + # Create symbol info + symbol_info = SymbolInfo( + type="method", + file=self.file_path, + line=node.lineno, + signature=method_signature, + docstring=method_docstring + ) + + # Store in symbols and lookup index + self.symbols[method_symbol_id] = symbol_info + self.symbol_lookup[method_name] = method_symbol_id + self.symbol_lookup[node.name] = method_symbol_id # Also index by method name alone + self.functions.append(method_name) + + # Track method context for call analysis + function_id = f"{self.file_path}::{method_name}" + self.current_function_stack.append(function_id) + + # Visit method body to analyze calls + for child in node.body: + self.visit(child) + + # Pop method from stack + self.current_function_stack.pop() + + def visit_Import(self, node: ast.Import): + """Handle import statements.""" + for alias in node.names: + self.imports.append(alias.name) + self.generic_visit(node) + + def visit_ImportFrom(self, node: ast.ImportFrom): + """Handle from...import statements.""" + if node.module: + for alias in node.names: + self.imports.append(f"{node.module}.{alias.name}") + self.generic_visit(node) + + def visit_Call(self, node: ast.Call): + """Visit function call and record relationship using O(1) lookup.""" + if not self.current_function_stack: + self.generic_visit(node) + return + + try: + # Get the function name being called + called_function = None + + if isinstance(node.func, ast.Name): + # Direct function call: function_name() + called_function = node.func.id + elif isinstance(node.func, ast.Attribute): + # Method call: obj.method() or module.function() + called_function = node.func.attr + + if called_function: + # Get the current calling function + caller_function = self.current_function_stack[-1] + + # Use O(1) lookup instead of O(n) iteration + # First try exact match + if called_function in self.symbol_lookup: + symbol_id = self.symbol_lookup[called_function] + symbol_info = self.symbols[symbol_id] + if symbol_info.type in ["function", "method"]: + if caller_function not in symbol_info.called_by: + symbol_info.called_by.append(caller_function) + else: + # Try method name match for any class + for name, symbol_id in self.symbol_lookup.items(): + if name.endswith(f".{called_function}"): + symbol_info = self.symbols[symbol_id] + if symbol_info.type in ["function", "method"]: + if caller_function not in symbol_info.called_by: + symbol_info.called_by.append(caller_function) + break + except Exception: + # Silently handle parsing errors for complex call patterns + pass + + # Continue visiting child nodes + self.generic_visit(node) + + def _create_symbol_id(self, file_path: str, symbol_name: str) -> str: + """Create a unique symbol ID.""" + return f"{file_path}::{symbol_name}" + + def _extract_function_signature(self, node: ast.FunctionDef) -> str: + """Extract function signature from AST node.""" + # Build basic signature + args = [] + + # Regular arguments + for arg in node.args.args: + args.append(arg.arg) + + # Varargs (*args) + if node.args.vararg: + args.append(f"*{node.args.vararg.arg}") + + # Keyword arguments (**kwargs) + if node.args.kwarg: + args.append(f"**{node.args.kwarg.arg}") + + signature = f"def {node.name}({', '.join(args)}):" + return signature \ No newline at end of file diff --git a/src/code_index_mcp/indexing/strategies/strategy_factory.py b/src/code_index_mcp/indexing/strategies/strategy_factory.py new file mode 100644 index 0000000..c7116d9 --- /dev/null +++ b/src/code_index_mcp/indexing/strategies/strategy_factory.py @@ -0,0 +1,201 @@ +""" +Strategy factory for creating appropriate parsing strategies. +""" + +import threading +from typing import Dict, List +from .base_strategy import ParsingStrategy +from .python_strategy import PythonParsingStrategy +from .javascript_strategy import JavaScriptParsingStrategy +from .typescript_strategy import TypeScriptParsingStrategy +from .java_strategy import JavaParsingStrategy +from .go_strategy import GoParsingStrategy +from .objective_c_strategy import ObjectiveCParsingStrategy +from .zig_strategy import ZigParsingStrategy +from .fallback_strategy import FallbackParsingStrategy + + +class StrategyFactory: + """Factory for creating appropriate parsing strategies.""" + + def __init__(self): + # Initialize all strategies with thread safety + self._strategies: Dict[str, ParsingStrategy] = {} + self._initialized = False + self._lock = threading.RLock() + self._initialize_strategies() + + # File type mappings for fallback parser + self._file_type_mappings = { + # Web and markup + '.html': 'html', '.htm': 'html', + '.css': 'css', '.scss': 'css', '.sass': 'css', + '.less': 'css', '.stylus': 'css', '.styl': 'css', + '.md': 'markdown', '.mdx': 'markdown', + '.json': 'json', '.jsonc': 'json', + '.xml': 'xml', + '.yml': 'yaml', '.yaml': 'yaml', + + # Frontend frameworks + '.vue': 'vue', + '.svelte': 'svelte', + '.astro': 'astro', + + # Template engines + '.hbs': 'handlebars', '.handlebars': 'handlebars', + '.ejs': 'ejs', + '.pug': 'pug', + + # Database and SQL + '.sql': 'sql', '.ddl': 'sql', '.dml': 'sql', + '.mysql': 'sql', '.postgresql': 'sql', '.psql': 'sql', + '.sqlite': 'sql', '.mssql': 'sql', '.oracle': 'sql', + '.ora': 'sql', '.db2': 'sql', + '.proc': 'sql', '.procedure': 'sql', + '.func': 'sql', '.function': 'sql', + '.view': 'sql', '.trigger': 'sql', '.index': 'sql', + '.migration': 'sql', '.seed': 'sql', '.fixture': 'sql', + '.schema': 'sql', + '.cql': 'sql', '.cypher': 'sql', '.sparql': 'sql', + '.gql': 'graphql', + '.liquibase': 'sql', '.flyway': 'sql', + + # Config and text files + '.txt': 'text', + '.ini': 'config', '.cfg': 'config', '.conf': 'config', + '.toml': 'config', + '.properties': 'config', + '.env': 'config', + '.gitignore': 'config', + '.dockerignore': 'config', + '.editorconfig': 'config', + + # Other programming languages (will use fallback) + '.c': 'c', '.cpp': 'cpp', '.h': 'h', '.hpp': 'hpp', + '.cxx': 'cpp', '.cc': 'cpp', '.hxx': 'hpp', '.hh': 'hpp', + '.cs': 'csharp', + '.rb': 'ruby', + '.php': 'php', + '.swift': 'swift', + '.kt': 'kotlin', '.kts': 'kotlin', + '.rs': 'rust', + '.scala': 'scala', + '.sh': 'shell', '.bash': 'shell', '.zsh': 'shell', + '.ps1': 'powershell', + '.bat': 'batch', '.cmd': 'batch', + '.r': 'r', '.R': 'r', + '.pl': 'perl', '.pm': 'perl', + '.lua': 'lua', + '.dart': 'dart', + '.hs': 'haskell', + '.ml': 'ocaml', '.mli': 'ocaml', + '.fs': 'fsharp', '.fsx': 'fsharp', + '.clj': 'clojure', '.cljs': 'clojure', + '.vim': 'vim', + } + + def _initialize_strategies(self): + """Initialize all parsing strategies with thread safety.""" + with self._lock: + if self._initialized: + return + + try: + # Python + python_strategy = PythonParsingStrategy() + for ext in python_strategy.get_supported_extensions(): + self._strategies[ext] = python_strategy + + # JavaScript + js_strategy = JavaScriptParsingStrategy() + for ext in js_strategy.get_supported_extensions(): + self._strategies[ext] = js_strategy + + # TypeScript + ts_strategy = TypeScriptParsingStrategy() + for ext in ts_strategy.get_supported_extensions(): + self._strategies[ext] = ts_strategy + + # Java + java_strategy = JavaParsingStrategy() + for ext in java_strategy.get_supported_extensions(): + self._strategies[ext] = java_strategy + + # Go + go_strategy = GoParsingStrategy() + for ext in go_strategy.get_supported_extensions(): + self._strategies[ext] = go_strategy + + # Objective-C + objc_strategy = ObjectiveCParsingStrategy() + for ext in objc_strategy.get_supported_extensions(): + self._strategies[ext] = objc_strategy + + # Zig + zig_strategy = ZigParsingStrategy() + for ext in zig_strategy.get_supported_extensions(): + self._strategies[ext] = zig_strategy + + self._initialized = True + + except Exception as e: + # Reset state on failure to allow retry + self._strategies.clear() + self._initialized = False + raise e + + def get_strategy(self, file_extension: str) -> ParsingStrategy: + """ + Get appropriate strategy for file extension. + + Args: + file_extension: File extension (e.g., '.py', '.js') + + Returns: + Appropriate parsing strategy + """ + with self._lock: + # Ensure initialization is complete + if not self._initialized: + self._initialize_strategies() + + # Check for specialized strategies first + if file_extension in self._strategies: + return self._strategies[file_extension] + + # Use fallback strategy with appropriate language name + language_name = self._file_type_mappings.get(file_extension, 'unknown') + return FallbackParsingStrategy(language_name) + + def get_all_supported_extensions(self) -> List[str]: + """Get all supported extensions across strategies.""" + specialized = list(self._strategies.keys()) + fallback = list(self._file_type_mappings.keys()) + return specialized + fallback + + def get_specialized_extensions(self) -> List[str]: + """Get extensions that have specialized parsers.""" + return list(self._strategies.keys()) + + def get_fallback_extensions(self) -> List[str]: + """Get extensions that use fallback parsing.""" + return list(self._file_type_mappings.keys()) + + def get_strategy_info(self) -> Dict[str, List[str]]: + """Get information about available strategies.""" + info = {} + + # Group extensions by strategy type + for ext, strategy in self._strategies.items(): + strategy_name = strategy.get_language_name() + if strategy_name not in info: + info[strategy_name] = [] + info[strategy_name].append(ext) + + # Add fallback info + fallback_languages = set(self._file_type_mappings.values()) + for lang in fallback_languages: + extensions = [ext for ext, mapped_lang in self._file_type_mappings.items() if mapped_lang == lang] + info[f"fallback_{lang}"] = extensions + + return info diff --git a/src/code_index_mcp/indexing/strategies/typescript_strategy.py b/src/code_index_mcp/indexing/strategies/typescript_strategy.py new file mode 100644 index 0000000..05ed04d --- /dev/null +++ b/src/code_index_mcp/indexing/strategies/typescript_strategy.py @@ -0,0 +1,251 @@ +""" +TypeScript parsing strategy using tree-sitter - Optimized single-pass version. +""" + +import logging +from typing import Dict, List, Tuple, Optional, Set +from .base_strategy import ParsingStrategy +from ..models import SymbolInfo, FileInfo + +logger = logging.getLogger(__name__) + +import tree_sitter +from tree_sitter_typescript import language_typescript + + +class TypeScriptParsingStrategy(ParsingStrategy): + """TypeScript-specific parsing strategy using tree-sitter - Single Pass Optimized.""" + + def __init__(self): + self.ts_language = tree_sitter.Language(language_typescript()) + + def get_language_name(self) -> str: + return "typescript" + + def get_supported_extensions(self) -> List[str]: + return ['.ts', '.tsx'] + + def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]: + """Parse TypeScript file using tree-sitter with single-pass optimization.""" + symbols = {} + functions = [] + classes = [] + imports = [] + exports = [] + + # Symbol lookup index for O(1) access + symbol_lookup = {} # name -> symbol_id mapping + + parser = tree_sitter.Parser(self.ts_language) + tree = parser.parse(content.encode('utf8')) + + # Single-pass traversal that handles everything + context = TraversalContext( + content=content, + file_path=file_path, + symbols=symbols, + functions=functions, + classes=classes, + imports=imports, + exports=exports, + symbol_lookup=symbol_lookup + ) + + self._traverse_node_single_pass(tree.root_node, context) + + file_info = FileInfo( + language=self.get_language_name(), + line_count=len(content.splitlines()), + symbols={"functions": functions, "classes": classes}, + imports=imports, + exports=exports + ) + + return symbols, file_info + + def _traverse_node_single_pass(self, node, context: 'TraversalContext', + current_function: Optional[str] = None, + current_class: Optional[str] = None): + """Single-pass traversal that extracts symbols and analyzes calls.""" + + # Handle function declarations + if node.type == 'function_declaration': + name = self._get_function_name(node, context.content) + if name: + symbol_id = self._create_symbol_id(context.file_path, name) + signature = self._get_ts_function_signature(node, context.content) + symbol_info = SymbolInfo( + type="function", + file=context.file_path, + line=node.start_point[0] + 1, + signature=signature + ) + context.symbols[symbol_id] = symbol_info + context.symbol_lookup[name] = symbol_id + context.functions.append(name) + + # Traverse function body with updated context + func_context = f"{context.file_path}::{name}" + for child in node.children: + self._traverse_node_single_pass(child, context, current_function=func_context, + current_class=current_class) + return + + # Handle class declarations + elif node.type == 'class_declaration': + name = self._get_class_name(node, context.content) + if name: + symbol_id = self._create_symbol_id(context.file_path, name) + symbol_info = SymbolInfo( + type="class", + file=context.file_path, + line=node.start_point[0] + 1 + ) + context.symbols[symbol_id] = symbol_info + context.symbol_lookup[name] = symbol_id + context.classes.append(name) + + # Traverse class body with updated context + for child in node.children: + self._traverse_node_single_pass(child, context, current_function=current_function, + current_class=name) + return + + # Handle interface declarations + elif node.type == 'interface_declaration': + name = self._get_interface_name(node, context.content) + if name: + symbol_id = self._create_symbol_id(context.file_path, name) + symbol_info = SymbolInfo( + type="interface", + file=context.file_path, + line=node.start_point[0] + 1 + ) + context.symbols[symbol_id] = symbol_info + context.symbol_lookup[name] = symbol_id + context.classes.append(name) # Group interfaces with classes + + # Traverse interface body with updated context + for child in node.children: + self._traverse_node_single_pass(child, context, current_function=current_function, + current_class=name) + return + + # Handle method definitions + elif node.type == 'method_definition': + method_name = self._get_method_name(node, context.content) + if method_name and current_class: + full_name = f"{current_class}.{method_name}" + symbol_id = self._create_symbol_id(context.file_path, full_name) + signature = self._get_ts_function_signature(node, context.content) + symbol_info = SymbolInfo( + type="method", + file=context.file_path, + line=node.start_point[0] + 1, + signature=signature + ) + context.symbols[symbol_id] = symbol_info + context.symbol_lookup[full_name] = symbol_id + context.symbol_lookup[method_name] = symbol_id # Also index by method name alone + context.functions.append(full_name) + + # Traverse method body with updated context + method_context = f"{context.file_path}::{full_name}" + for child in node.children: + self._traverse_node_single_pass(child, context, current_function=method_context, + current_class=current_class) + return + + # Handle function calls + elif node.type == 'call_expression' and current_function: + # Extract the function being called + called_function = None + if node.children: + func_node = node.children[0] + if func_node.type == 'identifier': + # Direct function call + called_function = context.content[func_node.start_byte:func_node.end_byte] + elif func_node.type == 'member_expression': + # Method call (obj.method or this.method) + for child in func_node.children: + if child.type == 'property_identifier': + called_function = context.content[child.start_byte:child.end_byte] + break + + # Add relationship using O(1) lookup + if called_function: + if called_function in context.symbol_lookup: + symbol_id = context.symbol_lookup[called_function] + symbol_info = context.symbols[symbol_id] + if current_function not in symbol_info.called_by: + symbol_info.called_by.append(current_function) + else: + # Try to find method with class prefix + for name, sid in context.symbol_lookup.items(): + if name.endswith(f".{called_function}"): + symbol_info = context.symbols[sid] + if current_function not in symbol_info.called_by: + symbol_info.called_by.append(current_function) + break + + # Handle import declarations + elif node.type == 'import_statement': + import_text = context.content[node.start_byte:node.end_byte] + context.imports.append(import_text) + + # Handle export declarations + elif node.type in ['export_statement', 'export_default_declaration']: + export_text = context.content[node.start_byte:node.end_byte] + context.exports.append(export_text) + + # Continue traversing children for other node types + for child in node.children: + self._traverse_node_single_pass(child, context, current_function=current_function, + current_class=current_class) + + def _get_function_name(self, node, content: str) -> Optional[str]: + """Extract function name from tree-sitter node.""" + for child in node.children: + if child.type == 'identifier': + return content[child.start_byte:child.end_byte] + return None + + def _get_class_name(self, node, content: str) -> Optional[str]: + """Extract class name from tree-sitter node.""" + for child in node.children: + if child.type == 'identifier': + return content[child.start_byte:child.end_byte] + return None + + def _get_interface_name(self, node, content: str) -> Optional[str]: + """Extract interface name from tree-sitter node.""" + for child in node.children: + if child.type == 'type_identifier': + return content[child.start_byte:child.end_byte] + return None + + def _get_method_name(self, node, content: str) -> Optional[str]: + """Extract method name from tree-sitter node.""" + for child in node.children: + if child.type == 'property_identifier': + return content[child.start_byte:child.end_byte] + return None + + def _get_ts_function_signature(self, node, content: str) -> str: + """Extract TypeScript function signature.""" + return content[node.start_byte:node.end_byte].split('\n')[0].strip() + + +class TraversalContext: + """Context object to pass state during single-pass traversal.""" + + def __init__(self, content: str, file_path: str, symbols: Dict, + functions: List, classes: List, imports: List, exports: List, symbol_lookup: Dict): + self.content = content + self.file_path = file_path + self.symbols = symbols + self.functions = functions + self.classes = classes + self.imports = imports + self.exports = exports + self.symbol_lookup = symbol_lookup \ No newline at end of file diff --git a/src/code_index_mcp/indexing/strategies/zig_strategy.py b/src/code_index_mcp/indexing/strategies/zig_strategy.py new file mode 100644 index 0000000..658ca2b --- /dev/null +++ b/src/code_index_mcp/indexing/strategies/zig_strategy.py @@ -0,0 +1,99 @@ +""" +Zig parsing strategy using tree-sitter. +""" + +import logging +from typing import Dict, List, Tuple, Optional +from .base_strategy import ParsingStrategy +from ..models import SymbolInfo, FileInfo + +logger = logging.getLogger(__name__) + +import tree_sitter +from tree_sitter_zig import language + + +class ZigParsingStrategy(ParsingStrategy): + """Zig parsing strategy using tree-sitter.""" + + def __init__(self): + self.zig_language = tree_sitter.Language(language()) + + def get_language_name(self) -> str: + return "zig" + + def get_supported_extensions(self) -> List[str]: + return ['.zig', '.zon'] + + def parse_file(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]: + """Parse Zig file using tree-sitter.""" + return self._tree_sitter_parse(file_path, content) + + + def _tree_sitter_parse(self, file_path: str, content: str) -> Tuple[Dict[str, SymbolInfo], FileInfo]: + """Parse Zig file using tree-sitter.""" + symbols = {} + functions = [] + classes = [] + imports = [] + + parser = tree_sitter.Parser(self.zig_language) + tree = parser.parse(content.encode('utf8')) + + # Phase 1: Extract symbols using tree-sitter + self._traverse_zig_node(tree.root_node, content, file_path, symbols, functions, classes, imports) + + file_info = FileInfo( + language=self.get_language_name(), + line_count=len(content.splitlines()), + symbols={"functions": functions, "classes": classes}, + imports=imports + ) + + return symbols, file_info + + def _traverse_zig_node(self, node, content: str, file_path: str, symbols: Dict, functions: List, classes: List, imports: List): + """Traverse Zig AST node and extract symbols.""" + if node.type == 'function_declaration': + func_name = self._extract_zig_function_name_from_node(node, content) + if func_name: + line_number = self._extract_line_number(content, node.start_byte) + symbol_id = self._create_symbol_id(file_path, func_name) + symbols[symbol_id] = SymbolInfo( + type="function", + file=file_path, + line=line_number, + signature=self._safe_extract_text(content, node.start_byte, node.end_byte) + ) + functions.append(func_name) + + elif node.type in ['struct_declaration', 'union_declaration', 'enum_declaration']: + type_name = self._extract_zig_type_name_from_node(node, content) + if type_name: + line_number = self._extract_line_number(content, node.start_byte) + symbol_id = self._create_symbol_id(file_path, type_name) + symbols[symbol_id] = SymbolInfo( + type=node.type.replace('_declaration', ''), + file=file_path, + line=line_number + ) + classes.append(type_name) + + # Recurse through children + for child in node.children: + self._traverse_zig_node(child, content, file_path, symbols, functions, classes, imports) + + def _extract_zig_function_name_from_node(self, node, content: str) -> Optional[str]: + """Extract function name from tree-sitter node.""" + for child in node.children: + if child.type == 'identifier': + return self._safe_extract_text(content, child.start_byte, child.end_byte) + return None + + def _extract_zig_type_name_from_node(self, node, content: str) -> Optional[str]: + """Extract type name from tree-sitter node.""" + for child in node.children: + if child.type == 'identifier': + return self._safe_extract_text(content, child.start_byte, child.end_byte) + return None + diff --git a/src/code_index_mcp/project_settings.py b/src/code_index_mcp/project_settings.py index 94b9da3..d3c3965 100644 --- a/src/code_index_mcp/project_settings.py +++ b/src/code_index_mcp/project_settings.py @@ -6,15 +6,16 @@ """ import os import json -import shutil -import pickle + + import tempfile import hashlib -import subprocess + from datetime import datetime + from .constants import ( - SETTINGS_DIR, CONFIG_FILE, INDEX_FILE, CACHE_FILE + SETTINGS_DIR, CONFIG_FILE, INDEX_FILE ) from .search.base import SearchStrategy from .search.ugrep import UgrepStrategy @@ -45,8 +46,8 @@ def _get_available_strategies() -> list[SearchStrategy]: strategy = strategy_class() if strategy.is_available(): available.append(strategy) - except Exception as e: - print(f"Error initializing strategy {strategy_class.__name__}: {e}") + except Exception: + pass return available @@ -69,36 +70,40 @@ def __init__(self, base_path, skip_load=False): try: # Get system temporary directory system_temp = tempfile.gettempdir() - print(f"System temporary directory: {system_temp}") # Check if the system temporary directory exists and is writable if not os.path.exists(system_temp): - print(f"Warning: System temporary directory does not exist: {system_temp}") - # Try using current directory as fallback - system_temp = os.getcwd() - print(f"Using current directory as fallback: {system_temp}") + # Try using project directory as fallback if available + if base_path and os.path.exists(base_path): + system_temp = base_path + else: + # Use user's home directory as last resort + system_temp = os.path.expanduser("~") if not os.access(system_temp, os.W_OK): - print(f"Warning: No write access to system temporary directory: {system_temp}") - # Try using current directory as fallback - system_temp = os.getcwd() - print(f"Using current directory as fallback: {system_temp}") + # Try using project directory as fallback if available + if base_path and os.path.exists(base_path) and os.access(base_path, os.W_OK): + system_temp = base_path + else: + # Use user's home directory as last resort + system_temp = os.path.expanduser("~") # Create code_indexer directory temp_base_dir = os.path.join(system_temp, SETTINGS_DIR) - print(f"Code indexer directory path: {temp_base_dir}") if not os.path.exists(temp_base_dir): - print(f"Creating code indexer directory: {temp_base_dir}") os.makedirs(temp_base_dir, exist_ok=True) - print(f"Code indexer directory created: {temp_base_dir}") else: - print(f"Code indexer directory already exists: {temp_base_dir}") - except Exception as e: - print(f"Error setting up temporary directory: {e}") - # If unable to create temporary directory, use .code_indexer in current directory - temp_base_dir = os.path.join(os.getcwd(), ".code_indexer") - print(f"Using fallback directory: {temp_base_dir}") + pass + except Exception: + # If unable to create temporary directory, use .code_indexer in project directory if available + if base_path and os.path.exists(base_path): + temp_base_dir = os.path.join(base_path, ".code_indexer") + + else: + # Use home directory as last resort + temp_base_dir = os.path.join(os.path.expanduser("~"), ".code_indexer") + if not os.path.exists(temp_base_dir): os.makedirs(temp_base_dir, exist_ok=True) @@ -108,52 +113,56 @@ def __init__(self, base_path, skip_load=False): # Use hash of project path as unique identifier path_hash = hashlib.md5(base_path.encode()).hexdigest() self.settings_path = os.path.join(temp_base_dir, path_hash) - print(f"Using project-specific directory: {self.settings_path}") else: # If no base path provided, use a default directory self.settings_path = os.path.join(temp_base_dir, "default") - print(f"Using default directory: {self.settings_path}") self.ensure_settings_dir() - except Exception as e: - print(f"Error setting up project settings: {e}") - # If error occurs, use .code_indexer in current directory as fallback - fallback_dir = os.path.join(os.getcwd(), ".code_indexer", - "default" if not base_path else hashlib.md5(base_path.encode()).hexdigest()) - print(f"Using fallback directory: {fallback_dir}") + except Exception: + # If error occurs, use .code_indexer in project or home directory as fallback + if base_path and os.path.exists(base_path): + fallback_dir = os.path.join(base_path, ".code_indexer", + hashlib.md5(base_path.encode()).hexdigest()) + else: + fallback_dir = os.path.join(os.path.expanduser("~"), ".code_indexer", + "default" if not base_path else hashlib.md5(base_path.encode()).hexdigest()) + self.settings_path = fallback_dir if not os.path.exists(fallback_dir): os.makedirs(fallback_dir, exist_ok=True) def ensure_settings_dir(self): """Ensure settings directory exists""" - print(f"Checking project settings directory: {self.settings_path}") try: if not os.path.exists(self.settings_path): - print(f"Creating project settings directory: {self.settings_path}") # Create directory structure os.makedirs(self.settings_path, exist_ok=True) - print(f"Project settings directory created: {self.settings_path}") else: - print(f"Project settings directory already exists: {self.settings_path}") + pass # Check if directory is writable if not os.access(self.settings_path, os.W_OK): - print(f"Warning: No write access to project settings directory: {self.settings_path}") - # If directory is not writable, use .code_indexer in current directory as fallback - fallback_dir = os.path.join(os.getcwd(), ".code_indexer", - os.path.basename(self.settings_path)) - print(f"Using fallback directory: {fallback_dir}") + # If directory is not writable, use .code_indexer in project or home directory as fallback + if self.base_path and os.path.exists(self.base_path) and os.access(self.base_path, os.W_OK): + fallback_dir = os.path.join(self.base_path, ".code_indexer", + os.path.basename(self.settings_path)) + else: + fallback_dir = os.path.join(os.path.expanduser("~"), ".code_indexer", + os.path.basename(self.settings_path)) + self.settings_path = fallback_dir if not os.path.exists(fallback_dir): os.makedirs(fallback_dir, exist_ok=True) - except Exception as e: - print(f"Error ensuring settings directory: {e}") - # If unable to create settings directory, use .code_indexer in current directory - fallback_dir = os.path.join(os.getcwd(), ".code_indexer", - "default" if not self.base_path else hashlib.md5(self.base_path.encode()).hexdigest()) - print(f"Using fallback directory: {fallback_dir}") + except Exception: + # If unable to create settings directory, use .code_indexer in project or home directory + if self.base_path and os.path.exists(self.base_path): + fallback_dir = os.path.join(self.base_path, ".code_indexer", + hashlib.md5(self.base_path.encode()).hexdigest()) + else: + fallback_dir = os.path.join(os.path.expanduser("~"), ".code_indexer", + "default" if not self.base_path else hashlib.md5(self.base_path.encode()).hexdigest()) + self.settings_path = fallback_dir if not os.path.exists(fallback_dir): os.makedirs(fallback_dir, exist_ok=True) @@ -165,34 +174,13 @@ def get_config_path(self): # Ensure directory exists os.makedirs(os.path.dirname(path), exist_ok=True) return path - except Exception as e: - print(f"Error getting config path: {e}") - # If error occurs, use file in current directory as fallback - return os.path.join(os.getcwd(), CONFIG_FILE) - - def get_index_path(self): - """Get the path to the index file""" - try: - path = os.path.join(self.settings_path, INDEX_FILE) - # Ensure directory exists - os.makedirs(os.path.dirname(path), exist_ok=True) - return path - except Exception as e: - print(f"Error getting index path: {e}") - # If error occurs, use file in current directory as fallback - return os.path.join(os.getcwd(), INDEX_FILE) + except Exception: + # If error occurs, use file in project or home directory as fallback + if self.base_path and os.path.exists(self.base_path): + return os.path.join(self.base_path, CONFIG_FILE) + else: + return os.path.join(os.path.expanduser("~"), CONFIG_FILE) - def get_cache_path(self): - """Get the path to the cache file""" - try: - path = os.path.join(self.settings_path, CACHE_FILE) - # Ensure directory exists - os.makedirs(os.path.dirname(path), exist_ok=True) - return path - except Exception as e: - print(f"Error getting cache path: {e}") - # If error occurs, use file in current directory as fallback - return os.path.join(os.getcwd(), CACHE_FILE) def _get_timestamp(self): """Get current timestamp""" @@ -215,10 +203,9 @@ def save_config(self, config): with open(config_path, 'w', encoding='utf-8') as f: json.dump(config, f, indent=2, ensure_ascii=False) - print(f"Config saved to: {config_path}") + return config - except Exception as e: - print(f"Error saving config: {e}") + except Exception: return config def load_config(self): @@ -237,218 +224,157 @@ def load_config(self): try: with open(config_path, 'r', encoding='utf-8') as f: config = json.load(f) - print(f"Config loaded from: {config_path}") return config - except (json.JSONDecodeError, UnicodeDecodeError) as e: - print(f"Error parsing config file: {e}") + except (json.JSONDecodeError, UnicodeDecodeError): # If file is corrupted, return empty dict return {} else: - print(f"Config file does not exist: {config_path}") + pass return {} - except Exception as e: - print(f"Error loading config: {e}") + except Exception: return {} - def save_index(self, file_index): - """Save file index + def save_index(self, index_data): + """Save code index in JSON format Args: - file_index (dict): File index data + index_data: Index data as dictionary or JSON string """ try: index_path = self.get_index_path() - print(f"Saving index to: {index_path}") # Ensure directory exists dir_path = os.path.dirname(index_path) if not os.path.exists(dir_path): - print(f"Creating directory: {dir_path}") os.makedirs(dir_path, exist_ok=True) # Check if directory is writable if not os.access(dir_path, os.W_OK): - print(f"Warning: Directory is not writable: {dir_path}") - # Use current directory as fallback - index_path = os.path.join(os.getcwd(), INDEX_FILE) - print(f"Using fallback path: {index_path}") + # Use project or home directory as fallback + if self.base_path and os.path.exists(self.base_path): + index_path = os.path.join(self.base_path, INDEX_FILE) + else: + index_path = os.path.join(os.path.expanduser("~"), INDEX_FILE) + + + # Convert to JSON string if it's an object with to_json method + if hasattr(index_data, 'to_json'): + json_data = index_data.to_json() + elif isinstance(index_data, str): + json_data = index_data + else: + # Assume it's a dictionary and convert to JSON + json_data = json.dumps(index_data, indent=2, default=str) - with open(index_path, 'wb') as f: - pickle.dump(file_index, f) + with open(index_path, 'w', encoding='utf-8') as f: + f.write(json_data) - print(f"Index saved successfully to: {index_path}") - except Exception as e: - print(f"Error saving index: {e}") - # Try saving to current directory + + except Exception: + # Try saving to project or home directory try: - fallback_path = os.path.join(os.getcwd(), INDEX_FILE) - print(f"Trying fallback path: {fallback_path}") - with open(fallback_path, 'wb') as f: - pickle.dump(file_index, f) - print(f"Index saved to fallback path: {fallback_path}") - except Exception as e2: - print(f"Error saving index to fallback path: {e2}") - + if self.base_path and os.path.exists(self.base_path): + fallback_path = os.path.join(self.base_path, INDEX_FILE) + else: + fallback_path = os.path.join(os.path.expanduser("~"), INDEX_FILE) + + + # Convert to JSON string if it's an object with to_json method + if hasattr(index_data, 'to_json'): + json_data = index_data.to_json() + elif isinstance(index_data, str): + json_data = index_data + else: + json_data = json.dumps(index_data, indent=2, default=str) + + with open(fallback_path, 'w', encoding='utf-8') as f: + f.write(json_data) + except Exception: + pass def load_index(self): - """Load file index + """Load code index from JSON format Returns: - dict: File index data, or empty dict if file doesn't exist + dict: Index data, or None if file doesn't exist or has errors """ - # If skip_load is set, return empty dict directly + # If skip_load is set, return None directly if self.skip_load: - return {} + return None try: index_path = self.get_index_path() if os.path.exists(index_path): try: - with open(index_path, 'rb') as f: - index = pickle.load(f) - print(f"Index loaded successfully from: {index_path}") - return index - except (pickle.PickleError, EOFError) as e: - print(f"Error parsing index file: {e}") - # If file is corrupted, return empty dict - return {} - except Exception as e: - print(f"Unexpected error loading index: {e}") - return {} + with open(index_path, 'r', encoding='utf-8') as f: + index_data = json.load(f) + return index_data + except (json.JSONDecodeError, UnicodeDecodeError): + # If file is corrupted, return None + return None + except Exception: + return None else: - # Try loading from current directory - fallback_path = os.path.join(os.getcwd(), INDEX_FILE) - if os.path.exists(fallback_path): - print(f"Trying fallback path: {fallback_path}") - try: - with open(fallback_path, 'rb') as f: - index = pickle.load(f) - print(f"Index loaded from fallback path: {fallback_path}") - return index - except Exception as e: - print(f"Error loading index from fallback path: {e}") - - return {} - except Exception as e: - print(f"Error in load_index: {e}") - return {} - - def save_cache(self, content_cache): - """Save content cache - - Args: - content_cache (dict): Content cache data - """ - try: - cache_path = self.get_cache_path() - print(f"Saving cache to: {cache_path}") - - # Ensure directory exists - dir_path = os.path.dirname(cache_path) - if not os.path.exists(dir_path): - print(f"Creating directory: {dir_path}") - os.makedirs(dir_path, exist_ok=True) - - # Check if directory is writable - if not os.access(dir_path, os.W_OK): - print(f"Warning: Directory is not writable: {dir_path}") - # Use current directory as fallback - cache_path = os.path.join(os.getcwd(), CACHE_FILE) - print(f"Using fallback path: {cache_path}") - - with open(cache_path, 'wb') as f: - pickle.dump(content_cache, f) - - print(f"Cache saved successfully to: {cache_path}") - except Exception as e: - print(f"Error saving cache: {e}") - # Try saving to current directory - try: - fallback_path = os.path.join(os.getcwd(), CACHE_FILE) - print(f"Trying fallback path: {fallback_path}") - with open(fallback_path, 'wb') as f: - pickle.dump(content_cache, f) - print(f"Cache saved to fallback path: {fallback_path}") - except Exception as e2: - print(f"Error saving cache to fallback path: {e2}") + # Try loading from project or home directory + if self.base_path and os.path.exists(self.base_path): + fallback_path = os.path.join(self.base_path, INDEX_FILE) + else: + fallback_path = os.path.join(os.path.expanduser("~"), INDEX_FILE) + if os.path.exists(fallback_path): + try: + with open(fallback_path, 'r', encoding='utf-8') as f: + index_data = json.load(f) + return index_data + except Exception: + pass + return None + except Exception: + return None - def load_cache(self): - """Load content cache - Returns: - dict: Content cache data, or empty dict if file doesn't exist - """ - # If skip_load is set, return empty dict directly - if self.skip_load: - return {} + def cleanup_legacy_files(self) -> None: + """Clean up any legacy index files found.""" try: - cache_path = self.get_cache_path() - - if os.path.exists(cache_path): - try: - with open(cache_path, 'rb') as f: - cache = pickle.load(f) - print(f"Cache loaded successfully from: {cache_path}") - return cache - except (pickle.PickleError, EOFError) as e: - print(f"Error parsing cache file: {e}") - # If file is corrupted, return empty dict - return {} - except Exception as e: - print(f"Unexpected error loading cache: {e}") - return {} - else: - # Try loading from current directory - fallback_path = os.path.join(os.getcwd(), CACHE_FILE) - if os.path.exists(fallback_path): - print(f"Trying fallback path: {fallback_path}") + legacy_files = [ + os.path.join(self.settings_path, "file_index.pickle"), + os.path.join(self.settings_path, "content_cache.pickle"), + os.path.join(self.settings_path, INDEX_FILE) # Legacy JSON + ] + + for legacy_file in legacy_files: + if os.path.exists(legacy_file): try: - with open(fallback_path, 'rb') as f: - cache = pickle.load(f) - print(f"Cache loaded from fallback path: {fallback_path}") - return cache - except Exception as e: - print(f"Error loading cache from fallback path: {e}") - - return {} - except Exception as e: - print(f"Error in load_cache: {e}") - return {} + os.remove(legacy_file) + except Exception: + pass + except Exception: + pass def clear(self): - """Clear all settings and cache files""" + """Clear config and index files""" try: - print(f"Clearing settings directory: {self.settings_path}") if os.path.exists(self.settings_path): # Check if directory is writable if not os.access(self.settings_path, os.W_OK): - print(f"Warning: Directory is not writable: {self.settings_path}") return - # Delete all files in the directory - try: - for filename in os.listdir(self.settings_path): - file_path = os.path.join(self.settings_path, filename) - try: - if os.path.isfile(file_path): - os.unlink(file_path) - print(f"Deleted file: {file_path}") - elif os.path.isdir(file_path): - shutil.rmtree(file_path) - print(f"Deleted directory: {file_path}") - except Exception as e: - print(f"Error deleting {file_path}: {e}") - except Exception as e: - print(f"Error listing directory: {e}") + # Delete specific files only (config.json and index.json) + files_to_delete = [CONFIG_FILE, INDEX_FILE] - print(f"Settings directory cleared successfully") - else: - print(f"Settings directory does not exist: {self.settings_path}") - except Exception as e: - print(f"Error clearing settings: {e}") + for filename in files_to_delete: + file_path = os.path.join(self.settings_path, filename) + try: + if os.path.isfile(file_path): + os.unlink(file_path) + except Exception: + pass + else: + pass + except Exception: + pass def get_stats(self): """Get statistics for the settings directory @@ -456,7 +382,6 @@ def get_stats(self): dict: Dictionary containing file sizes and update times """ try: - print(f"Getting stats for settings directory: {self.settings_path}") stats = { 'settings_path': self.settings_path, @@ -465,7 +390,7 @@ def get_stats(self): 'writable': os.access(self.settings_path, os.W_OK) if os.path.exists(self.settings_path) else False, 'files': {}, 'temp_dir': tempfile.gettempdir(), - 'current_dir': os.getcwd() + 'base_path': self.base_path } if stats['exists'] and stats['is_directory']: @@ -475,7 +400,7 @@ def get_stats(self): stats['all_files'] = all_files # Get details for specific files - for filename in [CONFIG_FILE, INDEX_FILE, CACHE_FILE]: + for filename in [CONFIG_FILE, INDEX_FILE]: file_path = os.path.join(self.settings_path, filename) if os.path.exists(file_path): try: @@ -496,19 +421,21 @@ def get_stats(self): stats['list_error'] = str(e) # Check fallback path - fallback_dir = os.path.join(os.getcwd(), ".code_indexer") + if self.base_path and os.path.exists(self.base_path): + fallback_dir = os.path.join(self.base_path, ".code_indexer") + else: + fallback_dir = os.path.join(os.path.expanduser("~"), ".code_indexer") stats['fallback_path'] = fallback_dir stats['fallback_exists'] = os.path.exists(fallback_dir) stats['fallback_is_directory'] = os.path.isdir(fallback_dir) if os.path.exists(fallback_dir) else False return stats except Exception as e: - print(f"Error getting stats: {e}") return { 'error': str(e), 'settings_path': self.settings_path, 'temp_dir': tempfile.gettempdir(), - 'current_dir': os.getcwd() + 'base_path': self.base_path } def get_search_tools_config(self): @@ -530,13 +457,58 @@ def get_preferred_search_tool(self) -> SearchStrategy | None: """ if not self.available_strategies: self.refresh_available_strategies() - + return self.available_strategies[0] if self.available_strategies else None def refresh_available_strategies(self): """ Force a refresh of the available search tools list. """ - print("Refreshing available search strategies...") + self.available_strategies = _get_available_strategies() - print(f"Available strategies found: {[s.name for s in self.available_strategies]}") + + + def get_file_watcher_config(self) -> dict: + """ + Get file watcher specific configuration. + + Returns: + dict: File watcher configuration with defaults + """ + config = self.load_config() + default_config = { + "enabled": True, + "debounce_seconds": 6.0, + "additional_exclude_patterns": [], + "monitored_extensions": [], # Empty = use all supported extensions + "exclude_patterns": [ + ".git", ".svn", ".hg", + "node_modules", "__pycache__", ".venv", "venv", + ".DS_Store", "Thumbs.db", + "dist", "build", "target", ".idea", ".vscode", + ".pytest_cache", ".coverage", ".tox", + "bin", "obj" + ] + } + + # Merge with loaded config + file_watcher_config = config.get("file_watcher", {}) + for key, default_value in default_config.items(): + if key not in file_watcher_config: + file_watcher_config[key] = default_value + + return file_watcher_config + + def update_file_watcher_config(self, updates: dict) -> None: + """ + Update file watcher configuration. + + Args: + updates: Dictionary of configuration updates + """ + config = self.load_config() + if "file_watcher" not in config: + config["file_watcher"] = self.get_file_watcher_config() + + config["file_watcher"].update(updates) + self.save_config(config) diff --git a/src/code_index_mcp/search/ag.py b/src/code_index_mcp/search/ag.py index e2506a2..aa3eb33 100644 --- a/src/code_index_mcp/search/ag.py +++ b/src/code_index_mcp/search/ag.py @@ -27,7 +27,8 @@ def search( context_lines: int = 0, file_pattern: Optional[str] = None, fuzzy: bool = False, - regex: bool = False + regex: bool = False, + max_line_length: Optional[int] = None ) -> Dict[str, List[Tuple[int, str]]]: """ Execute a search using The Silver Searcher (ag). @@ -40,6 +41,7 @@ def search( file_pattern: File pattern to filter fuzzy: Enable word boundary matching (not true fuzzy search) regex: Enable regex pattern matching + max_line_length: Optional. Limit the length of lines when context_lines is used """ # ag prints line numbers and groups by file by default, which is good. # --noheading is used to be consistent with other tools' output format. @@ -93,6 +95,26 @@ def search( cmd.extend(['-G', regex_pattern]) + processed_patterns = set() + exclude_dirs = getattr(self, 'exclude_dirs', []) + exclude_file_patterns = getattr(self, 'exclude_file_patterns', []) + + for directory in exclude_dirs: + normalized = directory.strip() + if not normalized or normalized in processed_patterns: + continue + cmd.extend(['--ignore', normalized]) + processed_patterns.add(normalized) + + for pattern in exclude_file_patterns: + normalized = pattern.strip() + if not normalized or normalized in processed_patterns: + continue + if normalized.startswith('!'): + normalized = normalized[1:] + cmd.extend(['--ignore', normalized]) + processed_patterns.add(normalized) + # Add -- to treat pattern as a literal argument, preventing injection cmd.append('--') cmd.append(search_pattern) @@ -116,10 +138,10 @@ def search( if process.returncode > 1: raise RuntimeError(f"ag failed with exit code {process.returncode}: {process.stderr}") - return parse_search_output(process.stdout, base_path) + return parse_search_output(process.stdout, base_path, max_line_length) except FileNotFoundError: raise RuntimeError("'ag' (The Silver Searcher) not found. Please install it and ensure it's in your PATH.") except Exception as e: # Re-raise other potential exceptions like permission errors - raise RuntimeError(f"An error occurred while running ag: {e}") + raise RuntimeError(f"An error occurred while running ag: {e}") diff --git a/src/code_index_mcp/search/base.py b/src/code_index_mcp/search/base.py index 33cb9bd..5e4c63b 100644 --- a/src/code_index_mcp/search/base.py +++ b/src/code_index_mcp/search/base.py @@ -10,15 +10,25 @@ import subprocess import sys from abc import ABC, abstractmethod -from typing import Dict, List, Optional, Tuple, Any +from typing import Any, Dict, List, Optional, Tuple, TYPE_CHECKING -def parse_search_output(output: str, base_path: str) -> Dict[str, List[Tuple[int, str]]]: +from ..indexing.qualified_names import normalize_file_path + +if TYPE_CHECKING: # pragma: no cover + from ..utils.file_filter import FileFilter + +def parse_search_output( + output: str, + base_path: str, + max_line_length: Optional[int] = None +) -> Dict[str, List[Tuple[int, str]]]: """ Parse the output of command-line search tools (grep, ag, rg). Args: output: The raw output from the command-line tool. base_path: The base path of the project to make file paths relative. + max_line_length: Optional maximum line length to truncate long lines. Returns: A dictionary where keys are file paths and values are lists of (line_number, line_content) tuples. @@ -31,25 +41,52 @@ def parse_search_output(output: str, base_path: str) -> Dict[str, List[Tuple[int if not line.strip(): continue try: - # Handle Windows paths which might have a drive letter, e.g., C: + # Try to parse as a matched line first (format: path:linenum:content) parts = line.split(':', 2) - if sys.platform == "win32" and len(parts[0]) == 1 and parts[1].startswith('\\'): - # Re-join drive letter with the rest of the path + + # Check if this might be a context line (format: path-linenum-content) + # Context lines use '-' as separator in grep/ag output + if len(parts) < 3 and '-' in line: + # Try to parse as context line + # Match pattern: path-linenum-content or path-linenum-\tcontent + match = re.match(r'^(.*?)-(\d+)[-\t](.*)$', line) + if match: + file_path_abs = match.group(1) + line_number_str = match.group(2) + content = match.group(3) + else: + # If regex doesn't match, skip this line + continue + elif sys.platform == "win32" and len(parts) >= 3 and len(parts[0]) == 1 and parts[1].startswith('\\'): + # Handle Windows paths with drive letter (e.g., C:\path\file.txt) file_path_abs = f"{parts[0]}:{parts[1]}" line_number_str = parts[2].split(':', 1)[0] - content = parts[2].split(':', 1)[1] - else: + content = parts[2].split(':', 1)[1] if ':' in parts[2] else parts[2] + elif len(parts) >= 3: + # Standard format: path:linenum:content file_path_abs = parts[0] line_number_str = parts[1] content = parts[2] + else: + # Line doesn't match any expected format + continue line_number = int(line_number_str) - # Make the file path relative to the base_path - relative_path = os.path.relpath(file_path_abs, normalized_base_path) + # If the path is already relative (doesn't start with /), keep it as is + # Otherwise, make it relative to the base_path + if os.path.isabs(file_path_abs): + relative_path = os.path.relpath(file_path_abs, normalized_base_path) + else: + # Path is already relative, use it as is + relative_path = file_path_abs # Normalize path separators for consistency - relative_path = relative_path.replace('\\', '/') + relative_path = normalize_file_path(relative_path) + + # Truncate content if it exceeds max_line_length + if max_line_length and len(content) > max_line_length: + content = content[:max_line_length] + '... (truncated)' if relative_path not in results: results[relative_path] = [] @@ -100,22 +137,45 @@ def is_safe_regex_pattern(pattern: str) -> bool: Returns: True if the pattern looks like a safe regex, False otherwise """ - # Allow basic regex operators that are commonly used and safe - safe_regex_chars = ['|', '(', ')', '[', ']', '^', '$'] + # Strong indicators of regex intent + strong_regex_indicators = ['|', '(', ')', '[', ']', '^', '$'] - # Check if pattern contains any regex metacharacters - has_regex_chars = any(char in pattern for char in safe_regex_chars) + # Weaker indicators that need context + weak_regex_indicators = ['.', '*', '+', '?'] - # Basic safety check - avoid obviously dangerous patterns - dangerous_patterns = [ - r'(.+)+', # Nested quantifiers - r'(.*)*', # Nested stars - r'(.{0,})+', # Potential ReDoS patterns - ] + # Check for strong regex indicators + has_strong_regex = any(char in pattern for char in strong_regex_indicators) - has_dangerous_patterns = any(dangerous in pattern for dangerous in dangerous_patterns) + # Check for weak indicators with context + has_weak_regex = any(char in pattern for char in weak_regex_indicators) - return has_regex_chars and not has_dangerous_patterns + # If has strong indicators, likely regex + if has_strong_regex: + # Still check for dangerous patterns + dangerous_patterns = [ + r'(.+)+', # Nested quantifiers + r'(.*)*', # Nested stars + r'(.{0,})+', # Potential ReDoS patterns + ] + + has_dangerous_patterns = any(dangerous in pattern for dangerous in dangerous_patterns) + return not has_dangerous_patterns + + # If only weak indicators, need more context + if has_weak_regex: + # Patterns like ".*", ".+", "file.*py" look like regex + # But "file.txt", "test.py" look like literal filenames + regex_like_patterns = [ + r'\.\*', # .* + r'\.\+', # .+ + r'\.\w*\*', # .something* + r'\*\.', # *. + r'\w+\.\*\w*', # word.*word + ] + + return any(re.search(regex_pattern, pattern) for regex_pattern in regex_like_patterns) + + return False class SearchStrategy(ABC): @@ -125,6 +185,16 @@ class SearchStrategy(ABC): Each strategy is responsible for searching code using a specific tool or method. """ + def configure_excludes(self, file_filter: Optional['FileFilter']) -> None: + """Configure shared exclusion settings for the strategy.""" + self.file_filter = file_filter + if file_filter: + self.exclude_dirs = sorted(set(file_filter.exclude_dirs)) + self.exclude_file_patterns = sorted(set(file_filter.exclude_files)) + else: + self.exclude_dirs = [] + self.exclude_file_patterns = [] + @property @abstractmethod def name(self) -> str: @@ -150,7 +220,8 @@ def search( context_lines: int = 0, file_pattern: Optional[str] = None, fuzzy: bool = False, - regex: bool = False + regex: bool = False, + max_line_length: Optional[int] = None ) -> Dict[str, List[Tuple[int, str]]]: """ Execute a search using the specific strategy. @@ -168,4 +239,3 @@ def search( A dictionary mapping filenames to lists of (line_number, line_content) tuples. """ pass - diff --git a/src/code_index_mcp/search/basic.py b/src/code_index_mcp/search/basic.py index 57aab77..9ef1846 100644 --- a/src/code_index_mcp/search/basic.py +++ b/src/code_index_mcp/search/basic.py @@ -1,9 +1,10 @@ """ Basic, pure-Python search strategy. """ +import fnmatch import os import re -import fnmatch +from pathlib import Path from typing import Dict, List, Optional, Tuple from .base import SearchStrategy, create_word_boundary_pattern, is_safe_regex_pattern @@ -46,7 +47,8 @@ def search( context_lines: int = 0, file_pattern: Optional[str] = None, fuzzy: bool = False, - regex: bool = False + regex: bool = False, + max_line_length: Optional[int] = None ) -> Dict[str, List[Tuple[int, str]]]: """ Execute a basic, line-by-line search. @@ -60,6 +62,7 @@ def search( file_pattern: File pattern to filter fuzzy: Enable word boundary matching regex: Enable regex pattern matching + max_line_length: Optional. Limit the length of lines when context_lines is used """ results: Dict[str, List[Tuple[int, str]]] = {} @@ -81,28 +84,38 @@ def search( except re.error as e: raise ValueError(f"Invalid regex pattern: {pattern}, error: {e}") - for root, _, files in os.walk(base_path): + file_filter = getattr(self, 'file_filter', None) + base = Path(base_path) + + for root, dirs, files in os.walk(base_path): + if file_filter: + dirs[:] = [d for d in dirs if not file_filter.should_exclude_directory(d)] + for file in files: - # Improved file pattern matching with glob support if file_pattern and not self._matches_pattern(file, file_pattern): continue - file_path = os.path.join(root, file) + file_path = Path(root) / file + + if file_filter and not file_filter.should_process_path(file_path, base): + continue + rel_path = os.path.relpath(file_path, base_path) - + try: with open(file_path, 'r', encoding='utf-8', errors='ignore') as f: for line_num, line in enumerate(f, 1): if search_regex.search(line): + content = line.rstrip('\n') + if max_line_length and len(content) > max_line_length: + content = content[:max_line_length] + '... (truncated)' + if rel_path not in results: results[rel_path] = [] - # Strip newline for consistent output - results[rel_path].append((line_num, line.rstrip('\n'))) + results[rel_path].append((line_num, content)) except (UnicodeDecodeError, PermissionError, OSError): - # Ignore files that can't be opened or read due to encoding/permission issues continue except Exception: - # Ignore any other unexpected exceptions to maintain robustness continue - return results \ No newline at end of file + return results diff --git a/src/code_index_mcp/search/grep.py b/src/code_index_mcp/search/grep.py index cd2d18e..f24c469 100644 --- a/src/code_index_mcp/search/grep.py +++ b/src/code_index_mcp/search/grep.py @@ -32,7 +32,8 @@ def search( context_lines: int = 0, file_pattern: Optional[str] = None, fuzzy: bool = False, - regex: bool = False + regex: bool = False, + max_line_length: Optional[int] = None ) -> Dict[str, List[Tuple[int, str]]]: """ Execute a search using standard grep. @@ -45,6 +46,7 @@ def search( file_pattern: File pattern to filter fuzzy: Enable word boundary matching regex: Enable regex pattern matching + max_line_length: Optional. Limit the length of lines when context_lines is used """ # -r: recursive, -n: line number cmd = ['grep', '-r', '-n'] @@ -81,6 +83,27 @@ def search( # Note: grep's --include uses glob patterns, not regex cmd.append(f'--include={file_pattern}') + exclude_dirs = getattr(self, 'exclude_dirs', []) + exclude_file_patterns = getattr(self, 'exclude_file_patterns', []) + + processed_dirs = set() + for directory in exclude_dirs: + normalized = directory.strip() + if not normalized or normalized in processed_dirs: + continue + cmd.append(f'--exclude-dir={normalized}') + processed_dirs.add(normalized) + + processed_files = set() + for pattern in exclude_file_patterns: + normalized = pattern.strip() + if not normalized or normalized in processed_files: + continue + if normalized.startswith('!'): + normalized = normalized[1:] + cmd.append(f'--exclude={normalized}') + processed_files.add(normalized) + # Add -- to treat pattern as a literal argument, preventing injection cmd.append('--') cmd.append(search_pattern) @@ -102,9 +125,9 @@ def search( if process.returncode > 1: raise RuntimeError(f"grep failed with exit code {process.returncode}: {process.stderr}") - return parse_search_output(process.stdout, base_path) + return parse_search_output(process.stdout, base_path, max_line_length) except FileNotFoundError: raise RuntimeError("'grep' not found. Please install it and ensure it's in your PATH.") except Exception as e: - raise RuntimeError(f"An error occurred while running grep: {e}") + raise RuntimeError(f"An error occurred while running grep: {e}") diff --git a/src/code_index_mcp/search/ripgrep.py b/src/code_index_mcp/search/ripgrep.py index 39c4e58..8a5c325 100644 --- a/src/code_index_mcp/search/ripgrep.py +++ b/src/code_index_mcp/search/ripgrep.py @@ -27,7 +27,8 @@ def search( context_lines: int = 0, file_pattern: Optional[str] = None, fuzzy: bool = False, - regex: bool = False + regex: bool = False, + max_line_length: Optional[int] = None ) -> Dict[str, List[Tuple[int, str]]]: """ Execute a search using ripgrep. @@ -40,8 +41,9 @@ def search( file_pattern: File pattern to filter fuzzy: Enable word boundary matching (not true fuzzy search) regex: Enable regex pattern matching + max_line_length: Optional. Limit the length of lines when context_lines is used """ - cmd = ['rg', '--line-number', '--no-heading', '--color=never'] + cmd = ['rg', '--line-number', '--no-heading', '--color=never', '--no-ignore'] if not case_sensitive: cmd.append('--ignore-case') @@ -67,6 +69,31 @@ def search( if file_pattern: cmd.extend(['--glob', file_pattern]) + exclude_dirs = getattr(self, 'exclude_dirs', []) + exclude_file_patterns = getattr(self, 'exclude_file_patterns', []) + + processed_patterns = set() + + for directory in exclude_dirs: + normalized = directory.strip() + if not normalized or normalized in processed_patterns: + continue + cmd.extend(['--glob', f'!**/{normalized}/**']) + processed_patterns.add(normalized) + + for pattern in exclude_file_patterns: + normalized = pattern.strip() + if not normalized or normalized in processed_patterns: + continue + if normalized.startswith('!'): + glob_pattern = normalized + elif any(ch in normalized for ch in '*?[') or '/' in normalized: + glob_pattern = f'!{normalized}' + else: + glob_pattern = f'!**/{normalized}' + cmd.extend(['--glob', glob_pattern]) + processed_patterns.add(normalized) + # Add -- to treat pattern as a literal argument, preventing injection cmd.append('--') cmd.append(search_pattern) @@ -87,10 +114,10 @@ def search( if process.returncode > 1: raise RuntimeError(f"ripgrep failed with exit code {process.returncode}: {process.stderr}") - return parse_search_output(process.stdout, base_path) + return parse_search_output(process.stdout, base_path, max_line_length) except FileNotFoundError: raise RuntimeError("ripgrep (rg) not found. Please install it and ensure it's in your PATH.") except Exception as e: # Re-raise other potential exceptions like permission errors - raise RuntimeError(f"An error occurred while running ripgrep: {e}") + raise RuntimeError(f"An error occurred while running ripgrep: {e}") diff --git a/src/code_index_mcp/search/ugrep.py b/src/code_index_mcp/search/ugrep.py index 69f2cc4..d4302c1 100644 --- a/src/code_index_mcp/search/ugrep.py +++ b/src/code_index_mcp/search/ugrep.py @@ -27,7 +27,8 @@ def search( context_lines: int = 0, file_pattern: Optional[str] = None, fuzzy: bool = False, - regex: bool = False + regex: bool = False, + max_line_length: Optional[int] = None ) -> Dict[str, List[Tuple[int, str]]]: """ Execute a search using the 'ug' command-line tool. @@ -40,11 +41,12 @@ def search( file_pattern: File pattern to filter fuzzy: Enable true fuzzy search (ugrep native support) regex: Enable regex pattern matching + max_line_length: Optional. Limit the length of lines when context_lines is used """ if not self.is_available(): return {"error": "ugrep (ug) command not found."} - cmd = ['ug', '--line-number', '--no-heading'] + cmd = ['ug', '-r', '--line-number', '--no-heading'] if fuzzy: # ugrep has native fuzzy search support @@ -65,7 +67,31 @@ def search( cmd.extend(['-A', str(context_lines), '-B', str(context_lines)]) if file_pattern: - cmd.extend(['-g', file_pattern]) # Correct parameter for file patterns + cmd.extend(['--include', file_pattern]) + + processed_patterns = set() + exclude_dirs = getattr(self, 'exclude_dirs', []) + exclude_file_patterns = getattr(self, 'exclude_file_patterns', []) + + for directory in exclude_dirs: + normalized = directory.strip() + if not normalized or normalized in processed_patterns: + continue + cmd.extend(['--ignore', f'**/{normalized}/**']) + processed_patterns.add(normalized) + + for pattern in exclude_file_patterns: + normalized = pattern.strip() + if not normalized or normalized in processed_patterns: + continue + if normalized.startswith('!'): + ignore_pattern = normalized[1:] + elif any(ch in normalized for ch in '*?[') or '/' in normalized: + ignore_pattern = normalized + else: + ignore_pattern = f'**/{normalized}' + cmd.extend(['--ignore', ignore_pattern]) + processed_patterns.add(normalized) # Add '--' to treat pattern as a literal argument, preventing injection cmd.append('--') @@ -89,7 +115,7 @@ def search( error_output = process.stderr.strip() return {"error": f"ugrep execution failed with code {process.returncode}", "details": error_output} - return parse_search_output(process.stdout, base_path) + return parse_search_output(process.stdout, base_path, max_line_length) except FileNotFoundError: return {"error": "ugrep (ug) command not found. Please ensure it's installed and in your PATH."} diff --git a/src/code_index_mcp/server.py b/src/code_index_mcp/server.py index f5fea88..2d1eb80 100644 --- a/src/code_index_mcp/server.py +++ b/src/code_index_mcp/server.py @@ -3,58 +3,55 @@ This MCP server allows LLMs to index, search, and analyze code from a project directory. It provides tools for file discovery, content retrieval, and code analysis. + +This version uses a service-oriented architecture where MCP decorators delegate +to domain-specific services for business logic. """ + # Standard library imports -import fnmatch -import json -import os import sys -import tempfile -import time +import logging from contextlib import asynccontextmanager from dataclasses import dataclass -from typing import AsyncIterator, Dict, List, Optional, Tuple, Any +from typing import AsyncIterator, Dict, Any, List # Third-party imports -from mcp import types from mcp.server.fastmcp import FastMCP, Context # Local imports -from .analyzers.analyzer_factory import AnalyzerFactory -from .constants import SETTINGS_DIR from .project_settings import ProjectSettings - -# MCP server will be created after lifespan manager is defined - -# In-memory references (will be loaded from persistent storage) -file_index = {} -code_content_cache = {} -supported_extensions = [ - '.py', '.js', '.ts', '.jsx', '.tsx', '.java', '.c', '.cpp', '.h', '.hpp', - '.cs', '.go', '.rb', '.php', '.swift', '.kt', '.rs', '.scala', '.sh', - '.bash', '.html', '.css', '.scss', '.md', '.json', '.xml', '.yml', '.yaml', '.zig', - # Frontend frameworks - '.vue', '.svelte', '.mjs', '.cjs', - # Style languages - '.less', '.sass', '.stylus', '.styl', - # Template engines - '.hbs', '.handlebars', '.ejs', '.pug', - # Modern frontend - '.astro', '.mdx', - # Objective-C - '.m', '.mm', - # Database and SQL - '.sql', '.ddl', '.dml', '.mysql', '.postgresql', '.psql', '.sqlite', - '.mssql', '.oracle', '.ora', '.db2', - # Database objects - '.proc', '.procedure', '.func', '.function', '.view', '.trigger', '.index', - # Database frameworks and tools - '.migration', '.seed', '.fixture', '.schema', - # NoSQL and modern databases - '.cql', '.cypher', '.sparql', '.gql', - # Database migration tools - '.liquibase', '.flyway' -] +from .services import ( + SearchService, FileService, SettingsService, FileWatcherService +) +from .services.settings_service import manage_temp_directory +from .services.file_discovery_service import FileDiscoveryService +from .services.project_management_service import ProjectManagementService +from .services.index_management_service import IndexManagementService +from .services.code_intelligence_service import CodeIntelligenceService +from .services.system_management_service import SystemManagementService +from .utils import ( + handle_mcp_resource_errors, handle_mcp_tool_errors +) + +# Setup logging without writing to files +def setup_indexing_performance_logging(): + """Setup logging (stderr only); remove any file-based logging.""" + + root_logger = logging.getLogger() + root_logger.handlers.clear() + + formatter = logging.Formatter('%(asctime)s - %(name)s - %(levelname)s - %(message)s') + + # stderr for errors only + stderr_handler = logging.StreamHandler(sys.stderr) + stderr_handler.setFormatter(formatter) + stderr_handler.setLevel(logging.ERROR) + + root_logger.addHandler(stderr_handler) + root_logger.setLevel(logging.DEBUG) + +# Initialize logging (no file handlers) +setup_indexing_performance_logging() @dataclass class CodeIndexerContext: @@ -62,40 +59,31 @@ class CodeIndexerContext: base_path: str settings: ProjectSettings file_count: int = 0 + file_watcher_service: FileWatcherService = None @asynccontextmanager -async def indexer_lifespan(server: FastMCP) -> AsyncIterator[CodeIndexerContext]: +async def indexer_lifespan(_server: FastMCP) -> AsyncIterator[CodeIndexerContext]: """Manage the lifecycle of the Code Indexer MCP server.""" # Don't set a default path, user must explicitly set project path base_path = "" # Empty string to indicate no path is set - print("Initializing Code Indexer MCP server...") - # Initialize settings manager with skip_load=True to skip loading files settings = ProjectSettings(base_path, skip_load=True) - # Initialize context + # Initialize context - file watcher will be initialized later when project path is set context = CodeIndexerContext( base_path=base_path, - settings=settings + settings=settings, + file_watcher_service=None ) - # Initialize global variables - global file_index, code_content_cache - try: - print("Server ready. Waiting for user to set project path...") # Provide context to the server yield context finally: - # Only save index and cache if project path has been set - if context.base_path and file_index: - print(f"Saving index for project: {context.base_path}") - settings.save_index(file_index) - - if context.base_path and code_content_cache: - print(f"Saving cache for project: {context.base_path}") - settings.save_cache(code_content_cache) + # Stop file watcher if it was started + if context.file_watcher_service: + context.file_watcher_service.stop_monitoring() # Create the MCP server with lifespan manager mcp = FastMCP("CodeIndexer", lifespan=indexer_lifespan, dependencies=["pathlib"]) @@ -103,282 +91,59 @@ async def indexer_lifespan(server: FastMCP) -> AsyncIterator[CodeIndexerContext] # ----- RESOURCES ----- @mcp.resource("config://code-indexer") +@handle_mcp_resource_errors def get_config() -> str: """Get the current configuration of the Code Indexer.""" ctx = mcp.get_context() - - # Get the base path from context - base_path = ctx.request_context.lifespan_context.base_path - - # Check if base_path is set - if not base_path: - return json.dumps({ - "status": "not_configured", - "message": "Project path not set. Please use set_project_path to set a project directory first.", - "supported_extensions": supported_extensions - }, indent=2) - - # Get file count - file_count = ctx.request_context.lifespan_context.file_count - - # Get settings stats - settings = ctx.request_context.lifespan_context.settings - settings_stats = settings.get_stats() - - config = { - "base_path": base_path, - "supported_extensions": supported_extensions, - "file_count": file_count, - "settings_directory": settings.settings_path, - "settings_stats": settings_stats - } - - return json.dumps(config, indent=2) + return ProjectManagementService(ctx).get_project_config() @mcp.resource("files://{file_path}") +@handle_mcp_resource_errors def get_file_content(file_path: str) -> str: """Get the content of a specific file.""" ctx = mcp.get_context() + # Use FileService for simple file reading - this is appropriate for a resource + return FileService(ctx).get_file_content(file_path) - # Get the base path from context - base_path = ctx.request_context.lifespan_context.base_path - - # Check if base_path is set - if not base_path: - return "Error: Project path not set. Please use set_project_path to set a project directory first." - - # Handle absolute paths (especially Windows paths starting with drive letters) - if os.path.isabs(file_path) or (len(file_path) > 1 and file_path[1] == ':'): - # Absolute paths are not allowed via this endpoint - return f"Error: Absolute file paths like '{file_path}' are not allowed. Please use paths relative to the project root." - - # Normalize the file path - norm_path = os.path.normpath(file_path) - - # Check for path traversal attempts - if "..\\" in norm_path or "../" in norm_path or norm_path.startswith(".."): - return f"Error: Invalid file path: {file_path} (directory traversal not allowed)" - - # Construct the full path and verify it's within the project bounds - full_path = os.path.join(base_path, norm_path) - real_full_path = os.path.realpath(full_path) - real_base_path = os.path.realpath(base_path) - - if not real_full_path.startswith(real_base_path): - return f"Error: Access denied. File path must be within project directory." - - try: - with open(full_path, 'r', encoding='utf-8') as f: - content = f.read() - - # Cache the content for faster retrieval later - code_content_cache[norm_path] = content - - return content - except UnicodeDecodeError: - return f"Error: File {file_path} appears to be a binary file or uses unsupported encoding." - except (FileNotFoundError, PermissionError, OSError) as e: - return f"Error reading file: {e}" - -@mcp.resource("structure://project") -def get_project_structure() -> str: - """Get the structure of the project as a JSON tree.""" - ctx = mcp.get_context() - - # Get the base path from context - base_path = ctx.request_context.lifespan_context.base_path - - # Check if base_path is set - if not base_path: - return json.dumps({ - "status": "not_configured", - "message": "Project path not set. Please use set_project_path to set a project directory first." - }, indent=2) - - # Check if we need to refresh the index - if not file_index: - _index_project(base_path) - # Update file count in context - ctx.request_context.lifespan_context.file_count = _count_files(file_index) - # Save updated index - ctx.request_context.lifespan_context.settings.save_index(file_index) - - return json.dumps(file_index, indent=2) - -@mcp.resource("settings://stats") -def get_settings_stats() -> str: - """Get statistics about the settings directory and files.""" - ctx = mcp.get_context() - - # Get settings manager from context - settings = ctx.request_context.lifespan_context.settings - - # Get settings stats - stats = settings.get_stats() - - return json.dumps(stats, indent=2) - -# ----- AUTO-REFRESH HELPERS ----- - -REFRESH_RATE_LIMIT_SECONDS = 30 - -# Memory cache for refresh time (loaded once per server session) -_cached_last_refresh_time = None - -def _get_last_refresh_time(ctx: Context) -> float: - """Get last refresh time, with memory cache for performance.""" - global _cached_last_refresh_time - - # Load from config only once per server session - if _cached_last_refresh_time is None: - config = ctx.request_context.lifespan_context.settings.load_config() - _cached_last_refresh_time = config.get('last_auto_refresh_time', 0.0) - - return _cached_last_refresh_time - -def _should_auto_refresh(ctx: Context) -> bool: - """Check if auto-refresh is allowed based on 30-second rate limit.""" - last_refresh_time = _get_last_refresh_time(ctx) - current_time = time.time() - return (current_time - last_refresh_time) >= REFRESH_RATE_LIMIT_SECONDS - -def _update_last_refresh_time(ctx: Context) -> None: - """Update refresh time in both memory cache and persistent config.""" - global _cached_last_refresh_time - current_time = time.time() - - # Update memory cache immediately for performance - _cached_last_refresh_time = current_time - - # Persist to config for stateless client support - config = ctx.request_context.lifespan_context.settings.load_config() - config['last_auto_refresh_time'] = current_time - ctx.request_context.lifespan_context.settings.save_config(config) - -def _get_remaining_refresh_time(ctx: Context) -> int: - """Get remaining seconds until next refresh is allowed.""" - last_refresh_time = _get_last_refresh_time(ctx) - current_time = time.time() - elapsed = current_time - last_refresh_time - remaining = max(0, REFRESH_RATE_LIMIT_SECONDS - elapsed) - return int(remaining) +# Removed: structure://project resource - not necessary for most workflows +# Removed: settings://stats resource - this information is available via get_settings_info() tool +# and is more of a debugging/technical detail rather than context AI needs # ----- TOOLS ----- @mcp.tool() +@handle_mcp_tool_errors(return_type='str') def set_project_path(path: str, ctx: Context) -> str: """Set the base project path for indexing.""" - # Validate and normalize path - try: - norm_path = os.path.normpath(path) - abs_path = os.path.abspath(norm_path) - - if not os.path.exists(abs_path): - return f"Error: Path does not exist: {abs_path}" - - if not os.path.isdir(abs_path): - return f"Error: Path is not a directory: {abs_path}" - - # Clear existing in-memory index and cache - global file_index, code_content_cache - file_index.clear() - code_content_cache.clear() - - # Update the base path in context - ctx.request_context.lifespan_context.base_path = abs_path - - # Create a new settings manager for the new path (don't skip loading files) - ctx.request_context.lifespan_context.settings = ProjectSettings(abs_path, skip_load=False) - - # Print the settings path for debugging - settings_path = ctx.request_context.lifespan_context.settings.settings_path - print(f"Project settings path: {settings_path}") - - # Try to load existing index and cache - print(f"Project path set to: {abs_path}") - print(f"Attempting to load existing index and cache...") - - # Try to load index - loaded_index = None - try: - loaded_index = ctx.request_context.lifespan_context.settings.load_index() - except Exception as e: - print(f"Could not load existing index, it might be an old format. A new index will be created. Error: {e}") - - if loaded_index: - print(f"Existing index found and loaded successfully") - file_index = loaded_index - file_count = _count_files(file_index) - ctx.request_context.lifespan_context.file_count = file_count - - # Try to load cache - loaded_cache = ctx.request_context.lifespan_context.settings.load_cache() - if loaded_cache: - print(f"Existing cache found and loaded successfully") - code_content_cache.update(loaded_cache) - - # Get search capabilities info - search_tool = ctx.request_context.lifespan_context.settings.get_preferred_search_tool() - - if search_tool is None: - search_info = " Basic search available." - else: - search_info = f" Advanced search enabled ({search_tool.name})." - - return f"Project path set to: {abs_path}. Loaded existing index with {file_count} files.{search_info}" - else: - print(f"No existing index found, creating new index...") - - # If no existing index, create a new one - file_count = _index_project(abs_path) - ctx.request_context.lifespan_context.file_count = file_count - - # Save the new index - ctx.request_context.lifespan_context.settings.save_index(file_index) - - # Save project config - config = { - "base_path": abs_path, - "supported_extensions": supported_extensions, - "last_indexed": ctx.request_context.lifespan_context.settings.load_config().get('last_indexed', None) - } - ctx.request_context.lifespan_context.settings.save_config(config) - - # Get search capabilities info (this will trigger lazy detection) - search_tool = ctx.request_context.lifespan_context.settings.get_preferred_search_tool() - - if search_tool is None: - search_info = " Basic search available." - else: - search_info = f" Advanced search enabled ({search_tool.name})." - - return f"Project path set to: {abs_path}. Indexed {file_count} files.{search_info}" - except (OSError, ValueError, RuntimeError) as e: - return f"Error setting project path: {e}" + return ProjectManagementService(ctx).initialize_project(path) @mcp.tool() +@handle_mcp_tool_errors(return_type='dict') def search_code_advanced( - pattern: str, + pattern: str, ctx: Context, case_sensitive: bool = True, context_lines: int = 0, - file_pattern: Optional[str] = None, + file_pattern: str = None, fuzzy: bool = False, - regex: bool = False + regex: bool = None, + max_line_length: int = None ) -> Dict[str, Any]: """ Search for a code pattern in the project using an advanced, fast tool. - - This tool automatically selects the best available command-line search tool + + This tool automatically selects the best available command-line search tool (like ugrep, ripgrep, ag, or grep) for maximum performance. - + Args: pattern: The search pattern. Can be literal text or regex (see regex parameter). case_sensitive: Whether the search should be case-sensitive. context_lines: Number of lines to show before and after the match. - file_pattern: A glob pattern to filter files to search in (e.g., "*.py", "*.js", "test_*.py"). + file_pattern: A glob pattern to filter files to search in + (e.g., "*.py", "*.js", "test_*.py"). + max_line_length: Optional. Default None (no limit). Limits the length of lines when context_lines is used. All search tools now handle glob patterns consistently: - - ugrep: Uses glob patterns (*.py, *.{js,ts}) + - ugrep: Uses glob patterns (*.py, *.{js,ts}) - ripgrep: Uses glob patterns (*.py, *.{js,ts}) - ag (Silver Searcher): Automatically converts globs to regex patterns - grep: Basic glob pattern matching @@ -386,127 +151,57 @@ def search_code_advanced( fuzzy: If True, enables fuzzy/partial matching behavior varies by search tool: - ugrep: Native fuzzy search with --fuzzy flag (true edit-distance fuzzy search) - ripgrep, ag, grep, basic: Word boundary pattern matching (not true fuzzy search) - IMPORTANT: Only ugrep provides true fuzzy search. Other tools use word boundary + IMPORTANT: Only ugrep provides true fuzzy search. Other tools use word boundary matching which allows partial matches at word boundaries. For exact literal matches, set fuzzy=False (default and recommended). - regex: If True, enables regex pattern matching. Use this for patterns like "ERROR|WARN". - The pattern will be validated for safety to prevent ReDoS attacks. - If False (default), uses literal string search. - + regex: Controls regex pattern matching behavior: + - If True, enables regex pattern matching + - If False, forces literal string search + - If None (default), automatically detects regex patterns and enables regex for patterns like "ERROR|WARN" + The pattern will always be validated for safety to prevent ReDoS attacks. + Returns: A dictionary containing the search results or an error message. - - """ - base_path = ctx.request_context.lifespan_context.base_path - if not base_path: - return {"error": "Project path not set. Please use set_project_path first."} - - settings = ctx.request_context.lifespan_context.settings - strategy = settings.get_preferred_search_tool() - - if not strategy: - return {"error": "No search strategies available. This is unexpected."} - print(f"Using search strategy: {strategy.name}") - - try: - results = strategy.search( - pattern=pattern, - base_path=base_path, - case_sensitive=case_sensitive, - context_lines=context_lines, - file_pattern=file_pattern, - fuzzy=fuzzy, - regex=regex - ) - return {"results": results} - except Exception as e: - return {"error": f"Search failed using '{strategy.name}': {e}"} + """ + return SearchService(ctx).search_code( + pattern=pattern, + case_sensitive=case_sensitive, + context_lines=context_lines, + file_pattern=file_pattern, + fuzzy=fuzzy, + regex=regex, + max_line_length=max_line_length + ) @mcp.tool() -def find_files(pattern: str, ctx: Context) -> Dict[str, Any]: +@handle_mcp_tool_errors(return_type='list') +def find_files(pattern: str, ctx: Context) -> List[str]: """ - Find files matching a glob pattern. Auto-refreshes index if no results found. - + Find files matching a glob pattern using pre-built file index. + Use when: - - Looking for files by pattern (e.g., "*.py", "test_*.js", "src/**/*.ts") + - Looking for files by pattern (e.g., "*.py", "test_*.js") + - Searching by filename only (e.g., "README.md" finds all README files) - Checking if specific files exist in the project - Getting file lists for further analysis - - Auto-refresh behavior: - - If no files found, automatically refreshes index once and retries - - Rate limited to once every 30 seconds to avoid excessive refreshes - - Manual refresh_index tool is always available without rate limits - + + Pattern matching: + - Supports both full path and filename-only matching + - Uses standard glob patterns (*, ?, []) + - Fast lookup using in-memory file index + - Uses forward slashes consistently across all platforms + Args: - pattern: Glob pattern to match files (e.g., "*.py", "test_*.js") - + pattern: Glob pattern to match files (e.g., "*.py", "test_*.js", "README.md") + Returns: - Dictionary with files list and status information + List of file paths matching the pattern """ - base_path = ctx.request_context.lifespan_context.base_path - - # Check if base_path is set - if not base_path: - return { - "error": "Project path not set. Please use set_project_path to set a project directory first.", - "files": [] - } - - # Check if we need to index the project initially - if not file_index: - _index_project(base_path) - ctx.request_context.lifespan_context.file_count = _count_files(file_index) - ctx.request_context.lifespan_context.settings.save_index(file_index) - - # First search attempt - matching_files = [] - for file_path, _ in _get_all_files(file_index): - if fnmatch.fnmatch(file_path, pattern): - matching_files.append(file_path) - - # If no results found, try auto-refresh once (with rate limiting) - if not matching_files: - if _should_auto_refresh(ctx): - # Perform full re-index - file_index.clear() - _index_project(base_path) - ctx.request_context.lifespan_context.file_count = _count_files(file_index) - ctx.request_context.lifespan_context.settings.save_index(file_index) - - # Update last refresh time - _update_last_refresh_time(ctx) - - # Search again after refresh - for file_path, _ in _get_all_files(file_index): - if fnmatch.fnmatch(file_path, pattern): - matching_files.append(file_path) - - if matching_files: - return { - "files": matching_files, - "status": f"✅ Found {len(matching_files)} files after refresh" - } - else: - return { - "files": [], - "status": "⚠️ No files found even after refresh" - } - else: - # Rate limited - remaining_time = _get_remaining_refresh_time(ctx) - return { - "files": [], - "status": f"⚠️ No files found - Rate limited. Try again in {remaining_time} seconds" - } - - # Return successful results - return { - "files": matching_files, - "status": f"✅ Found {len(matching_files)} files" - } + return FileDiscoveryService(ctx).find_files(pattern) @mcp.tool() +@handle_mcp_tool_errors(return_type='dict') def get_file_summary(file_path: str, ctx: Context) -> Dict[str, Any]: """ Get a summary of a specific file, including: @@ -515,355 +210,99 @@ def get_file_summary(file_path: str, ctx: Context) -> Dict[str, Any]: - Import statements - Basic complexity metrics """ - base_path = ctx.request_context.lifespan_context.base_path - - # Check if base_path is set - if not base_path: - return {"error": "Project path not set. Please use set_project_path to set a project directory first."} - - # Normalize the file path - norm_path = os.path.normpath(file_path) - if norm_path.startswith('..'): - return {"error": f"Invalid file path: {file_path}"} - - full_path = os.path.join(base_path, norm_path) - - try: - # Get file content - if norm_path in code_content_cache: - content = code_content_cache[norm_path] - else: - with open(full_path, 'r', encoding='utf-8') as f: - content = f.read() - code_content_cache[norm_path] = content - # Save the updated cache - ctx.request_context.lifespan_context.settings.save_cache(code_content_cache) - - # Basic file info - lines = content.splitlines() - line_count = len(lines) - - # File extension for language-specific analysis - _, ext = os.path.splitext(norm_path) - - summary = { - "file_path": norm_path, - "line_count": line_count, - "size_bytes": os.path.getsize(full_path), - "extension": ext, - } - - # Use the new analyzer framework for language-specific analysis - try: - analyzer = AnalyzerFactory.get_analyzer(ext) - if analyzer is None: - summary["error"] = "No analyzer available for this file type" - summary["basic_info"] = True - return summary - - analysis_result = analyzer.analyze(content, norm_path, full_path) - - # Convert to dict for backwards compatibility - return analysis_result.to_dict() - except Exception as e: - # Fallback to basic summary if analyzer fails - summary["error"] = f"Analysis failed: {str(e)}" - summary["basic_info"] = True - return summary - except (OSError, UnicodeDecodeError, ValueError) as e: - return {"error": f"Error analyzing file: {e}"} + return CodeIntelligenceService(ctx).analyze_file(file_path) @mcp.tool() +@handle_mcp_tool_errors(return_type='str') def refresh_index(ctx: Context) -> str: """ Manually refresh the project index when files have been added/removed/moved. - + Use when: - - Files were added, deleted, or moved outside the editor - - After git operations (checkout, merge, pull) that change files + - File watcher is disabled or unavailable + - After large-scale operations (git checkout, merge, pull) that change many files + - When you want immediate index rebuild without waiting for file watcher debounce - When find_files results seem incomplete or outdated - - For immediate refresh without waiting for auto-refresh rate limits - + - For troubleshooting suspected index synchronization issues + Important notes for LLMs: - - This tool bypasses the 30-second rate limit that applies to auto-refresh - - Always available for immediate use when you know files have changed + - Always available as backup when file watcher is not working - Performs full project re-indexing for complete accuracy - Use when you suspect the index is stale after file system changes - + - **Call this after programmatic file modifications if file watcher seems unresponsive** + - Complements the automatic file watcher system + Returns: Success message with total file count """ - base_path = ctx.request_context.lifespan_context.base_path - - # Check if base_path is set - if not base_path: - return "Error: Project path not set. Please use set_project_path to set a project directory first." - - # Clear existing index - global file_index - file_index.clear() + return IndexManagementService(ctx).rebuild_index() - # Re-index the project - file_count = _index_project(base_path) - ctx.request_context.lifespan_context.file_count = file_count - - # Save the updated index - ctx.request_context.lifespan_context.settings.save_index(file_index) - - # Update the last indexed timestamp in config - config = ctx.request_context.lifespan_context.settings.load_config() - ctx.request_context.lifespan_context.settings.save_config({ - **config, - 'last_indexed': ctx.request_context.lifespan_context.settings._get_timestamp() - }) - - # Update auto-refresh timer to prevent immediate auto-refresh after manual refresh - _update_last_refresh_time(ctx) +@mcp.tool() +@handle_mcp_tool_errors(return_type='str') +def build_deep_index(ctx: Context) -> str: + """ + Build the deep index (full symbol extraction) for the current project. - return f"Project re-indexed. Found {file_count} files." + This performs a complete re-index and loads it into memory. + """ + return IndexManagementService(ctx).rebuild_deep_index() @mcp.tool() +@handle_mcp_tool_errors(return_type='dict') def get_settings_info(ctx: Context) -> Dict[str, Any]: """Get information about the project settings.""" - base_path = ctx.request_context.lifespan_context.base_path - - # Check if base_path is set - if not base_path: - # Even if base_path is not set, we can still show the temp directory - temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR) - return { - "status": "not_configured", - "message": "Project path not set. Please use set_project_path to set a project directory first.", - "temp_directory": temp_dir, - "temp_directory_exists": os.path.exists(temp_dir) - } - - settings = ctx.request_context.lifespan_context.settings - - # Get config - config = settings.load_config() - - # Get stats - stats = settings.get_stats() - - # Get temp directory - temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR) - - return { - "settings_directory": settings.settings_path, - "temp_directory": temp_dir, - "temp_directory_exists": os.path.exists(temp_dir), - "config": config, - "stats": stats, - "exists": os.path.exists(settings.settings_path) - } + return SettingsService(ctx).get_settings_info() @mcp.tool() +@handle_mcp_tool_errors(return_type='dict') def create_temp_directory() -> Dict[str, Any]: """Create the temporary directory used for storing index data.""" - temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR) - - result = { - "temp_directory": temp_dir, - "existed_before": os.path.exists(temp_dir), - } - - try: - # Use ProjectSettings to handle directory creation consistently - temp_settings = ProjectSettings("", skip_load=True) - - result["created"] = not result["existed_before"] - result["exists_now"] = os.path.exists(temp_dir) - result["is_directory"] = os.path.isdir(temp_dir) - except Exception as e: - result["error"] = str(e) - - return result + return manage_temp_directory('create') @mcp.tool() +@handle_mcp_tool_errors(return_type='dict') def check_temp_directory() -> Dict[str, Any]: """Check the temporary directory used for storing index data.""" - temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR) - - result = { - "temp_directory": temp_dir, - "exists": os.path.exists(temp_dir), - "is_directory": os.path.isdir(temp_dir) if os.path.exists(temp_dir) else False, - "temp_root": tempfile.gettempdir(), - } - - # If the directory exists, list its contents - if result["exists"] and result["is_directory"]: - try: - contents = os.listdir(temp_dir) - result["contents"] = contents - result["subdirectories"] = [] - - # Check each subdirectory - for item in contents: - item_path = os.path.join(temp_dir, item) - if os.path.isdir(item_path): - subdir_info = { - "name": item, - "path": item_path, - "contents": os.listdir(item_path) if os.path.exists(item_path) else [] - } - result["subdirectories"].append(subdir_info) - except (OSError, PermissionError) as e: - result["error"] = str(e) - - return result + return manage_temp_directory('check') @mcp.tool() +@handle_mcp_tool_errors(return_type='str') def clear_settings(ctx: Context) -> str: """Clear all settings and cached data.""" - settings = ctx.request_context.lifespan_context.settings - settings.clear() - return "Project settings, index, and cache have been cleared." + return SettingsService(ctx).clear_all_settings() @mcp.tool() +@handle_mcp_tool_errors(return_type='str') def refresh_search_tools(ctx: Context) -> str: """ Manually re-detect the available command-line search tools on the system. This is useful if you have installed a new tool (like ripgrep) after starting the server. """ - settings = ctx.request_context.lifespan_context.settings - settings.refresh_available_strategies() - - config = settings.get_search_tools_config() - - return f"Search tools refreshed. Available: {config['available_tools']}. Preferred: {config['preferred_tool']}." - + return SearchService(ctx).refresh_search_tools() -# ----- PROMPTS ----- - -@mcp.prompt() -def analyze_code(file_path: str = "", query: str = "") -> list[types.PromptMessage]: - """Prompt for analyzing code in the project.""" - messages = [ - types.PromptMessage(role="user", content=types.TextContent(type="text", text=f"""I need you to analyze some code from my project. - -{f'Please analyze the file: {file_path}' if file_path else ''} -{f'I want to understand: {query}' if query else ''} - -First, let me give you some context about the project structure. Then, I'll provide the code to analyze. -""")), - types.PromptMessage(role="assistant", content=types.TextContent(type="text", text="I'll help you analyze the code. Let me first examine the project structure to get a better understanding of the codebase.")) - ] - return messages - -@mcp.prompt() -def code_search(query: str = "") -> types.TextContent: - """Prompt for searching code in the project.""" - search_text = f"\"query\"" if not query else f"\"{query}\"" - return types.TextContent(type="text", text=f"""I need to search through my codebase for {search_text}. - -Please help me find all occurrences of this query and explain what each match means in its context. -Focus on the most relevant files and provide a brief explanation of how each match is used in the code. - -If there are too many results, prioritize the most important ones and summarize the patterns you see.""") - -@mcp.prompt() -def set_project() -> list[types.PromptMessage]: - """Prompt for setting the project path.""" - messages = [ - types.PromptMessage(role="user", content=types.TextContent(type="text", text=""" - I need to analyze code from a project, but I haven't set the project path yet. Please help me set up the project path and index the code. - - First, I need to specify which project directory to analyze. - """)), - types.PromptMessage(role="assistant", content=types.TextContent(type="text", text=""" - Before I can help you analyze any code, we need to set up the project path. This is a required first step. - - Please provide the full path to your project folder. For example: - - Windows: "C:/Users/username/projects/my-project" - - macOS/Linux: "/home/username/projects/my-project" - - Once you provide the path, I'll use the `set_project_path` tool to configure the code analyzer to work with your project. - """)) - ] - return messages - -# ----- HELPER FUNCTIONS ----- +@mcp.tool() +@handle_mcp_tool_errors(return_type='dict') +def get_file_watcher_status(ctx: Context) -> Dict[str, Any]: + """Get file watcher service status and statistics.""" + return SystemManagementService(ctx).get_file_watcher_status() -def _index_project(base_path: str) -> int: - """ - Create an index of the project files. - Returns the number of files indexed. - """ - file_count = 0 - file_index.clear() - - for root, dirs, files in os.walk(base_path): - # Skip hidden directories and common build/dependency directories - dirs[:] = [d for d in dirs if not d.startswith('.') and - d not in ['node_modules', 'venv', '__pycache__', 'build', 'dist']] - - # Create relative path from base_path - rel_path = os.path.relpath(root, base_path) - current_dir = file_index - - # Skip the '.' directory (base_path itself) - if rel_path != '.': - # Split the path and navigate/create the tree - path_parts = rel_path.replace('\\', '/').split('/') - for part in path_parts: - if part not in current_dir: - current_dir[part] = {"type": "directory", "children": {}} - current_dir = current_dir[part]["children"] - - # Add files to current directory - for file in files: - # Skip hidden files and files with unsupported extensions - _, ext = os.path.splitext(file) - if file.startswith('.') or ext not in supported_extensions: - continue - - # Store file information - file_path = os.path.join(rel_path, file).replace('\\', '/') - if rel_path == '.': - file_path = file - - current_dir[file] = { - "type": "file", - "path": file_path, - "ext": ext - } - file_count += 1 - - return file_count - -def _count_files(directory: Dict) -> int: - """ - Count the number of files in the index. - """ - count = 0 - for name, value in directory.items(): - if isinstance(value, dict): - if value.get("type") == "file": - count += 1 - elif value.get("type") == "directory": - count += _count_files(value.get("children", {})) - return count - -def _get_all_files(directory: Dict, prefix: str = "") -> List[Tuple[str, Dict]]: - """Recursively get all files from the index.""" - all_files = [] - for name, item in directory.items(): - current_path = os.path.join(prefix, name).replace('\\', '/') - if item.get('type') == 'file': - all_files.append((current_path, item)) - elif item.get('type') == 'directory': - all_files.extend(_get_all_files(item.get('children', {}), current_path)) - return all_files +@mcp.tool() +@handle_mcp_tool_errors(return_type='str') +def configure_file_watcher( + ctx: Context, + enabled: bool = None, + debounce_seconds: float = None, + additional_exclude_patterns: list = None +) -> str: + """Configure file watcher service settings.""" + return SystemManagementService(ctx).configure_file_watcher(enabled, debounce_seconds, additional_exclude_patterns) +# ----- PROMPTS ----- +# Removed: analyze_code, code_search, set_project prompts def main(): """Main function to run the MCP server.""" - # Run the server. Tools are discovered automatically via decorators. mcp.run() if __name__ == '__main__': - # Set path to project root - sys.path.append(os.path.dirname(os.path.dirname(os.path.abspath(__file__)))) main() diff --git a/src/code_index_mcp/services/__init__.py b/src/code_index_mcp/services/__init__.py new file mode 100644 index 0000000..7694446 --- /dev/null +++ b/src/code_index_mcp/services/__init__.py @@ -0,0 +1,48 @@ +""" +Service layer for the Code Index MCP server. + +This package contains domain-specific services that handle the business logic +for different areas of functionality: + + +- SearchService: Code search operations and search tool management +- FileService: File operations, content retrieval, and analysis +- SettingsService: Settings management and directory operations + +Each service follows a consistent pattern: +- Constructor accepts MCP Context parameter +- Methods correspond to MCP entry points +- Clear domain boundaries with no cross-service dependencies +- Shared utilities accessed through utils module +- Meaningful exceptions raised for error conditions +""" + +# New Three-Layer Architecture Services +from .base_service import BaseService +from .project_management_service import ProjectManagementService +from .index_management_service import IndexManagementService +from .file_discovery_service import FileDiscoveryService +from .code_intelligence_service import CodeIntelligenceService +from .system_management_service import SystemManagementService +from .search_service import SearchService # Already follows clean architecture +from .settings_service import SettingsService + +# Simple Services +from .file_service import FileService # Simple file reading for resources +from .file_watcher_service import FileWatcherService # Low-level service, still needed + +__all__ = [ + # New Architecture + 'BaseService', + 'ProjectManagementService', + 'IndexManagementService', + 'FileDiscoveryService', + 'CodeIntelligenceService', + 'SystemManagementService', + 'SearchService', + 'SettingsService', + + # Simple Services + 'FileService', # Simple file reading for resources + 'FileWatcherService' # Keep as low-level service +] \ No newline at end of file diff --git a/src/code_index_mcp/services/base_service.py b/src/code_index_mcp/services/base_service.py new file mode 100644 index 0000000..a29e6bf --- /dev/null +++ b/src/code_index_mcp/services/base_service.py @@ -0,0 +1,140 @@ +""" +Base service class providing common functionality for all services. + +This module defines the base service pattern that all domain services inherit from, +ensuring consistent behavior and shared functionality across the service layer. +""" + +from abc import ABC +from typing import Optional +from mcp.server.fastmcp import Context + +from ..utils import ContextHelper, ValidationHelper + + +class BaseService(ABC): + """ + Base class for all MCP services. + + This class provides common functionality that all services need: + - Context management through ContextHelper + - Common validation patterns + - Shared error checking methods + + All domain services should inherit from this class to ensure + consistent behavior and access to shared utilities. + """ + + def __init__(self, ctx: Context): + """ + Initialize the base service. + + Args: + ctx: The MCP Context object containing request and lifespan context + """ + self.ctx = ctx + self.helper = ContextHelper(ctx) + + def _validate_project_setup(self) -> Optional[str]: + """ + Validate that the project is properly set up. + + This method checks if the base path is set and valid, which is + required for most operations. + + Returns: + Error message if project is not set up properly, None if valid + """ + return self.helper.get_base_path_error() + + def _require_project_setup(self) -> None: + """ + Ensure project is set up, raising an exception if not. + + This is a convenience method for operations that absolutely + require a valid project setup. + + Raises: + ValueError: If project is not properly set up + """ + error = self._validate_project_setup() + if error: + raise ValueError(error) + + def _validate_file_path(self, file_path: str) -> Optional[str]: + """ + Validate a file path for security and accessibility. + + Args: + file_path: The file path to validate + + Returns: + Error message if validation fails, None if valid + """ + return ValidationHelper.validate_file_path(file_path, self.helper.base_path) + + def _require_valid_file_path(self, file_path: str) -> None: + """ + Ensure file path is valid, raising an exception if not. + + Args: + file_path: The file path to validate + + Raises: + ValueError: If file path is invalid + """ + error = self._validate_file_path(file_path) + if error: + raise ValueError(error) + + @property + def base_path(self) -> str: + """ + Convenient access to the base project path. + + Returns: + The base project path + """ + return self.helper.base_path + + @property + def settings(self): + """ + Convenient access to the project settings. + + Returns: + The ProjectSettings instance + """ + return self.helper.settings + + @property + def file_count(self) -> int: + """ + Convenient access to the current file count. + + Returns: + The number of indexed files + """ + return self.helper.file_count + + @property + def index_provider(self): + """ + Convenient access to the unified index provider. + + Returns: + The current IIndexProvider instance, or None if not available + """ + if self.helper.index_manager: + return self.helper.index_manager.get_provider() + return None + + @property + def index_manager(self): + """ + Convenient access to the index manager. + + Returns: + The index manager instance, or None if not available + """ + return self.helper.index_manager diff --git a/src/code_index_mcp/services/code_intelligence_service.py b/src/code_index_mcp/services/code_intelligence_service.py new file mode 100644 index 0000000..af0f1a2 --- /dev/null +++ b/src/code_index_mcp/services/code_intelligence_service.py @@ -0,0 +1,104 @@ +""" +Code Intelligence Service - Business logic for code analysis and understanding. + +This service handles the business logic for analyzing code files using the new +JSON-based indexing system optimized for LLM consumption. +""" + +import logging +import os +from typing import Dict, Any + +from .base_service import BaseService +from ..tools.filesystem import FileSystemTool +from ..indexing import get_index_manager + +logger = logging.getLogger(__name__) + + +class CodeIntelligenceService(BaseService): + """ + Business service for code analysis and intelligence using JSON indexing. + + This service provides comprehensive code analysis using the optimized + JSON-based indexing system for fast LLM-friendly responses. + """ + + def __init__(self, ctx): + super().__init__(ctx) + self._filesystem_tool = FileSystemTool() + + def analyze_file(self, file_path: str) -> Dict[str, Any]: + """ + Analyze a file and return comprehensive intelligence. + + This is the main business method that orchestrates the file analysis + workflow, choosing the best analysis strategy and providing rich + insights about the code. + + Args: + file_path: Path to the file to analyze (relative to project root) + + Returns: + Dictionary with comprehensive file analysis + + Raises: + ValueError: If file path is invalid or analysis fails + """ + # Business validation + self._validate_analysis_request(file_path) + + # Use the global index manager + index_manager = get_index_manager() + + # Debug logging + logger.info(f"Getting file summary for: {file_path}") + logger.info(f"Index manager state - Project path: {index_manager.project_path}") + logger.info(f"Index manager state - Has builder: {index_manager.index_builder is not None}") + if index_manager.index_builder: + logger.info(f"Index manager state - Has index: {index_manager.index_builder.in_memory_index is not None}") + + # Get file summary from JSON index + summary = index_manager.get_file_summary(file_path) + logger.info(f"Summary result: {summary is not None}") + + # If deep index isn't available yet, return a helpful hint instead of error + if not summary: + return { + "status": "needs_deep_index", + "message": "Deep index not available. Please run build_deep_index before calling get_file_summary.", + "file_path": file_path + } + + return summary + + def _validate_analysis_request(self, file_path: str) -> None: + """ + Validate the file analysis request according to business rules. + + Args: + file_path: File path to validate + + Raises: + ValueError: If validation fails + """ + # Business rule: Project must be set up OR auto-initialization must be possible + if self.base_path: + # Standard validation if project is set up in context + self._require_valid_file_path(file_path) + full_path = os.path.join(self.base_path, file_path) + if not os.path.exists(full_path): + raise ValueError(f"File does not exist: {file_path}") + else: + # Allow proceeding if auto-initialization might work + # The index manager will handle project discovery + logger.info("Project not set in context, relying on index auto-initialization") + + # Basic file path validation only + if not file_path or '..' in file_path: + raise ValueError(f"Invalid file path: {file_path}") + + + + + diff --git a/src/code_index_mcp/services/file_discovery_service.py b/src/code_index_mcp/services/file_discovery_service.py new file mode 100644 index 0000000..d777511 --- /dev/null +++ b/src/code_index_mcp/services/file_discovery_service.py @@ -0,0 +1,78 @@ +""" +File Discovery Service - Business logic for intelligent file discovery. + +This service handles the business logic for finding files using the new +JSON-based indexing system optimized for LLM consumption. +""" + +from typing import Dict, Any, List, Optional +from dataclasses import dataclass + +from .base_service import BaseService +from ..indexing import get_shallow_index_manager + + +@dataclass +class FileDiscoveryResult: + """Business result for file discovery operations.""" + files: List[str] + total_count: int + pattern_used: str + search_strategy: str + metadata: Dict[str, Any] + + +class FileDiscoveryService(BaseService): + """ + Business service for intelligent file discovery using JSON indexing. + + This service provides fast file discovery using the optimized JSON + indexing system for efficient LLM-oriented responses. + """ + + def __init__(self, ctx): + super().__init__(ctx) + self._index_manager = get_shallow_index_manager() + + def find_files(self, pattern: str, max_results: Optional[int] = None) -> List[str]: + """ + Find files matching the given pattern using JSON indexing. + + Args: + pattern: Glob pattern to search for (e.g., "*.py", "test_*.js") + max_results: Maximum number of results to return (None for no limit) + + Returns: + List of file paths matching the pattern + + Raises: + ValueError: If pattern is invalid or project not set up + """ + # Business validation + self._validate_discovery_request(pattern) + + # Get files from JSON index + files = self._index_manager.find_files(pattern) + + # Apply max_results limit if specified + if max_results and len(files) > max_results: + files = files[:max_results] + + return files + + def _validate_discovery_request(self, pattern: str) -> None: + """ + Validate the file discovery request according to business rules. + + Args: + pattern: Pattern to validate + + Raises: + ValueError: If validation fails + """ + # Ensure project is set up + self._require_project_setup() + + # Validate pattern + if not pattern or not pattern.strip(): + raise ValueError("Search pattern cannot be empty") diff --git a/src/code_index_mcp/services/file_service.py b/src/code_index_mcp/services/file_service.py new file mode 100644 index 0000000..480d6f0 --- /dev/null +++ b/src/code_index_mcp/services/file_service.py @@ -0,0 +1,62 @@ +""" +File Service - Simple file reading service for MCP resources. + +This service provides simple file content reading functionality for MCP resources. +Complex file analysis has been moved to CodeIntelligenceService. + +Usage: +- get_file_content() - used by files://{file_path} resource +""" + +import os +from .base_service import BaseService + + +class FileService(BaseService): + """ + Simple service for file content reading. + + This service handles basic file reading operations for MCP resources. + Complex analysis functionality has been moved to CodeIntelligenceService. + """ + + def get_file_content(self, file_path: str) -> str: + """ + Get file content for MCP resource. + + Args: + file_path: Path to the file (relative to project root) + + Returns: + File content as string + + Raises: + ValueError: If project is not set up or path is invalid + FileNotFoundError: If file is not found or readable + """ + self._require_project_setup() + self._require_valid_file_path(file_path) + + # Build full path + full_path = os.path.join(self.base_path, file_path) + + try: + # Try UTF-8 first (most common) + with open(full_path, 'r', encoding='utf-8') as f: + return f.read() + except UnicodeDecodeError: + # Try other encodings if UTF-8 fails + encodings = ['utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1'] + for encoding in encodings: + try: + with open(full_path, 'r', encoding=encoding) as f: + return f.read() + except UnicodeDecodeError: + continue + + raise ValueError( + f"Could not decode file {file_path}. File may have " + f"unsupported encoding." + ) from None + except (FileNotFoundError, PermissionError, OSError) as e: + raise FileNotFoundError(f"Error reading file: {e}") from e diff --git a/src/code_index_mcp/services/file_watcher_service.py b/src/code_index_mcp/services/file_watcher_service.py new file mode 100644 index 0000000..c2ef64c --- /dev/null +++ b/src/code_index_mcp/services/file_watcher_service.py @@ -0,0 +1,418 @@ +""" +File Watcher Service for automatic index rebuilds. + +This module provides file system monitoring capabilities that automatically +trigger index rebuilds when relevant files are modified, created, or deleted. +It uses the watchdog library for cross-platform file system event monitoring. +""" +# pylint: disable=missing-function-docstring # Fallback stub methods don't need docstrings + +import logging +import os +import traceback +from threading import Timer +from typing import Optional, Callable, List +from pathlib import Path + +try: + from watchdog.observers import Observer + from watchdog.events import FileSystemEventHandler, FileSystemEvent + WATCHDOG_AVAILABLE = True +except ImportError: + # Fallback classes for when watchdog is not available + class Observer: + """Fallback Observer class when watchdog library is not available.""" + def __init__(self): + pass + def schedule(self, *args, **kwargs): + pass + def start(self): + pass + def stop(self): + pass + def join(self, *args, **kwargs): + pass + def is_alive(self): + return False + + class FileSystemEventHandler: + """Fallback FileSystemEventHandler class when watchdog library is not available.""" + def __init__(self): + pass + + class FileSystemEvent: + """Fallback FileSystemEvent class when watchdog library is not available.""" + def __init__(self): + self.is_directory = False + self.src_path = "" + self.event_type = "" + + WATCHDOG_AVAILABLE = False + +from .base_service import BaseService +from ..constants import SUPPORTED_EXTENSIONS + + +class FileWatcherService(BaseService): + """ + Service for monitoring file system changes and triggering index rebuilds. + + This service uses the watchdog library to monitor file system events and + automatically triggers background index rebuilds when relevant files change. + It includes intelligent debouncing to batch rapid changes and filtering + to only monitor relevant file types. + """ + MAX_RESTART_ATTEMPTS = 3 + + def __init__(self, ctx): + """ + Initialize the file watcher service. + + Args: + ctx: The MCP Context object + """ + super().__init__(ctx) + self.logger = logging.getLogger(__name__) + self.observer: Optional[Observer] = None + self.event_handler: Optional[DebounceEventHandler] = None + self.is_monitoring = False + self.restart_attempts = 0 + self.rebuild_callback: Optional[Callable] = None + + # Check if watchdog is available + if not WATCHDOG_AVAILABLE: + self.logger.warning("Watchdog library not available - file watcher disabled") + + def start_monitoring(self, rebuild_callback: Callable) -> bool: + """ + Start file system monitoring. + + Args: + rebuild_callback: Function to call when rebuild is needed + + Returns: + True if monitoring started successfully, False otherwise + """ + if not WATCHDOG_AVAILABLE: + self.logger.warning("Cannot start file watcher - watchdog library not available") + return False + + if self.is_monitoring: + self.logger.debug("File watcher already monitoring") + return True + + # Validate project setup + error = self._validate_project_setup() + if error: + self.logger.error("Cannot start file watcher: %s", error) + return False + + self.rebuild_callback = rebuild_callback + + # Get debounce seconds from config + config = self.settings.get_file_watcher_config() + debounce_seconds = config.get('debounce_seconds', 6.0) + + try: + self.observer = Observer() + self.event_handler = DebounceEventHandler( + debounce_seconds=debounce_seconds, + rebuild_callback=self.rebuild_callback, + base_path=Path(self.base_path), + logger=self.logger + ) + + # Log detailed Observer setup + watch_path = str(self.base_path) + self.logger.debug("Scheduling Observer for path: %s", watch_path) + + self.observer.schedule( + self.event_handler, + watch_path, + recursive=True + ) + + # Log Observer start + self.logger.debug("Starting Observer...") + self.observer.start() + self.is_monitoring = True + self.restart_attempts = 0 + + # Log Observer thread info + if hasattr(self.observer, '_thread'): + self.logger.debug("Observer thread: %s", self.observer._thread) + + # Verify observer is actually running + if self.observer.is_alive(): + self.logger.info( + "File watcher started successfully", + extra={ + "debounce_seconds": debounce_seconds, + "monitored_path": str(self.base_path), + "supported_extensions": len(SUPPORTED_EXTENSIONS) + } + ) + + # Add diagnostic test - create a test event to verify Observer works + self.logger.debug("Observer thread is alive: %s", self.observer.is_alive()) + self.logger.debug("Monitored path exists: %s", os.path.exists(str(self.base_path))) + self.logger.debug("Event handler is set: %s", self.event_handler is not None) + + # Log current directory for comparison + current_dir = os.getcwd() + self.logger.debug("Current working directory: %s", current_dir) + self.logger.debug("Are paths same: %s", os.path.normpath(current_dir) == os.path.normpath(str(self.base_path))) + + return True + else: + self.logger.error("File watcher failed to start - Observer not alive") + return False + + except Exception as e: + self.logger.warning("Failed to start file watcher: %s", e) + self.logger.info("Falling back to reactive index refresh") + return False + + def stop_monitoring(self) -> None: + """ + Stop file system monitoring and cleanup all resources. + + This method ensures complete cleanup of: + - Observer thread + - Event handler + - Debounce timers + - Monitoring state + """ + if not self.observer and not self.is_monitoring: + # Already stopped or never started + return + + self.logger.info("Stopping file watcher monitoring...") + + try: + # Step 1: Stop the observer first + if self.observer: + self.logger.debug("Stopping observer...") + self.observer.stop() + + # Step 2: Cancel any active debounce timer + if self.event_handler and self.event_handler.debounce_timer: + self.logger.debug("Cancelling debounce timer...") + self.event_handler.debounce_timer.cancel() + + # Step 3: Wait for observer thread to finish (with timeout) + self.logger.debug("Waiting for observer thread to finish...") + self.observer.join(timeout=5.0) + + # Step 4: Check if thread actually finished + if self.observer.is_alive(): + self.logger.warning("Observer thread did not stop within timeout") + else: + self.logger.debug("Observer thread stopped successfully") + + # Step 5: Clear all references + self.observer = None + self.event_handler = None + self.rebuild_callback = None + self.is_monitoring = False + + self.logger.info("File watcher stopped and cleaned up successfully") + + except Exception as e: + self.logger.error("Error stopping file watcher: %s", e) + + # Force cleanup even if there were errors + self.observer = None + self.event_handler = None + self.rebuild_callback = None + self.is_monitoring = False + + def is_active(self) -> bool: + """ + Check if file watcher is actively monitoring. + + Returns: + True if actively monitoring, False otherwise + """ + return (self.is_monitoring and + self.observer and + self.observer.is_alive()) + + def restart_observer(self) -> bool: + """ + Attempt to restart the file system observer. + + Returns: + True if restart successful, False otherwise + """ + if self.restart_attempts >= self.MAX_RESTART_ATTEMPTS: + self.logger.error("Max restart attempts reached, file watcher disabled") + return False + + self.logger.info("Attempting to restart file watcher (attempt %d)", + self.restart_attempts + 1) + self.restart_attempts += 1 + + # Stop current observer if running + if self.observer: + try: + self.observer.stop() + self.observer.join(timeout=2.0) + except Exception as e: + self.logger.warning("Error stopping observer during restart: %s", e) + + # Start new observer + try: + self.observer = Observer() + self.observer.schedule( + self.event_handler, + str(self.base_path), + recursive=True + ) + self.observer.start() + self.is_monitoring = True + + self.logger.info("File watcher restarted successfully") + return True + + except Exception as e: + self.logger.error("Failed to restart file watcher: %s", e) + return False + + def get_status(self) -> dict: + """ + Get current file watcher status information. + + Returns: + Dictionary containing status information + """ + # Get current debounce seconds from config + config = self.settings.get_file_watcher_config() + debounce_seconds = config.get('debounce_seconds', 6.0) + + return { + "available": WATCHDOG_AVAILABLE, + "active": self.is_active(), + "monitoring": self.is_monitoring, + "restart_attempts": self.restart_attempts, + "debounce_seconds": debounce_seconds, + "base_path": self.base_path if self.base_path else None, + "observer_alive": self.observer.is_alive() if self.observer else False + } + + +class DebounceEventHandler(FileSystemEventHandler): + """ + File system event handler with debouncing capability. + + This handler filters file system events to only relevant files and + implements a debounce mechanism to batch rapid changes into single + rebuild operations. + """ + + def __init__(self, debounce_seconds: float, rebuild_callback: Callable, + base_path: Path, logger: logging.Logger, additional_excludes: Optional[List[str]] = None): + """ + Initialize the debounce event handler. + + Args: + debounce_seconds: Number of seconds to wait before triggering rebuild + rebuild_callback: Function to call when rebuild is needed + base_path: Base project path for filtering + logger: Logger instance for debug messages + additional_excludes: Additional patterns to exclude + """ + from ..utils import FileFilter + + super().__init__() + self.debounce_seconds = debounce_seconds + self.rebuild_callback = rebuild_callback + self.base_path = base_path + self.debounce_timer: Optional[Timer] = None + self.logger = logger + + # Use centralized file filtering + self.file_filter = FileFilter(additional_excludes) + + def on_any_event(self, event: FileSystemEvent) -> None: + """ + Handle any file system event. + + Args: + event: The file system event + """ + # Check if event should be processed + should_process = self.should_process_event(event) + + if should_process: + self.logger.info("File changed: %s - %s", event.event_type, event.src_path) + self.reset_debounce_timer() + else: + # Only log at debug level for filtered events + self.logger.debug("Filtered: %s - %s", event.event_type, event.src_path) + + def should_process_event(self, event: FileSystemEvent) -> bool: + """ + Determine if event should trigger index rebuild using centralized filtering. + + Args: + event: The file system event to evaluate + + Returns: + True if event should trigger rebuild, False otherwise + """ + # Skip directory events + if event.is_directory: + self.logger.debug("Skipping directory event: %s", event.src_path) + return False + + # Select path to check: dest_path for moves, src_path for others + if event.event_type == 'moved': + if not hasattr(event, 'dest_path'): + return False + target_path = event.dest_path + else: + target_path = event.src_path + + # Use centralized filtering logic + try: + path = Path(target_path) + should_process = self.file_filter.should_process_path(path, self.base_path) + + # Skip temporary files using centralized logic + if not should_process or self.file_filter.is_temporary_file(path): + return False + + return True + except Exception: + return False + + + + + + + def reset_debounce_timer(self) -> None: + """Reset the debounce timer, canceling any existing timer.""" + if self.debounce_timer: + self.debounce_timer.cancel() + + self.debounce_timer = Timer( + self.debounce_seconds, + self.trigger_rebuild + ) + self.debounce_timer.start() + + def trigger_rebuild(self) -> None: + """Trigger index rebuild after debounce period.""" + self.logger.info("File changes detected, triggering rebuild") + + if self.rebuild_callback: + try: + result = self.rebuild_callback() + except Exception as e: + self.logger.error("Rebuild callback failed: %s", e) + traceback_msg = traceback.format_exc() + self.logger.error("Traceback: %s", traceback_msg) + else: + self.logger.warning("No rebuild callback configured") diff --git a/src/code_index_mcp/services/index_management_service.py b/src/code_index_mcp/services/index_management_service.py new file mode 100644 index 0000000..f56c760 --- /dev/null +++ b/src/code_index_mcp/services/index_management_service.py @@ -0,0 +1,198 @@ +""" +Index Management Service - Business logic for index lifecycle management. + +This service handles the business logic for index rebuilding, status monitoring, +and index-related operations using the new JSON-based indexing system. +""" +import time +import logging +import os +import json + +from typing import Dict, Any +from dataclasses import dataclass + +logger = logging.getLogger(__name__) + +from .base_service import BaseService +from ..indexing import get_index_manager, get_shallow_index_manager, DeepIndexManager + + +@dataclass +class IndexRebuildResult: + """Business result for index rebuild operations.""" + file_count: int + rebuild_time: float + status: str + message: str + + +class IndexManagementService(BaseService): + """ + Business service for index lifecycle management. + + This service orchestrates index management workflows using the new + JSON-based indexing system for optimal LLM performance. + """ + + def __init__(self, ctx): + super().__init__(ctx) + # Deep manager (symbols/files, legacy JSON index manager) + self._index_manager = get_index_manager() + # Shallow manager (file-list only) for default workflows + self._shallow_manager = get_shallow_index_manager() + # Optional wrapper for explicit deep builds + self._deep_wrapper = DeepIndexManager() + + def rebuild_index(self) -> str: + """ + Rebuild the project index (DEFAULT: shallow file list). + + For deep/symbol rebuilds, use build_deep_index() tool instead. + + Returns: + Success message with rebuild information + + Raises: + ValueError: If project not set up or rebuild fails + """ + # Business validation + self._validate_rebuild_request() + + # Shallow rebuild only (fast path) + if not self._shallow_manager.set_project_path(self.base_path): + raise RuntimeError("Failed to set project path (shallow) in index manager") + if not self._shallow_manager.build_index(): + raise RuntimeError("Failed to rebuild shallow index") + + try: + count = len(self._shallow_manager.get_file_list()) + except Exception: + count = 0 + return f"Shallow index re-built with {count} files." + + def get_rebuild_status(self) -> Dict[str, Any]: + """ + Get current index rebuild status information. + + Returns: + Dictionary with rebuild status and metadata + """ + # Check if project is set up + if not self.base_path: + return { + 'status': 'not_initialized', + 'message': 'Project not initialized', + 'is_rebuilding': False + } + + # Get index stats from the new JSON system + stats = self._index_manager.get_index_stats() + + return { + 'status': 'ready' if stats.get('status') == 'loaded' else 'needs_rebuild', + 'index_available': stats.get('status') == 'loaded', + 'is_rebuilding': False, + 'project_path': self.base_path, + 'file_count': stats.get('indexed_files', 0), + 'total_symbols': stats.get('total_symbols', 0), + 'symbol_types': stats.get('symbol_types', {}), + 'languages': stats.get('languages', []) + } + + def _validate_rebuild_request(self) -> None: + """ + Validate the index rebuild request according to business rules. + + Raises: + ValueError: If validation fails + """ + # Business rule: Project must be set up + self._require_project_setup() + + def _execute_rebuild_workflow(self) -> IndexRebuildResult: + """ + Execute the core index rebuild business workflow. + + Returns: + IndexRebuildResult with rebuild data + """ + start_time = time.time() + + # Set project path in index manager + if not self._index_manager.set_project_path(self.base_path): + raise RuntimeError("Failed to set project path in index manager") + + # Rebuild the index + if not self._index_manager.refresh_index(): + raise RuntimeError("Failed to rebuild index") + + # Get stats for result + stats = self._index_manager.get_index_stats() + file_count = stats.get('indexed_files', 0) + + rebuild_time = time.time() - start_time + + return IndexRebuildResult( + file_count=file_count, + rebuild_time=rebuild_time, + status='success', + message=f"Index rebuilt successfully with {file_count} files" + ) + + + def _format_rebuild_result(self, result: IndexRebuildResult) -> str: + """ + Format the rebuild result according to business requirements. + + Args: + result: Rebuild result data + + Returns: + Formatted result string for MCP response + """ + return f"Project re-indexed. Found {result.file_count} files." + + def build_shallow_index(self) -> str: + """ + Build and persist the shallow index (file list only). + + Returns: + Success message including file count if available. + + Raises: + ValueError/RuntimeError on validation or build failure + """ + # Ensure project is set up + self._require_project_setup() + + # Initialize manager with current base path + if not self._shallow_manager.set_project_path(self.base_path): + raise RuntimeError("Failed to set project path in index manager") + + # Build shallow index + if not self._shallow_manager.build_index(): + raise RuntimeError("Failed to build shallow index") + + # Try to report count + count = 0 + try: + shallow_path = getattr(self._shallow_manager, 'index_path', None) + if shallow_path and os.path.exists(shallow_path): + with open(shallow_path, 'r', encoding='utf-8') as f: + data = json.load(f) + if isinstance(data, list): + count = len(data) + except Exception as e: # noqa: BLE001 - safe fallback to zero + logger.debug(f"Unable to read shallow index count: {e}") + + return f"Shallow index built{f' with {count} files' if count else ''}." + + def rebuild_deep_index(self) -> str: + """Rebuild the deep index using the original workflow.""" + # Business validation + self._validate_rebuild_request() + + # Deep rebuild via existing workflow + result = self._execute_rebuild_workflow() + return self._format_rebuild_result(result) diff --git a/src/code_index_mcp/services/project_management_service.py b/src/code_index_mcp/services/project_management_service.py new file mode 100644 index 0000000..c0f3a63 --- /dev/null +++ b/src/code_index_mcp/services/project_management_service.py @@ -0,0 +1,411 @@ +""" +Project Management Service - Business logic for project lifecycle management. + +This service handles the business logic for project initialization, configuration, +and lifecycle management using the new JSON-based indexing system. +""" +import logging +from typing import Dict, Any +from dataclasses import dataclass +from contextlib import contextmanager + +from .base_service import BaseService +from ..utils.response_formatter import ResponseFormatter +from ..constants import SUPPORTED_EXTENSIONS +from ..indexing import get_index_manager, get_shallow_index_manager + +logger = logging.getLogger(__name__) + + +@dataclass +class ProjectInitializationResult: + """Business result for project initialization operations.""" + project_path: str + file_count: int + index_source: str # 'loaded_existing' or 'built_new' + search_capabilities: str + monitoring_status: str + message: str + + +class ProjectManagementService(BaseService): + """ + Business service for project lifecycle management. + + This service orchestrates project initialization workflows by composing + technical tools to achieve business goals like setting up projects, + managing configurations, and coordinating system components. + """ + + def __init__(self, ctx): + super().__init__(ctx) + # Deep index manager (legacy full index) + self._index_manager = get_index_manager() + # Shallow index manager (default for initialization) + self._shallow_manager = get_shallow_index_manager() + from ..tools.config import ProjectConfigTool + self._config_tool = ProjectConfigTool() + # Import FileWatcherTool locally to avoid circular import + from ..tools.monitoring import FileWatcherTool + self._watcher_tool = FileWatcherTool(ctx) + + + @contextmanager + def _noop_operation(self, *_args, **_kwargs): + yield + + def initialize_project(self, path: str) -> str: + """ + Initialize a project with comprehensive business logic. + + This is the main business method that orchestrates the project + initialization workflow, handling validation, cleanup, setup, + and coordination of all project components. + + Args: + path: Project directory path to initialize + + Returns: + Success message with project information + + Raises: + ValueError: If path is invalid or initialization fails + """ + # Business validation + self._validate_initialization_request(path) + + # Business workflow: Execute initialization + result = self._execute_initialization_workflow(path) + + # Business result formatting + return self._format_initialization_result(result) + + def _validate_initialization_request(self, path: str) -> None: + """ + Validate the project initialization request according to business rules. + + Args: + path: Project path to validate + + Raises: + ValueError: If validation fails + """ + # Business rule: Path must be valid + error = self._config_tool.validate_project_path(path) + if error: + raise ValueError(error) + + def _execute_initialization_workflow(self, path: str) -> ProjectInitializationResult: + """ + Execute the core project initialization business workflow. + + Args: + path: Project path to initialize + + Returns: + ProjectInitializationResult with initialization data + """ + # Business step 1: Initialize config tool + self._config_tool.initialize_settings(path) + + # Normalize path for consistent processing + normalized_path = self._config_tool.normalize_project_path(path) + + # Business step 2: Cleanup existing project state + self._cleanup_existing_project() + + # Business step 3: Initialize shallow index by default (fast path) + index_result = self._initialize_shallow_index_manager(normalized_path) + + # Business step 3.1: Store index manager in context for other services + self.helper.update_index_manager(self._index_manager) + + # Business step 4: Setup file monitoring + monitoring_result = self._setup_file_monitoring(normalized_path) + + # Business step 4: Update system state + self._update_project_state(normalized_path, index_result['file_count']) + + # Business step 6: Get search capabilities info + search_info = self._get_search_capabilities_info() + + return ProjectInitializationResult( + project_path=normalized_path, + file_count=index_result['file_count'], + index_source=index_result['source'], + search_capabilities=search_info, + monitoring_status=monitoring_result, + message=f"Project initialized: {normalized_path}" + ) + + def _cleanup_existing_project(self) -> None: + """Business logic to cleanup existing project state.""" + with self._noop_operation(): + # Stop existing file monitoring + self._watcher_tool.stop_existing_watcher() + + # Clear existing index cache + self.helper.clear_index_cache() + + # Clear any existing index state + pass + + def _initialize_json_index_manager(self, project_path: str) -> Dict[str, Any]: + """ + Business logic to initialize JSON index manager. + + Args: + project_path: Project path + + Returns: + Dictionary with initialization results + """ + # Set project path in index manager + if not self._index_manager.set_project_path(project_path): + raise RuntimeError(f"Failed to set project path: {project_path}") + + # Update context + self.helper.update_base_path(project_path) + + # Try to load existing index or build new one + if self._index_manager.load_index(): + source = "loaded_existing" + else: + if not self._index_manager.build_index(): + raise RuntimeError("Failed to build index") + source = "built_new" + + # Get stats + stats = self._index_manager.get_index_stats() + file_count = stats.get('indexed_files', 0) + + return { + 'file_count': file_count, + 'source': source, + 'total_symbols': stats.get('total_symbols', 0), + 'languages': stats.get('languages', []) + } + + def _initialize_shallow_index_manager(self, project_path: str) -> Dict[str, Any]: + """ + Business logic to initialize the shallow index manager by default. + + Args: + project_path: Project path + + Returns: + Dictionary with initialization results + """ + # Set project path in shallow manager + if not self._shallow_manager.set_project_path(project_path): + raise RuntimeError(f"Failed to set project path (shallow): {project_path}") + + # Update context + self.helper.update_base_path(project_path) + + # Try to load existing shallow index or build new one + if self._shallow_manager.load_index(): + source = "loaded_existing" + else: + if not self._shallow_manager.build_index(): + raise RuntimeError("Failed to build shallow index") + source = "built_new" + + # Determine file count from shallow list + try: + files = self._shallow_manager.get_file_list() + file_count = len(files) + except Exception: # noqa: BLE001 - safe fallback + file_count = 0 + + return { + 'file_count': file_count, + 'source': source, + 'total_symbols': 0, + 'languages': [] + } + + + def _is_valid_existing_index(self, index_data: Dict[str, Any]) -> bool: + """ + Business rule to determine if existing index is valid and usable. + + Args: + index_data: Index data to validate + + Returns: + True if index is valid and usable, False otherwise + """ + if not index_data or not isinstance(index_data, dict): + return False + + # Business rule: Must have new format metadata + if 'index_metadata' not in index_data: + return False + + # Business rule: Must be compatible version + version = index_data.get('index_metadata', {}).get('version', '') + return version >= '3.0' + + def _load_existing_index(self, index_data: Dict[str, Any]) -> Dict[str, Any]: + """ + Business logic to load and use existing index. + + Args: + index_data: Existing index data + + Returns: + Dictionary with loading results + """ + + + # Note: Legacy index loading is now handled by UnifiedIndexManager + # This method is kept for backward compatibility but functionality moved + + # Extract file count from metadata + file_count = index_data.get('project_metadata', {}).get('total_files', 0) + + + + return { + 'file_count': file_count, + 'source': 'loaded_existing' + } + + + def _setup_file_monitoring(self, project_path: str) -> str: + """ + Business logic to setup file monitoring for the project. + + Args: + project_path: Project path to monitor + + Returns: + String describing monitoring setup result + """ + + + try: + # Create rebuild callback that uses the JSON index manager + def rebuild_callback(): + logger.info("File watcher triggered rebuild callback") + try: + logger.debug(f"Starting shallow index rebuild for: {project_path}") + # Business logic: File changed, rebuild using SHALLOW index manager + try: + if not self._shallow_manager.set_project_path(project_path): + logger.warning("Shallow manager set_project_path failed") + return False + if self._shallow_manager.build_index(): + files = self._shallow_manager.get_file_list() + logger.info(f"File watcher shallow rebuild completed successfully - files {len(files)}") + return True + else: + logger.warning("File watcher shallow rebuild failed") + return False + except Exception as e: + import traceback + logger.error(f"File watcher shallow rebuild failed: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") + return False + except Exception as e: + import traceback + logger.error(f"File watcher rebuild failed: {e}") + logger.error(f"Traceback: {traceback.format_exc()}") + return False + + # Start monitoring using watcher tool + success = self._watcher_tool.start_monitoring(project_path, rebuild_callback) + + if success: + # Store watcher in context for later access + self._watcher_tool.store_in_context() + # No logging + return "monitoring_active" + else: + self._watcher_tool.record_error("Failed to start file monitoring") + return "monitoring_failed" + + except Exception as e: + error_msg = f"File monitoring setup failed: {e}" + self._watcher_tool.record_error(error_msg) + return "monitoring_error" + + def _update_project_state(self, project_path: str, file_count: int) -> None: + """Business logic to update system state after project initialization.""" + + + # Update context with file count + self.helper.update_file_count(file_count) + + # No logging + + def _get_search_capabilities_info(self) -> str: + """Business logic to get search capabilities information.""" + search_info = self._config_tool.get_search_tool_info() + + if search_info['available']: + return f"Advanced search enabled ({search_info['name']})" + else: + return "Basic search available" + + def _format_initialization_result(self, result: ProjectInitializationResult) -> str: + """ + Format the initialization result according to business requirements. + + Args: + result: Initialization result data + + Returns: + Formatted result string for MCP response + """ + if result.index_source == 'unified_manager': + message = (f"Project path set to: {result.project_path}. " + f"Initialized unified index with {result.file_count} files. " + f"{result.search_capabilities}.") + elif result.index_source == 'failed': + message = (f"Project path set to: {result.project_path}. " + f"Index initialization failed. Some features may be limited. " + f"{result.search_capabilities}.") + else: + message = (f"Project path set to: {result.project_path}. " + f"Indexed {result.file_count} files. " + f"{result.search_capabilities}.") + + if result.monitoring_status != "monitoring_active": + message += " (File monitoring unavailable - use manual refresh)" + + return message + + def get_project_config(self) -> str: + """ + Get the current project configuration for MCP resource. + + Returns: + JSON formatted configuration string + """ + + # Check if project is configured + if not self.helper.base_path: + config_data = { + "status": "not_configured", + "message": ("Project path not set. Please use set_project_path " + "to set a project directory first."), + "supported_extensions": SUPPORTED_EXTENSIONS + } + return ResponseFormatter.config_response(config_data) + + # Get settings stats + settings_stats = self.helper.settings.get_stats() if self.helper.settings else {} + + config_data = { + "base_path": self.helper.base_path, + "supported_extensions": SUPPORTED_EXTENSIONS, + "file_count": self.helper.file_count, + "settings_directory": self.helper.settings.settings_path if self.helper.settings else "", + "settings_stats": settings_stats + } + + return ResponseFormatter.config_response(config_data) + + # Removed: get_project_structure; the project structure resource is deprecated diff --git a/src/code_index_mcp/services/search_service.py b/src/code_index_mcp/services/search_service.py new file mode 100644 index 0000000..a2c2799 --- /dev/null +++ b/src/code_index_mcp/services/search_service.py @@ -0,0 +1,172 @@ +""" +Search service for the Code Index MCP server. + +This service handles code search operations, search tool management, +and search strategy selection. +""" + +from pathlib import Path +from typing import Any, Dict, List, Optional + +from .base_service import BaseService +from ..utils import FileFilter, ResponseFormatter, ValidationHelper +from ..search.base import is_safe_regex_pattern + + +class SearchService(BaseService): + """Service for managing code search operations.""" + + def __init__(self, ctx): + super().__init__(ctx) + self.file_filter = self._create_file_filter() + + def search_code( # pylint: disable=too-many-arguments + self, + pattern: str, + case_sensitive: bool = True, + context_lines: int = 0, + file_pattern: Optional[str] = None, + fuzzy: bool = False, + regex: Optional[bool] = None, + max_line_length: Optional[int] = None + ) -> Dict[str, Any]: + """Search for code patterns in the project.""" + self._require_project_setup() + + if regex is None: + regex = is_safe_regex_pattern(pattern) + + error = ValidationHelper.validate_search_pattern(pattern, regex) + if error: + raise ValueError(error) + + if file_pattern: + error = ValidationHelper.validate_glob_pattern(file_pattern) + if error: + raise ValueError(f"Invalid file pattern: {error}") + + if not self.settings: + raise ValueError("Settings not available") + + strategy = self.settings.get_preferred_search_tool() + if not strategy: + raise ValueError("No search strategies available") + + self._configure_strategy(strategy) + + try: + results = strategy.search( + pattern=pattern, + base_path=self.base_path, + case_sensitive=case_sensitive, + context_lines=context_lines, + file_pattern=file_pattern, + fuzzy=fuzzy, + regex=regex, + max_line_length=max_line_length + ) + filtered = self._filter_results(results) + return ResponseFormatter.search_results_response(filtered) + except Exception as exc: + raise ValueError(f"Search failed using '{strategy.name}': {exc}") from exc + + def refresh_search_tools(self) -> str: + """Refresh the available search tools.""" + if not self.settings: + raise ValueError("Settings not available") + + self.settings.refresh_available_strategies() + config = self.settings.get_search_tools_config() + + available = config['available_tools'] + preferred = config['preferred_tool'] + return f"Search tools refreshed. Available: {available}. Preferred: {preferred}." + + def get_search_capabilities(self) -> Dict[str, Any]: + """Get information about search capabilities and available tools.""" + if not self.settings: + return {"error": "Settings not available"} + + config = self.settings.get_search_tools_config() + + capabilities = { + "available_tools": config.get('available_tools', []), + "preferred_tool": config.get('preferred_tool', 'basic'), + "supports_regex": True, + "supports_fuzzy": True, + "supports_case_sensitivity": True, + "supports_context_lines": True, + "supports_file_patterns": True + } + + return capabilities + + def _configure_strategy(self, strategy) -> None: + """Apply shared exclusion configuration to the strategy if supported.""" + configure = getattr(strategy, 'configure_excludes', None) + if not configure: + return + + try: + configure(self.file_filter) + except Exception: # pragma: no cover - defensive fallback + pass + + def _create_file_filter(self) -> FileFilter: + """Build a shared file filter drawing from project settings.""" + additional_dirs: List[str] = [] + additional_file_patterns: List[str] = [] + + settings = self.settings + if settings: + try: + config = settings.get_file_watcher_config() + except Exception: # pragma: no cover - fallback if config fails + config = {} + + for key in ('exclude_patterns', 'additional_exclude_patterns'): + patterns = config.get(key) or [] + for pattern in patterns: + if not isinstance(pattern, str): + continue + normalized = pattern.strip() + if not normalized: + continue + additional_dirs.append(normalized) + additional_file_patterns.append(normalized) + + file_filter = FileFilter(additional_dirs or None) + + if additional_file_patterns: + file_filter.exclude_files.update(additional_file_patterns) + + return file_filter + + def _filter_results(self, results: Dict[str, Any]) -> Dict[str, Any]: + """Filter out matches that reside under excluded paths.""" + if not isinstance(results, dict) or not results: + return results + + if 'error' in results or not self.file_filter or not self.base_path: + return results + + base_path = Path(self.base_path) + filtered: Dict[str, Any] = {} + + for rel_path, matches in results.items(): + if not isinstance(rel_path, str): + continue + + normalized = Path(rel_path.replace('\\', '/')) + try: + absolute = (base_path / normalized).resolve() + except Exception: # pragma: no cover - invalid path safety + continue + + try: + if self.file_filter.should_process_path(absolute, base_path): + filtered[rel_path] = matches + except Exception: # pragma: no cover - defensive fallback + continue + + return filtered diff --git a/src/code_index_mcp/services/settings_service.py b/src/code_index_mcp/services/settings_service.py new file mode 100644 index 0000000..bd641c4 --- /dev/null +++ b/src/code_index_mcp/services/settings_service.py @@ -0,0 +1,191 @@ +""" +Settings management service for the Code Index MCP server. + +This service handles settings information, statistics, +temporary directory management, and settings cleanup operations. +""" + +import os +import tempfile +from typing import Dict, Any + +from .base_service import BaseService +from ..utils import ResponseFormatter +from ..constants import SETTINGS_DIR +from ..project_settings import ProjectSettings +from ..indexing import get_index_manager + + +def manage_temp_directory(action: str) -> Dict[str, Any]: + """ + Manage temporary directory operations. + + This is a standalone function that doesn't require project context. + Handles the logic for create_temp_directory and check_temp_directory MCP tools. + + Args: + action: The action to perform ('create' or 'check') + + Returns: + Dictionary with directory information and operation results + + Raises: + ValueError: If action is invalid or operation fails + """ + if action not in ['create', 'check']: + raise ValueError(f"Invalid action: {action}. Must be 'create' or 'check'") + + # Try to get the actual temp directory from index manager, fallback to default + try: + index_manager = get_index_manager() + temp_dir = index_manager.temp_dir if index_manager.temp_dir else os.path.join(tempfile.gettempdir(), SETTINGS_DIR) + except: + temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR) + + if action == 'create': + existed_before = os.path.exists(temp_dir) + + try: + # Use ProjectSettings to handle directory creation consistently + ProjectSettings("", skip_load=True) + + result = ResponseFormatter.directory_info_response( + temp_directory=temp_dir, + exists=os.path.exists(temp_dir), + is_directory=os.path.isdir(temp_dir) + ) + result["existed_before"] = existed_before + result["created"] = not existed_before + + return result + + except (OSError, IOError, ValueError) as e: + return ResponseFormatter.directory_info_response( + temp_directory=temp_dir, + exists=False, + error=str(e) + ) + + else: # action == 'check' + result = ResponseFormatter.directory_info_response( + temp_directory=temp_dir, + exists=os.path.exists(temp_dir), + is_directory=os.path.isdir(temp_dir) if os.path.exists(temp_dir) else False + ) + result["temp_root"] = tempfile.gettempdir() + + # If the directory exists, list its contents + if result["exists"] and result["is_directory"]: + try: + contents = os.listdir(temp_dir) + result["contents"] = contents + result["subdirectories"] = [] + + # Check each subdirectory + for item in contents: + item_path = os.path.join(temp_dir, item) + if os.path.isdir(item_path): + subdir_info = { + "name": item, + "path": item_path, + "contents": os.listdir(item_path) if os.path.exists(item_path) else [] + } + result["subdirectories"].append(subdir_info) + + except (OSError, PermissionError) as e: + result["error"] = str(e) + + return result + + + + + +class SettingsService(BaseService): + """ + Service for managing settings and directory operations. + + This service handles: + - Settings information and statistics + - Temporary directory management + - Settings cleanup operations + - Configuration data access + """ + + + + def get_settings_info(self) -> Dict[str, Any]: + """ + Get comprehensive settings information. + + Handles the logic for get_settings_info MCP tool. + + Returns: + Dictionary with settings directory, config, stats, and status information + """ + temp_dir = os.path.join(tempfile.gettempdir(), SETTINGS_DIR) + + # Get the actual index directory from the index manager + index_manager = get_index_manager() + actual_temp_dir = index_manager.temp_dir if index_manager.temp_dir else temp_dir + + # Check if base_path is set + if not self.base_path: + return ResponseFormatter.settings_info_response( + settings_directory="", + temp_directory=actual_temp_dir, + temp_directory_exists=os.path.exists(actual_temp_dir), + config={}, + stats={}, + exists=False, + status="not_configured", + message="Project path not set. Please use set_project_path to set a " + "project directory first." + ) + + # Get config and stats + config = self.settings.load_config() if self.settings else {} + stats = self.settings.get_stats() if self.settings else {} + settings_directory = actual_temp_dir + exists = os.path.exists(settings_directory) if settings_directory else False + + return ResponseFormatter.settings_info_response( + settings_directory=settings_directory, + temp_directory=actual_temp_dir, + temp_directory_exists=os.path.exists(actual_temp_dir), + config=config, + stats=stats, + exists=exists + ) + + + + def clear_all_settings(self) -> str: + """ + Clear all settings and cached data. + + Handles the logic for clear_settings MCP tool. + + Returns: + Success message confirming settings were cleared + """ + if self.settings: + self.settings.clear() + + return "Project settings, index, and cache have been cleared." + + def get_settings_stats(self) -> str: + """ + Get settings statistics as JSON string. + + Handles the logic for settings://stats MCP resource. + + Returns: + JSON formatted settings statistics + """ + if not self.settings: + stats_data = {"error": "Settings not available"} + else: + stats_data = self.settings.get_stats() + + return ResponseFormatter.stats_response(stats_data) diff --git a/src/code_index_mcp/services/system_management_service.py b/src/code_index_mcp/services/system_management_service.py new file mode 100644 index 0000000..8cb420d --- /dev/null +++ b/src/code_index_mcp/services/system_management_service.py @@ -0,0 +1,407 @@ +""" +System Management Service - Business logic for system configuration and monitoring. + +This service handles the business logic for system management operations including +file watcher status, configuration management, and system health monitoring. +It composes technical tools to achieve business goals. +""" + +from typing import Dict, Any, Optional +from dataclasses import dataclass +from .index_management_service import IndexManagementService +from .base_service import BaseService +# FileWatcherTool will be imported locally to avoid circular import +from ..tools.config import ProjectConfigTool, SettingsTool + + +@dataclass +class FileWatcherStatus: + """Business result for file watcher status operations.""" + available: bool + active: bool + status: str + message: Optional[str] + error_info: Optional[Dict[str, Any]] + configuration: Dict[str, Any] + rebuild_status: Dict[str, Any] + recommendations: list[str] + + +class SystemManagementService(BaseService): + """ + Business service for system configuration and monitoring. + + This service orchestrates system management workflows by composing + technical tools to achieve business goals like monitoring file watchers, + managing configurations, and providing system health insights. + """ + + def __init__(self, ctx): + super().__init__(ctx) + # Import FileWatcherTool locally to avoid circular import + from ..tools.monitoring import FileWatcherTool + self._watcher_tool = FileWatcherTool(ctx) + self._config_tool = ProjectConfigTool() + self._settings_tool = SettingsTool() + + def get_file_watcher_status(self) -> Dict[str, Any]: + """ + Get comprehensive file watcher status with business intelligence. + + This is the main business method that orchestrates the file watcher + status workflow, analyzing system state, providing recommendations, + and formatting comprehensive status information. + + Returns: + Dictionary with comprehensive file watcher status + """ + # Business workflow: Analyze system state + status_result = self._analyze_file_watcher_state() + + # Business result formatting + return self._format_status_result(status_result) + + def configure_file_watcher(self, enabled: Optional[bool] = None, + debounce_seconds: Optional[float] = None, + additional_exclude_patterns: Optional[list] = None) -> str: + """ + Configure file watcher settings with business validation. + + Args: + enabled: Whether to enable file watcher + debounce_seconds: Debounce time in seconds + additional_exclude_patterns: Additional patterns to exclude + + Returns: + Success message with configuration details + + Raises: + ValueError: If configuration is invalid + """ + # Business validation + self._validate_configuration_request(enabled, debounce_seconds, additional_exclude_patterns) + + # Business workflow: Apply configuration + result = self._apply_file_watcher_configuration(enabled, debounce_seconds, additional_exclude_patterns) + + return result + + def _analyze_file_watcher_state(self) -> FileWatcherStatus: + """ + Business logic to analyze comprehensive file watcher state. + + Returns: + FileWatcherStatus with complete analysis + """ + # Business step 1: Check for error conditions + error_info = self._check_for_watcher_errors() + if error_info: + return self._create_error_status(error_info) + + # Business step 2: Check initialization state + watcher_service = self._watcher_tool.get_from_context() + if not watcher_service: + return self._create_not_initialized_status() + + # Business step 3: Get active status + return self._create_active_status(watcher_service) + + def _check_for_watcher_errors(self) -> Optional[Dict[str, Any]]: + """ + Business logic to check for file watcher error conditions. + + Returns: + Error information dictionary or None if no errors + """ + # Check context for recorded errors + if hasattr(self.ctx.request_context.lifespan_context, 'file_watcher_error'): + return self.ctx.request_context.lifespan_context.file_watcher_error + + return None + + def _create_error_status(self, error_info: Dict[str, Any]) -> FileWatcherStatus: + """ + Business logic to create error status with recommendations. + + Args: + error_info: Error information from context + + Returns: + FileWatcherStatus for error condition + """ + # Get configuration if available + configuration = self._get_file_watcher_configuration() + + # Get rebuild status + rebuild_status = self._get_rebuild_status() + + # Business logic: Generate error-specific recommendations + recommendations = [ + "Use refresh_index tool for manual updates", + "File watcher auto-refresh is disabled due to errors", + "Consider restarting the project or checking system permissions" + ] + + return FileWatcherStatus( + available=True, + active=False, + status="error", + message=error_info.get('message', 'File watcher error occurred'), + error_info=error_info, + configuration=configuration, + rebuild_status=rebuild_status, + recommendations=recommendations + ) + + def _create_not_initialized_status(self) -> FileWatcherStatus: + """ + Business logic to create not-initialized status. + + Returns: + FileWatcherStatus for not-initialized condition + """ + # Get basic configuration + configuration = self._get_file_watcher_configuration() + + # Get rebuild status + rebuild_status = self._get_rebuild_status() + + # Business logic: Generate initialization recommendations + recommendations = [ + "Use set_project_path tool to initialize file watcher", + "File monitoring will be enabled after project initialization" + ] + + return FileWatcherStatus( + available=True, + active=False, + status="not_initialized", + message="File watcher service not initialized. Set project path to enable auto-refresh.", + error_info=None, + configuration=configuration, + rebuild_status=rebuild_status, + recommendations=recommendations + ) + + def _create_active_status(self, watcher_service) -> FileWatcherStatus: + """ + Business logic to create active status with comprehensive information. + + Args: + watcher_service: Active file watcher service + + Returns: + FileWatcherStatus for active condition + """ + # Get detailed status from watcher service + watcher_status = watcher_service.get_status() + + # Get configuration + configuration = self._get_file_watcher_configuration() + + # Get rebuild status + rebuild_status = self._get_rebuild_status() + + # Business logic: Generate status-specific recommendations + recommendations = self._generate_active_recommendations(watcher_status) + + return FileWatcherStatus( + available=watcher_status.get('available', True), + active=watcher_status.get('active', False), + status=watcher_status.get('status', 'active'), + message=watcher_status.get('message'), + error_info=None, + configuration=configuration, + rebuild_status=rebuild_status, + recommendations=recommendations + ) + + def _get_file_watcher_configuration(self) -> Dict[str, Any]: + """ + Business logic to get file watcher configuration safely. + + Returns: + Configuration dictionary + """ + try: + # Try to get from project settings + if (hasattr(self.ctx.request_context.lifespan_context, 'settings') and + self.ctx.request_context.lifespan_context.settings): + return self.ctx.request_context.lifespan_context.settings.get_file_watcher_config() + + # Fallback to default configuration + return { + 'enabled': True, + 'debounce_seconds': 6.0, + 'additional_exclude_patterns': [], + 'note': 'Default configuration - project not fully initialized' + } + + except Exception as e: + return { + 'error': f'Could not load configuration: {e}', + 'enabled': True, + 'debounce_seconds': 6.0 + } + + def _get_rebuild_status(self) -> Dict[str, Any]: + """ + Business logic to get index rebuild status safely. + + Returns: + Rebuild status dictionary + """ + try: + index_service = IndexManagementService(self.ctx) + return index_service.get_rebuild_status() + + except Exception as e: + return { + 'status': 'unknown', + 'error': f'Could not get rebuild status: {e}' + } + + def _generate_active_recommendations(self, watcher_status: Dict[str, Any]) -> list[str]: + """ + Business logic to generate recommendations for active file watcher. + + Args: + watcher_status: Current watcher status + + Returns: + List of recommendations + """ + recommendations = [] + + if watcher_status.get('active', False): + recommendations.append("File watcher is active - automatic index updates enabled") + recommendations.append("Files will be re-indexed automatically when changed") + else: + recommendations.append("File watcher is available but not active") + recommendations.append("Use refresh_index for manual updates") + + # Add performance recommendations + restart_attempts = watcher_status.get('restart_attempts', 0) + if restart_attempts > 0: + recommendations.append(f"File watcher has restarted {restart_attempts} times - monitor for stability") + + return recommendations + + def _validate_configuration_request(self, enabled: Optional[bool], + debounce_seconds: Optional[float], + additional_exclude_patterns: Optional[list]) -> None: + """ + Business validation for file watcher configuration. + + Args: + enabled: Enable flag + debounce_seconds: Debounce time + additional_exclude_patterns: Exclude patterns + + Raises: + ValueError: If validation fails + """ + # Business rule: Enabled flag must be boolean if provided + if enabled is not None and not isinstance(enabled, bool): + raise ValueError("Enabled flag must be a boolean value") + + # Business rule: Debounce seconds must be reasonable + if debounce_seconds is not None: + if debounce_seconds < 0.1: + raise ValueError("Debounce seconds must be at least 0.1") + if debounce_seconds > 300: # 5 minutes + raise ValueError("Debounce seconds cannot exceed 300 (5 minutes)") + + # Business rule: Exclude patterns must be valid + if additional_exclude_patterns is not None: + if not isinstance(additional_exclude_patterns, list): + raise ValueError("Additional exclude patterns must be a list") + + for pattern in additional_exclude_patterns: + if not isinstance(pattern, str): + raise ValueError("All exclude patterns must be strings") + if not pattern.strip(): + raise ValueError("Exclude patterns cannot be empty") + + def _apply_file_watcher_configuration(self, enabled: Optional[bool], + debounce_seconds: Optional[float], + additional_exclude_patterns: Optional[list]) -> str: + """ + Business logic to apply file watcher configuration. + + Args: + enabled: Enable flag + debounce_seconds: Debounce time + additional_exclude_patterns: Exclude patterns + + Returns: + Success message + + Raises: + ValueError: If configuration cannot be applied + """ + # Business rule: Settings must be available + if (not hasattr(self.ctx.request_context.lifespan_context, 'settings') or + not self.ctx.request_context.lifespan_context.settings): + raise ValueError("Settings not available - project path not set") + + settings = self.ctx.request_context.lifespan_context.settings + + # Build updates dictionary + updates = {} + if enabled is not None: + updates["enabled"] = enabled + if debounce_seconds is not None: + updates["debounce_seconds"] = debounce_seconds + if additional_exclude_patterns is not None: + updates["additional_exclude_patterns"] = additional_exclude_patterns + + if not updates: + return "No configuration changes specified" + + # Apply configuration + settings.update_file_watcher_config(updates) + + # Business logic: Generate informative result message + changes_summary = [] + if 'enabled' in updates: + changes_summary.append(f"enabled={updates['enabled']}") + if 'debounce_seconds' in updates: + changes_summary.append(f"debounce={updates['debounce_seconds']}s") + if 'additional_exclude_patterns' in updates: + pattern_count = len(updates['additional_exclude_patterns']) + changes_summary.append(f"exclude_patterns={pattern_count}") + + changes_str = ", ".join(changes_summary) + + return (f"File watcher configuration updated: {changes_str}. " + f"Restart may be required for changes to take effect.") + + def _format_status_result(self, status_result: FileWatcherStatus) -> Dict[str, Any]: + """ + Format the status result according to business requirements. + + Args: + status_result: Status analysis result + + Returns: + Formatted result dictionary for MCP response + """ + result = { + 'available': status_result.available, + 'active': status_result.active, + 'status': status_result.status, + 'configuration': status_result.configuration, + 'rebuild_status': status_result.rebuild_status, + 'recommendations': status_result.recommendations + } + + # Add optional fields + if status_result.message: + result['message'] = status_result.message + + if status_result.error_info: + result['error'] = status_result.error_info + result['manual_refresh_required'] = True + + return result diff --git a/src/code_index_mcp/tools/__init__.py b/src/code_index_mcp/tools/__init__.py new file mode 100644 index 0000000..f69d664 --- /dev/null +++ b/src/code_index_mcp/tools/__init__.py @@ -0,0 +1,19 @@ +""" +Tool Layer - Technical components for the Code Index MCP server. + +This package contains pure technical components that provide specific +capabilities without business logic. These tools are composed by the +business layer to achieve business goals. +""" + +from .filesystem import FileMatchingTool, FileSystemTool +from .config import ProjectConfigTool, SettingsTool +from .monitoring import FileWatcherTool + +__all__ = [ + 'FileMatchingTool', + 'FileSystemTool', + 'ProjectConfigTool', + 'SettingsTool', + 'FileWatcherTool' +] diff --git a/src/code_index_mcp/tools/config/__init__.py b/src/code_index_mcp/tools/config/__init__.py new file mode 100644 index 0000000..12d4304 --- /dev/null +++ b/src/code_index_mcp/tools/config/__init__.py @@ -0,0 +1,8 @@ +""" +Configuration Tools - Technical components for configuration management. +""" + +from .project_config_tool import ProjectConfigTool +from .settings_tool import SettingsTool + +__all__ = ['ProjectConfigTool', 'SettingsTool'] diff --git a/src/code_index_mcp/tools/config/project_config_tool.py b/src/code_index_mcp/tools/config/project_config_tool.py new file mode 100644 index 0000000..c2738dd --- /dev/null +++ b/src/code_index_mcp/tools/config/project_config_tool.py @@ -0,0 +1,308 @@ +""" +Project Configuration Tool - Pure technical component for project configuration operations. + +This tool handles low-level project configuration operations without any business logic. +""" + +import os +from typing import Dict, Any, Optional +from pathlib import Path + +from ...project_settings import ProjectSettings + + +class ProjectConfigTool: + """ + Pure technical component for project configuration operations. + + This tool provides low-level configuration management capabilities + without any business logic or decision making. + """ + + def __init__(self): + self._settings: Optional[ProjectSettings] = None + self._project_path: Optional[str] = None + + def initialize_settings(self, project_path: str) -> ProjectSettings: + """ + Initialize project settings for the given path. + + Args: + project_path: Absolute path to the project directory + + Returns: + ProjectSettings instance + + Raises: + ValueError: If project path is invalid + """ + if not Path(project_path).exists(): + raise ValueError(f"Project path does not exist: {project_path}") + + if not Path(project_path).is_dir(): + raise ValueError(f"Project path is not a directory: {project_path}") + + self._project_path = project_path + self._settings = ProjectSettings(project_path, skip_load=False) + + return self._settings + + def load_existing_index(self) -> Optional[Dict[str, Any]]: + """ + Load existing index data if available. + + Returns: + Index data dictionary or None if not available + + Raises: + RuntimeError: If settings not initialized + """ + if not self._settings: + raise RuntimeError("Settings not initialized. Call initialize_settings() first.") + + try: + return self._settings.load_index() + except Exception: + return None + + def save_project_config(self, config_data: Dict[str, Any]) -> None: + """ + Save project configuration data. + + Args: + config_data: Configuration data to save + + Raises: + RuntimeError: If settings not initialized + """ + if not self._settings: + raise RuntimeError("Settings not initialized") + + self._settings.save_config(config_data) + + def save_index_data(self, index_data: Dict[str, Any]) -> None: + """ + Save index data to persistent storage. + + Args: + index_data: Index data to save + + Raises: + RuntimeError: If settings not initialized + """ + if not self._settings: + raise RuntimeError("Settings not initialized") + + self._settings.save_index(index_data) + + def check_index_version(self) -> bool: + """ + Check if JSON index is the latest version. + + Returns: + True if JSON index exists and is recent, False if needs rebuild + + Raises: + RuntimeError: If settings not initialized + """ + if not self._settings: + raise RuntimeError("Settings not initialized") + + # Check if JSON index exists and is fresh + from ...indexing import get_index_manager + index_manager = get_index_manager() + + # Set project path if available + if self._settings.base_path: + index_manager.set_project_path(self._settings.base_path) + stats = index_manager.get_index_stats() + return stats.get('status') == 'loaded' + + return False + + def cleanup_legacy_files(self) -> None: + """ + Clean up legacy index files. + + Raises: + RuntimeError: If settings not initialized + """ + if not self._settings: + raise RuntimeError("Settings not initialized") + + self._settings.cleanup_legacy_files() + + def get_search_tool_info(self) -> Dict[str, Any]: + """ + Get information about available search tools. + + Returns: + Dictionary with search tool information + + Raises: + RuntimeError: If settings not initialized + """ + if not self._settings: + raise RuntimeError("Settings not initialized") + + search_tool = self._settings.get_preferred_search_tool() + return { + 'available': search_tool is not None, + 'name': search_tool.name if search_tool else None, + 'description': "Advanced search enabled" if search_tool else "Basic search available" + } + + def get_file_watcher_config(self) -> Dict[str, Any]: + """ + Get file watcher configuration. + + Returns: + File watcher configuration dictionary + + Raises: + RuntimeError: If settings not initialized + """ + if not self._settings: + raise RuntimeError("Settings not initialized") + + return self._settings.get_file_watcher_config() + + def create_default_config(self, project_path: str) -> Dict[str, Any]: + """ + Create default project configuration. + + Args: + project_path: Project path for the configuration + + Returns: + Default configuration dictionary + """ + from ...utils import FileFilter + + file_filter = FileFilter() + return { + "base_path": project_path, + "supported_extensions": list(file_filter.supported_extensions), + "last_indexed": None, + "file_watcher": self.get_file_watcher_config() if self._settings else {} + } + + def validate_project_path(self, path: str) -> Optional[str]: + """ + Validate project path. + + Args: + path: Path to validate + + Returns: + Error message if invalid, None if valid + """ + if not path or not path.strip(): + return "Project path cannot be empty" + + try: + norm_path = os.path.normpath(path) + abs_path = os.path.abspath(norm_path) + except (OSError, ValueError) as e: + return f"Invalid path format: {str(e)}" + + if not os.path.exists(abs_path): + return f"Path does not exist: {abs_path}" + + if not os.path.isdir(abs_path): + return f"Path is not a directory: {abs_path}" + + return None + + def normalize_project_path(self, path: str) -> str: + """ + Normalize and get absolute project path. + + Args: + path: Path to normalize + + Returns: + Normalized absolute path + """ + norm_path = os.path.normpath(path) + return os.path.abspath(norm_path) + + def get_settings_path(self) -> Optional[str]: + """ + Get the settings directory path. + + Returns: + Settings directory path or None if not initialized + """ + return self._settings.settings_path if self._settings else None + + def get_project_path(self) -> Optional[str]: + """ + Get the current project path. + + Returns: + Project path or None if not set + """ + return self._project_path + + def get_basic_project_structure(self, project_path: str) -> Dict[str, Any]: + """ + Get basic project directory structure. + + Args: + project_path: Path to analyze + + Returns: + Basic directory structure dictionary + """ + from ...utils import FileFilter + + file_filter = FileFilter() + + def build_tree(path: str, max_depth: int = 3, current_depth: int = 0) -> Dict[str, Any]: + """Build directory tree with limited depth using centralized filtering.""" + if current_depth >= max_depth: + return {"type": "directory", "truncated": True} + + try: + items = [] + path_obj = Path(path) + + for item in sorted(path_obj.iterdir()): + if item.is_dir(): + # Use centralized directory filtering + if not file_filter.should_exclude_directory(item.name): + items.append({ + "name": item.name, + "type": "directory", + "children": build_tree(str(item), max_depth, current_depth + 1) + }) + else: + # Use centralized file filtering + if not file_filter.should_exclude_file(item): + items.append({ + "name": item.name, + "type": "file", + "size": item.stat().st_size if item.exists() else 0 + }) + + return {"type": "directory", "children": items} + + except (OSError, PermissionError): + return {"type": "directory", "error": "Access denied"} + + try: + root_name = Path(project_path).name + structure = { + "name": root_name, + "path": project_path, + "type": "directory", + "children": build_tree(project_path)["children"] + } + return structure + + except Exception as e: + return { + "error": f"Failed to build project structure: {e}", + "path": project_path + } diff --git a/src/code_index_mcp/tools/config/settings_tool.py b/src/code_index_mcp/tools/config/settings_tool.py new file mode 100644 index 0000000..51fe0dc --- /dev/null +++ b/src/code_index_mcp/tools/config/settings_tool.py @@ -0,0 +1,100 @@ +""" +Settings Tool - Pure technical component for settings operations. + +This tool handles low-level settings operations without any business logic. +""" + +import os +import tempfile +from typing import Dict, Any + +from ...constants import SETTINGS_DIR + + +class SettingsTool: + """ + Pure technical component for settings operations. + + This tool provides low-level settings management capabilities + without any business logic or decision making. + """ + + def __init__(self): + pass + + def get_temp_directory_path(self) -> str: + """ + Get the path to the temporary directory for settings. + + Returns: + Path to the temporary settings directory + """ + return os.path.join(tempfile.gettempdir(), SETTINGS_DIR) + + def create_temp_directory(self) -> Dict[str, Any]: + """ + Create the temporary directory for settings. + + Returns: + Dictionary with creation results + """ + temp_dir = self.get_temp_directory_path() + existed_before = os.path.exists(temp_dir) + + try: + os.makedirs(temp_dir, exist_ok=True) + + return { + "temp_directory": temp_dir, + "exists": os.path.exists(temp_dir), + "is_directory": os.path.isdir(temp_dir), + "existed_before": existed_before, + "created": not existed_before + } + + except (OSError, IOError) as e: + return { + "temp_directory": temp_dir, + "exists": False, + "error": str(e) + } + + def check_temp_directory(self) -> Dict[str, Any]: + """ + Check the status of the temporary directory. + + Returns: + Dictionary with directory status information + """ + temp_dir = self.get_temp_directory_path() + + result = { + "temp_directory": temp_dir, + "temp_root": tempfile.gettempdir(), + "exists": os.path.exists(temp_dir), + "is_directory": os.path.isdir(temp_dir) if os.path.exists(temp_dir) else False + } + + # If the directory exists, list its contents + if result["exists"] and result["is_directory"]: + try: + contents = os.listdir(temp_dir) + result["contents"] = contents + result["subdirectories"] = [] + + # Check each subdirectory + for item in contents: + item_path = os.path.join(temp_dir, item) + if os.path.isdir(item_path): + subdir_info = { + "name": item, + "path": item_path, + "contents": os.listdir(item_path) if os.path.exists(item_path) else [] + } + result["subdirectories"].append(subdir_info) + + except (OSError, PermissionError) as e: + result["error"] = str(e) + + return result + diff --git a/src/code_index_mcp/tools/filesystem/__init__.py b/src/code_index_mcp/tools/filesystem/__init__.py new file mode 100644 index 0000000..e8f9798 --- /dev/null +++ b/src/code_index_mcp/tools/filesystem/__init__.py @@ -0,0 +1,8 @@ +""" +Filesystem Tools - Technical components for file system operations. +""" + +from .file_matching_tool import FileMatchingTool +from .file_system_tool import FileSystemTool + +__all__ = ['FileMatchingTool', 'FileSystemTool'] diff --git a/src/code_index_mcp/tools/filesystem/file_matching_tool.py b/src/code_index_mcp/tools/filesystem/file_matching_tool.py new file mode 100644 index 0000000..22ebdf6 --- /dev/null +++ b/src/code_index_mcp/tools/filesystem/file_matching_tool.py @@ -0,0 +1,215 @@ +""" +File Matching Tool - Pure technical component for pattern matching operations. + +This tool handles file pattern matching without any business logic. +It provides technical capabilities for finding files based on various patterns. +""" + +import fnmatch +from typing import List, Set +from pathlib import Path + +# FileInfo defined locally for file matching operations +from dataclasses import dataclass + +@dataclass +class FileInfo: + """File information structure.""" + relative_path: str + language: str + + +class FileMatchingTool: + """ + Pure technical component for file pattern matching. + + This tool provides low-level pattern matching capabilities without + any business logic. It can match files using glob patterns, regex, + or other matching strategies. + """ + + def __init__(self): + pass + + def match_glob_pattern(self, files: List[FileInfo], pattern: str) -> List[FileInfo]: + """ + Match files using glob pattern. + + Args: + files: List of FileInfo objects to search through + pattern: Glob pattern (e.g., "*.py", "test_*.js", "src/**/*.ts") + + Returns: + List of FileInfo objects that match the pattern + """ + if not pattern: + return files + + matched_files = [] + + for file_info in files: + # Try matching against full path + if fnmatch.fnmatch(file_info.relative_path, pattern): + matched_files.append(file_info) + continue + + # Try matching against just the filename + filename = Path(file_info.relative_path).name + if fnmatch.fnmatch(filename, pattern): + matched_files.append(file_info) + + return matched_files + + def match_multiple_patterns(self, files: List[FileInfo], patterns: List[str]) -> List[FileInfo]: + """ + Match files using multiple glob patterns (OR logic). + + Args: + files: List of FileInfo objects to search through + patterns: List of glob patterns + + Returns: + List of FileInfo objects that match any of the patterns + """ + if not patterns: + return files + + matched_files = set() + + for pattern in patterns: + pattern_matches = self.match_glob_pattern(files, pattern) + matched_files.update(pattern_matches) + + return list(matched_files) + + def match_by_language(self, files: List[FileInfo], languages: List[str]) -> List[FileInfo]: + """ + Match files by programming language. + + Args: + files: List of FileInfo objects to search through + languages: List of language names (e.g., ["python", "javascript"]) + + Returns: + List of FileInfo objects with matching languages + """ + if not languages: + return files + + # Normalize language names for comparison + normalized_languages = {lang.lower() for lang in languages} + + matched_files = [] + for file_info in files: + if file_info.language.lower() in normalized_languages: + matched_files.append(file_info) + + return matched_files + + def match_by_directory(self, files: List[FileInfo], directory_patterns: List[str]) -> List[FileInfo]: + """ + Match files by directory patterns. + + Args: + files: List of FileInfo objects to search through + directory_patterns: List of directory patterns (e.g., ["src/*", "test/**"]) + + Returns: + List of FileInfo objects in matching directories + """ + if not directory_patterns: + return files + + matched_files = [] + + for file_info in files: + file_dir = str(Path(file_info.relative_path).parent) + + for dir_pattern in directory_patterns: + if fnmatch.fnmatch(file_dir, dir_pattern): + matched_files.append(file_info) + break + + return matched_files + + def exclude_patterns(self, files: List[FileInfo], exclude_patterns: List[str]) -> List[FileInfo]: + """ + Exclude files matching the given patterns. + + Args: + files: List of FileInfo objects to filter + exclude_patterns: List of patterns to exclude + + Returns: + List of FileInfo objects that don't match any exclude pattern + """ + if not exclude_patterns: + return files + + filtered_files = [] + + for file_info in files: + should_exclude = False + + for exclude_pattern in exclude_patterns: + if (fnmatch.fnmatch(file_info.relative_path, exclude_pattern) or + fnmatch.fnmatch(Path(file_info.relative_path).name, exclude_pattern)): + should_exclude = True + break + + if not should_exclude: + filtered_files.append(file_info) + + return filtered_files + + def sort_by_relevance(self, files: List[FileInfo], pattern: str) -> List[FileInfo]: + """ + Sort files by relevance to the search pattern. + + Args: + files: List of FileInfo objects to sort + pattern: Original search pattern for relevance scoring + + Returns: + List of FileInfo objects sorted by relevance (most relevant first) + """ + def relevance_score(file_info: FileInfo) -> int: + """Calculate relevance score for a file.""" + score = 0 + filename = Path(file_info.relative_path).name + + # Exact filename match gets highest score + if filename == pattern: + score += 100 + + # Filename starts with pattern + elif filename.startswith(pattern.replace('*', '')): + score += 50 + + # Pattern appears in filename + elif pattern.replace('*', '') in filename: + score += 25 + + # Shorter paths are generally more relevant + path_depth = len(Path(file_info.relative_path).parts) + score += max(0, 10 - path_depth) + + return score + + return sorted(files, key=relevance_score, reverse=True) + + def limit_results(self, files: List[FileInfo], max_results: int) -> List[FileInfo]: + """ + Limit the number of results returned. + + Args: + files: List of FileInfo objects + max_results: Maximum number of results to return + + Returns: + List of FileInfo objects limited to max_results + """ + if max_results <= 0: + return files + + return files[:max_results] diff --git a/src/code_index_mcp/tools/filesystem/file_system_tool.py b/src/code_index_mcp/tools/filesystem/file_system_tool.py new file mode 100644 index 0000000..fac9f5d --- /dev/null +++ b/src/code_index_mcp/tools/filesystem/file_system_tool.py @@ -0,0 +1,234 @@ +""" +File System Tool - Pure technical component for file system operations. + +This tool handles low-level file system operations without any business logic. +""" + +import os +from typing import Dict, Any, Optional +from pathlib import Path + + +class FileSystemTool: + """ + Pure technical component for file system operations. + + This tool provides low-level file system capabilities without + any business logic or decision making. + """ + + def __init__(self): + pass + + def get_file_stats(self, file_path: str) -> Dict[str, Any]: + """ + Get basic file system statistics for a file. + + Args: + file_path: Absolute path to the file + + Returns: + Dictionary with file statistics + + Raises: + FileNotFoundError: If file doesn't exist + OSError: If file cannot be accessed + """ + if not os.path.exists(file_path): + raise FileNotFoundError(f"File not found: {file_path}") + + try: + stat_info = os.stat(file_path) + path_obj = Path(file_path) + + return { + 'size_bytes': stat_info.st_size, + 'modified_time': stat_info.st_mtime, + 'created_time': stat_info.st_ctime, + 'is_file': path_obj.is_file(), + 'is_directory': path_obj.is_dir(), + 'extension': path_obj.suffix, + 'name': path_obj.name, + 'parent': str(path_obj.parent) + } + + except OSError as e: + raise OSError(f"Cannot access file {file_path}: {e}") from e + + def read_file_content(self, file_path: str) -> str: + """ + Read file content with intelligent encoding detection. + + Args: + file_path: Absolute path to the file + + Returns: + File content as string + + Raises: + FileNotFoundError: If file doesn't exist + ValueError: If file cannot be decoded + """ + if not os.path.exists(file_path): + raise FileNotFoundError(f"File not found: {file_path}") + + # Try UTF-8 first (most common) + try: + with open(file_path, 'r', encoding='utf-8') as f: + return f.read() + except UnicodeDecodeError: + pass + + # Try other common encodings + encodings = ['utf-8-sig', 'latin-1', 'cp1252', 'iso-8859-1'] + for encoding in encodings: + try: + with open(file_path, 'r', encoding=encoding) as f: + return f.read() + except UnicodeDecodeError: + continue + + raise ValueError(f"Could not decode file {file_path} with any supported encoding") + + def count_lines(self, file_path: str) -> int: + """ + Count the number of lines in a file. + + Args: + file_path: Absolute path to the file + + Returns: + Number of lines in the file + + Raises: + FileNotFoundError: If file doesn't exist + """ + try: + content = self.read_file_content(file_path) + return len(content.splitlines()) + except Exception: + # If we can't read the file, return 0 + return 0 + + def detect_language_from_extension(self, file_path: str) -> str: + """ + Detect programming language from file extension. + + Args: + file_path: Path to the file + + Returns: + Language name or 'unknown' + """ + extension = Path(file_path).suffix.lower() + + lang_map = { + '.py': 'python', + '.js': 'javascript', + '.jsx': 'javascript', + '.ts': 'typescript', + '.tsx': 'typescript', + '.java': 'java', + '.cpp': 'cpp', + '.cxx': 'cpp', + '.cc': 'cpp', + '.c': 'c', + '.h': 'c', + '.hpp': 'cpp', + '.hxx': 'cpp', + '.cs': 'csharp', + '.go': 'go', + '.rs': 'rust', + '.php': 'php', + '.rb': 'ruby', + '.swift': 'swift', + '.kt': 'kotlin', + '.scala': 'scala', + '.m': 'objc', + '.mm': 'objc', + '.html': 'html', + '.htm': 'html', + '.css': 'css', + '.scss': 'scss', + '.sass': 'sass', + '.less': 'less', + '.json': 'json', + '.xml': 'xml', + '.yaml': 'yaml', + '.yml': 'yaml', + '.md': 'markdown', + '.txt': 'text', + '.sh': 'shell', + '.bash': 'shell', + '.zsh': 'shell', + '.fish': 'shell', + '.ps1': 'powershell', + '.bat': 'batch', + '.cmd': 'batch' + } + + return lang_map.get(extension, 'unknown') + + def is_text_file(self, file_path: str) -> bool: + """ + Check if a file is likely a text file. + + Args: + file_path: Path to the file + + Returns: + True if file appears to be text, False otherwise + """ + try: + # Try to read a small portion of the file + with open(file_path, 'rb') as f: + chunk = f.read(1024) + + # Check for null bytes (common in binary files) + if b'\x00' in chunk: + return False + + # Try to decode as UTF-8 + try: + chunk.decode('utf-8') + return True + except UnicodeDecodeError: + # Try other encodings + for encoding in ['latin-1', 'cp1252']: + try: + chunk.decode(encoding) + return True + except UnicodeDecodeError: + continue + + return False + + except Exception: + return False + + def get_file_size_category(self, file_path: str) -> str: + """ + Categorize file size for analysis purposes. + + Args: + file_path: Path to the file + + Returns: + Size category: 'small', 'medium', 'large', or 'very_large' + """ + try: + size = os.path.getsize(file_path) + + if size < 1024: # < 1KB + return 'tiny' + elif size < 10 * 1024: # < 10KB + return 'small' + elif size < 100 * 1024: # < 100KB + return 'medium' + elif size < 1024 * 1024: # < 1MB + return 'large' + else: + return 'very_large' + + except Exception: + return 'unknown' diff --git a/src/code_index_mcp/tools/monitoring/__init__.py b/src/code_index_mcp/tools/monitoring/__init__.py new file mode 100644 index 0000000..6da231e --- /dev/null +++ b/src/code_index_mcp/tools/monitoring/__init__.py @@ -0,0 +1,7 @@ +""" +Monitoring Tools - Technical components for file monitoring operations. +""" + +from .file_watcher_tool import FileWatcherTool + +__all__ = ['FileWatcherTool'] \ No newline at end of file diff --git a/src/code_index_mcp/tools/monitoring/file_watcher_tool.py b/src/code_index_mcp/tools/monitoring/file_watcher_tool.py new file mode 100644 index 0000000..3671952 --- /dev/null +++ b/src/code_index_mcp/tools/monitoring/file_watcher_tool.py @@ -0,0 +1,134 @@ +""" +File Watcher Tool - Pure technical component for file monitoring operations. + +This tool handles low-level file watching operations without any business logic. +""" + +import time +from typing import Optional, Callable +from ...utils import ContextHelper +from ...services.file_watcher_service import FileWatcherService + + +class FileWatcherTool: + """ + Pure technical component for file monitoring operations. + + This tool provides low-level file watching capabilities without + any business logic or decision making. + """ + + def __init__(self, ctx): + self._ctx = ctx + self._file_watcher_service: Optional[FileWatcherService] = None + + + def create_watcher(self) -> FileWatcherService: + """ + Create a new file watcher service instance. + + Returns: + FileWatcherService instance + """ + self._file_watcher_service = FileWatcherService(self._ctx) + return self._file_watcher_service + + def start_monitoring(self, project_path: str, rebuild_callback: Callable) -> bool: + """ + Start file monitoring for the given project path. + + Args: + project_path: Path to monitor + rebuild_callback: Callback function for rebuild events + + Returns: + True if monitoring started successfully, False otherwise + """ + if not self._file_watcher_service: + self._file_watcher_service = self.create_watcher() + + # Validate that the project path matches the expected base path + helper = ContextHelper(self._ctx) + if helper.base_path and helper.base_path != project_path: + pass + + return self._file_watcher_service.start_monitoring(rebuild_callback) + + def stop_monitoring(self) -> None: + """Stop file monitoring if active.""" + if self._file_watcher_service: + self._file_watcher_service.stop_monitoring() + + def is_monitoring_active(self) -> bool: + """ + Check if file monitoring is currently active. + + Returns: + True if monitoring is active, False otherwise + """ + return (self._file_watcher_service is not None and + self._file_watcher_service.is_active()) + + def get_monitoring_status(self) -> dict: + """ + Get current monitoring status. + + Returns: + Dictionary with monitoring status information + """ + if not self._file_watcher_service: + return { + 'active': False, + 'available': True, + 'status': 'not_initialized' + } + + return self._file_watcher_service.get_status() + + def store_in_context(self) -> None: + """Store the file watcher service in the MCP context.""" + if (self._file_watcher_service and + hasattr(self._ctx.request_context.lifespan_context, '__dict__')): + self._ctx.request_context.lifespan_context.file_watcher_service = self._file_watcher_service + + def get_from_context(self) -> Optional[FileWatcherService]: + """ + Get existing file watcher service from context. + + Returns: + FileWatcherService instance or None if not found + """ + if hasattr(self._ctx.request_context.lifespan_context, 'file_watcher_service'): + return self._ctx.request_context.lifespan_context.file_watcher_service + return None + + def stop_existing_watcher(self) -> None: + """Stop any existing file watcher from context.""" + existing_watcher = self.get_from_context() + if existing_watcher: + + existing_watcher.stop_monitoring() + # Clear reference + if hasattr(self._ctx.request_context.lifespan_context, '__dict__'): + self._ctx.request_context.lifespan_context.file_watcher_service = None + + + def record_error(self, error_message: str) -> None: + """ + Record file watcher error in context for status reporting. + + Args: + error_message: Error message to record + """ + error_info = { + 'status': 'failed', + 'message': f'{error_message}. Auto-refresh disabled. Please use manual refresh.', + 'timestamp': time.time(), + 'manual_refresh_required': True + } + + # Store error in context for status reporting + if hasattr(self._ctx.request_context.lifespan_context, '__dict__'): + self._ctx.request_context.lifespan_context.file_watcher_error = error_info + + diff --git a/src/code_index_mcp/utils/__init__.py b/src/code_index_mcp/utils/__init__.py new file mode 100644 index 0000000..cd3fb92 --- /dev/null +++ b/src/code_index_mcp/utils/__init__.py @@ -0,0 +1,25 @@ +""" +Utility modules for the Code Index MCP server. + +This package contains shared utilities used across services: +- error_handler: Decorator-based error handling for MCP entry points +- context_helper: Context access utilities and helpers +- validation: Common validation logic +- response_formatter: Response formatting utilities +""" + +from .error_handler import handle_mcp_errors, handle_mcp_resource_errors, handle_mcp_tool_errors +from .context_helper import ContextHelper +from .validation import ValidationHelper +from .response_formatter import ResponseFormatter +from .file_filter import FileFilter + +__all__ = [ + 'handle_mcp_errors', + 'handle_mcp_resource_errors', + 'handle_mcp_tool_errors', + 'ContextHelper', + 'ValidationHelper', + 'ResponseFormatter', + 'FileFilter' +] \ No newline at end of file diff --git a/src/code_index_mcp/utils/context_helper.py b/src/code_index_mcp/utils/context_helper.py new file mode 100644 index 0000000..1ed5fa6 --- /dev/null +++ b/src/code_index_mcp/utils/context_helper.py @@ -0,0 +1,169 @@ +""" +Context access utilities and helpers. + +This module provides convenient access to MCP Context data and common +operations that services need to perform with the context. +""" + +import os +from typing import Optional +from mcp.server.fastmcp import Context + +from ..project_settings import ProjectSettings + + +class ContextHelper: + """ + Helper class for convenient access to MCP Context data. + + This class wraps the MCP Context object and provides convenient properties + and methods for accessing commonly needed data like base_path, settings, etc. + """ + + def __init__(self, ctx: Context): + """ + Initialize the context helper. + + Args: + ctx: The MCP Context object + """ + self.ctx = ctx + + @property + def base_path(self) -> str: + """ + Get the base project path from the context. + + Returns: + The base project path, or empty string if not set + """ + try: + return self.ctx.request_context.lifespan_context.base_path + except AttributeError: + return "" + + @property + def settings(self) -> Optional[ProjectSettings]: + """ + Get the project settings from the context. + + Returns: + The ProjectSettings instance, or None if not available + """ + try: + return self.ctx.request_context.lifespan_context.settings + except AttributeError: + return None + + @property + def file_count(self) -> int: + """ + Get the current file count from the context. + + Returns: + The number of indexed files, or 0 if not available + """ + try: + return self.ctx.request_context.lifespan_context.file_count + except AttributeError: + return 0 + + @property + def index_manager(self): + """ + Get the unified index manager from the context. + + Returns: + The UnifiedIndexManager instance, or None if not available + """ + try: + return getattr(self.ctx.request_context.lifespan_context, 'index_manager', None) + except AttributeError: + return None + + def validate_base_path(self) -> bool: + """ + Check if the base path is set and valid. + + Returns: + True if base path is set and exists, False otherwise + """ + base_path = self.base_path + return bool(base_path and os.path.exists(base_path)) + + def get_base_path_error(self) -> Optional[str]: + """ + Get an error message if base path is not properly set. + + Returns: + Error message string if base path is invalid, None if valid + """ + if not self.base_path: + return ("Project path not set. Please use set_project_path to set a " + "project directory first.") + + if not os.path.exists(self.base_path): + return f"Project path does not exist: {self.base_path}" + + if not os.path.isdir(self.base_path): + return f"Project path is not a directory: {self.base_path}" + + return None + + def update_file_count(self, count: int) -> None: + """ + Update the file count in the context. + + Args: + count: The new file count + """ + try: + self.ctx.request_context.lifespan_context.file_count = count + except AttributeError: + pass # Context not available or doesn't support this operation + + def update_base_path(self, path: str) -> None: + """ + Update the base path in the context. + + Args: + path: The new base path + """ + try: + self.ctx.request_context.lifespan_context.base_path = path + except AttributeError: + pass # Context not available or doesn't support this operation + + def update_settings(self, settings: ProjectSettings) -> None: + """ + Update the settings in the context. + + Args: + settings: The new ProjectSettings instance + """ + try: + self.ctx.request_context.lifespan_context.settings = settings + except AttributeError: + pass # Context not available or doesn't support this operation + + def clear_index_cache(self) -> None: + """ + Clear the index through the unified index manager. + """ + try: + if self.index_manager: + self.index_manager.clear_index() + except AttributeError: + pass + + def update_index_manager(self, index_manager) -> None: + """ + Update the index manager in the context. + + Args: + index_manager: The new UnifiedIndexManager instance + """ + try: + self.ctx.request_context.lifespan_context.index_manager = index_manager + except AttributeError: + pass # Context not available or doesn't support this operation diff --git a/src/code_index_mcp/utils/error_handler.py b/src/code_index_mcp/utils/error_handler.py new file mode 100644 index 0000000..e596886 --- /dev/null +++ b/src/code_index_mcp/utils/error_handler.py @@ -0,0 +1,103 @@ +""" +Decorator-based error handling for MCP entry points. + +This module provides consistent error handling across all MCP tools, resources, and prompts. +""" + +import functools +import json +from typing import Any, Callable, Dict, Union + + +def handle_mcp_errors(return_type: str = 'str') -> Callable: + """ + Decorator to handle exceptions in MCP entry points consistently. + + This decorator catches all exceptions and formats them according to the expected + return type, providing consistent error responses across all MCP entry points. + + Args: + return_type: The expected return type format + - 'str': Returns error as string format "Error: {message}" + - 'dict': Returns error as dict format {"error": "Operation failed: {message}"} + - 'json': Returns error as JSON string with dict format + + Returns: + Decorator function that wraps MCP entry points with error handling + + Example: + @mcp.tool() + @handle_mcp_errors(return_type='str') + def set_project_path(path: str, ctx: Context) -> str: + from ..services.project_management_service import ProjectManagementService + return ProjectManagementService(ctx).initialize_project(path) + + @mcp.tool() + @handle_mcp_errors(return_type='dict') + def search_code_advanced(pattern: str, ctx: Context, **kwargs) -> Dict[str, Any]: + return SearchService(ctx).search_code(pattern, **kwargs) + """ + def decorator(func: Callable) -> Callable: + @functools.wraps(func) + def wrapper(*args, **kwargs) -> Union[str, Dict[str, Any]]: + try: + return func(*args, **kwargs) + except Exception as e: + error_message = str(e) + + if return_type == 'dict': + return {"error": f"Operation failed: {error_message}"} + elif return_type == 'json': + return json.dumps({"error": f"Operation failed: {error_message}"}) + else: # return_type == 'str' (default) + return f"Error: {error_message}" + + return wrapper + return decorator + + +def handle_mcp_resource_errors(func: Callable) -> Callable: + """ + Specialized error handler for MCP resources that always return strings. + + This is a convenience decorator specifically for @mcp.resource decorated functions + which always return string responses. + + Args: + func: The MCP resource function to wrap + + Returns: + Wrapped function with error handling + + Example: + @mcp.resource("config://code-indexer") + @handle_mcp_resource_errors + def get_config() -> str: + ctx = mcp.get_context() + from ..services.project_management_service import ProjectManagementService + return ProjectManagementService(ctx).get_project_config() + """ + return handle_mcp_errors(return_type='str')(func) + + +def handle_mcp_tool_errors(return_type: str = 'str') -> Callable: + """ + Specialized error handler for MCP tools with flexible return types. + + This is a convenience decorator specifically for @mcp.tool decorated functions + which may return either strings or dictionaries. + + Args: + return_type: The expected return type ('str' or 'dict') + + Returns: + Decorator function for MCP tools + + Example: + @mcp.tool() + @handle_mcp_tool_errors(return_type='dict') + def find_files(pattern: str, ctx: Context) -> Dict[str, Any]: + from ..services.file_discovery_service import FileDiscoveryService + return FileDiscoveryService(ctx).find_files(pattern) + """ + return handle_mcp_errors(return_type=return_type) diff --git a/src/code_index_mcp/utils/file_filter.py b/src/code_index_mcp/utils/file_filter.py new file mode 100644 index 0000000..5cd9938 --- /dev/null +++ b/src/code_index_mcp/utils/file_filter.py @@ -0,0 +1,177 @@ +""" +Centralized file filtering logic for the Code Index MCP server. + +This module provides unified filtering capabilities used across all components +that need to determine which files and directories should be processed or excluded. +""" + +import fnmatch +from pathlib import Path +from typing import List, Optional, Set + +from ..constants import FILTER_CONFIG + + +class FileFilter: + """Centralized file filtering logic.""" + + def __init__(self, additional_excludes: Optional[List[str]] = None): + """ + Initialize the file filter. + + Args: + additional_excludes: Additional directory patterns to exclude + """ + self.exclude_dirs = set(FILTER_CONFIG["exclude_directories"]) + self.exclude_files = set(FILTER_CONFIG["exclude_files"]) + self.supported_extensions = set(FILTER_CONFIG["supported_extensions"]) + + # Add user-defined exclusions + if additional_excludes: + self.exclude_dirs.update(additional_excludes) + + def should_exclude_directory(self, dir_name: str) -> bool: + """ + Check if directory should be excluded from processing. + + Args: + dir_name: Directory name to check + + Returns: + True if directory should be excluded, False otherwise + """ + # Skip hidden directories except for specific allowed ones + if dir_name.startswith('.') and dir_name not in {'.env', '.gitignore'}: + return True + + # Check against exclude patterns + return dir_name in self.exclude_dirs + + def should_exclude_file(self, file_path: Path) -> bool: + """ + Check if file should be excluded from processing. + + Args: + file_path: Path object for the file to check + + Returns: + True if file should be excluded, False otherwise + """ + # Extension check - only process supported file types + if file_path.suffix.lower() not in self.supported_extensions: + return True + + # Hidden files (except specific allowed ones) + if file_path.name.startswith('.') and file_path.name not in {'.gitignore', '.env'}: + return True + + # Filename pattern check using glob patterns + for pattern in self.exclude_files: + if fnmatch.fnmatch(file_path.name, pattern): + return True + + return False + + def should_process_path(self, path: Path, base_path: Path) -> bool: + """ + Unified path processing logic to determine if a file should be processed. + + Args: + path: File path to check + base_path: Project base path for relative path calculation + + Returns: + True if file should be processed, False otherwise + """ + try: + # Ensure we're working with absolute paths + if not path.is_absolute(): + path = base_path / path + + # Get relative path from base + relative_path = path.relative_to(base_path) + + # Check each path component for excluded directories + for part in relative_path.parts[:-1]: # Exclude filename + if self.should_exclude_directory(part): + return False + + # Check file itself + return not self.should_exclude_file(path) + + except (ValueError, OSError): + # Path not relative to base_path or other path errors + return False + + def is_supported_file_type(self, file_path: Path) -> bool: + """ + Check if file type is supported for indexing. + + Args: + file_path: Path to check + + Returns: + True if file type is supported, False otherwise + """ + return file_path.suffix.lower() in self.supported_extensions + + def is_temporary_file(self, file_path: Path) -> bool: + """ + Check if file appears to be a temporary file. + + Args: + file_path: Path to check + + Returns: + True if file appears temporary, False otherwise + """ + name = file_path.name + + # Common temporary file patterns + temp_patterns = ['*.tmp', '*.temp', '*.swp', '*.swo', '*~'] + + for pattern in temp_patterns: + if fnmatch.fnmatch(name, pattern): + return True + + # Files ending in .bak or .orig + if name.endswith(('.bak', '.orig')): + return True + + return False + + def filter_file_list(self, files: List[str], base_path: str) -> List[str]: + """ + Filter a list of file paths, keeping only those that should be processed. + + Args: + files: List of file paths (absolute or relative) + base_path: Project base path + + Returns: + Filtered list of file paths that should be processed + """ + base = Path(base_path) + filtered = [] + + for file_path_str in files: + file_path = Path(file_path_str) + if self.should_process_path(file_path, base): + filtered.append(file_path_str) + + return filtered + + def get_exclude_summary(self) -> dict: + """ + Get summary of current exclusion configuration. + + Returns: + Dictionary with exclusion configuration details + """ + return { + "exclude_directories_count": len(self.exclude_dirs), + "exclude_files_count": len(self.exclude_files), + "supported_extensions_count": len(self.supported_extensions), + "exclude_directories": sorted(self.exclude_dirs), + "exclude_files": sorted(self.exclude_files) + } \ No newline at end of file diff --git a/src/code_index_mcp/utils/response_formatter.py b/src/code_index_mcp/utils/response_formatter.py new file mode 100644 index 0000000..aad65ce --- /dev/null +++ b/src/code_index_mcp/utils/response_formatter.py @@ -0,0 +1,364 @@ +""" +Response formatting utilities for the MCP server. + +This module provides consistent response formatting functions used across +services to ensure uniform response structures and formats. +""" + +import json +from typing import Any, Dict, List, Optional, Union + +from ..indexing.qualified_names import generate_qualified_name + + +class ResponseFormatter: + """ + Helper class for formatting responses consistently across services. + + This class provides static methods for formatting different types of + responses in a consistent manner. + """ + + @staticmethod + def _resolve_qualified_names_in_relationships( + file_path: str, + relationship_list: List[str], + duplicate_names: set, + index_cache: Optional[Dict[str, Any]] = None + ) -> List[str]: + """ + Convert simple names to qualified names when duplicates exist. + + Args: + file_path: Current file path for context + relationship_list: List of function/class names that may need qualification + duplicate_names: Set of names that have duplicates in the project + index_cache: Optional index cache for duplicate detection + + Returns: + List with qualified names where duplicates exist + """ + if not relationship_list or not duplicate_names: + return relationship_list + + qualified_list = [] + for name in relationship_list: + if name in duplicate_names: + # Convert to qualified name if this name has duplicates + if index_cache and 'files' in index_cache: + # Try to find the actual file where this name is defined + # For now, we'll use the current file path as context + qualified_name = generate_qualified_name(file_path, name) + qualified_list.append(qualified_name) + else: + # Fallback: keep original name if we can't resolve + qualified_list.append(name) + else: + # No duplicates, keep original name + qualified_list.append(name) + + return qualified_list + + @staticmethod + def _get_duplicate_names_from_index(index_cache: Optional[Dict[str, Any]] = None) -> Dict[str, set]: + """ + Extract duplicate function and class names from index cache. + + Args: + index_cache: Optional index cache + + Returns: + Dictionary with 'functions' and 'classes' sets of duplicate names + """ + duplicates = {'functions': set(), 'classes': set()} + + if not index_cache: + return duplicates + + # Duplicate detection functionality removed - was legacy code + # Return empty duplicates as this feature is no longer used + + return duplicates + + @staticmethod + def success_response(message: str, data: Optional[Dict[str, Any]] = None) -> Dict[str, Any]: + """ + Format a successful operation response. + + Args: + message: Success message + data: Optional additional data to include + + Returns: + Formatted success response dictionary + """ + response = {"status": "success", "message": message} + if data: + response.update(data) + return response + + @staticmethod + def error_response(message: str, error_code: Optional[str] = None) -> Dict[str, Any]: + """ + Format an error response. + + Args: + message: Error message + error_code: Optional error code for categorization + + Returns: + Formatted error response dictionary + """ + response = {"error": message} + if error_code: + response["error_code"] = error_code + return response + + @staticmethod + def file_list_response(files: List[str], status_message: str) -> Dict[str, Any]: + """ + Format a file list response for find_files operations. + + Args: + files: List of file paths + status_message: Status message describing the operation result + + Returns: + Formatted file list response + """ + return { + "files": files, + "status": status_message + } + + @staticmethod + def search_results_response(results: List[Dict[str, Any]]) -> Dict[str, Any]: + """ + Format search results response. + + Args: + results: List of search result dictionaries + + Returns: + Formatted search results response + """ + return { + "results": results + } + + @staticmethod + def config_response(config_data: Dict[str, Any]) -> str: + """ + Format configuration data as JSON string. + + Args: + config_data: Configuration data dictionary + + Returns: + JSON formatted configuration string + """ + return json.dumps(config_data, indent=2) + + @staticmethod + def stats_response(stats_data: Dict[str, Any]) -> str: + """ + Format statistics data as JSON string. + + Args: + stats_data: Statistics data dictionary + + Returns: + JSON formatted statistics string + """ + return json.dumps(stats_data, indent=2) + + @staticmethod + def file_summary_response( + file_path: str, + line_count: int, + size_bytes: int, + extension: str, + language: str = "unknown", + functions: Optional[Union[List[str], List[Dict[str, Any]]]] = None, + classes: Optional[Union[List[str], List[Dict[str, Any]]]] = None, + imports: Optional[Union[List[str], List[Dict[str, Any]]]] = None, + language_specific: Optional[Dict[str, Any]] = None, + error: Optional[str] = None, + index_cache: Optional[Dict[str, Any]] = None + ) -> Dict[str, Any]: + """ + Format file summary response from index data. + + Args: + file_path: Path to the file + line_count: Number of lines in the file + size_bytes: File size in bytes + extension: File extension + language: Programming language detected + functions: List of function names (strings) or complete function objects (dicts) + classes: List of class names (strings) or complete class objects (dicts) + imports: List of import statements (strings) or complete import objects (dicts) + language_specific: Language-specific analysis data + error: Error message if analysis failed + index_cache: Optional index cache for duplicate name resolution + + Returns: + Formatted file summary response + """ + # Get duplicate names from index for qualified name resolution + duplicate_names = ResponseFormatter._get_duplicate_names_from_index(index_cache) + + # Handle backward compatibility for functions + processed_functions = [] + if functions: + for func in functions: + if isinstance(func, str): + # Legacy format - convert string to basic object + processed_functions.append({"name": func}) + elif isinstance(func, dict): + # New format - use complete object and resolve qualified names in relationships + processed_func = func.copy() + + # Resolve qualified names in relationship fields + if 'calls' in processed_func and isinstance(processed_func['calls'], list): + processed_func['calls'] = ResponseFormatter._resolve_qualified_names_in_relationships( + file_path, processed_func['calls'], duplicate_names['functions'], index_cache + ) + + if 'called_by' in processed_func and isinstance(processed_func['called_by'], list): + processed_func['called_by'] = ResponseFormatter._resolve_qualified_names_in_relationships( + file_path, processed_func['called_by'], duplicate_names['functions'], index_cache + ) + + processed_functions.append(processed_func) + + # Handle backward compatibility for classes + processed_classes = [] + if classes: + for cls in classes: + if isinstance(cls, str): + # Legacy format - convert string to basic object + processed_classes.append({"name": cls}) + elif isinstance(cls, dict): + # New format - use complete object and resolve qualified names in relationships + processed_cls = cls.copy() + + # Resolve qualified names in relationship fields + if 'instantiated_by' in processed_cls and isinstance(processed_cls['instantiated_by'], list): + processed_cls['instantiated_by'] = ResponseFormatter._resolve_qualified_names_in_relationships( + file_path, processed_cls['instantiated_by'], duplicate_names['functions'], index_cache + ) + + processed_classes.append(processed_cls) + + # Handle backward compatibility for imports + processed_imports = [] + if imports: + for imp in imports: + if isinstance(imp, str): + # Legacy format - convert string to basic object + processed_imports.append({"module": imp, "import_type": "unknown"}) + elif isinstance(imp, dict): + # New format - use complete object + processed_imports.append(imp) + + response = { + "file_path": file_path, + "line_count": line_count, + "size_bytes": size_bytes, + "extension": extension, + "language": language, + "functions": processed_functions, + "classes": processed_classes, + "imports": processed_imports, + "language_specific": language_specific or {} + } + + if error: + response["error"] = error + + return response + + @staticmethod + def directory_info_response( + temp_directory: str, + exists: bool, + is_directory: bool = False, + contents: Optional[List[str]] = None, + subdirectories: Optional[List[Dict[str, Any]]] = None, + error: Optional[str] = None + ) -> Dict[str, Any]: + """ + Format directory information response. + + Args: + temp_directory: Path to the directory + exists: Whether the directory exists + is_directory: Whether the path is a directory + contents: List of directory contents + subdirectories: List of subdirectory information + error: Error message if operation failed + + Returns: + Formatted directory info response + """ + response = { + "temp_directory": temp_directory, + "exists": exists, + "is_directory": is_directory + } + + if contents is not None: + response["contents"] = contents + + if subdirectories is not None: + response["subdirectories"] = subdirectories + + if error: + response["error"] = error + + return response + + @staticmethod + def settings_info_response( + settings_directory: str, + temp_directory: str, + temp_directory_exists: bool, + config: Dict[str, Any], + stats: Dict[str, Any], + exists: bool, + status: str = "configured", + message: Optional[str] = None + ) -> Dict[str, Any]: + """ + Format settings information response. + + Args: + settings_directory: Path to settings directory + temp_directory: Path to temp directory + temp_directory_exists: Whether temp directory exists + config: Configuration data + stats: Statistics data + exists: Whether settings directory exists + status: Status of the configuration + message: Optional status message + + Returns: + Formatted settings info response + """ + response = { + "settings_directory": settings_directory, + "temp_directory": temp_directory, + "temp_directory_exists": temp_directory_exists, + "config": config, + "stats": stats, + "exists": exists + } + + if status != "configured": + response["status"] = status + + if message: + response["message"] = message + + return response \ No newline at end of file diff --git a/src/code_index_mcp/utils/validation.py b/src/code_index_mcp/utils/validation.py new file mode 100644 index 0000000..6f7eaf9 --- /dev/null +++ b/src/code_index_mcp/utils/validation.py @@ -0,0 +1,207 @@ +""" +Common validation logic for the MCP server. + +This module provides shared validation functions used across services +to ensure consistent validation behavior and reduce code duplication. +""" + +import os +import re +import fnmatch +from typing import Optional, List + +from ..indexing.qualified_names import normalize_file_path + + +class ValidationHelper: + """ + Helper class containing common validation logic. + + This class provides static methods for common validation operations + that are used across multiple services. + """ + + @staticmethod + def validate_file_path(file_path: str, base_path: str) -> Optional[str]: + """ + Validate a file path for security and accessibility. + + This method checks for: + - Path traversal attempts + - Absolute path usage (not allowed) + - Path existence within base directory + + Args: + file_path: The file path to validate (should be relative) + base_path: The base project directory path + + Returns: + Error message if validation fails, None if valid + """ + if not file_path: + return "File path cannot be empty" + + if not base_path: + return "Base path not set" + + # Handle absolute paths (especially Windows paths starting with drive letters) + if os.path.isabs(file_path) or (len(file_path) > 1 and file_path[1] == ':'): + return (f"Absolute file paths like '{file_path}' are not allowed. " + "Please use paths relative to the project root.") + + # Normalize the file path + norm_path = os.path.normpath(file_path) + + # Check for path traversal attempts + if "..\\" in norm_path or "../" in norm_path or norm_path.startswith(".."): + return f"Invalid file path: {file_path} (directory traversal not allowed)" + + # Construct the full path and verify it's within the project bounds + full_path = os.path.join(base_path, norm_path) + real_full_path = os.path.realpath(full_path) + real_base_path = os.path.realpath(base_path) + + if not real_full_path.startswith(real_base_path): + return "Access denied. File path must be within project directory." + + return None + + @staticmethod + def validate_directory_path(dir_path: str) -> Optional[str]: + """ + Validate a directory path for project initialization. + + Args: + dir_path: The directory path to validate + + Returns: + Error message if validation fails, None if valid + """ + if not dir_path: + return "Directory path cannot be empty" + + # Normalize and get absolute path + try: + norm_path = os.path.normpath(dir_path) + abs_path = os.path.abspath(norm_path) + except (OSError, ValueError) as e: + return f"Invalid path format: {str(e)}" + + if not os.path.exists(abs_path): + return f"Path does not exist: {abs_path}" + + if not os.path.isdir(abs_path): + return f"Path is not a directory: {abs_path}" + + return None + + @staticmethod + def validate_glob_pattern(pattern: str) -> Optional[str]: + """ + Validate a glob pattern for file searching. + + Args: + pattern: The glob pattern to validate + + Returns: + Error message if validation fails, None if valid + """ + if not pattern: + return "Pattern cannot be empty" + + # Check for potentially dangerous patterns + if pattern.startswith('/') or pattern.startswith('\\'): + return "Pattern cannot start with path separator" + + # Test if the pattern is valid by trying to compile it + try: + # This will raise an exception if the pattern is malformed + fnmatch.translate(pattern) + except (ValueError, TypeError) as e: + return f"Invalid glob pattern: {str(e)}" + + return None + + @staticmethod + def validate_search_pattern(pattern: str, regex: bool = False) -> Optional[str]: + """ + Validate a search pattern for code searching. + + Args: + pattern: The search pattern to validate + regex: Whether the pattern is a regex pattern + + Returns: + Error message if validation fails, None if valid + """ + if not pattern: + return "Search pattern cannot be empty" + + if regex: + # Basic regex validation - check for potentially dangerous patterns + try: + re.compile(pattern) + except re.error as e: + return f"Invalid regex pattern: {str(e)}" + + # Check for potentially expensive regex patterns (basic ReDoS protection) + dangerous_patterns = [ + r'\(\?\=.*\)\+', # Positive lookahead with quantifier + r'\(\?\!.*\)\+', # Negative lookahead with quantifier + r'\(\?\<\=.*\)\+', # Positive lookbehind with quantifier + r'\(\?\<\!.*\)\+', # Negative lookbehind with quantifier + ] + + for dangerous in dangerous_patterns: + if re.search(dangerous, pattern): + return "Potentially dangerous regex pattern detected" + + return None + + @staticmethod + def validate_file_extensions(extensions: List[str]) -> Optional[str]: + """ + Validate a list of file extensions. + + Args: + extensions: List of file extensions to validate + + Returns: + Error message if validation fails, None if valid + """ + if not extensions: + return "Extensions list cannot be empty" + + for ext in extensions: + if not isinstance(ext, str): + return "All extensions must be strings" + + if not ext.startswith('.'): + return f"Extension '{ext}' must start with a dot" + + if len(ext) < 2: + return f"Extension '{ext}' is too short" + + return None + + @staticmethod + def sanitize_file_path(file_path: str) -> str: + """ + Sanitize a file path by normalizing separators and removing dangerous elements. + + Args: + file_path: The file path to sanitize + + Returns: + Sanitized file path + """ + if not file_path: + return "" + + # Normalize path separators and structure + sanitized = normalize_file_path(file_path) + + # Remove any leading slashes to ensure relative path + sanitized = sanitized.lstrip('/') + + return sanitized \ No newline at end of file diff --git a/test/sample-projects/java/user-management/src/main/java/com/example/usermanagement/services/UserManager.java b/test/sample-projects/java/user-management/src/main/java/com/example/usermanagement/services/UserManager.java index 6058822..ca32e11 100644 --- a/test/sample-projects/java/user-management/src/main/java/com/example/usermanagement/services/UserManager.java +++ b/test/sample-projects/java/user-management/src/main/java/com/example/usermanagement/services/UserManager.java @@ -480,4 +480,9 @@ private void saveUsersToFile() { logger.error("Error saving users to file: {}", storagePath, e); } } + + // CI marker method to verify auto-reindex on change + public String ciAddedSymbolMarker() { + return "ci_symbol_java"; + } } \ No newline at end of file diff --git a/test/sample-projects/javascript/user-management/src/services/UserService.js b/test/sample-projects/javascript/user-management/src/services/UserService.js index b91d6c4..5dfde99 100644 --- a/test/sample-projects/javascript/user-management/src/services/UserService.js +++ b/test/sample-projects/javascript/user-management/src/services/UserService.js @@ -489,4 +489,5 @@ class UserService { } } -module.exports = UserService; \ No newline at end of file +module.exports = UserService; +// AUTO_REINDEX_MARKER: ci_auto_reindex_test_token_js \ No newline at end of file diff --git a/test/sample-projects/python/user_management/models/user.py b/test/sample-projects/python/user_management/models/user.py index 2bd0de7..83e1f13 100644 --- a/test/sample-projects/python/user_management/models/user.py +++ b/test/sample-projects/python/user_management/models/user.py @@ -175,4 +175,6 @@ def __str__(self) -> str: def __repr__(self) -> str: """Developer representation of user.""" - return f"User(username='{self.username}', name='{self.name}', role={self.role})" \ No newline at end of file + return f"User(username='{self.username}', name='{self.name}', role={self.role})" + +# AUTO_REINDEX_MARKER: ci_auto_reindex_test_token \ No newline at end of file diff --git a/test/sample-projects/python/user_management/services/user_manager.py b/test/sample-projects/python/user_management/services/user_manager.py index f546502..05ca4bc 100644 --- a/test/sample-projects/python/user_management/services/user_manager.py +++ b/test/sample-projects/python/user_management/services/user_manager.py @@ -215,4 +215,8 @@ def __iter__(self): def __str__(self) -> str: """String representation of UserManager.""" - return f"UserManager(users: {len(self._users)})" \ No newline at end of file + return f"UserManager(users: {len(self._users)})" + + # CI marker method to verify auto-reindex on change + def _ci_added_symbol_marker(self) -> str: + return "ci_symbol_python" \ No newline at end of file diff --git a/test/sample-projects/typescript/user-management/src/services/UserService.ts b/test/sample-projects/typescript/user-management/src/services/UserService.ts index 0536e54..962d300 100644 --- a/test/sample-projects/typescript/user-management/src/services/UserService.ts +++ b/test/sample-projects/typescript/user-management/src/services/UserService.ts @@ -514,4 +514,5 @@ export class UserService { } } -export default UserService; \ No newline at end of file +export default UserService; +// AUTO_REINDEX_MARKER: ci_auto_reindex_test_token_ts \ No newline at end of file diff --git a/test/sample-projects/zig/code-index-example/build.zig b/test/sample-projects/zig/code-index-example/build.zig new file mode 100644 index 0000000..ea52f56 --- /dev/null +++ b/test/sample-projects/zig/code-index-example/build.zig @@ -0,0 +1,156 @@ +const std = @import("std"); + +// Although this function looks imperative, it does not perform the build +// directly and instead it mutates the build graph (`b`) that will be then +// executed by an external runner. The functions in `std.Build` implement a DSL +// for defining build steps and express dependencies between them, allowing the +// build runner to parallelize the build automatically (and the cache system to +// know when a step doesn't need to be re-run). +pub fn build(b: *std.Build) void { + // Standard target options allow the person running `zig build` to choose + // what target to build for. Here we do not override the defaults, which + // means any target is allowed, and the default is native. Other options + // for restricting supported target set are available. + const target = b.standardTargetOptions(.{}); + // Standard optimization options allow the person running `zig build` to select + // between Debug, ReleaseSafe, ReleaseFast, and ReleaseSmall. Here we do not + // set a preferred release mode, allowing the user to decide how to optimize. + const optimize = b.standardOptimizeOption(.{}); + // It's also possible to define more custom flags to toggle optional features + // of this build script using `b.option()`. All defined flags (including + // target and optimize options) will be listed when running `zig build --help` + // in this directory. + + // This creates a module, which represents a collection of source files alongside + // some compilation options, such as optimization mode and linked system libraries. + // Zig modules are the preferred way of making Zig code available to consumers. + // addModule defines a module that we intend to make available for importing + // to our consumers. We must give it a name because a Zig package can expose + // multiple modules and consumers will need to be able to specify which + // module they want to access. + const mod = b.addModule("code_index_example", .{ + // The root source file is the "entry point" of this module. Users of + // this module will only be able to access public declarations contained + // in this file, which means that if you have declarations that you + // intend to expose to consumers that were defined in other files part + // of this module, you will have to make sure to re-export them from + // the root file. + .root_source_file = b.path("src/root.zig"), + // Later on we'll use this module as the root module of a test executable + // which requires us to specify a target. + .target = target, + }); + + // Here we define an executable. An executable needs to have a root module + // which needs to expose a `main` function. While we could add a main function + // to the module defined above, it's sometimes preferable to split business + // business logic and the CLI into two separate modules. + // + // If your goal is to create a Zig library for others to use, consider if + // it might benefit from also exposing a CLI tool. A parser library for a + // data serialization format could also bundle a CLI syntax checker, for example. + // + // If instead your goal is to create an executable, consider if users might + // be interested in also being able to embed the core functionality of your + // program in their own executable in order to avoid the overhead involved in + // subprocessing your CLI tool. + // + // If neither case applies to you, feel free to delete the declaration you + // don't need and to put everything under a single module. + const exe = b.addExecutable(.{ + .name = "code_index_example", + .root_module = b.createModule(.{ + // b.createModule defines a new module just like b.addModule but, + // unlike b.addModule, it does not expose the module to consumers of + // this package, which is why in this case we don't have to give it a name. + .root_source_file = b.path("src/main.zig"), + // Target and optimization levels must be explicitly wired in when + // defining an executable or library (in the root module), and you + // can also hardcode a specific target for an executable or library + // definition if desireable (e.g. firmware for embedded devices). + .target = target, + .optimize = optimize, + // List of modules available for import in source files part of the + // root module. + .imports = &.{ + // Here "code_index_example" is the name you will use in your source code to + // import this module (e.g. `@import("code_index_example")`). The name is + // repeated because you are allowed to rename your imports, which + // can be extremely useful in case of collisions (which can happen + // importing modules from different packages). + .{ .name = "code_index_example", .module = mod }, + }, + }), + }); + + // This declares intent for the executable to be installed into the + // install prefix when running `zig build` (i.e. when executing the default + // step). By default the install prefix is `zig-out/` but can be overridden + // by passing `--prefix` or `-p`. + b.installArtifact(exe); + + // This creates a top level step. Top level steps have a name and can be + // invoked by name when running `zig build` (e.g. `zig build run`). + // This will evaluate the `run` step rather than the default step. + // For a top level step to actually do something, it must depend on other + // steps (e.g. a Run step, as we will see in a moment). + const run_step = b.step("run", "Run the app"); + + // This creates a RunArtifact step in the build graph. A RunArtifact step + // invokes an executable compiled by Zig. Steps will only be executed by the + // runner if invoked directly by the user (in the case of top level steps) + // or if another step depends on it, so it's up to you to define when and + // how this Run step will be executed. In our case we want to run it when + // the user runs `zig build run`, so we create a dependency link. + const run_cmd = b.addRunArtifact(exe); + run_step.dependOn(&run_cmd.step); + + // By making the run step depend on the default step, it will be run from the + // installation directory rather than directly from within the cache directory. + run_cmd.step.dependOn(b.getInstallStep()); + + // This allows the user to pass arguments to the application in the build + // command itself, like this: `zig build run -- arg1 arg2 etc` + if (b.args) |args| { + run_cmd.addArgs(args); + } + + // Creates an executable that will run `test` blocks from the provided module. + // Here `mod` needs to define a target, which is why earlier we made sure to + // set the releative field. + const mod_tests = b.addTest(.{ + .root_module = mod, + }); + + // A run step that will run the test executable. + const run_mod_tests = b.addRunArtifact(mod_tests); + + // Creates an executable that will run `test` blocks from the executable's + // root module. Note that test executables only test one module at a time, + // hence why we have to create two separate ones. + const exe_tests = b.addTest(.{ + .root_module = exe.root_module, + }); + + // A run step that will run the second test executable. + const run_exe_tests = b.addRunArtifact(exe_tests); + + // A top level step for running all tests. dependOn can be called multiple + // times and since the two run steps do not depend on one another, this will + // make the two of them run in parallel. + const test_step = b.step("test", "Run tests"); + test_step.dependOn(&run_mod_tests.step); + test_step.dependOn(&run_exe_tests.step); + + // Just like flags, top level steps are also listed in the `--help` menu. + // + // The Zig build system is entirely implemented in userland, which means + // that it cannot hook into private compiler APIs. All compilation work + // orchestrated by the build system will result in other Zig compiler + // subcommands being invoked with the right flags defined. You can observe + // these invocations when one fails (or you pass a flag to increase + // verbosity) to validate assumptions and diagnose problems. + // + // Lastly, the Zig build system is relatively simple and self-contained, + // and reading its source code will allow you to master it. +} diff --git a/test/sample-projects/zig/code-index-example/build.zig.zon b/test/sample-projects/zig/code-index-example/build.zig.zon new file mode 100644 index 0000000..f8f4ad1 --- /dev/null +++ b/test/sample-projects/zig/code-index-example/build.zig.zon @@ -0,0 +1,81 @@ +.{ + // This is the default name used by packages depending on this one. For + // example, when a user runs `zig fetch --save `, this field is used + // as the key in the `dependencies` table. Although the user can choose a + // different name, most users will stick with this provided value. + // + // It is redundant to include "zig" in this name because it is already + // within the Zig package namespace. + .name = .code_index_example, + // This is a [Semantic Version](https://semver.org/). + // In a future version of Zig it will be used for package deduplication. + .version = "0.0.0", + // Together with name, this represents a globally unique package + // identifier. This field is generated by the Zig toolchain when the + // package is first created, and then *never changes*. This allows + // unambiguous detection of one package being an updated version of + // another. + // + // When forking a Zig project, this id should be regenerated (delete the + // field and run `zig build`) if the upstream project is still maintained. + // Otherwise, the fork is *hostile*, attempting to take control over the + // original project's identity. Thus it is recommended to leave the comment + // on the following line intact, so that it shows up in code reviews that + // modify the field. + .fingerprint = 0x995c7acfb423849b, // Changing this has security and trust implications. + // Tracks the earliest Zig version that the package considers to be a + // supported use case. + .minimum_zig_version = "0.15.0-dev.1507+e25168d01", + // This field is optional. + // Each dependency must either provide a `url` and `hash`, or a `path`. + // `zig build --fetch` can be used to fetch all dependencies of a package, recursively. + // Once all dependencies are fetched, `zig build` no longer requires + // internet connectivity. + .dependencies = .{ + // See `zig fetch --save ` for a command-line interface for adding dependencies. + //.example = .{ + // // When updating this field to a new URL, be sure to delete the corresponding + // // `hash`, otherwise you are communicating that you expect to find the old hash at + // // the new URL. If the contents of a URL change this will result in a hash mismatch + // // which will prevent zig from using it. + // .url = "https://example.com/foo.tar.gz", + // + // // This is computed from the file contents of the directory of files that is + // // obtained after fetching `url` and applying the inclusion rules given by + // // `paths`. + // // + // // This field is the source of truth; packages do not come from a `url`; they + // // come from a `hash`. `url` is just one of many possible mirrors for how to + // // obtain a package matching this `hash`. + // // + // // Uses the [multihash](https://multiformats.io/multihash/) format. + // .hash = "...", + // + // // When this is provided, the package is found in a directory relative to the + // // build root. In this case the package's hash is irrelevant and therefore not + // // computed. This field and `url` are mutually exclusive. + // .path = "foo", + // + // // When this is set to `true`, a package is declared to be lazily + // // fetched. This makes the dependency only get fetched if it is + // // actually used. + // .lazy = false, + //}, + }, + // Specifies the set of files and directories that are included in this package. + // Only files and directories listed here are included in the `hash` that + // is computed for this package. Only files listed here will remain on disk + // when using the zig package manager. As a rule of thumb, one should list + // files required for compilation plus any license(s). + // Paths are relative to the build root. Use the empty string (`""`) to refer to + // the build root itself. + // A directory listed here means that all files within, recursively, are included. + .paths = .{ + "build.zig", + "build.zig.zon", + "src", + // For example... + //"LICENSE", + //"README.md", + }, +} diff --git a/test/sample-projects/zig/code-index-example/src/main.zig b/test/sample-projects/zig/code-index-example/src/main.zig new file mode 100644 index 0000000..792cfc1 --- /dev/null +++ b/test/sample-projects/zig/code-index-example/src/main.zig @@ -0,0 +1,45 @@ +const std = @import("std"); +const builtin = @import("builtin"); +const testing = @import("testing"); +const code_index_example = @import("code_index_example"); +const utils = @import("./utils.zig"); +const math_utils = @import("./math.zig"); + +pub fn main() !void { + // Prints to stderr, ignoring potential errors. + std.debug.print("All your {s} are belong to us.\n", .{"codebase"}); + try code_index_example.bufferedPrint(); + + // Test our custom utilities + const result = utils.processData("Hello, World!"); + std.debug.print("Processed result: {s}\n", .{result}); + + // Test math utilities + const sum = math_utils.calculateSum(10, 20); + std.debug.print("Sum: {}\n", .{sum}); + + // Platform-specific code + if (builtin.os.tag == .windows) { + std.debug.print("Running on Windows\n", .{}); + } else { + std.debug.print("Running on Unix-like system\n", .{}); + } +} + +test "simple test" { + var list = std.ArrayList(i32).init(std.testing.allocator); + defer list.deinit(); // Try commenting this out and see if zig detects the memory leak! + try list.append(42); + try std.testing.expectEqual(@as(i32, 42), list.pop()); +} + +test "fuzz example" { + const Context = struct { + fn testOne(context: @This(), input: []const u8) anyerror!void { + _ = context; + // Try passing `--fuzz` to `zig build test` and see if it manages to fail this test case! + try std.testing.expect(!std.mem.eql(u8, "canyoufindme", input)); + } + }; + try std.testing.fuzz(Context{}, Context.testOne, .{}); +} diff --git a/test/sample-projects/zig/code-index-example/src/math.zig b/test/sample-projects/zig/code-index-example/src/math.zig new file mode 100644 index 0000000..dba7420 --- /dev/null +++ b/test/sample-projects/zig/code-index-example/src/math.zig @@ -0,0 +1,262 @@ +//! Mathematical utility functions and data structures +const std = @import("std"); +const math = @import("math"); +const testing = @import("testing"); + +// Mathematical constants +pub const PI: f64 = 3.14159265358979323846; +pub const E: f64 = 2.71828182845904523536; +pub const GOLDEN_RATIO: f64 = 1.61803398874989484820; + +// Complex number representation +pub const Complex = struct { + real: f64, + imag: f64, + + pub fn init(real: f64, imag: f64) Complex { + return Complex{ .real = real, .imag = imag }; + } + + pub fn add(self: Complex, other: Complex) Complex { + return Complex{ + .real = self.real + other.real, + .imag = self.imag + other.imag, + }; + } + + pub fn multiply(self: Complex, other: Complex) Complex { + return Complex{ + .real = self.real * other.real - self.imag * other.imag, + .imag = self.real * other.imag + self.imag * other.real, + }; + } + + pub fn magnitude(self: Complex) f64 { + return @sqrt(self.real * self.real + self.imag * self.imag); + } + + pub fn conjugate(self: Complex) Complex { + return Complex{ .real = self.real, .imag = -self.imag }; + } +}; + +// Point in 2D space +pub const Point2D = struct { + x: f64, + y: f64, + + pub fn init(x: f64, y: f64) Point2D { + return Point2D{ .x = x, .y = y }; + } + + pub fn distance(self: Point2D, other: Point2D) f64 { + const dx = self.x - other.x; + const dy = self.y - other.y; + return @sqrt(dx * dx + dy * dy); + } + + pub fn midpoint(self: Point2D, other: Point2D) Point2D { + return Point2D{ + .x = (self.x + other.x) / 2.0, + .y = (self.y + other.y) / 2.0, + }; + } +}; + +// Statistics utilities +pub const Statistics = struct { + pub fn mean(values: []const f64) f64 { + if (values.len == 0) return 0.0; + + var sum: f64 = 0.0; + for (values) |value| { + sum += value; + } + + return sum / @as(f64, @floatFromInt(values.len)); + } + + pub fn median(values: []const f64, buffer: []f64) f64 { + if (values.len == 0) return 0.0; + + // Copy to buffer and sort + for (values, 0..) |value, i| { + buffer[i] = value; + } + std.sort.insertionSort(f64, buffer[0..values.len], {}, std.sort.asc(f64)); + + const n = values.len; + if (n % 2 == 1) { + return buffer[n / 2]; + } else { + return (buffer[n / 2 - 1] + buffer[n / 2]) / 2.0; + } + } + + pub fn standardDeviation(values: []const f64) f64 { + if (values.len <= 1) return 0.0; + + const avg = mean(values); + var sum_sq_diff: f64 = 0.0; + + for (values) |value| { + const diff = value - avg; + sum_sq_diff += diff * diff; + } + + return @sqrt(sum_sq_diff / @as(f64, @floatFromInt(values.len - 1))); + } +}; + +// Basic math functions +pub fn factorial(n: u32) u64 { + if (n <= 1) return 1; + return @as(u64, n) * factorial(n - 1); +} + +pub fn fibonacci(n: u32) u64 { + if (n <= 1) return n; + return fibonacci(n - 1) + fibonacci(n - 2); +} + +pub fn gcd(a: u32, b: u32) u32 { + if (b == 0) return a; + return gcd(b, a % b); +} + +pub fn lcm(a: u32, b: u32) u32 { + return (a * b) / gcd(a, b); +} + +pub fn isPrime(n: u32) bool { + if (n < 2) return false; + if (n == 2) return true; + if (n % 2 == 0) return false; + + var i: u32 = 3; + while (i * i <= n) : (i += 2) { + if (n % i == 0) return false; + } + + return true; +} + +// Function used by main.zig +pub fn calculateSum(a: i32, b: i32) i32 { + return a + b; +} + +pub fn power(base: f64, exponent: i32) f64 { + if (exponent == 0) return 1.0; + if (exponent < 0) return 1.0 / power(base, -exponent); + + var result: f64 = 1.0; + var exp = exponent; + var b = base; + + while (exp > 0) { + if (exp % 2 == 1) { + result *= b; + } + b *= b; + exp /= 2; + } + + return result; +} + +// Matrix operations (2x2 for simplicity) +pub const Matrix2x2 = struct { + data: [2][2]f64, + + pub fn init(a: f64, b: f64, c: f64, d: f64) Matrix2x2 { + return Matrix2x2{ + .data = [_][2]f64{ + [_]f64{ a, b }, + [_]f64{ c, d }, + }, + }; + } + + pub fn multiply(self: Matrix2x2, other: Matrix2x2) Matrix2x2 { + return Matrix2x2{ + .data = [_][2]f64{ + [_]f64{ + self.data[0][0] * other.data[0][0] + self.data[0][1] * other.data[1][0], + self.data[0][0] * other.data[0][1] + self.data[0][1] * other.data[1][1], + }, + [_]f64{ + self.data[1][0] * other.data[0][0] + self.data[1][1] * other.data[1][0], + self.data[1][0] * other.data[0][1] + self.data[1][1] * other.data[1][1], + }, + }, + }; + } + + pub fn determinant(self: Matrix2x2) f64 { + return self.data[0][0] * self.data[1][1] - self.data[0][1] * self.data[1][0]; + } +}; + +// Tests +test "complex number operations" { + const z1 = Complex.init(3.0, 4.0); + const z2 = Complex.init(1.0, 2.0); + + const sum = z1.add(z2); + try std.testing.expectEqual(@as(f64, 4.0), sum.real); + try std.testing.expectEqual(@as(f64, 6.0), sum.imag); + + const magnitude = z1.magnitude(); + try std.testing.expectApproxEqAbs(@as(f64, 5.0), magnitude, 0.0001); +} + +test "point distance calculation" { + const p1 = Point2D.init(0.0, 0.0); + const p2 = Point2D.init(3.0, 4.0); + + const dist = p1.distance(p2); + try std.testing.expectApproxEqAbs(@as(f64, 5.0), dist, 0.0001); +} + +test "factorial calculation" { + try std.testing.expectEqual(@as(u64, 1), factorial(0)); + try std.testing.expectEqual(@as(u64, 1), factorial(1)); + try std.testing.expectEqual(@as(u64, 120), factorial(5)); +} + +test "fibonacci sequence" { + try std.testing.expectEqual(@as(u64, 0), fibonacci(0)); + try std.testing.expectEqual(@as(u64, 1), fibonacci(1)); + try std.testing.expectEqual(@as(u64, 13), fibonacci(7)); +} + +test "prime number detection" { + try std.testing.expect(isPrime(2)); + try std.testing.expect(isPrime(17)); + try std.testing.expect(!isPrime(4)); + try std.testing.expect(!isPrime(1)); +} + +test "statistics calculations" { + const values = [_]f64{ 1.0, 2.0, 3.0, 4.0, 5.0 }; + + const avg = Statistics.mean(&values); + try std.testing.expectEqual(@as(f64, 3.0), avg); + + var buffer: [10]f64 = undefined; + const med = Statistics.median(&values, &buffer); + try std.testing.expectEqual(@as(f64, 3.0), med); +} + +test "matrix operations" { + const m1 = Matrix2x2.init(1.0, 2.0, 3.0, 4.0); + const m2 = Matrix2x2.init(5.0, 6.0, 7.0, 8.0); + + const product = m1.multiply(m2); + try std.testing.expectEqual(@as(f64, 19.0), product.data[0][0]); + try std.testing.expectEqual(@as(f64, 22.0), product.data[0][1]); + + const det = m1.determinant(); + try std.testing.expectEqual(@as(f64, -2.0), det); +} \ No newline at end of file diff --git a/test/sample-projects/zig/code-index-example/src/root.zig b/test/sample-projects/zig/code-index-example/src/root.zig new file mode 100644 index 0000000..1cc95e3 --- /dev/null +++ b/test/sample-projects/zig/code-index-example/src/root.zig @@ -0,0 +1,135 @@ +//! By convention, root.zig is the root source file when making a library. +const std = @import("std"); +const fmt = @import("fmt"); +const mem = @import("mem"); +const json = @import("json"); + +// Define custom types and structures +pub const Config = struct { + name: []const u8, + version: u32, + debug: bool, + + pub fn init(name: []const u8, version: u32) Config { + return Config{ + .name = name, + .version = version, + .debug = false, + }; + } + + pub fn setDebug(self: *Config, debug: bool) void { + self.debug = debug; + } +}; + +pub const ErrorType = enum { + None, + InvalidInput, + OutOfMemory, + NetworkError, + + pub fn toString(self: ErrorType) []const u8 { + return switch (self) { + .None => "No error", + .InvalidInput => "Invalid input", + .OutOfMemory => "Out of memory", + .NetworkError => "Network error", + }; + } +}; + +// Global constants +pub const VERSION: u32 = 1; +pub const MAX_BUFFER_SIZE: usize = 4096; +var global_config: Config = undefined; + +pub fn bufferedPrint() !void { + // Stdout is for the actual output of your application, for example if you + // are implementing gzip, then only the compressed bytes should be sent to + // stdout, not any debugging messages. + var stdout_buffer: [1024]u8 = undefined; + var stdout_writer = std.fs.File.stdout().writer(&stdout_buffer); + const stdout = &stdout_writer.interface; + + try stdout.print("Run `zig build test` to run the tests.\n", .{}); + + try stdout.flush(); // Don't forget to flush! +} + +pub fn add(a: i32, b: i32) i32 { + return a + b; +} + +pub fn multiply(a: i32, b: i32) i32 { + return a * b; +} + +pub fn processConfig(config: *const Config) !void { + std.debug.print("Processing config: {s} v{}\n", .{ config.name, config.version }); + if (config.debug) { + std.debug.print("Debug mode enabled\n", .{}); + } +} + +pub fn handleError(err: ErrorType) void { + std.debug.print("Error: {s}\n", .{err.toString()}); +} + +// Advanced function with error handling +pub fn parseNumber(input: []const u8) !i32 { + if (input.len == 0) { + return error.InvalidInput; + } + + return std.fmt.parseInt(i32, input, 10) catch |err| switch (err) { + error.InvalidCharacter => error.InvalidInput, + error.Overflow => error.OutOfMemory, + else => err, + }; +} + +// Generic function +pub fn swap(comptime T: type, a: *T, b: *T) void { + const temp = a.*; + a.* = b.*; + b.* = temp; +} + +test "basic add functionality" { + try std.testing.expect(add(3, 7) == 10); +} + +test "config initialization" { + var config = Config.init("test-app", 1); + try std.testing.expectEqualStrings("test-app", config.name); + try std.testing.expectEqual(@as(u32, 1), config.version); + try std.testing.expectEqual(false, config.debug); + + config.setDebug(true); + try std.testing.expectEqual(true, config.debug); +} + +test "error type handling" { + const err = ErrorType.InvalidInput; + try std.testing.expectEqualStrings("Invalid input", err.toString()); +} + +test "number parsing" { + const result = try parseNumber("42"); + try std.testing.expectEqual(@as(i32, 42), result); + + // Test error case + const invalid_result = parseNumber(""); + try std.testing.expectError(error.InvalidInput, invalid_result); +} + +test "generic swap function" { + var a: i32 = 10; + var b: i32 = 20; + + swap(i32, &a, &b); + + try std.testing.expectEqual(@as(i32, 20), a); + try std.testing.expectEqual(@as(i32, 10), b); +} diff --git a/test/sample-projects/zig/code-index-example/src/utils.zig b/test/sample-projects/zig/code-index-example/src/utils.zig new file mode 100644 index 0000000..eab54ce --- /dev/null +++ b/test/sample-projects/zig/code-index-example/src/utils.zig @@ -0,0 +1,169 @@ +//! Utility functions for string processing and data manipulation +const std = @import("std"); +const mem = @import("mem"); +const ascii = @import("ascii"); + +// Constants for utility functions +pub const DEFAULT_BUFFER_SIZE: usize = 256; +pub const MAX_STRING_LENGTH: usize = 1024; + +// Custom error types +pub const UtilError = error{ + BufferTooSmall, + InvalidString, + ProcessingFailed, +}; + +// String processing utilities +pub const StringProcessor = struct { + buffer: []u8, + allocator: std.mem.Allocator, + + pub fn init(allocator: std.mem.Allocator, buffer_size: usize) !StringProcessor { + const buffer = try allocator.alloc(u8, buffer_size); + return StringProcessor{ + .buffer = buffer, + .allocator = allocator, + }; + } + + pub fn deinit(self: *StringProcessor) void { + self.allocator.free(self.buffer); + } + + pub fn toUpperCase(self: *StringProcessor, input: []const u8) ![]const u8 { + if (input.len > self.buffer.len) { + return UtilError.BufferTooSmall; + } + + for (input, 0..) |char, i| { + self.buffer[i] = std.ascii.toUpper(char); + } + + return self.buffer[0..input.len]; + } + + pub fn reverse(self: *StringProcessor, input: []const u8) ![]const u8 { + if (input.len > self.buffer.len) { + return UtilError.BufferTooSmall; + } + + for (input, 0..) |char, i| { + self.buffer[input.len - 1 - i] = char; + } + + return self.buffer[0..input.len]; + } +}; + +// Data validation functions +pub fn validateEmail(email: []const u8) bool { + if (email.len == 0) return false; + + var has_at = false; + var has_dot = false; + + for (email) |char| { + if (char == '@') { + if (has_at) return false; // Multiple @ symbols + has_at = true; + } else if (char == '.') { + has_dot = true; + } + } + + return has_at and has_dot; +} + +pub fn isValidIdentifier(identifier: []const u8) bool { + if (identifier.len == 0) return false; + + // First character must be letter or underscore + if (!std.ascii.isAlphabetic(identifier[0]) and identifier[0] != '_') { + return false; + } + + // Rest must be alphanumeric or underscore + for (identifier[1..]) |char| { + if (!std.ascii.isAlphanumeric(char) and char != '_') { + return false; + } + } + + return true; +} + +// Simple string processing function used by main.zig +pub fn processData(input: []const u8) []const u8 { + return if (input.len > 0) "Processed!" else "Empty input"; +} + +// Array utilities +pub fn findMax(numbers: []const i32) ?i32 { + if (numbers.len == 0) return null; + + var max = numbers[0]; + for (numbers[1..]) |num| { + if (num > max) { + max = num; + } + } + + return max; +} + +pub fn bubbleSort(numbers: []i32) void { + const n = numbers.len; + if (n <= 1) return; + + var i: usize = 0; + while (i < n - 1) : (i += 1) { + var j: usize = 0; + while (j < n - i - 1) : (j += 1) { + if (numbers[j] > numbers[j + 1]) { + const temp = numbers[j]; + numbers[j] = numbers[j + 1]; + numbers[j + 1] = temp; + } + } + } +} + +// Tests +test "string processor initialization" { + var processor = try StringProcessor.init(std.testing.allocator, 100); + defer processor.deinit(); + + const result = try processor.toUpperCase("hello"); + try std.testing.expectEqualStrings("HELLO", result); +} + +test "email validation" { + try std.testing.expect(validateEmail("test@example.com")); + try std.testing.expect(!validateEmail("invalid-email")); + try std.testing.expect(!validateEmail("")); +} + +test "identifier validation" { + try std.testing.expect(isValidIdentifier("valid_id")); + try std.testing.expect(isValidIdentifier("_private")); + try std.testing.expect(!isValidIdentifier("123invalid")); + try std.testing.expect(!isValidIdentifier("")); +} + +test "find maximum in array" { + const numbers = [_]i32{ 3, 1, 4, 1, 5, 9, 2, 6 }; + const max = findMax(&numbers); + try std.testing.expectEqual(@as(?i32, 9), max); + + const empty: []const i32 = &[_]i32{}; + try std.testing.expectEqual(@as(?i32, null), findMax(empty)); +} + +test "bubble sort" { + var numbers = [_]i32{ 64, 34, 25, 12, 22, 11, 90 }; + bubbleSort(&numbers); + + const expected = [_]i32{ 11, 12, 22, 25, 34, 64, 90 }; + try std.testing.expectEqualSlices(i32, &expected, &numbers); +} \ No newline at end of file diff --git a/tests/search/test_search_filters.py b/tests/search/test_search_filters.py new file mode 100644 index 0000000..787461d --- /dev/null +++ b/tests/search/test_search_filters.py @@ -0,0 +1,52 @@ +"""Tests covering shared search filtering behaviour.""" +import os +from types import SimpleNamespace +from unittest.mock import patch +from pathlib import Path as _TestPath +import sys + +ROOT = _TestPath(__file__).resolve().parents[2] +SRC_PATH = ROOT / 'src' +if str(SRC_PATH) not in sys.path: + sys.path.insert(0, str(SRC_PATH)) + +from code_index_mcp.search.basic import BasicSearchStrategy +from code_index_mcp.search.ripgrep import RipgrepStrategy +from code_index_mcp.utils.file_filter import FileFilter + + +def test_basic_strategy_skips_excluded_directories(tmp_path): + base = tmp_path + src_dir = base / "src" + src_dir.mkdir() + (src_dir / 'app.js').write_text("const db = 'mongo';\n") + + node_modules_dir = base / "node_modules" / "pkg" + node_modules_dir.mkdir(parents=True) + (node_modules_dir / 'index.js').write_text("// mongo dependency\n") + + strategy = BasicSearchStrategy() + strategy.configure_excludes(FileFilter()) + + results = strategy.search("mongo", str(base), case_sensitive=False) + + included_path = os.path.join("src", "app.js") + excluded_path = os.path.join("node_modules", "pkg", "index.js") + + assert included_path in results + assert excluded_path not in results + + +@patch("code_index_mcp.search.ripgrep.subprocess.run") +def test_ripgrep_strategy_adds_exclude_globs(mock_run, tmp_path): + mock_run.return_value = SimpleNamespace(returncode=0, stdout="", stderr="") + + strategy = RipgrepStrategy() + strategy.configure_excludes(FileFilter()) + + strategy.search("mongo", str(tmp_path)) + + cmd = mock_run.call_args[0][0] + glob_args = [cmd[i + 1] for i, arg in enumerate(cmd) if arg == '--glob' and i + 1 < len(cmd)] + + assert any(value.startswith('!**/node_modules/') for value in glob_args) diff --git a/uv.lock b/uv.lock index a24321f..08294cf 100644 --- a/uv.lock +++ b/uv.lock @@ -49,14 +49,32 @@ wheels = [ [[package]] name = "code-index-mcp" -version = "0.5.0" +version = "2.4.1" source = { editable = "." } dependencies = [ { name = "mcp" }, + { name = "msgpack" }, + { name = "pathspec" }, + { name = "tree-sitter" }, + { name = "tree-sitter-java" }, + { name = "tree-sitter-javascript" }, + { name = "tree-sitter-typescript" }, + { name = "tree-sitter-zig" }, + { name = "watchdog" }, ] [package.metadata] -requires-dist = [{ name = "mcp", specifier = ">=0.3.0" }] +requires-dist = [ + { name = "mcp", specifier = ">=0.3.0" }, + { name = "msgpack", specifier = ">=1.0.0" }, + { name = "pathspec", specifier = ">=0.12.1" }, + { name = "tree-sitter", specifier = ">=0.20.0" }, + { name = "tree-sitter-java", specifier = ">=0.20.0" }, + { name = "tree-sitter-javascript", specifier = ">=0.20.0" }, + { name = "tree-sitter-typescript", specifier = ">=0.20.0" }, + { name = "tree-sitter-zig", specifier = ">=0.20.0" }, + { name = "watchdog", specifier = ">=3.0.0" }, +] [[package]] name = "colorama" @@ -150,6 +168,63 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e8/0e/885f156ade60108e67bf044fada5269da68e29d758a10b0c513f4d85dd76/mcp-1.4.1-py3-none-any.whl", hash = "sha256:a7716b1ec1c054e76f49806f7d96113b99fc1166fc9244c2c6f19867cb75b593", size = 72448 }, ] +[[package]] +name = "msgpack" +version = "1.1.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/45/b1/ea4f68038a18c77c9467400d166d74c4ffa536f34761f7983a104357e614/msgpack-1.1.1.tar.gz", hash = "sha256:77b79ce34a2bdab2594f490c8e80dd62a02d650b91a75159a63ec413b8d104cd", size = 173555 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/33/52/f30da112c1dc92cf64f57d08a273ac771e7b29dea10b4b30369b2d7e8546/msgpack-1.1.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:353b6fc0c36fde68b661a12949d7d49f8f51ff5fa019c1e47c87c4ff34b080ed", size = 81799 }, + { url = "https://files.pythonhosted.org/packages/e4/35/7bfc0def2f04ab4145f7f108e3563f9b4abae4ab0ed78a61f350518cc4d2/msgpack-1.1.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:79c408fcf76a958491b4e3b103d1c417044544b68e96d06432a189b43d1215c8", size = 78278 }, + { url = "https://files.pythonhosted.org/packages/e8/c5/df5d6c1c39856bc55f800bf82778fd4c11370667f9b9e9d51b2f5da88f20/msgpack-1.1.1-cp310-cp310-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:78426096939c2c7482bf31ef15ca219a9e24460289c00dd0b94411040bb73ad2", size = 402805 }, + { url = "https://files.pythonhosted.org/packages/20/8e/0bb8c977efecfe6ea7116e2ed73a78a8d32a947f94d272586cf02a9757db/msgpack-1.1.1-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8b17ba27727a36cb73aabacaa44b13090feb88a01d012c0f4be70c00f75048b4", size = 408642 }, + { url = "https://files.pythonhosted.org/packages/59/a1/731d52c1aeec52006be6d1f8027c49fdc2cfc3ab7cbe7c28335b2910d7b6/msgpack-1.1.1-cp310-cp310-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:7a17ac1ea6ec3c7687d70201cfda3b1e8061466f28f686c24f627cae4ea8efd0", size = 395143 }, + { url = "https://files.pythonhosted.org/packages/2b/92/b42911c52cda2ba67a6418ffa7d08969edf2e760b09015593c8a8a27a97d/msgpack-1.1.1-cp310-cp310-musllinux_1_2_aarch64.whl", hash = "sha256:88d1e966c9235c1d4e2afac21ca83933ba59537e2e2727a999bf3f515ca2af26", size = 395986 }, + { url = "https://files.pythonhosted.org/packages/61/dc/8ae165337e70118d4dab651b8b562dd5066dd1e6dd57b038f32ebc3e2f07/msgpack-1.1.1-cp310-cp310-musllinux_1_2_i686.whl", hash = "sha256:f6d58656842e1b2ddbe07f43f56b10a60f2ba5826164910968f5933e5178af75", size = 402682 }, + { url = "https://files.pythonhosted.org/packages/58/27/555851cb98dcbd6ce041df1eacb25ac30646575e9cd125681aa2f4b1b6f1/msgpack-1.1.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:96decdfc4adcbc087f5ea7ebdcfd3dee9a13358cae6e81d54be962efc38f6338", size = 406368 }, + { url = "https://files.pythonhosted.org/packages/d4/64/39a26add4ce16f24e99eabb9005e44c663db00e3fce17d4ae1ae9d61df99/msgpack-1.1.1-cp310-cp310-win32.whl", hash = "sha256:6640fd979ca9a212e4bcdf6eb74051ade2c690b862b679bfcb60ae46e6dc4bfd", size = 65004 }, + { url = "https://files.pythonhosted.org/packages/7d/18/73dfa3e9d5d7450d39debde5b0d848139f7de23bd637a4506e36c9800fd6/msgpack-1.1.1-cp310-cp310-win_amd64.whl", hash = "sha256:8b65b53204fe1bd037c40c4148d00ef918eb2108d24c9aaa20bc31f9810ce0a8", size = 71548 }, + { url = "https://files.pythonhosted.org/packages/7f/83/97f24bf9848af23fe2ba04380388216defc49a8af6da0c28cc636d722502/msgpack-1.1.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:71ef05c1726884e44f8b1d1773604ab5d4d17729d8491403a705e649116c9558", size = 82728 }, + { url = "https://files.pythonhosted.org/packages/aa/7f/2eaa388267a78401f6e182662b08a588ef4f3de6f0eab1ec09736a7aaa2b/msgpack-1.1.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:36043272c6aede309d29d56851f8841ba907a1a3d04435e43e8a19928e243c1d", size = 79279 }, + { url = "https://files.pythonhosted.org/packages/f8/46/31eb60f4452c96161e4dfd26dbca562b4ec68c72e4ad07d9566d7ea35e8a/msgpack-1.1.1-cp311-cp311-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:a32747b1b39c3ac27d0670122b57e6e57f28eefb725e0b625618d1b59bf9d1e0", size = 423859 }, + { url = "https://files.pythonhosted.org/packages/45/16/a20fa8c32825cc7ae8457fab45670c7a8996d7746ce80ce41cc51e3b2bd7/msgpack-1.1.1-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:8a8b10fdb84a43e50d38057b06901ec9da52baac6983d3f709d8507f3889d43f", size = 429975 }, + { url = "https://files.pythonhosted.org/packages/86/ea/6c958e07692367feeb1a1594d35e22b62f7f476f3c568b002a5ea09d443d/msgpack-1.1.1-cp311-cp311-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:ba0c325c3f485dc54ec298d8b024e134acf07c10d494ffa24373bea729acf704", size = 413528 }, + { url = "https://files.pythonhosted.org/packages/75/05/ac84063c5dae79722bda9f68b878dc31fc3059adb8633c79f1e82c2cd946/msgpack-1.1.1-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:88daaf7d146e48ec71212ce21109b66e06a98e5e44dca47d853cbfe171d6c8d2", size = 413338 }, + { url = "https://files.pythonhosted.org/packages/69/e8/fe86b082c781d3e1c09ca0f4dacd457ede60a13119b6ce939efe2ea77b76/msgpack-1.1.1-cp311-cp311-musllinux_1_2_i686.whl", hash = "sha256:d8b55ea20dc59b181d3f47103f113e6f28a5e1c89fd5b67b9140edb442ab67f2", size = 422658 }, + { url = "https://files.pythonhosted.org/packages/3b/2b/bafc9924df52d8f3bb7c00d24e57be477f4d0f967c0a31ef5e2225e035c7/msgpack-1.1.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4a28e8072ae9779f20427af07f53bbb8b4aa81151054e882aee333b158da8752", size = 427124 }, + { url = "https://files.pythonhosted.org/packages/a2/3b/1f717e17e53e0ed0b68fa59e9188f3f610c79d7151f0e52ff3cd8eb6b2dc/msgpack-1.1.1-cp311-cp311-win32.whl", hash = "sha256:7da8831f9a0fdb526621ba09a281fadc58ea12701bc709e7b8cbc362feabc295", size = 65016 }, + { url = "https://files.pythonhosted.org/packages/48/45/9d1780768d3b249accecc5a38c725eb1e203d44a191f7b7ff1941f7df60c/msgpack-1.1.1-cp311-cp311-win_amd64.whl", hash = "sha256:5fd1b58e1431008a57247d6e7cc4faa41c3607e8e7d4aaf81f7c29ea013cb458", size = 72267 }, + { url = "https://files.pythonhosted.org/packages/e3/26/389b9c593eda2b8551b2e7126ad3a06af6f9b44274eb3a4f054d48ff7e47/msgpack-1.1.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:ae497b11f4c21558d95de9f64fff7053544f4d1a17731c866143ed6bb4591238", size = 82359 }, + { url = "https://files.pythonhosted.org/packages/ab/65/7d1de38c8a22cf8b1551469159d4b6cf49be2126adc2482de50976084d78/msgpack-1.1.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:33be9ab121df9b6b461ff91baac6f2731f83d9b27ed948c5b9d1978ae28bf157", size = 79172 }, + { url = "https://files.pythonhosted.org/packages/0f/bd/cacf208b64d9577a62c74b677e1ada005caa9b69a05a599889d6fc2ab20a/msgpack-1.1.1-cp312-cp312-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:6f64ae8fe7ffba251fecb8408540c34ee9df1c26674c50c4544d72dbf792e5ce", size = 425013 }, + { url = "https://files.pythonhosted.org/packages/4d/ec/fd869e2567cc9c01278a736cfd1697941ba0d4b81a43e0aa2e8d71dab208/msgpack-1.1.1-cp312-cp312-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:a494554874691720ba5891c9b0b39474ba43ffb1aaf32a5dac874effb1619e1a", size = 426905 }, + { url = "https://files.pythonhosted.org/packages/55/2a/35860f33229075bce803a5593d046d8b489d7ba2fc85701e714fc1aaf898/msgpack-1.1.1-cp312-cp312-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:cb643284ab0ed26f6957d969fe0dd8bb17beb567beb8998140b5e38a90974f6c", size = 407336 }, + { url = "https://files.pythonhosted.org/packages/8c/16/69ed8f3ada150bf92745fb4921bd621fd2cdf5a42e25eb50bcc57a5328f0/msgpack-1.1.1-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:d275a9e3c81b1093c060c3837e580c37f47c51eca031f7b5fb76f7b8470f5f9b", size = 409485 }, + { url = "https://files.pythonhosted.org/packages/c6/b6/0c398039e4c6d0b2e37c61d7e0e9d13439f91f780686deb8ee64ecf1ae71/msgpack-1.1.1-cp312-cp312-musllinux_1_2_i686.whl", hash = "sha256:4fd6b577e4541676e0cc9ddc1709d25014d3ad9a66caa19962c4f5de30fc09ef", size = 412182 }, + { url = "https://files.pythonhosted.org/packages/b8/d0/0cf4a6ecb9bc960d624c93effaeaae75cbf00b3bc4a54f35c8507273cda1/msgpack-1.1.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:bb29aaa613c0a1c40d1af111abf025f1732cab333f96f285d6a93b934738a68a", size = 419883 }, + { url = "https://files.pythonhosted.org/packages/62/83/9697c211720fa71a2dfb632cad6196a8af3abea56eece220fde4674dc44b/msgpack-1.1.1-cp312-cp312-win32.whl", hash = "sha256:870b9a626280c86cff9c576ec0d9cbcc54a1e5ebda9cd26dab12baf41fee218c", size = 65406 }, + { url = "https://files.pythonhosted.org/packages/c0/23/0abb886e80eab08f5e8c485d6f13924028602829f63b8f5fa25a06636628/msgpack-1.1.1-cp312-cp312-win_amd64.whl", hash = "sha256:5692095123007180dca3e788bb4c399cc26626da51629a31d40207cb262e67f4", size = 72558 }, + { url = "https://files.pythonhosted.org/packages/a1/38/561f01cf3577430b59b340b51329803d3a5bf6a45864a55f4ef308ac11e3/msgpack-1.1.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:3765afa6bd4832fc11c3749be4ba4b69a0e8d7b728f78e68120a157a4c5d41f0", size = 81677 }, + { url = "https://files.pythonhosted.org/packages/09/48/54a89579ea36b6ae0ee001cba8c61f776451fad3c9306cd80f5b5c55be87/msgpack-1.1.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:8ddb2bcfd1a8b9e431c8d6f4f7db0773084e107730ecf3472f1dfe9ad583f3d9", size = 78603 }, + { url = "https://files.pythonhosted.org/packages/a0/60/daba2699b308e95ae792cdc2ef092a38eb5ee422f9d2fbd4101526d8a210/msgpack-1.1.1-cp313-cp313-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:196a736f0526a03653d829d7d4c5500a97eea3648aebfd4b6743875f28aa2af8", size = 420504 }, + { url = "https://files.pythonhosted.org/packages/20/22/2ebae7ae43cd8f2debc35c631172ddf14e2a87ffcc04cf43ff9df9fff0d3/msgpack-1.1.1-cp313-cp313-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9d592d06e3cc2f537ceeeb23d38799c6ad83255289bb84c2e5792e5a8dea268a", size = 423749 }, + { url = "https://files.pythonhosted.org/packages/40/1b/54c08dd5452427e1179a40b4b607e37e2664bca1c790c60c442c8e972e47/msgpack-1.1.1-cp313-cp313-manylinux_2_5_i686.manylinux1_i686.manylinux_2_17_i686.manylinux2014_i686.whl", hash = "sha256:4df2311b0ce24f06ba253fda361f938dfecd7b961576f9be3f3fbd60e87130ac", size = 404458 }, + { url = "https://files.pythonhosted.org/packages/2e/60/6bb17e9ffb080616a51f09928fdd5cac1353c9becc6c4a8abd4e57269a16/msgpack-1.1.1-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:e4141c5a32b5e37905b5940aacbc59739f036930367d7acce7a64e4dec1f5e0b", size = 405976 }, + { url = "https://files.pythonhosted.org/packages/ee/97/88983e266572e8707c1f4b99c8fd04f9eb97b43f2db40e3172d87d8642db/msgpack-1.1.1-cp313-cp313-musllinux_1_2_i686.whl", hash = "sha256:b1ce7f41670c5a69e1389420436f41385b1aa2504c3b0c30620764b15dded2e7", size = 408607 }, + { url = "https://files.pythonhosted.org/packages/bc/66/36c78af2efaffcc15a5a61ae0df53a1d025f2680122e2a9eb8442fed3ae4/msgpack-1.1.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:4147151acabb9caed4e474c3344181e91ff7a388b888f1e19ea04f7e73dc7ad5", size = 424172 }, + { url = "https://files.pythonhosted.org/packages/8c/87/a75eb622b555708fe0427fab96056d39d4c9892b0c784b3a721088c7ee37/msgpack-1.1.1-cp313-cp313-win32.whl", hash = "sha256:500e85823a27d6d9bba1d057c871b4210c1dd6fb01fbb764e37e4e8847376323", size = 65347 }, + { url = "https://files.pythonhosted.org/packages/ca/91/7dc28d5e2a11a5ad804cf2b7f7a5fcb1eb5a4966d66a5d2b41aee6376543/msgpack-1.1.1-cp313-cp313-win_amd64.whl", hash = "sha256:6d489fba546295983abd142812bda76b57e33d0b9f5d5b71c09a583285506f69", size = 72341 }, +] + +[[package]] +name = "pathspec" +version = "0.12.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/ca/bc/f35b8446f4531a7cb215605d100cd88b7ac6f44ab3fc94870c120ab3adbf/pathspec-0.12.1.tar.gz", hash = "sha256:a482d51503a1ab33b1c67a6c3813a26953dbdc71c31dacaef9a838c4e29f5712", size = 51043 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/cc/20/ff623b09d963f88bfde16306a54e12ee5ea43e9b597108672ff3a408aad6/pathspec-0.12.1-py3-none-any.whl", hash = "sha256:a0d503e138a4c123b27490a4f7beda6a01c6f288df0e4a8b79c7eb0dc7b4cc08", size = 31191 }, +] + [[package]] name = "pydantic" version = "2.10.6" @@ -295,6 +370,109 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a0/4b/528ccf7a982216885a1ff4908e886b8fb5f19862d1962f56a3fce2435a70/starlette-0.46.1-py3-none-any.whl", hash = "sha256:77c74ed9d2720138b25875133f3a2dae6d854af2ec37dceb56aef370c1d8a227", size = 71995 }, ] +[[package]] +name = "tree-sitter" +version = "0.25.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/89/2b/02a642e67605b9dd59986b00d13a076044dede04025a243f0592ac79d68c/tree-sitter-0.25.1.tar.gz", hash = "sha256:cd761ad0e4d1fc88a4b1b8083bae06d4f973acf6f5f29bbf13ea9609c1dec9c1", size = 177874 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e0/6c/6160ca15926d11a6957d8bee887f477f3c1d9bc5272c863affc0b50b9cff/tree_sitter-0.25.1-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:a15d62ffdb095d509bda8c140c1ddd0cc80f0c67f92b87fcc96cd242dc0c71ea", size = 146692 }, + { url = "https://files.pythonhosted.org/packages/81/4a/e5eb39fe73a514a13bf94acee97925de296d673dace00557763cbbdc938f/tree_sitter-0.25.1-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:1d938f0a1ffad1206a1a569b0501345eeca81cae0a4487bb485e53768b02f24e", size = 141015 }, + { url = "https://files.pythonhosted.org/packages/63/22/c8e3ba245e5cdb8c951482028a7ee99d141302047b708dc9d670f0fafd85/tree_sitter-0.25.1-cp310-cp310-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ba8cea296de5dcb384b9a15cf526985ac8339c81da51c7e29a251d82071f5ee9", size = 599462 }, + { url = "https://files.pythonhosted.org/packages/c2/91/c866c3d278ee86354fd81fd055b5d835c510b0e9af07e1cf7e48e2f946b0/tree_sitter-0.25.1-cp310-cp310-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:387fd2bd8657d69e877618dc199c18e2d6fe073b8f5c59e23435f3baee4ee10a", size = 627062 }, + { url = "https://files.pythonhosted.org/packages/90/96/ac010f72778dae60381ab5fcca9651ac72647d582db0b027ca6c56116920/tree_sitter-0.25.1-cp310-cp310-musllinux_1_2_x86_64.whl", hash = "sha256:afa49e51f82b58ae2c1291d6b79ca31e0fb36c04bd9a20d89007472edfb70136", size = 623788 }, + { url = "https://files.pythonhosted.org/packages/0e/29/190bdfd54a564a2e43a702884ad5679f4578c481a46161f9f335dd390a70/tree_sitter-0.25.1-cp310-cp310-win_amd64.whl", hash = "sha256:77be45f666adf284914510794b41100decccd71dba88010c03dc2bb0d653acec", size = 127253 }, + { url = "https://files.pythonhosted.org/packages/da/60/7daca5ccf65fb204c9f2cc2907db6aeaf1cb42aa605427580c17a38a53b3/tree_sitter-0.25.1-cp310-cp310-win_arm64.whl", hash = "sha256:72badac2de4e81ae0df5efe14ec5003bd4df3e48e7cf84dbd9df3a54599ba371", size = 113930 }, + { url = "https://files.pythonhosted.org/packages/17/dc/0dabb75d249108fb9062d6e9e791e4ad8e9ae5c095e06dd8af770bc07902/tree_sitter-0.25.1-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:33a8fbaeb2b5049cf5318306ab8b16ab365828b2b21ee13678c29e0726a1d27a", size = 146696 }, + { url = "https://files.pythonhosted.org/packages/da/d0/b7305a05d65dbcfce7a97a93252bf7384f09800866e9de55a625c76e0257/tree_sitter-0.25.1-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:797bbbc686d8d3722d25ee0108ad979bda6ad3e1025859ce2ee290e517816bd4", size = 141014 }, + { url = "https://files.pythonhosted.org/packages/84/d0/d0d8bd13c44ef6379499712a3f5e3930e7db11e5c8eb2af8655e288597a3/tree_sitter-0.25.1-cp311-cp311-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:629fc2ae3f5954b0f6a7b42ee3fcd8f34b68ea161e9f02fa5bf709cbbac996d3", size = 604339 }, + { url = "https://files.pythonhosted.org/packages/c5/13/22869a6da25ffe2dfff922712605e72a9c3481109a93f4218bea1bc65f35/tree_sitter-0.25.1-cp311-cp311-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:4257018c42a33a7935a5150d678aac05c6594347d6a6e6dbdf7e2ef4ae985213", size = 631593 }, + { url = "https://files.pythonhosted.org/packages/ec/0c/f4590fc08422768fc57456a85c932888a02e7a13540574859308611be1cf/tree_sitter-0.25.1-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:4027854c9feee2a3bb99642145ba04ce95d75bd17e292911c93a488cb28d0a04", size = 629265 }, + { url = "https://files.pythonhosted.org/packages/a7/a8/ee9305ce9a7417715cbf038fdcc4fdb6042e30065c9837bdcf36be440388/tree_sitter-0.25.1-cp311-cp311-win_amd64.whl", hash = "sha256:183faaedcee5f0a3ba39257fa81749709d5eb7cf92c2c050b36ff38468d1774c", size = 127210 }, + { url = "https://files.pythonhosted.org/packages/48/64/6a39882f534373873ef3dba8a1a8f47dc3bfb39ee63784eac2e789b404c4/tree_sitter-0.25.1-cp311-cp311-win_arm64.whl", hash = "sha256:6a3800235535a2532ce392ed0d8e6f698ee010e73805bdeac2f249da8246bab6", size = 113928 }, + { url = "https://files.pythonhosted.org/packages/45/79/6dea0c098879d99f41ba919da1ea46e614fb4bf9c4d591450061aeec6fcb/tree_sitter-0.25.1-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:9362a202144075b54f7c9f07e0b0e44a61eed7ee19e140c506b9e64c1d21ed58", size = 146928 }, + { url = "https://files.pythonhosted.org/packages/15/30/8002f4e76c7834a6101895ff7524ea29ab4f1f1da1270260ef52e2319372/tree_sitter-0.25.1-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:593f22529f34dd04de02f56ea6d7c2c8ec99dfab25b58be893247c1090dedd60", size = 140802 }, + { url = "https://files.pythonhosted.org/packages/38/ec/d297ad9d4a4b26f551a5ca49afe48fdbcb20f058c2eff8d8463ad6c0eed1/tree_sitter-0.25.1-cp312-cp312-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:ebb6849f76e1cbfa223303fa680da533d452e378d5fe372598e4752838ca7929", size = 606762 }, + { url = "https://files.pythonhosted.org/packages/4a/1c/05a623cfb420b10d5f782d4ec064cf00fbfa9c21b8526ca4fd042f80acff/tree_sitter-0.25.1-cp312-cp312-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:034d4544bb0f82e449033d76dd083b131c3f9ecb5e37d3475f80ae55e8f382bd", size = 634632 }, + { url = "https://files.pythonhosted.org/packages/c5/e0/f05fd5a2331c16d428efb8eef32dfb80dc6565438146e34e9a235ecd7925/tree_sitter-0.25.1-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:46a9b721560070f2f980105266e28a17d3149485582cdba14d66dca14692e932", size = 630756 }, + { url = "https://files.pythonhosted.org/packages/b2/fc/79f3c5d53d1721b95ab6cda0368192a4f1d367e3a5ff7ac21d77e9841782/tree_sitter-0.25.1-cp312-cp312-win_amd64.whl", hash = "sha256:9a5c522b1350a626dc1cbc5dc203133caeaa114d3f65e400445e8b02f18b343b", size = 127157 }, + { url = "https://files.pythonhosted.org/packages/24/b7/07c4e3f71af0096db6c2ecd83e7d61584e3891c79cb39b208082312d1d60/tree_sitter-0.25.1-cp312-cp312-win_arm64.whl", hash = "sha256:43e7b8e83f9fc29ca62e7d2aa8c38e3fa806ff3fc65e0d501d18588dc1509888", size = 113910 }, + { url = "https://files.pythonhosted.org/packages/3f/d3/bfb08aab9c7daed2715f303cc017329e3512bb77678cc28829681decadd2/tree_sitter-0.25.1-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:ae1eebc175e6a50b38b0e0385cdc26e92ac0bff9b32ee1c0619bbbf6829d57ea", size = 146920 }, + { url = "https://files.pythonhosted.org/packages/f9/36/7f897c50489c38665255579646fca8191e1b9e5a29ac9cf11022e42e1e2b/tree_sitter-0.25.1-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:9e0ae03c4f132f1bffb2bc40b1bb28742785507da693ab04da8531fe534ada9c", size = 140782 }, + { url = "https://files.pythonhosted.org/packages/16/e6/85012113899296b8e0789ae94f562d3971d7d3df989e8bec6128749394e1/tree_sitter-0.25.1-cp313-cp313-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:acf571758be0a71046a61a0936cb815f15b13e0ae7ec6d08398e4aa1560b371d", size = 607590 }, + { url = "https://files.pythonhosted.org/packages/49/93/605b08dc4cf76d08cfacebc30a88467c6526ea5c94592c25240518e38b71/tree_sitter-0.25.1-cp313-cp313-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:632910847e3f8ae35841f92cba88a9a1b8bc56ecc1514a5affebf7951fa0fc0a", size = 635553 }, + { url = "https://files.pythonhosted.org/packages/ce/27/123667f756bb32168507c940db9040104c606fbb0214397d3c20cf985073/tree_sitter-0.25.1-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:a99ecef7771afb118b2a8435c8ba67ea7a085c60d5d33dc0a4794ed882e5f7df", size = 630844 }, + { url = "https://files.pythonhosted.org/packages/2f/53/180b0ed74153a3c9a23967f54774d5930c2e0b67671ae4ca0d4d35ba18ac/tree_sitter-0.25.1-cp313-cp313-win_amd64.whl", hash = "sha256:c1d6393454d1f9d4195c74e40a487640cd4390cd4aee90837485f932a1a0f40c", size = 127159 }, + { url = "https://files.pythonhosted.org/packages/32/fb/b8b7b5122ac4a80cd689a5023f2416910e10f9534ace1cdf0020a315d40d/tree_sitter-0.25.1-cp313-cp313-win_arm64.whl", hash = "sha256:c1d2dbf7d12426b71ff49739f599c355f4de338a5c0ab994de2a1d290f6e0b20", size = 113920 }, + { url = "https://files.pythonhosted.org/packages/70/8c/cb851da552baf4215baf96443e5e9e39095083a95bc05c4444e640fe0fe8/tree_sitter-0.25.1-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:32cee52264d9ecf98885fcac0185ac63e16251b31dd8b4a3b8d8071173405f8f", size = 146775 }, + { url = "https://files.pythonhosted.org/packages/f3/59/002c89df1e8f1664b82023e5d0c06de97fff5c2a2e33dce1a241c8909758/tree_sitter-0.25.1-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:ae024d8ccfef51e61c44a81af7a48670601430701c24f450bea10f4b4effd8d1", size = 140787 }, + { url = "https://files.pythonhosted.org/packages/39/48/c9e6deb88f3c7f16963ef205e5b8e3ea7f5effd048b4515d09738c7b032b/tree_sitter-0.25.1-cp314-cp314-manylinux2014_aarch64.manylinux_2_17_aarch64.manylinux_2_28_aarch64.whl", hash = "sha256:d025c56c393cea660df9ef33ca60329952a1f8ee6212d21b2b390dfec08a3874", size = 609173 }, + { url = "https://files.pythonhosted.org/packages/53/a8/b782576d7ea081a87285d974005155da03b6d0c66283fe1e3a5e0dd4bd98/tree_sitter-0.25.1-cp314-cp314-manylinux2014_x86_64.manylinux_2_17_x86_64.manylinux_2_28_x86_64.whl", hash = "sha256:044aa23ea14f337809821bea7467f33f4c6d351739dca76ba0cbe4d0154d8662", size = 635994 }, + { url = "https://files.pythonhosted.org/packages/70/0a/c5b6c9cdb7bd4bf0c3d2bd494fcf356acc53f8e63007dc2a836d95bbe964/tree_sitter-0.25.1-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:1863d96704eb002df4ad3b738294ae8bd5dcf8cefb715da18bff6cb2d33d978e", size = 630944 }, + { url = "https://files.pythonhosted.org/packages/12/2a/d0b097157c2d487f5e6293dae2c106ec9ede792a6bb780249e81432e754d/tree_sitter-0.25.1-cp314-cp314-win_amd64.whl", hash = "sha256:a40a481e28e1afdbc455932d61e49ffd4163aafa83f4a3deb717524a7786197e", size = 130831 }, + { url = "https://files.pythonhosted.org/packages/ce/33/3591e7b22dd49f46ae4fdee1db316ecefd0486cae880c5b497a55f0ccb24/tree_sitter-0.25.1-cp314-cp314-win_arm64.whl", hash = "sha256:f7b68f584336b39b2deab9896b629dddc3c784170733d3409f01fe825e9c04eb", size = 117376 }, +] + +[[package]] +name = "tree-sitter-java" +version = "0.23.5" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/fa/dc/eb9c8f96304e5d8ae1663126d89967a622a80937ad2909903569ccb7ec8f/tree_sitter_java-0.23.5.tar.gz", hash = "sha256:f5cd57b8f1270a7f0438878750d02ccc79421d45cca65ff284f1527e9ef02e38", size = 138121 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/67/21/b3399780b440e1567a11d384d0ebb1aea9b642d0d98becf30fa55c0e3a3b/tree_sitter_java-0.23.5-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:355ce0308672d6f7013ec913dee4a0613666f4cda9044a7824240d17f38209df", size = 58926 }, + { url = "https://files.pythonhosted.org/packages/57/ef/6406b444e2a93bc72a04e802f4107e9ecf04b8de4a5528830726d210599c/tree_sitter_java-0.23.5-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:24acd59c4720dedad80d548fe4237e43ef2b7a4e94c8549b0ca6e4c4d7bf6e69", size = 62288 }, + { url = "https://files.pythonhosted.org/packages/4e/6c/74b1c150d4f69c291ab0b78d5dd1b59712559bbe7e7daf6d8466d483463f/tree_sitter_java-0.23.5-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:9401e7271f0b333df39fc8a8336a0caf1b891d9a2b89ddee99fae66b794fc5b7", size = 85533 }, + { url = "https://files.pythonhosted.org/packages/29/09/e0d08f5c212062fd046db35c1015a2621c2631bc8b4aae5740d7adb276ad/tree_sitter_java-0.23.5-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:370b204b9500b847f6d0c5ad584045831cee69e9a3e4d878535d39e4a7e4c4f1", size = 84033 }, + { url = "https://files.pythonhosted.org/packages/43/56/7d06b23ddd09bde816a131aa504ee11a1bbe87c6b62ab9b2ed23849a3382/tree_sitter_java-0.23.5-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:aae84449e330363b55b14a2af0585e4e0dae75eb64ea509b7e5b0e1de536846a", size = 82564 }, + { url = "https://files.pythonhosted.org/packages/da/d6/0528c7e1e88a18221dbd8ccee3825bf274b1fa300f745fd74eb343878043/tree_sitter_java-0.23.5-cp39-abi3-win_amd64.whl", hash = "sha256:1ee45e790f8d31d416bc84a09dac2e2c6bc343e89b8a2e1d550513498eedfde7", size = 60650 }, + { url = "https://files.pythonhosted.org/packages/72/57/5bab54d23179350356515526fff3cc0f3ac23bfbc1a1d518a15978d4880e/tree_sitter_java-0.23.5-cp39-abi3-win_arm64.whl", hash = "sha256:402efe136104c5603b429dc26c7e75ae14faaca54cfd319ecc41c8f2534750f4", size = 59059 }, +] + +[[package]] +name = "tree-sitter-javascript" +version = "0.23.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/cd/dc/1c55c33cc6bbe754359b330534cf9f261c1b9b2c26ddf23aef3c5fa67759/tree_sitter_javascript-0.23.1.tar.gz", hash = "sha256:b2059ce8b150162cda05a457ca3920450adbf915119c04b8c67b5241cd7fcfed", size = 110058 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/20/d3/c67d7d49967344b51208ad19f105233be1afdf07d3dcb35b471900265227/tree_sitter_javascript-0.23.1-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:6ca583dad4bd79d3053c310b9f7208cd597fd85f9947e4ab2294658bb5c11e35", size = 59333 }, + { url = "https://files.pythonhosted.org/packages/a5/db/ea0ee1547679d1750e80a0c4bc60b3520b166eeaf048764cfdd1ba3fd5e5/tree_sitter_javascript-0.23.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:94100e491a6a247aa4d14caf61230c171b6376c863039b6d9cd71255c2d815ec", size = 61071 }, + { url = "https://files.pythonhosted.org/packages/67/6e/07c4857e08be37bfb55bfb269863df8ec908b2f6a3f1893cd852b893ecab/tree_sitter_javascript-0.23.1-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:5a6bc1055b061c5055ec58f39ee9b2e9efb8e6e0ae970838af74da0afb811f0a", size = 96999 }, + { url = "https://files.pythonhosted.org/packages/5f/f5/4de730afe8b9422845bc2064020a8a8f49ebd1695c04261c38d1b3e3edec/tree_sitter_javascript-0.23.1-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:056dc04fb6b24293f8c5fec43c14e7e16ba2075b3009c643abf8c85edc4c7c3c", size = 94020 }, + { url = "https://files.pythonhosted.org/packages/77/0a/f980520da86c4eff8392867840a945578ef43372c9d4a37922baa6b121fe/tree_sitter_javascript-0.23.1-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:a11ca1c0f736da42967586b568dff8a465ee148a986c15ebdc9382806e0ce871", size = 92927 }, + { url = "https://files.pythonhosted.org/packages/ff/5c/36a98d512aa1d1082409d6b7eda5d26b820bd4477a54100ad9f62212bc55/tree_sitter_javascript-0.23.1-cp39-abi3-win_amd64.whl", hash = "sha256:041fa22b34250ea6eb313d33104d5303f79504cb259d374d691e38bbdc49145b", size = 58824 }, + { url = "https://files.pythonhosted.org/packages/dc/79/ceb21988e6de615355a63eebcf806cd2a0fe875bec27b429d58b63e7fb5f/tree_sitter_javascript-0.23.1-cp39-abi3-win_arm64.whl", hash = "sha256:eb28130cd2fb30d702d614cbf61ef44d1c7f6869e7d864a9cc17111e370be8f7", size = 57027 }, +] + +[[package]] +name = "tree-sitter-typescript" +version = "0.23.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/1e/fc/bb52958f7e399250aee093751e9373a6311cadbe76b6e0d109b853757f35/tree_sitter_typescript-0.23.2.tar.gz", hash = "sha256:7b167b5827c882261cb7a50dfa0fb567975f9b315e87ed87ad0a0a3aedb3834d", size = 773053 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/28/95/4c00680866280e008e81dd621fd4d3f54aa3dad1b76b857a19da1b2cc426/tree_sitter_typescript-0.23.2-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:3cd752d70d8e5371fdac6a9a4df9d8924b63b6998d268586f7d374c9fba2a478", size = 286677 }, + { url = "https://files.pythonhosted.org/packages/8f/2f/1f36fda564518d84593f2740d5905ac127d590baf5c5753cef2a88a89c15/tree_sitter_typescript-0.23.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:c7cc1b0ff5d91bac863b0e38b1578d5505e718156c9db577c8baea2557f66de8", size = 302008 }, + { url = "https://files.pythonhosted.org/packages/96/2d/975c2dad292aa9994f982eb0b69cc6fda0223e4b6c4ea714550477d8ec3a/tree_sitter_typescript-0.23.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:4b1eed5b0b3a8134e86126b00b743d667ec27c63fc9de1b7bb23168803879e31", size = 351987 }, + { url = "https://files.pythonhosted.org/packages/49/d1/a71c36da6e2b8a4ed5e2970819b86ef13ba77ac40d9e333cb17df6a2c5db/tree_sitter_typescript-0.23.2-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e96d36b85bcacdeb8ff5c2618d75593ef12ebaf1b4eace3477e2bdb2abb1752c", size = 344960 }, + { url = "https://files.pythonhosted.org/packages/7f/cb/f57b149d7beed1a85b8266d0c60ebe4c46e79c9ba56bc17b898e17daf88e/tree_sitter_typescript-0.23.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:8d4f0f9bcb61ad7b7509d49a1565ff2cc363863644a234e1e0fe10960e55aea0", size = 340245 }, + { url = "https://files.pythonhosted.org/packages/8b/ab/dd84f0e2337296a5f09749f7b5483215d75c8fa9e33738522e5ed81f7254/tree_sitter_typescript-0.23.2-cp39-abi3-win_amd64.whl", hash = "sha256:3f730b66396bc3e11811e4465c41ee45d9e9edd6de355a58bbbc49fa770da8f9", size = 278015 }, + { url = "https://files.pythonhosted.org/packages/9f/e4/81f9a935789233cf412a0ed5fe04c883841d2c8fb0b7e075958a35c65032/tree_sitter_typescript-0.23.2-cp39-abi3-win_arm64.whl", hash = "sha256:05db58f70b95ef0ea126db5560f3775692f609589ed6f8dd0af84b7f19f1cbb7", size = 274052 }, +] + +[[package]] +name = "tree-sitter-zig" +version = "1.1.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/5c/97/75967b81460e0ce999de4736b9ac189dcd5ad1c85aabcc398ba529f4838e/tree_sitter_zig-1.1.2.tar.gz", hash = "sha256:da24db16df92f7fcfa34448e06a14b637b1ff985f7ce2ee19183c489e187a92e", size = 194084 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/b3/c6/db41d3f6c7c0174db56d9122a2a4d8b345c377ca87268e76557b2879675e/tree_sitter_zig-1.1.2-cp39-abi3-macosx_10_9_x86_64.whl", hash = "sha256:e7542354a5edba377b5692b2add4f346501306d455e192974b7e76bf1a61a282", size = 61900 }, + { url = "https://files.pythonhosted.org/packages/5a/78/93d32fea98b3b031bc0fbec44e27f2b8cc1a1a8ff5a99dfb1a8f85b11d43/tree_sitter_zig-1.1.2-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:daa2cdd7c1a2d278f2a917c85993adb6e84d37778bfc350ee9e342872e7f8be2", size = 67837 }, + { url = "https://files.pythonhosted.org/packages/40/45/ef5afd6b79bd58731dae2cf61ff7960dd616737397db4d2e926457ff24b7/tree_sitter_zig-1.1.2-cp39-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:1962e95067ac5ee784daddd573f828ef32f15e9c871967df6833d3d389113eae", size = 83391 }, + { url = "https://files.pythonhosted.org/packages/78/02/275523eb05108d83e154f52c7255763bac8b588ae14163563e19479322a7/tree_sitter_zig-1.1.2-cp39-abi3-manylinux_2_5_x86_64.manylinux1_x86_64.manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:e924509dcac5a6054da357e3d6bcf37ea82984ee1d2a376569753d32f61ea8bb", size = 82323 }, + { url = "https://files.pythonhosted.org/packages/ef/e9/ff3c11097e37d4d899155c8fbdf7531063b6d15ee252b2e01ce0063f0218/tree_sitter_zig-1.1.2-cp39-abi3-musllinux_1_2_x86_64.whl", hash = "sha256:d8f463c370cdd71025b8d40f90e21e8fc25c7394eb64ebd53b1e566d712a3a68", size = 81383 }, + { url = "https://files.pythonhosted.org/packages/ab/5c/f5fb2ce355bbd381e647b04e8b2078a4043e663b6df6145d87550d3c3fe5/tree_sitter_zig-1.1.2-cp39-abi3-win_amd64.whl", hash = "sha256:7b94f00a0e69231ac4ebf0aa763734b9b5637e0ff13634ebfe6d13fadece71e9", size = 65105 }, + { url = "https://files.pythonhosted.org/packages/34/8d/c0a481cc7bba9d39c533dd3098463854b5d3c4e6134496d9d83cd1331e51/tree_sitter_zig-1.1.2-cp39-abi3-win_arm64.whl", hash = "sha256:88152ebeaeca1431a6fc943a8b391fee6f6a8058f17435015135157735061ddf", size = 63219 }, +] + [[package]] name = "typing-extensions" version = "4.12.2" @@ -317,3 +495,36 @@ sdist = { url = "https://files.pythonhosted.org/packages/4b/4d/938bd85e5bf2edeec wheels = [ { url = "https://files.pythonhosted.org/packages/61/14/33a3a1352cfa71812a3a21e8c9bfb83f60b0011f5e36f2b1399d51928209/uvicorn-0.34.0-py3-none-any.whl", hash = "sha256:023dc038422502fa28a09c7a30bf2b6991512da7dcdb8fd35fe57cfc154126f4", size = 62315 }, ] + +[[package]] +name = "watchdog" +version = "6.0.0" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/db/7d/7f3d619e951c88ed75c6037b246ddcf2d322812ee8ea189be89511721d54/watchdog-6.0.0.tar.gz", hash = "sha256:9ddf7c82fda3ae8e24decda1338ede66e1c99883db93711d8fb941eaa2d8c282", size = 131220 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/0c/56/90994d789c61df619bfc5ce2ecdabd5eeff564e1eb47512bd01b5e019569/watchdog-6.0.0-cp310-cp310-macosx_10_9_universal2.whl", hash = "sha256:d1cdb490583ebd691c012b3d6dae011000fe42edb7a82ece80965b42abd61f26", size = 96390 }, + { url = "https://files.pythonhosted.org/packages/55/46/9a67ee697342ddf3c6daa97e3a587a56d6c4052f881ed926a849fcf7371c/watchdog-6.0.0-cp310-cp310-macosx_10_9_x86_64.whl", hash = "sha256:bc64ab3bdb6a04d69d4023b29422170b74681784ffb9463ed4870cf2f3e66112", size = 88389 }, + { url = "https://files.pythonhosted.org/packages/44/65/91b0985747c52064d8701e1075eb96f8c40a79df889e59a399453adfb882/watchdog-6.0.0-cp310-cp310-macosx_11_0_arm64.whl", hash = "sha256:c897ac1b55c5a1461e16dae288d22bb2e412ba9807df8397a635d88f671d36c3", size = 89020 }, + { url = "https://files.pythonhosted.org/packages/e0/24/d9be5cd6642a6aa68352ded4b4b10fb0d7889cb7f45814fb92cecd35f101/watchdog-6.0.0-cp311-cp311-macosx_10_9_universal2.whl", hash = "sha256:6eb11feb5a0d452ee41f824e271ca311a09e250441c262ca2fd7ebcf2461a06c", size = 96393 }, + { url = "https://files.pythonhosted.org/packages/63/7a/6013b0d8dbc56adca7fdd4f0beed381c59f6752341b12fa0886fa7afc78b/watchdog-6.0.0-cp311-cp311-macosx_10_9_x86_64.whl", hash = "sha256:ef810fbf7b781a5a593894e4f439773830bdecb885e6880d957d5b9382a960d2", size = 88392 }, + { url = "https://files.pythonhosted.org/packages/d1/40/b75381494851556de56281e053700e46bff5b37bf4c7267e858640af5a7f/watchdog-6.0.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:afd0fe1b2270917c5e23c2a65ce50c2a4abb63daafb0d419fde368e272a76b7c", size = 89019 }, + { url = "https://files.pythonhosted.org/packages/39/ea/3930d07dafc9e286ed356a679aa02d777c06e9bfd1164fa7c19c288a5483/watchdog-6.0.0-cp312-cp312-macosx_10_13_universal2.whl", hash = "sha256:bdd4e6f14b8b18c334febb9c4425a878a2ac20efd1e0b231978e7b150f92a948", size = 96471 }, + { url = "https://files.pythonhosted.org/packages/12/87/48361531f70b1f87928b045df868a9fd4e253d9ae087fa4cf3f7113be363/watchdog-6.0.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:c7c15dda13c4eb00d6fb6fc508b3c0ed88b9d5d374056b239c4ad1611125c860", size = 88449 }, + { url = "https://files.pythonhosted.org/packages/5b/7e/8f322f5e600812e6f9a31b75d242631068ca8f4ef0582dd3ae6e72daecc8/watchdog-6.0.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:6f10cb2d5902447c7d0da897e2c6768bca89174d0c6e1e30abec5421af97a5b0", size = 89054 }, + { url = "https://files.pythonhosted.org/packages/68/98/b0345cabdce2041a01293ba483333582891a3bd5769b08eceb0d406056ef/watchdog-6.0.0-cp313-cp313-macosx_10_13_universal2.whl", hash = "sha256:490ab2ef84f11129844c23fb14ecf30ef3d8a6abafd3754a6f75ca1e6654136c", size = 96480 }, + { url = "https://files.pythonhosted.org/packages/85/83/cdf13902c626b28eedef7ec4f10745c52aad8a8fe7eb04ed7b1f111ca20e/watchdog-6.0.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:76aae96b00ae814b181bb25b1b98076d5fc84e8a53cd8885a318b42b6d3a5134", size = 88451 }, + { url = "https://files.pythonhosted.org/packages/fe/c4/225c87bae08c8b9ec99030cd48ae9c4eca050a59bf5c2255853e18c87b50/watchdog-6.0.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a175f755fc2279e0b7312c0035d52e27211a5bc39719dd529625b1930917345b", size = 89057 }, + { url = "https://files.pythonhosted.org/packages/30/ad/d17b5d42e28a8b91f8ed01cb949da092827afb9995d4559fd448d0472763/watchdog-6.0.0-pp310-pypy310_pp73-macosx_10_15_x86_64.whl", hash = "sha256:c7ac31a19f4545dd92fc25d200694098f42c9a8e391bc00bdd362c5736dbf881", size = 87902 }, + { url = "https://files.pythonhosted.org/packages/5c/ca/c3649991d140ff6ab67bfc85ab42b165ead119c9e12211e08089d763ece5/watchdog-6.0.0-pp310-pypy310_pp73-macosx_11_0_arm64.whl", hash = "sha256:9513f27a1a582d9808cf21a07dae516f0fab1cf2d7683a742c498b93eedabb11", size = 88380 }, + { url = "https://files.pythonhosted.org/packages/a9/c7/ca4bf3e518cb57a686b2feb4f55a1892fd9a3dd13f470fca14e00f80ea36/watchdog-6.0.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:7607498efa04a3542ae3e05e64da8202e58159aa1fa4acddf7678d34a35d4f13", size = 79079 }, + { url = "https://files.pythonhosted.org/packages/5c/51/d46dc9332f9a647593c947b4b88e2381c8dfc0942d15b8edc0310fa4abb1/watchdog-6.0.0-py3-none-manylinux2014_armv7l.whl", hash = "sha256:9041567ee8953024c83343288ccc458fd0a2d811d6a0fd68c4c22609e3490379", size = 79078 }, + { url = "https://files.pythonhosted.org/packages/d4/57/04edbf5e169cd318d5f07b4766fee38e825d64b6913ca157ca32d1a42267/watchdog-6.0.0-py3-none-manylinux2014_i686.whl", hash = "sha256:82dc3e3143c7e38ec49d61af98d6558288c415eac98486a5c581726e0737c00e", size = 79076 }, + { url = "https://files.pythonhosted.org/packages/ab/cc/da8422b300e13cb187d2203f20b9253e91058aaf7db65b74142013478e66/watchdog-6.0.0-py3-none-manylinux2014_ppc64.whl", hash = "sha256:212ac9b8bf1161dc91bd09c048048a95ca3a4c4f5e5d4a7d1b1a7d5752a7f96f", size = 79077 }, + { url = "https://files.pythonhosted.org/packages/2c/3b/b8964e04ae1a025c44ba8e4291f86e97fac443bca31de8bd98d3263d2fcf/watchdog-6.0.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:e3df4cbb9a450c6d49318f6d14f4bbc80d763fa587ba46ec86f99f9e6876bb26", size = 79078 }, + { url = "https://files.pythonhosted.org/packages/62/ae/a696eb424bedff7407801c257d4b1afda455fe40821a2be430e173660e81/watchdog-6.0.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:2cce7cfc2008eb51feb6aab51251fd79b85d9894e98ba847408f662b3395ca3c", size = 79077 }, + { url = "https://files.pythonhosted.org/packages/b5/e8/dbf020b4d98251a9860752a094d09a65e1b436ad181faf929983f697048f/watchdog-6.0.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:20ffe5b202af80ab4266dcd3e91aae72bf2da48c0d33bdb15c66658e685e94e2", size = 79078 }, + { url = "https://files.pythonhosted.org/packages/07/f6/d0e5b343768e8bcb4cda79f0f2f55051bf26177ecd5651f84c07567461cf/watchdog-6.0.0-py3-none-win32.whl", hash = "sha256:07df1fdd701c5d4c8e55ef6cf55b8f0120fe1aef7ef39a1c6fc6bc2e606d517a", size = 79065 }, + { url = "https://files.pythonhosted.org/packages/db/d9/c495884c6e548fce18a8f40568ff120bc3a4b7b99813081c8ac0c936fa64/watchdog-6.0.0-py3-none-win_amd64.whl", hash = "sha256:cbafb470cf848d93b5d013e2ecb245d4aa1c8fd0504e863ccefa32445359d680", size = 79070 }, + { url = "https://files.pythonhosted.org/packages/33/e8/e40370e6d74ddba47f002a32919d91310d6074130fe4e17dabcafc15cbf1/watchdog-6.0.0-py3-none-win_ia64.whl", hash = "sha256:a1914259fa9e1454315171103c6a30961236f508b9b623eae470268bbcc6a22f", size = 79067 }, +] +