9
9
import json
10
10
import logging
11
11
import os
12
+ import re
12
13
import tempfile
13
14
import threading
14
15
import fnmatch
15
16
from pathlib import Path
16
17
from typing import Dict , List , Optional , Any
17
18
18
19
from .json_index_builder import JSONIndexBuilder
19
- from ..constants import SETTINGS_DIR , INDEX_FILE
20
+ from ..constants import SETTINGS_DIR , INDEX_FILE , INDEX_FILE_SHALLOW
20
21
21
22
logger = logging .getLogger (__name__ )
22
23
@@ -29,6 +30,8 @@ def __init__(self):
29
30
self .index_builder : Optional [JSONIndexBuilder ] = None
30
31
self .temp_dir : Optional [str ] = None
31
32
self .index_path : Optional [str ] = None
33
+ self .shallow_index_path : Optional [str ] = None
34
+ self ._shallow_file_list : Optional [List [str ]] = None
32
35
self ._lock = threading .RLock ()
33
36
logger .info ("Initialized JSON Index Manager" )
34
37
@@ -59,6 +62,7 @@ def set_project_path(self, project_path: str) -> bool:
59
62
os .makedirs (self .temp_dir , exist_ok = True )
60
63
61
64
self .index_path = os .path .join (self .temp_dir , INDEX_FILE )
65
+ self .shallow_index_path = os .path .join (self .temp_dir , INDEX_FILE_SHALLOW )
62
66
63
67
logger .info (f"Set project path: { project_path } " )
64
68
logger .info (f"Index storage: { self .index_path } " )
@@ -114,6 +118,52 @@ def load_index(self) -> bool:
114
118
logger .error (f"Failed to load index: { e } " )
115
119
return False
116
120
121
+ def build_shallow_index (self ) -> bool :
122
+ """Build and save the minimal shallow index (file list)."""
123
+ with self ._lock :
124
+ if not self .index_builder or not self .project_path or not self .shallow_index_path :
125
+ logger .error ("Index builder not initialized for shallow index" )
126
+ return False
127
+
128
+ try :
129
+ file_list = self .index_builder .build_shallow_file_list ()
130
+ # Persist as a JSON array for minimal overhead
131
+ with open (self .shallow_index_path , 'w' , encoding = 'utf-8' ) as f :
132
+ json .dump (file_list , f , ensure_ascii = False )
133
+ self ._shallow_file_list = file_list
134
+ logger .info (f"Saved shallow index with { len (file_list )} files to { self .shallow_index_path } " )
135
+ return True
136
+ except Exception as e :
137
+ logger .error (f"Failed to build shallow index: { e } " )
138
+ return False
139
+
140
+ def load_shallow_index (self ) -> bool :
141
+ """Load shallow index (file list) from disk into memory."""
142
+ with self ._lock :
143
+ try :
144
+ if not self .shallow_index_path or not os .path .exists (self .shallow_index_path ):
145
+ logger .warning ("No existing shallow index found" )
146
+ return False
147
+ with open (self .shallow_index_path , 'r' , encoding = 'utf-8' ) as f :
148
+ data = json .load (f )
149
+ if not isinstance (data , list ):
150
+ logger .error ("Shallow index format invalid (expected list)" )
151
+ return False
152
+ # Normalize paths
153
+ normalized = []
154
+ for p in data :
155
+ if isinstance (p , str ):
156
+ q = p .replace ('\\ \\ ' , '/' ).replace ('\\ ' , '/' )
157
+ if q .startswith ('./' ):
158
+ q = q [2 :]
159
+ normalized .append (q )
160
+ self ._shallow_file_list = normalized
161
+ logger .info (f"Loaded shallow index with { len (normalized )} files" )
162
+ return True
163
+ except Exception as e :
164
+ logger .error (f"Failed to load shallow index: { e } " )
165
+ return False
166
+
117
167
def refresh_index (self ) -> bool :
118
168
"""Refresh the index (rebuild and reload)."""
119
169
with self ._lock :
@@ -123,7 +173,14 @@ def refresh_index(self) -> bool:
123
173
return False
124
174
125
175
def find_files (self , pattern : str = "*" ) -> List [str ]:
126
- """Find files matching a pattern."""
176
+ """
177
+ Find files matching a glob pattern using the SHALLOW file list only.
178
+
179
+ Notes:
180
+ - '*' does not cross '/'
181
+ - '**' matches across directories
182
+ - Always sources from the shallow index for consistency and speed
183
+ """
127
184
with self ._lock :
128
185
# Input validation
129
186
if not isinstance (pattern , str ):
@@ -134,18 +191,27 @@ def find_files(self, pattern: str = "*") -> List[str]:
134
191
if not pattern :
135
192
pattern = "*"
136
193
137
- if not self .index_builder or not self .index_builder .in_memory_index :
138
- logger .warning ("Index not loaded" )
139
- return []
194
+ # Normalize to forward slashes
195
+ norm_pattern = pattern .replace ('\\ \\ ' , '/' ).replace ('\\ ' , '/' )
196
+
197
+ # Build glob regex: '*' does not cross '/', '**' crosses directories
198
+ regex = self ._compile_glob_regex (norm_pattern )
140
199
200
+ # Always use shallow index for file discovery
141
201
try :
142
- files = list (self .index_builder .in_memory_index ["files" ].keys ())
202
+ if self ._shallow_file_list is None :
203
+ # Try load existing shallow index; if missing, build then load
204
+ if not self .load_shallow_index ():
205
+ # If still not available, attempt to build
206
+ if self .build_shallow_index ():
207
+ self .load_shallow_index ()
143
208
144
- if pattern == "*" :
209
+ files = list (self ._shallow_file_list or [])
210
+
211
+ if norm_pattern == "*" :
145
212
return files
146
213
147
- # Simple pattern matching
148
- return [f for f in files if fnmatch .fnmatch (f , pattern )]
214
+ return [f for f in files if regex .match (f ) is not None ]
149
215
150
216
except Exception as e :
151
217
logger .error (f"Error finding files: { e } " )
@@ -356,6 +422,39 @@ def cleanup(self):
356
422
self .index_path = None
357
423
logger .info ("Cleaned up JSON Index Manager" )
358
424
425
+ @staticmethod
426
+ def _compile_glob_regex (pattern : str ) -> re .Pattern :
427
+ """
428
+ Compile a glob pattern where '*' does not match '/', and '**' matches across directories.
429
+
430
+ Examples:
431
+ src/*.py -> direct children .py under src
432
+ **/*.py -> .py at any depth
433
+ """
434
+ # Translate glob to regex
435
+ i = 0
436
+ out = []
437
+ special = ".^$+{}[]|()"
438
+ while i < len (pattern ):
439
+ c = pattern [i ]
440
+ if c == '*' :
441
+ if i + 1 < len (pattern ) and pattern [i + 1 ] == '*' :
442
+ # '**' -> match across directories
443
+ out .append ('.*' )
444
+ i += 2
445
+ continue
446
+ else :
447
+ out .append ('[^/]*' )
448
+ elif c == '?' :
449
+ out .append ('[^/]' )
450
+ elif c in special :
451
+ out .append ('\\ ' + c )
452
+ else :
453
+ out .append (c )
454
+ i += 1
455
+ regex_str = '^' + '' .join (out ) + '$'
456
+ return re .compile (regex_str )
457
+
359
458
360
459
# Global instance
361
460
_index_manager = JSONIndexManager ()
@@ -364,4 +463,3 @@ def cleanup(self):
364
463
def get_index_manager () -> JSONIndexManager :
365
464
"""Get the global index manager instance."""
366
465
return _index_manager
367
-
0 commit comments