@@ -26,30 +26,45 @@ class FileSystemStats:
26
26
class FileSystemNode (Source ): # pylint: disable=too-many-instance-attributes
27
27
"""Base class for filesystem nodes (files, directories, symlinks)."""
28
28
29
- name : str = ""
30
- path_str : str = ""
31
- path : Path | None = None
29
+ # Required fields - use None defaults and validate in __post_init__
30
+ name : str | None = None
31
+ path_str : str | None = None
32
+ path : "Path | None" = None
33
+
34
+ # Optional fields with sensible defaults
32
35
size : int = 0
33
36
file_count : int = 0
34
37
dir_count : int = 0
35
38
depth : int = 0
36
39
children : list [FileSystemNode ] = field (default_factory = list )
40
+
41
+ # Class attribute for display type name (instead of fragile string manipulation)
42
+ _display_type : str = "NODE"
43
+
44
+ def __post_init__ (self ) -> None :
45
+ """Validate required fields after initialization."""
46
+ if self .name is None :
47
+ raise ValueError ("FileSystemNode requires 'name' field" )
48
+ if self .path_str is None :
49
+ raise ValueError ("FileSystemNode requires 'path_str' field" )
50
+ if self .path is None :
51
+ raise ValueError ("FileSystemNode requires 'path' field" )
37
52
38
53
@property
39
54
def tree (self ) -> str :
40
55
"""Return the name of this node."""
41
- return self .name
56
+ return self .name or ""
42
57
43
58
def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
44
59
"""Return default tree representation with just the name."""
45
60
current_prefix = "└── " if is_last else "├── "
46
- return [f"{ prefix } { current_prefix } { self .name } " ]
61
+ return [f"{ prefix } { current_prefix } { self .name or '' } " ]
47
62
48
63
def sort_children (self ) -> None :
49
64
"""Sort the children nodes of a directory according to a specific order."""
50
65
51
66
def _sort_key (child : FileSystemNode ) -> tuple [int , str ]:
52
- name = child .name .lower ()
67
+ name = ( child .name or "" ) .lower ()
53
68
# Each child knows its own sort priority - polymorphism!
54
69
priority = child .get_sort_priority ()
55
70
if priority == 0 and (name == "readme" or name .startswith ("readme." )):
@@ -75,26 +90,60 @@ def content_string(self) -> str:
75
90
A string representation of the node's content.
76
91
77
92
"""
78
- type_name = self .__class__ .__name__ .upper ().replace ("FILESYSTEM" , "" )
93
+ # Use class attribute instead of fragile string manipulation
94
+ type_name = self ._display_type
79
95
80
96
parts = [
81
97
SEPARATOR ,
82
- f"{ type_name } : { str (self .path_str ).replace (os .sep , '/' )} " ,
98
+ f"{ type_name } : { str (self .path_str or '' ).replace (os .sep , '/' )} " ,
83
99
SEPARATOR ,
84
100
f"{ self .content } " ,
85
101
]
86
102
87
103
return "\n " .join (parts ) + "\n \n "
88
104
89
105
def get_content (self ) -> str :
90
- """Return file content. Override in subclasses for specific behavior."""
91
- if self .path is None :
106
+ """Return file content with proper encoding detection."""
107
+ from gitingest .utils .file_utils import _decodes , _get_preferred_encodings , _read_chunk
108
+ from gitingest .utils .notebook import process_notebook
109
+
110
+ if not self .path :
92
111
return "Error: No path specified"
93
112
113
+ # Handle notebook files specially
114
+ if self .path .suffix == ".ipynb" :
115
+ try :
116
+ return process_notebook (self .path )
117
+ except Exception as exc :
118
+ return f"Error processing notebook: { exc } "
119
+
120
+ # Read a chunk to check if it's binary or text
121
+ chunk = _read_chunk (self .path )
122
+
123
+ if chunk is None :
124
+ return "Error reading file"
125
+
126
+ if chunk == b"" :
127
+ return "[Empty file]"
128
+
129
+ # Check if it's binary
130
+ if not _decodes (chunk , "utf-8" ):
131
+ return "[Binary file]"
132
+
133
+ # Find the first encoding that decodes the sample
134
+ good_enc : str | None = next (
135
+ (enc for enc in _get_preferred_encodings () if _decodes (chunk , encoding = enc )),
136
+ None ,
137
+ )
138
+
139
+ if good_enc is None :
140
+ return "Error: Unable to decode file with available encodings"
141
+
94
142
try :
95
- return self .path .read_text (encoding = "utf-8" )
96
- except Exception as e :
97
- return f"Error reading content of { self .name } : { e } "
143
+ with self .path .open (encoding = good_enc ) as fp :
144
+ return fp .read ()
145
+ except (OSError , UnicodeDecodeError ) as exc :
146
+ return f"Error reading file with { good_enc !r} : { exc } "
98
147
99
148
def get_summary_info (self ) -> str :
100
149
"""Return summary information. Override in subclasses."""
@@ -110,11 +159,7 @@ def gather_contents(self) -> str:
110
159
111
160
def get_display_name (self ) -> str :
112
161
"""Get display name for tree view. Override in subclasses."""
113
- return self .name
114
-
115
- def has_children (self ) -> bool :
116
- """Return whether this node has children to display."""
117
- return False
162
+ return self .name or ""
118
163
119
164
@property
120
165
def content (self ) -> str :
@@ -125,14 +170,16 @@ def content(self) -> str:
125
170
@dataclass
126
171
class FileSystemFile (FileSystemNode ):
127
172
"""Represents a file in the filesystem."""
173
+
174
+ _display_type : str = "FILE"
128
175
129
176
def get_sort_priority (self ) -> int :
130
177
"""Files have priority 0 for sorting."""
131
178
return 0
132
179
133
180
def get_summary_info (self ) -> str :
134
181
"""Return file summary information."""
135
- return f"File: { self .name } \n Lines: { len (self .content .splitlines ()):,} \n "
182
+ return f"File: { self .name or '' } \n Lines: { len (self .content .splitlines ()):,} \n "
136
183
137
184
def is_single_file (self ) -> bool :
138
185
"""Files are single files."""
@@ -141,14 +188,15 @@ def is_single_file(self) -> bool:
141
188
def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
142
189
"""Render the tree representation of this file."""
143
190
current_prefix = "└── " if is_last else "├── "
144
- return [f"{ prefix } { current_prefix } { self .name } " ]
191
+ return [f"{ prefix } { current_prefix } { self .name or '' } " ]
145
192
146
193
147
194
@dataclass
148
195
class FileSystemDirectory (FileSystemNode ):
149
196
"""Represents a directory in the filesystem."""
150
197
151
198
file_count_total : int = 0
199
+ _display_type : str = "DIRECTORY"
152
200
153
201
def get_content (self ) -> str :
154
202
"""Directories cannot have content."""
@@ -165,17 +213,13 @@ def gather_contents(self) -> str:
165
213
166
214
def get_display_name (self ) -> str :
167
215
"""Directories get a trailing slash."""
168
- return self .name + "/"
169
-
170
- def has_children (self ) -> bool :
171
- """Directories have children if the list is not empty."""
172
- return bool (self .children )
216
+ return (self .name or "" ) + "/"
173
217
174
218
def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
175
219
"""Render the tree representation of this directory."""
176
220
lines = []
177
221
current_prefix = "└── " if is_last else "├── "
178
- display_name = self .name + "/"
222
+ display_name = ( self .name or "" ) + "/"
179
223
lines .append (f"{ prefix } { current_prefix } { display_name } " )
180
224
if hasattr (self , "children" ) and self .children :
181
225
new_prefix = prefix + (" " if is_last else "│ " )
@@ -195,13 +239,14 @@ class GitRepository(FileSystemDirectory):
195
239
"""A directory that contains a .git folder, representing a Git repository."""
196
240
197
241
git_info : dict = field (default_factory = dict ) # Store git metadata like branch, commit, etc.
242
+ _display_type : str = "GIT_REPOSITORY"
198
243
199
244
def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
200
245
"""Render the tree representation of this git repository."""
201
246
lines = []
202
247
current_prefix = "└── " if is_last else "├── "
203
248
# Mark as git repo in the tree
204
- display_name = f"{ self .name } / (git repository)"
249
+ display_name = f"{ self .name or '' } / (git repository)"
205
250
lines .append (f"{ prefix } { current_prefix } { display_name } " )
206
251
if hasattr (self , "children" ) and self .children :
207
252
new_prefix = prefix + (" " if is_last else "│ " )
@@ -216,18 +261,18 @@ class FileSystemSymlink(FileSystemNode):
216
261
"""Represents a symbolic link in the filesystem."""
217
262
218
263
target : str = ""
219
- # Add symlink-specific fields if needed
264
+ _display_type : str = "SYMLINK"
220
265
221
266
def get_content (self ) -> str :
222
267
"""Symlinks content is what they point to."""
223
268
return self .target
224
269
225
270
def get_display_name (self ) -> str :
226
271
"""Symlinks show target."""
227
- return f"{ self .name } -> { self .target } "
272
+ return f"{ self .name or '' } -> { self .target } "
228
273
229
274
def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
230
275
"""Render the tree representation of this symlink."""
231
276
current_prefix = "└── " if is_last else "├── "
232
- display_name = f"{ self .name } -> { self .target } " if self .target else self .name
277
+ display_name = f"{ self .name or '' } -> { self .target } " if self .target else ( self .name or '' )
233
278
return [f"{ prefix } { current_prefix } { display_name } " ]
0 commit comments