@@ -26,45 +26,47 @@ class FileSystemStats:
26
26
class FileSystemNode (Source ): # pylint: disable=too-many-instance-attributes
27
27
"""Base class for filesystem nodes (files, directories, symlinks)."""
28
28
29
- name : str = ""
30
- path_str : str = ""
31
- path : Path | None = None
29
+ # Required fields - use None defaults and validate in __post_init__
30
+ name : str | None = None
31
+ path_str : str | None = None
32
+ path : "Path | None" = None
33
+
34
+ # Optional fields with sensible defaults
32
35
size : int = 0
33
36
file_count : int = 0
34
37
dir_count : int = 0
35
38
depth : int = 0
36
39
children : list [FileSystemNode ] = field (default_factory = list )
37
-
38
- @property
39
- def tree (self ) -> str :
40
- """Return the name of this node."""
41
- return self .name
42
-
43
- def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
44
- """Return default tree representation with just the name."""
45
- current_prefix = "└── " if is_last else "├── "
46
- return [f"{ prefix } { current_prefix } { self .name } " ]
40
+
41
+ # Class attribute for display type name (instead of fragile string manipulation)
42
+ _display_type : str = "NODE"
43
+
44
+ def __post_init__ (self ) -> None :
45
+ """Validate required fields after initialization."""
46
+ if self .name is None :
47
+ raise ValueError ("FileSystemNode requires 'name' field" )
48
+ if self .path_str is None :
49
+ raise ValueError ("FileSystemNode requires 'path_str' field" )
50
+ if self .path is None :
51
+ raise ValueError ("FileSystemNode requires 'path' field" )
47
52
48
53
def sort_children (self ) -> None :
49
54
"""Sort the children nodes of a directory according to a specific order."""
50
55
51
56
def _sort_key (child : FileSystemNode ) -> tuple [int , str ]:
52
- name = child .name .lower ()
53
- # Each child knows its own sort priority - polymorphism!
54
- priority = child .get_sort_priority ()
55
- if priority == 0 and (name == "readme" or name .startswith ("readme." )):
56
- return (0 , name )
57
- if priority == 0 : # Files
57
+ name = (child .name or "" ).lower ()
58
+ # Files have priority 0, others have priority 1
59
+ # Use string comparison to avoid circular import
60
+ if child .__class__ .__name__ == "FileSystemFile" :
61
+ priority = 0
62
+ if name == "readme" or name .startswith ("readme." ):
63
+ return (0 , name )
58
64
return (1 if not name .startswith ("." ) else 2 , name )
59
65
# Directories, symlinks, etc.
60
66
return (3 if not name .startswith ("." ) else 4 , name )
61
67
62
68
self .children .sort (key = _sort_key )
63
69
64
- def get_sort_priority (self ) -> int :
65
- """Return sort priority. Override in subclasses."""
66
- return 1 # Default: not a file
67
-
68
70
@property
69
71
def content_string (self ) -> str :
70
72
"""Return the content of the node as a string, including path and content.
@@ -75,46 +77,60 @@ def content_string(self) -> str:
75
77
A string representation of the node's content.
76
78
77
79
"""
78
- type_name = self .__class__ .__name__ .upper ().replace ("FILESYSTEM" , "" )
80
+ # Use class attribute instead of fragile string manipulation
81
+ type_name = self ._display_type
79
82
80
83
parts = [
81
84
SEPARATOR ,
82
- f"{ type_name } : { str (self .path_str ).replace (os .sep , '/' )} " ,
85
+ f"{ type_name } : { str (self .path_str or '' ).replace (os .sep , '/' )} " ,
83
86
SEPARATOR ,
84
87
f"{ self .content } " ,
85
88
]
86
89
87
90
return "\n " .join (parts ) + "\n \n "
88
91
89
92
def get_content (self ) -> str :
90
- """Return file content. Override in subclasses for specific behavior."""
91
- if self .path is None :
93
+ """Return file content with proper encoding detection."""
94
+ from gitingest .utils .file_utils import _decodes , _get_preferred_encodings , _read_chunk
95
+ from gitingest .utils .notebook import process_notebook
96
+
97
+ if not self .path :
92
98
return "Error: No path specified"
93
99
94
- try :
95
- return self .path .read_text (encoding = "utf-8" )
96
- except Exception as e :
97
- return f"Error reading content of { self .name } : { e } "
100
+ # Handle notebook files specially
101
+ if self .path .suffix == ".ipynb" :
102
+ try :
103
+ return process_notebook (self .path )
104
+ except Exception as exc :
105
+ return f"Error processing notebook: { exc } "
106
+
107
+ # Read a chunk to check if it's binary or text
108
+ chunk = _read_chunk (self .path )
98
109
99
- def get_summary_info (self ) -> str :
100
- """Return summary information. Override in subclasses."""
101
- return ""
110
+ if chunk is None :
111
+ return "Error reading file"
102
112
103
- def is_single_file (self ) -> bool :
104
- """Return whether this node represents a single file."""
105
- return False
113
+ if chunk == b"" :
114
+ return "[Empty file]"
106
115
107
- def gather_contents ( self ) -> str :
108
- """Gather file contents. Override in subclasses."""
109
- return self . content_string
116
+ # Check if it's binary
117
+ if not _decodes ( chunk , "utf-8" ):
118
+ return "[Binary file]"
110
119
111
- def get_display_name (self ) -> str :
112
- """Get display name for tree view. Override in subclasses."""
113
- return self .name
120
+ # Find the first encoding that decodes the sample
121
+ good_enc : str | None = next (
122
+ (enc for enc in _get_preferred_encodings () if _decodes (chunk , encoding = enc )),
123
+ None ,
124
+ )
114
125
115
- def has_children (self ) -> bool :
116
- """Return whether this node has children to display."""
117
- return False
126
+ if good_enc is None :
127
+ return "Error: Unable to decode file with available encodings"
128
+
129
+ try :
130
+ with self .path .open (encoding = good_enc ) as fp :
131
+ return fp .read ()
132
+ except (OSError , UnicodeDecodeError ) as exc :
133
+ return f"Error reading file with { good_enc !r} : { exc } "
118
134
119
135
@property
120
136
def content (self ) -> str :
@@ -125,109 +141,38 @@ def content(self) -> str:
125
141
@dataclass
126
142
class FileSystemFile (FileSystemNode ):
127
143
"""Represents a file in the filesystem."""
128
-
129
- def get_sort_priority (self ) -> int :
130
- """Files have priority 0 for sorting."""
131
- return 0
132
-
133
- def get_summary_info (self ) -> str :
134
- """Return file summary information."""
135
- return f"File: { self .name } \n Lines: { len (self .content .splitlines ()):,} \n "
136
-
137
- def is_single_file (self ) -> bool :
138
- """Files are single files."""
139
- return True
140
-
141
- def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
142
- """Render the tree representation of this file."""
143
- current_prefix = "└── " if is_last else "├── "
144
- return [f"{ prefix } { current_prefix } { self .name } " ]
144
+
145
+ _display_type : str = "FILE"
145
146
146
147
147
148
@dataclass
148
149
class FileSystemDirectory (FileSystemNode ):
149
150
"""Represents a directory in the filesystem."""
150
151
151
152
file_count_total : int = 0
153
+ _display_type : str = "DIRECTORY"
152
154
153
155
def get_content (self ) -> str :
154
156
"""Directories cannot have content."""
155
157
msg = "Cannot read content of a directory node"
156
158
raise ValueError (msg )
157
159
158
- def get_summary_info (self ) -> str :
159
- """Return directory summary information."""
160
- return f"Files analyzed: { self .file_count } \n "
161
-
162
- def gather_contents (self ) -> str :
163
- """Recursively gather contents of all files under this directory."""
164
- return "\n " .join (child .gather_contents () for child in self .children )
165
-
166
- def get_display_name (self ) -> str :
167
- """Directories get a trailing slash."""
168
- return self .name + "/"
169
-
170
- def has_children (self ) -> bool :
171
- """Directories have children if the list is not empty."""
172
- return bool (self .children )
173
-
174
- def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
175
- """Render the tree representation of this directory."""
176
- lines = []
177
- current_prefix = "└── " if is_last else "├── "
178
- display_name = self .name + "/"
179
- lines .append (f"{ prefix } { current_prefix } { display_name } " )
180
- if hasattr (self , "children" ) and self .children :
181
- new_prefix = prefix + (" " if is_last else "│ " )
182
- for i , child in enumerate (self .children ):
183
- is_last_child = i == len (self .children ) - 1
184
- lines .extend (child .render_tree (prefix = new_prefix , is_last = is_last_child ))
185
- return lines
186
-
187
- @property
188
- def tree (self ) -> str :
189
- """Return the tree representation of this directory."""
190
- return "\n " .join (self .render_tree ())
191
-
192
160
193
161
@dataclass
194
162
class GitRepository (FileSystemDirectory ):
195
163
"""A directory that contains a .git folder, representing a Git repository."""
196
164
197
165
git_info : dict = field (default_factory = dict ) # Store git metadata like branch, commit, etc.
198
-
199
- def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
200
- """Render the tree representation of this git repository."""
201
- lines = []
202
- current_prefix = "└── " if is_last else "├── "
203
- # Mark as git repo in the tree
204
- display_name = f"{ self .name } / (git repository)"
205
- lines .append (f"{ prefix } { current_prefix } { display_name } " )
206
- if hasattr (self , "children" ) and self .children :
207
- new_prefix = prefix + (" " if is_last else "│ " )
208
- for i , child in enumerate (self .children ):
209
- is_last_child = i == len (self .children ) - 1
210
- lines .extend (child .render_tree (prefix = new_prefix , is_last = is_last_child ))
211
- return lines
166
+ _display_type : str = "GIT_REPOSITORY"
212
167
213
168
214
169
@dataclass
215
170
class FileSystemSymlink (FileSystemNode ):
216
171
"""Represents a symbolic link in the filesystem."""
217
172
218
173
target : str = ""
219
- # Add symlink-specific fields if needed
174
+ _display_type : str = "SYMLINK"
220
175
221
176
def get_content (self ) -> str :
222
177
"""Symlinks content is what they point to."""
223
178
return self .target
224
-
225
- def get_display_name (self ) -> str :
226
- """Symlinks show target."""
227
- return f"{ self .name } -> { self .target } "
228
-
229
- def render_tree (self , prefix : str = "" , * , is_last : bool = True ) -> list [str ]:
230
- """Render the tree representation of this symlink."""
231
- current_prefix = "└── " if is_last else "├── "
232
- display_name = f"{ self .name } -> { self .target } " if self .target else self .name
233
- return [f"{ prefix } { current_prefix } { display_name } " ]
0 commit comments