Skip to content

Commit 26ccecd

Browse files
committed
simplify: use direct node.children check instead of has_children() method
Even simpler approach: - Replace node.has_children() with direct if node.children: - Remove unnecessary has_children() methods from all classes - Pythonic and direct - empty lists are falsy, non-empty are truthy - Less code, same functionality This is the most straightforward way to check for children in Python.
1 parent 5efe631 commit 26ccecd

File tree

2 files changed

+88
-128
lines changed

2 files changed

+88
-128
lines changed

src/gitingest/output_formatter.py

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -41,9 +41,14 @@ def format_node(node: FileSystemNode, query: IngestionQuery) -> tuple[str, str,
4141
A tuple containing the summary, directory structure, and file contents.
4242
4343
"""
44-
is_single_file = node.is_single_file()
44+
is_single_file = isinstance(node, FileSystemFile)
4545
summary = _create_summary_prefix(query, single_file=is_single_file)
46-
summary += node.get_summary_info()
46+
47+
# Add summary info based on node type
48+
if isinstance(node, FileSystemDirectory):
49+
summary += f"Files analyzed: {node.file_count}\n"
50+
elif isinstance(node, FileSystemFile):
51+
summary += f"File: {node.name or ''}\nLines: {len(node.content.splitlines()):,}\n"
4752

4853
tree = "Directory structure:\n" + _create_tree_structure(query, node=node)
4954

@@ -113,7 +118,12 @@ def _gather_file_contents(node: FileSystemNode) -> str:
113118
The concatenated content of all files under the given node.
114119
115120
"""
116-
return node.gather_contents()
121+
if isinstance(node, FileSystemDirectory):
122+
# Recursively gather contents of all files under the current directory
123+
return "\n".join(_gather_file_contents(child) for child in node.children)
124+
else:
125+
# File, symlink, etc. - return content_string
126+
return node.content_string
117127

118128

119129
def _create_tree_structure(
@@ -152,11 +162,16 @@ def _create_tree_structure(
152162
tree_str = ""
153163
current_prefix = "└── " if is_last else "├── "
154164

155-
# Get the display name (handles directory slash, symlink target, etc.)
156-
display_name = node.get_display_name()
165+
# Get display name based on node type
166+
display_name = node.name or ""
167+
if isinstance(node, FileSystemDirectory):
168+
display_name += "/"
169+
elif isinstance(node, FileSystemSymlink):
170+
display_name += f" -> {node.target}"
171+
157172
tree_str += f"{prefix}{current_prefix}{display_name}\n"
158173

159-
if node.has_children():
174+
if node.children:
160175
prefix += " " if is_last else "│ "
161176
for i, child in enumerate(node.children):
162177
tree_str += _create_tree_structure(query, node=child, prefix=prefix, is_last=i == len(node.children) - 1)

src/gitingest/schemas/filesystem.py

Lines changed: 67 additions & 122 deletions
Original file line numberDiff line numberDiff line change
@@ -26,45 +26,47 @@ class FileSystemStats:
2626
class FileSystemNode(Source): # pylint: disable=too-many-instance-attributes
2727
"""Base class for filesystem nodes (files, directories, symlinks)."""
2828

29-
name: str = ""
30-
path_str: str = ""
31-
path: Path | None = None
29+
# Required fields - use None defaults and validate in __post_init__
30+
name: str | None = None
31+
path_str: str | None = None
32+
path: "Path | None" = None
33+
34+
# Optional fields with sensible defaults
3235
size: int = 0
3336
file_count: int = 0
3437
dir_count: int = 0
3538
depth: int = 0
3639
children: list[FileSystemNode] = field(default_factory=list)
37-
38-
@property
39-
def tree(self) -> str:
40-
"""Return the name of this node."""
41-
return self.name
42-
43-
def render_tree(self, prefix: str = "", *, is_last: bool = True) -> list[str]:
44-
"""Return default tree representation with just the name."""
45-
current_prefix = "└── " if is_last else "├── "
46-
return [f"{prefix}{current_prefix}{self.name}"]
40+
41+
# Class attribute for display type name (instead of fragile string manipulation)
42+
_display_type: str = "NODE"
43+
44+
def __post_init__(self) -> None:
45+
"""Validate required fields after initialization."""
46+
if self.name is None:
47+
raise ValueError("FileSystemNode requires 'name' field")
48+
if self.path_str is None:
49+
raise ValueError("FileSystemNode requires 'path_str' field")
50+
if self.path is None:
51+
raise ValueError("FileSystemNode requires 'path' field")
4752

4853
def sort_children(self) -> None:
4954
"""Sort the children nodes of a directory according to a specific order."""
5055

5156
def _sort_key(child: FileSystemNode) -> tuple[int, str]:
52-
name = child.name.lower()
53-
# Each child knows its own sort priority - polymorphism!
54-
priority = child.get_sort_priority()
55-
if priority == 0 and (name == "readme" or name.startswith("readme.")):
56-
return (0, name)
57-
if priority == 0: # Files
57+
name = (child.name or "").lower()
58+
# Files have priority 0, others have priority 1
59+
# Use string comparison to avoid circular import
60+
if child.__class__.__name__ == "FileSystemFile":
61+
priority = 0
62+
if name == "readme" or name.startswith("readme."):
63+
return (0, name)
5864
return (1 if not name.startswith(".") else 2, name)
5965
# Directories, symlinks, etc.
6066
return (3 if not name.startswith(".") else 4, name)
6167

6268
self.children.sort(key=_sort_key)
6369

64-
def get_sort_priority(self) -> int:
65-
"""Return sort priority. Override in subclasses."""
66-
return 1 # Default: not a file
67-
6870
@property
6971
def content_string(self) -> str:
7072
"""Return the content of the node as a string, including path and content.
@@ -75,46 +77,60 @@ def content_string(self) -> str:
7577
A string representation of the node's content.
7678
7779
"""
78-
type_name = self.__class__.__name__.upper().replace("FILESYSTEM", "")
80+
# Use class attribute instead of fragile string manipulation
81+
type_name = self._display_type
7982

8083
parts = [
8184
SEPARATOR,
82-
f"{type_name}: {str(self.path_str).replace(os.sep, '/')}",
85+
f"{type_name}: {str(self.path_str or '').replace(os.sep, '/')}",
8386
SEPARATOR,
8487
f"{self.content}",
8588
]
8689

8790
return "\n".join(parts) + "\n\n"
8891

8992
def get_content(self) -> str:
90-
"""Return file content. Override in subclasses for specific behavior."""
91-
if self.path is None:
93+
"""Return file content with proper encoding detection."""
94+
from gitingest.utils.file_utils import _decodes, _get_preferred_encodings, _read_chunk
95+
from gitingest.utils.notebook import process_notebook
96+
97+
if not self.path:
9298
return "Error: No path specified"
9399

94-
try:
95-
return self.path.read_text(encoding="utf-8")
96-
except Exception as e:
97-
return f"Error reading content of {self.name}: {e}"
100+
# Handle notebook files specially
101+
if self.path.suffix == ".ipynb":
102+
try:
103+
return process_notebook(self.path)
104+
except Exception as exc:
105+
return f"Error processing notebook: {exc}"
106+
107+
# Read a chunk to check if it's binary or text
108+
chunk = _read_chunk(self.path)
98109

99-
def get_summary_info(self) -> str:
100-
"""Return summary information. Override in subclasses."""
101-
return ""
110+
if chunk is None:
111+
return "Error reading file"
102112

103-
def is_single_file(self) -> bool:
104-
"""Return whether this node represents a single file."""
105-
return False
113+
if chunk == b"":
114+
return "[Empty file]"
106115

107-
def gather_contents(self) -> str:
108-
"""Gather file contents. Override in subclasses."""
109-
return self.content_string
116+
# Check if it's binary
117+
if not _decodes(chunk, "utf-8"):
118+
return "[Binary file]"
110119

111-
def get_display_name(self) -> str:
112-
"""Get display name for tree view. Override in subclasses."""
113-
return self.name
120+
# Find the first encoding that decodes the sample
121+
good_enc: str | None = next(
122+
(enc for enc in _get_preferred_encodings() if _decodes(chunk, encoding=enc)),
123+
None,
124+
)
114125

115-
def has_children(self) -> bool:
116-
"""Return whether this node has children to display."""
117-
return False
126+
if good_enc is None:
127+
return "Error: Unable to decode file with available encodings"
128+
129+
try:
130+
with self.path.open(encoding=good_enc) as fp:
131+
return fp.read()
132+
except (OSError, UnicodeDecodeError) as exc:
133+
return f"Error reading file with {good_enc!r}: {exc}"
118134

119135
@property
120136
def content(self) -> str:
@@ -125,109 +141,38 @@ def content(self) -> str:
125141
@dataclass
126142
class FileSystemFile(FileSystemNode):
127143
"""Represents a file in the filesystem."""
128-
129-
def get_sort_priority(self) -> int:
130-
"""Files have priority 0 for sorting."""
131-
return 0
132-
133-
def get_summary_info(self) -> str:
134-
"""Return file summary information."""
135-
return f"File: {self.name}\nLines: {len(self.content.splitlines()):,}\n"
136-
137-
def is_single_file(self) -> bool:
138-
"""Files are single files."""
139-
return True
140-
141-
def render_tree(self, prefix: str = "", *, is_last: bool = True) -> list[str]:
142-
"""Render the tree representation of this file."""
143-
current_prefix = "└── " if is_last else "├── "
144-
return [f"{prefix}{current_prefix}{self.name}"]
144+
145+
_display_type: str = "FILE"
145146

146147

147148
@dataclass
148149
class FileSystemDirectory(FileSystemNode):
149150
"""Represents a directory in the filesystem."""
150151

151152
file_count_total: int = 0
153+
_display_type: str = "DIRECTORY"
152154

153155
def get_content(self) -> str:
154156
"""Directories cannot have content."""
155157
msg = "Cannot read content of a directory node"
156158
raise ValueError(msg)
157159

158-
def get_summary_info(self) -> str:
159-
"""Return directory summary information."""
160-
return f"Files analyzed: {self.file_count}\n"
161-
162-
def gather_contents(self) -> str:
163-
"""Recursively gather contents of all files under this directory."""
164-
return "\n".join(child.gather_contents() for child in self.children)
165-
166-
def get_display_name(self) -> str:
167-
"""Directories get a trailing slash."""
168-
return self.name + "/"
169-
170-
def has_children(self) -> bool:
171-
"""Directories have children if the list is not empty."""
172-
return bool(self.children)
173-
174-
def render_tree(self, prefix: str = "", *, is_last: bool = True) -> list[str]:
175-
"""Render the tree representation of this directory."""
176-
lines = []
177-
current_prefix = "└── " if is_last else "├── "
178-
display_name = self.name + "/"
179-
lines.append(f"{prefix}{current_prefix}{display_name}")
180-
if hasattr(self, "children") and self.children:
181-
new_prefix = prefix + (" " if is_last else "│ ")
182-
for i, child in enumerate(self.children):
183-
is_last_child = i == len(self.children) - 1
184-
lines.extend(child.render_tree(prefix=new_prefix, is_last=is_last_child))
185-
return lines
186-
187-
@property
188-
def tree(self) -> str:
189-
"""Return the tree representation of this directory."""
190-
return "\n".join(self.render_tree())
191-
192160

193161
@dataclass
194162
class GitRepository(FileSystemDirectory):
195163
"""A directory that contains a .git folder, representing a Git repository."""
196164

197165
git_info: dict = field(default_factory=dict) # Store git metadata like branch, commit, etc.
198-
199-
def render_tree(self, prefix: str = "", *, is_last: bool = True) -> list[str]:
200-
"""Render the tree representation of this git repository."""
201-
lines = []
202-
current_prefix = "└── " if is_last else "├── "
203-
# Mark as git repo in the tree
204-
display_name = f"{self.name}/ (git repository)"
205-
lines.append(f"{prefix}{current_prefix}{display_name}")
206-
if hasattr(self, "children") and self.children:
207-
new_prefix = prefix + (" " if is_last else "│ ")
208-
for i, child in enumerate(self.children):
209-
is_last_child = i == len(self.children) - 1
210-
lines.extend(child.render_tree(prefix=new_prefix, is_last=is_last_child))
211-
return lines
166+
_display_type: str = "GIT_REPOSITORY"
212167

213168

214169
@dataclass
215170
class FileSystemSymlink(FileSystemNode):
216171
"""Represents a symbolic link in the filesystem."""
217172

218173
target: str = ""
219-
# Add symlink-specific fields if needed
174+
_display_type: str = "SYMLINK"
220175

221176
def get_content(self) -> str:
222177
"""Symlinks content is what they point to."""
223178
return self.target
224-
225-
def get_display_name(self) -> str:
226-
"""Symlinks show target."""
227-
return f"{self.name} -> {self.target}"
228-
229-
def render_tree(self, prefix: str = "", *, is_last: bool = True) -> list[str]:
230-
"""Render the tree representation of this symlink."""
231-
current_prefix = "└── " if is_last else "├── "
232-
display_name = f"{self.name} -> {self.target}" if self.target else self.name
233-
return [f"{prefix}{current_prefix}{display_name}"]

0 commit comments

Comments
 (0)