Skip to content

feat: use gitpython for git stuff #504

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 11 commits into from
Aug 11, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ repos:
boto3>=1.28.0,
click>=8.0.0,
'fastapi[standard]>=0.109.1',
gitpython>=3.1.0,
httpx,
loguru>=0.7.0,
pathspec>=0.12.1,
Expand Down Expand Up @@ -144,6 +145,7 @@ repos:
boto3>=1.28.0,
click>=8.0.0,
'fastapi[standard]>=0.109.1',
gitpython>=3.1.0,
httpx,
loguru>=0.7.0,
pathspec>=0.12.1,
Expand Down
2 changes: 1 addition & 1 deletion Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@ COPY src/ ./src/

RUN set -eux; \
pip install --no-cache-dir --upgrade pip; \
pip install --no-cache-dir --timeout 1000 .[server]
pip install --no-cache-dir --timeout 1000 .[server,mcp]

# Stage 2: Runtime image
FROM python:3.13.5-slim@sha256:4c2cf9917bd1cbacc5e9b07320025bdb7cdf2df7b0ceaccb55e9dd7e30987419
Expand Down
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ readme = {file = "README.md", content-type = "text/markdown" }
requires-python = ">= 3.8"
dependencies = [
"click>=8.0.0",
"gitpython>=3.1.0",
"httpx",
"loguru>=0.7.0",
"pathspec>=0.12.1",
Expand Down
113 changes: 83 additions & 30 deletions src/gitingest/clone.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,16 +5,17 @@
from pathlib import Path
from typing import TYPE_CHECKING

import git

from gitingest.config import DEFAULT_TIMEOUT
from gitingest.utils.git_utils import (
check_repo_exists,
checkout_partial_clone,
create_git_auth_header,
create_git_command,
create_git_repo,
ensure_git_installed,
git_auth_context,
is_github_host,
resolve_commit,
run_command,
)
from gitingest.utils.logging_config import get_logger
from gitingest.utils.os_utils import ensure_directory_exists_or_create
Expand Down Expand Up @@ -46,6 +47,8 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
------
ValueError
If the repository is not found, if the provided URL is invalid, or if the token format is invalid.
RuntimeError
If Git operations fail during the cloning process.

"""
# Extract and validate query parameters
Expand Down Expand Up @@ -83,41 +86,91 @@ async def clone_repo(config: CloneConfig, *, token: str | None = None) -> None:
commit = await resolve_commit(config, token=token)
logger.debug("Resolved commit", extra={"commit": commit})

clone_cmd = ["git"]
if token and is_github_host(url):
clone_cmd += ["-c", create_git_auth_header(token, url=url)]

clone_cmd += ["clone", "--single-branch", "--no-checkout", "--depth=1"]
if partial_clone:
clone_cmd += ["--filter=blob:none", "--sparse"]

clone_cmd += [url, local_path]

# Clone the repository
logger.info("Executing git clone command", extra={"command": " ".join([*clone_cmd[:-1], "<url>", local_path])})
await run_command(*clone_cmd)
logger.info("Git clone completed successfully")
# Clone the repository using GitPython with proper authentication
logger.info("Executing git clone operation", extra={"url": "<redacted>", "local_path": local_path})
try:
clone_kwargs = {
"single_branch": True,
"no_checkout": True,
"depth": 1,
}

with git_auth_context(url, token) as (git_cmd, auth_url):
if partial_clone:
# For partial clones, use git.Git() with filter and sparse options
cmd_args = ["--single-branch", "--no-checkout", "--depth=1"]
cmd_args.extend(["--filter=blob:none", "--sparse"])
cmd_args.extend([auth_url, local_path])
git_cmd.clone(*cmd_args)
elif token and is_github_host(url):
# For authenticated GitHub repos, use git_cmd with auth URL
cmd_args = ["--single-branch", "--no-checkout", "--depth=1", auth_url, local_path]
git_cmd.clone(*cmd_args)
else:
# For non-authenticated repos, use the standard GitPython method
git.Repo.clone_from(url, local_path, **clone_kwargs)

logger.info("Git clone completed successfully")
except git.GitCommandError as exc:
msg = f"Git clone failed: {exc}"
raise RuntimeError(msg) from exc

# Checkout the subpath if it is a partial clone
if partial_clone:
logger.info("Setting up partial clone for subpath", extra={"subpath": config.subpath})
await checkout_partial_clone(config, token=token)
logger.debug("Partial clone setup completed")

git = create_git_command(["git"], local_path, url, token)
# Perform post-clone operations
await _perform_post_clone_operations(config, local_path, url, token, commit)

# Ensure the commit is locally available
logger.debug("Fetching specific commit", extra={"commit": commit})
await run_command(*git, "fetch", "--depth=1", "origin", commit)
logger.info("Git clone operation completed successfully", extra={"local_path": local_path})

# Write the work-tree at that commit
logger.info("Checking out commit", extra={"commit": commit})
await run_command(*git, "checkout", commit)

# Update submodules
if config.include_submodules:
logger.info("Updating submodules")
await run_command(*git, "submodule", "update", "--init", "--recursive", "--depth=1")
logger.debug("Submodules updated successfully")
async def _perform_post_clone_operations(
config: CloneConfig,
local_path: str,
url: str,
token: str | None,
commit: str,
) -> None:
"""Perform post-clone operations like fetching, checkout, and submodule updates.

logger.info("Git clone operation completed successfully", extra={"local_path": local_path})
Parameters
----------
config : CloneConfig
The configuration for cloning the repository.
local_path : str
The local path where the repository was cloned.
url : str
The repository URL.
token : str | None
GitHub personal access token (PAT) for accessing private repositories.
commit : str
The commit SHA to checkout.

Raises
------
RuntimeError
If any Git operation fails.

"""
try:
repo = create_git_repo(local_path, url, token)

# Ensure the commit is locally available
logger.debug("Fetching specific commit", extra={"commit": commit})
repo.git.fetch("--depth=1", "origin", commit)

# Write the work-tree at that commit
logger.info("Checking out commit", extra={"commit": commit})
repo.git.checkout(commit)

# Update submodules
if config.include_submodules:
logger.info("Updating submodules")
repo.git.submodule("update", "--init", "--recursive", "--depth=1")
logger.debug("Submodules updated successfully")
except git.GitCommandError as exc:
msg = f"Git operation failed: {exc}"
raise RuntimeError(msg) from exc
Loading
Loading