From 247aa3cc3941dfb819ac1f59884a5a79e6f8870e Mon Sep 17 00:00:00 2001 From: Nicolas Iragne Date: Sun, 13 Jul 2025 13:06:11 +0200 Subject: [PATCH 1/3] chore: add .env to gitignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 61d8642b..0c4be6e1 100644 --- a/.gitignore +++ b/.gitignore @@ -38,3 +38,6 @@ tmp/ # Project-specific files history.txt digest.txt + +# Environment variables +.env From 98e1d83cf18c3351bd9a12e6f13a11d890659995 Mon Sep 17 00:00:00 2001 From: Nicolas IRAGNE Date: Thu, 10 Jul 2025 15:43:04 +0200 Subject: [PATCH 2/3] feat: implement prometheus exporter --- .gitignore | 1 + pyproject.toml | 1 + requirements.txt | 1 + src/server/main.py | 4 ++- src/server/routers/__init__.py | 3 +- src/server/routers/ingest.py | 13 +++++++-- src/server/routers/metrics.py | 51 ++++++++++++++++++++++++++++++++++ 7 files changed, 70 insertions(+), 4 deletions(-) create mode 100644 src/server/routers/metrics.py diff --git a/.gitignore b/.gitignore index 0c4be6e1..9d61bef3 100644 --- a/.gitignore +++ b/.gitignore @@ -10,6 +10,7 @@ Thumbs.db # Python virtual-envs & tooling .venv*/ +venv/ .python-version __pycache__/ *.egg-info/ diff --git a/pyproject.toml b/pyproject.toml index fb78abab..e70e7caa 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,6 +16,7 @@ dependencies = [ "tiktoken>=0.7.0", # Support for o200k_base encoding "typing_extensions>= 4.0.0; python_version < '3.10'", "uvicorn>=0.11.7", # Minimum safe release (https://osv.dev/vulnerability/PYSEC-2020-150) + "prometheus-client", ] license = {file = "LICENSE"} diff --git a/requirements.txt b/requirements.txt index f9f9b50a..b6418950 100644 --- a/requirements.txt +++ b/requirements.txt @@ -8,3 +8,4 @@ slowapi starlette>=0.40.0 # Vulnerable to https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw tiktoken>=0.7.0 # Support for o200k_base encoding uvicorn>=0.11.7 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2020-150 +prometheus-client \ No newline at end of file diff --git a/src/server/main.py b/src/server/main.py index 09904256..2e9959f5 100644 --- a/src/server/main.py +++ b/src/server/main.py @@ -12,7 +12,7 @@ from slowapi.errors import RateLimitExceeded from starlette.middleware.trustedhost import TrustedHostMiddleware -from server.routers import dynamic, index, ingest +from server.routers import dynamic, index, ingest, metrics from server.server_config import templates from server.server_utils import lifespan, limiter, rate_limit_exception_handler @@ -159,6 +159,8 @@ def openapi_json() -> JSONResponse: # Include routers for modular endpoints +if os.getenv("GITINGEST_PROMETHEUS_TOKEN") is not None: + app.include_router(metrics) app.include_router(index) app.include_router(ingest) app.include_router(dynamic) diff --git a/src/server/routers/__init__.py b/src/server/routers/__init__.py index 307ed16c..1a0136b4 100644 --- a/src/server/routers/__init__.py +++ b/src/server/routers/__init__.py @@ -3,5 +3,6 @@ from server.routers.dynamic import router as dynamic from server.routers.index import router as index from server.routers.ingest import router as ingest +from server.routers.metrics import router as metrics -__all__ = ["dynamic", "index", "ingest"] +__all__ = ["dynamic", "index", "ingest", "metrics"] diff --git a/src/server/routers/ingest.py b/src/server/routers/ingest.py index 117161bf..514db272 100644 --- a/src/server/routers/ingest.py +++ b/src/server/routers/ingest.py @@ -2,6 +2,7 @@ from fastapi import APIRouter, HTTPException, Request, status from fastapi.responses import FileResponse, JSONResponse +from prometheus_client import Counter from gitingest.config import TMP_BASE_PATH from server.models import IngestRequest @@ -9,6 +10,8 @@ from server.server_config import MAX_DISPLAY_SIZE from server.server_utils import limiter +ingest_counter = Counter("gitingest_ingest_total", "Number of ingests", ["status", "url"]) + router = APIRouter() @@ -33,13 +36,16 @@ async def api_ingest( - **JSONResponse**: Success response with ingestion results or error response with appropriate HTTP status code """ - return await _perform_ingestion( + response = await _perform_ingestion( input_text=ingest_request.input_text, max_file_size=ingest_request.max_file_size, pattern_type=ingest_request.pattern_type, pattern=ingest_request.pattern, token=ingest_request.token, ) + # limit URL to 255 characters + ingest_counter.labels(status=response.status_code, url=ingest_request.input_text[:255]).inc() + return response @router.get("/api/{user}/{repository}", responses=COMMON_INGEST_RESPONSES) @@ -72,13 +78,16 @@ async def api_ingest_get( **Returns** - **JSONResponse**: Success response with ingestion results or error response with appropriate HTTP status code """ - return await _perform_ingestion( + response = await _perform_ingestion( input_text=f"{user}/{repository}", max_file_size=max_file_size, pattern_type=pattern_type, pattern=pattern, token=token or None, ) + # limit URL to 255 characters + ingest_counter.labels(status=response.status_code, url=f"{user}/{repository}"[:255]).inc() + return response @router.get("/api/download/file/{ingest_id}", response_class=FileResponse) diff --git a/src/server/routers/metrics.py b/src/server/routers/metrics.py new file mode 100644 index 00000000..b596ee00 --- /dev/null +++ b/src/server/routers/metrics.py @@ -0,0 +1,51 @@ +"""Prometheus metrics endpoint for the API.""" + +import os +from fastapi import APIRouter, HTTPException, Request, status +from fastapi.responses import HTMLResponse +from prometheus_client import REGISTRY, generate_latest + +router = APIRouter() + +@router.get("/metrics") +async def metrics(request: Request) -> HTMLResponse: + """Serve Prometheus metrics with token authentication. + + This endpoint requires the GITINGEST_PROMETHEUS_TOKEN to be provided + in the Authorization header as 'Bearer '. + + Parameters + ---------- + request : Request + The incoming HTTP request containing headers + + Returns + ------- + HTMLResponse + Prometheus metrics in text format + + Raises + ------ + HTTPException + 401 if no token provided or invalid token + """ + # Get the expected token from environment + expected_token = os.getenv("GITINGEST_PROMETHEUS_TOKEN") + if not expected_token: + raise HTTPException( + status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, + ) + # Check Authorization header + auth_header = request.headers.get("Authorization") + if not auth_header: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + ) + + # Extract and verify token + provided_token = auth_header[7:] # Remove "Bearer " prefix + if provided_token != expected_token: + raise HTTPException( + status_code=status.HTTP_401_UNAUTHORIZED, + ) + return HTMLResponse(content=generate_latest(REGISTRY), status_code=200, media_type="text/plain") From 9a1cfe2964e1283799cc82ddfcef787b6220f07e Mon Sep 17 00:00:00 2001 From: mickael Date: Sun, 13 Jul 2025 15:49:18 +0200 Subject: [PATCH 3/3] refactor: decouple Prometheus metrics endpoint into a separate server --- .pre-commit-config.yaml | 2 ++ Dockerfile | 1 + requirements.txt | 2 +- src/server/main.py | 17 ++++++++-- src/server/metrics_server.py | 57 ++++++++++++++++++++++++++++++++++ src/server/routers/__init__.py | 3 +- src/server/routers/metrics.py | 51 ------------------------------ src/server/server_config.py | 7 ++++- 8 files changed, 82 insertions(+), 58 deletions(-) create mode 100644 src/server/metrics_server.py delete mode 100644 src/server/routers/metrics.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index f38e4d1f..f2d0072a 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -117,6 +117,7 @@ repos: 'fastapi[standard]>=0.109.1', httpx, pathspec>=0.12.1, + prometheus-client, pydantic, pytest-asyncio, pytest-mock, @@ -138,6 +139,7 @@ repos: 'fastapi[standard]>=0.109.1', httpx, pathspec>=0.12.1, + prometheus-client, pydantic, pytest-asyncio, pytest-mock, diff --git a/Dockerfile b/Dockerfile index 90ae4134..1fb2357f 100644 --- a/Dockerfile +++ b/Dockerfile @@ -43,4 +43,5 @@ RUN set -eux; \ USER appuser EXPOSE 8000 +EXPOSE 9090 CMD ["python", "-m", "uvicorn", "server.main:app", "--host", "0.0.0.0", "--port", "8000"] diff --git a/requirements.txt b/requirements.txt index b6418950..bfec694e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -2,10 +2,10 @@ click>=8.0.0 fastapi[standard]>=0.109.1 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2024-38 httpx pathspec>=0.12.1 +prometheus-client pydantic python-dotenv slowapi starlette>=0.40.0 # Vulnerable to https://osv.dev/vulnerability/GHSA-f96h-pmfr-66vw tiktoken>=0.7.0 # Support for o200k_base encoding uvicorn>=0.11.7 # Vulnerable to https://osv.dev/vulnerability/PYSEC-2020-150 -prometheus-client \ No newline at end of file diff --git a/src/server/main.py b/src/server/main.py index 2e9959f5..5bdd2e15 100644 --- a/src/server/main.py +++ b/src/server/main.py @@ -3,6 +3,7 @@ from __future__ import annotations import os +import threading from pathlib import Path from dotenv import load_dotenv @@ -12,7 +13,8 @@ from slowapi.errors import RateLimitExceeded from starlette.middleware.trustedhost import TrustedHostMiddleware -from server.routers import dynamic, index, ingest, metrics +from server.metrics_server import start_metrics_server +from server.routers import dynamic, index, ingest from server.server_config import templates from server.server_utils import lifespan, limiter, rate_limit_exception_handler @@ -26,6 +28,17 @@ # Register the custom exception handler for rate limits app.add_exception_handler(RateLimitExceeded, rate_limit_exception_handler) +# Start metrics server in a separate thread if enabled +if os.getenv("GITINGEST_METRICS_ENABLED", "false").lower() == "true": + metrics_host = os.getenv("GITINGEST_METRICS_HOST", "127.0.0.1") + metrics_port = int(os.getenv("GITINGEST_METRICS_PORT", "9090")) + metrics_thread = threading.Thread( + target=start_metrics_server, + args=(metrics_host, metrics_port), + daemon=True, + ) + metrics_thread.start() + # Mount static files dynamically to serve CSS, JS, and other static assets static_dir = Path(__file__).parent.parent / "static" @@ -159,8 +172,6 @@ def openapi_json() -> JSONResponse: # Include routers for modular endpoints -if os.getenv("GITINGEST_PROMETHEUS_TOKEN") is not None: - app.include_router(metrics) app.include_router(index) app.include_router(ingest) app.include_router(dynamic) diff --git a/src/server/metrics_server.py b/src/server/metrics_server.py new file mode 100644 index 00000000..1de3d022 --- /dev/null +++ b/src/server/metrics_server.py @@ -0,0 +1,57 @@ +"""Prometheus metrics server running on a separate port.""" + +import logging + +import uvicorn +from fastapi import FastAPI +from fastapi.responses import HTMLResponse +from prometheus_client import REGISTRY, generate_latest + +# Create a logger for this module +logger = logging.getLogger(__name__) + +# Create a separate FastAPI app for metrics +metrics_app = FastAPI( + title="Gitingest Metrics", + description="Prometheus metrics for Gitingest", + docs_url=None, + redoc_url=None, +) + + +@metrics_app.get("/metrics") +async def metrics() -> HTMLResponse: + """Serve Prometheus metrics without authentication. + + This endpoint is only accessible from the local network. + + Returns + ------- + HTMLResponse + Prometheus metrics in text format + + """ + return HTMLResponse( + content=generate_latest(REGISTRY), + status_code=200, + media_type="text/plain", + ) + + +def start_metrics_server(host: str = "127.0.0.1", port: int = 9090) -> None: + """Start the metrics server on a separate port. + + Parameters + ---------- + host : str + The host to bind to (default: 127.0.0.1 for local network only) + port : int + The port to bind to (default: 9090) + + Returns + ------- + None + + """ + logger.info("Starting metrics server on %s:%s", host, port) + uvicorn.run(metrics_app, host=host, port=port) diff --git a/src/server/routers/__init__.py b/src/server/routers/__init__.py index 1a0136b4..307ed16c 100644 --- a/src/server/routers/__init__.py +++ b/src/server/routers/__init__.py @@ -3,6 +3,5 @@ from server.routers.dynamic import router as dynamic from server.routers.index import router as index from server.routers.ingest import router as ingest -from server.routers.metrics import router as metrics -__all__ = ["dynamic", "index", "ingest", "metrics"] +__all__ = ["dynamic", "index", "ingest"] diff --git a/src/server/routers/metrics.py b/src/server/routers/metrics.py deleted file mode 100644 index b596ee00..00000000 --- a/src/server/routers/metrics.py +++ /dev/null @@ -1,51 +0,0 @@ -"""Prometheus metrics endpoint for the API.""" - -import os -from fastapi import APIRouter, HTTPException, Request, status -from fastapi.responses import HTMLResponse -from prometheus_client import REGISTRY, generate_latest - -router = APIRouter() - -@router.get("/metrics") -async def metrics(request: Request) -> HTMLResponse: - """Serve Prometheus metrics with token authentication. - - This endpoint requires the GITINGEST_PROMETHEUS_TOKEN to be provided - in the Authorization header as 'Bearer '. - - Parameters - ---------- - request : Request - The incoming HTTP request containing headers - - Returns - ------- - HTMLResponse - Prometheus metrics in text format - - Raises - ------ - HTTPException - 401 if no token provided or invalid token - """ - # Get the expected token from environment - expected_token = os.getenv("GITINGEST_PROMETHEUS_TOKEN") - if not expected_token: - raise HTTPException( - status_code=status.HTTP_500_INTERNAL_SERVER_ERROR, - ) - # Check Authorization header - auth_header = request.headers.get("Authorization") - if not auth_header: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - ) - - # Extract and verify token - provided_token = auth_header[7:] # Remove "Bearer " prefix - if provided_token != expected_token: - raise HTTPException( - status_code=status.HTTP_401_UNAUTHORIZED, - ) - return HTMLResponse(content=generate_latest(REGISTRY), status_code=200, media_type="text/plain") diff --git a/src/server/server_config.py b/src/server/server_config.py index 99ef5c91..0257db8b 100644 --- a/src/server/server_config.py +++ b/src/server/server_config.py @@ -2,6 +2,8 @@ from __future__ import annotations +from pathlib import Path + from fastapi.templating import Jinja2Templates MAX_DISPLAY_SIZE: int = 300_000 @@ -19,4 +21,7 @@ {"name": "ApiAnalytics", "url": "https://github.com/tom-draper/api-analytics"}, ] -templates = Jinja2Templates(directory="server/templates") + +# Use absolute path to templates directory +templates_dir = Path(__file__).parent / "templates" +templates = Jinja2Templates(directory=templates_dir)