diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..f970781 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,8 @@ +# .coveragerc to control coverage.py + +[report] +# Regexes for lines to exclude from consideration +exclude_also = + # Don't complain if non-runnable code isn't run: + if __name__ == .__main__.: + if TYPE_CHECKING: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index d553e49..cd9d07e 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -2,19 +2,20 @@ name: Lint on: [push, pull_request, workflow_dispatch] +permissions: {} + env: FORCE_COLOR: 1 PIP_DISABLE_PIP_VERSION_CHECK: 1 -permissions: - contents: read - jobs: lint: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 + with: + persist-credentials: false - uses: actions/setup-python@v5 with: python-version: "3.x" diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index d74e607..b64e8a3 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -1,34 +1,80 @@ name: Test -on: [push, pull_request, workflow_dispatch] +on: + push: + pull_request: + workflow_dispatch: -permissions: - contents: read +permissions: {} env: FORCE_COLOR: 1 jobs: - test: + integration: + name: Integration test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Set up requirements + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + + - name: Build documentation + run: > + python ./build_docs.py + --quick + --build-root ./build_root + --www-root ./www + --log-directory ./logs + --group "$(id -g)" + --skip-cache-invalidation + --languages en + --branches 3.14 + + - name: Upload documentation + uses: actions/upload-artifact@v4 + with: + name: www-root + path: ./www + retention-days: 2 + + unit: + name: Unit tests runs-on: ${{ matrix.os }} strategy: fail-fast: false matrix: - python-version: ["3.10", "3.11", "3.12", "3.13"] - os: [ubuntu-latest] + python-version: ["3.13", "3.14"] + os: [windows-latest, macos-latest, ubuntu-latest] steps: - - uses: actions/checkout@v4 + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: true - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v5 - with: - python-version: ${{ matrix.python-version }} - allow-prereleases: true + - name: Install uv + uses: hynek/setup-cached-uv@v2 - - name: Install uv - uses: hynek/setup-cached-uv@v2 + - name: Tox tests + run: uvx --with tox-uv tox -e py - - name: Tox tests - run: | - uvx --with tox-uv tox -e py + - name: Upload coverage + uses: codecov/codecov-action@v5 + with: + flags: ${{ matrix.os }} + name: Python ${{ matrix.python-version }} + token: ${{ secrets.CODECOV_ORG_TOKEN }} diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml index c3d2f5f..869a979 100644 --- a/.pre-commit-config.yaml +++ b/.pre-commit-config.yaml @@ -1,6 +1,6 @@ repos: - repo: https://github.com/pre-commit/pre-commit-hooks - rev: v4.6.0 + rev: v5.0.0 hooks: - id: check-added-large-files - id: check-case-conflict @@ -14,35 +14,48 @@ repos: - id: trailing-whitespace - repo: https://github.com/astral-sh/ruff-pre-commit - rev: v0.6.7 + rev: v0.11.5 hooks: + - id: ruff + args: [--fix] - id: ruff-format - repo: https://github.com/python-jsonschema/check-jsonschema - rev: 0.29.2 + rev: 0.31.1 hooks: - id: check-github-workflows - repo: https://github.com/rhysd/actionlint - rev: v1.7.1 + rev: v1.7.7 hooks: - id: actionlint + - repo: https://github.com/woodruffw/zizmor-pre-commit + rev: v1.3.1 + hooks: + - id: zizmor + - repo: https://github.com/tox-dev/pyproject-fmt - rev: 2.2.3 + rev: v2.5.0 hooks: - id: pyproject-fmt - repo: https://github.com/abravalheri/validate-pyproject - rev: v0.19 + rev: v0.23 hooks: - id: validate-pyproject - repo: https://github.com/tox-dev/tox-ini-fmt - rev: 1.4.0 + rev: 1.5.0 hooks: - id: tox-ini-fmt + - repo: https://github.com/rbubley/mirrors-prettier + rev: v3.5.1 + hooks: + - id: prettier + files: templates/switchers.js + - repo: meta hooks: - id: check-hooks-apply diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 0000000..47cbf74 --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,34 @@ +target-version = "py313" # Pin Ruff to Python 3.13 +line-length = 88 +output-format = "full" + +[format] +preview = true +docstring-code-format = true + +[lint] +preview = true +select = [ + "C4", # flake8-comprehensions + "B", # flake8-bugbear + "E", # pycodestyle + "F", # pyflakes + "FA", # flake8-future-annotations + "FLY", # flynt + "G", # flake8-logging-format + "I", # isort + "N", # pep8-naming + "PERF", # perflint + "PGH", # pygrep-hooks + "PT", # flake8-pytest-style + "TC", # flake8-type-checking + "UP", # pyupgrade + "W", # pycodestyle +] +ignore = [ + "E501", # Ignore line length errors (we use auto-formatting) +] + +[lint.flake8-type-checking] +exempt-modules = [] +strict = true diff --git a/README.md b/README.md index 767014c..8f914c6 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,13 @@ +# docsbuild-scripts + +[](https://github.com/python/docsbuild-scripts/actions/workflows/test.yml) +[](https://codecov.io/gh/python/docsbuild-scripts) + This repository contains scripts for automatically building the Python documentation on [docs.python.org](https://docs.python.org). -# How to test it? +## How to test it? The following command should build all maintained versions and translations in `./www`, beware it can take a few hours: @@ -12,10 +17,10 @@ python3 ./build_docs.py --quick --build-root ./build_root --www-root ./www --log ``` If you don't need to build all translations of all branches, add -`--language en --branch main`. +`--languages en --branches main`. -# Check current version +## Check current version Install `tools_requirements.txt` then run `python check_versions.py ../cpython/` (pointing to a real CPython clone) to see which version @@ -34,9 +39,9 @@ of Sphinx we're using where: 3.9 ø sphinx==2.4.4 needs_sphinx='1.8' 3.10 ø sphinx==3.4.3 needs_sphinx='3.2' 3.11 ø sphinx~=7.2.0 needs_sphinx='4.2' - 3.12 ø sphinx~=8.1.0 needs_sphinx='7.2.6' - 3.13 ø sphinx~=8.1.0 needs_sphinx='7.2.6' - 3.14 ø sphinx~=8.1.0 needs_sphinx='7.2.6' + 3.12 ø sphinx~=8.2.0 needs_sphinx='8.2.0' + 3.13 ø sphinx~=8.2.0 needs_sphinx='8.2.0' + 3.14 ø sphinx~=8.2.0 needs_sphinx='8.2.0' ========= ============= ================== ==================== Sphinx build as seen on docs.python.org: @@ -47,7 +52,24 @@ of Sphinx we're using where: 3.9 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 3.10 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.11 7.2.6 7.2.6 7.2.6 7.2.6 7.2.6 7.2.6 7.2.6 7.2.6 7.2.6 7.2.6 7.2.6 7.2.6 7.2.6 - 3.12 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 - 3.13 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 - 3.14 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 8.1.3 + 3.12 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 + 3.13 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 + 3.14 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 8.2.3 ========= ===== ===== ===== ===== ===== ===== ===== ===== ======= ===== ===== ======= ======= + +## Manually rebuild a branch + +Docs for [feature and bugfix branches](https://devguide.python.org/versions/) are +automatically built from a cron. + +Manual rebuilds are needed for new security releases, +and to add the end-of-life banner for newly end-of-life branches. + +To manually rebuild a branch, for example 3.11: + +```shell +ssh docs.nyc1.psf.io +sudo su --shell=/bin/bash docsbuild +screen -DUR # Rejoin screen session if it exists, otherwise create a new one +/srv/docsbuild/venv/bin/python /srv/docsbuild/scripts/build_docs.py --force --branch 3.11 +``` diff --git a/build_docs.py b/build_docs.py index 658ba63..c75f096 100755 --- a/build_docs.py +++ b/build_docs.py @@ -5,6 +5,26 @@ Without any arguments builds docs for all active versions and languages. +Environment variables for: + +- `SENTRY_DSN` (Error reporting) +- `FASTLY_SERVICE_ID` / `FASTLY_TOKEN` (CDN purges) +- `PYTHON_DOCS_ENABLE_ANALYTICS` (Enable Plausible for online docs) + +are read from the site configuration path for your platform +(/etc/xdg/docsbuild-scripts on linux) if available, +and can be overriden by writing a file to the user config dir +for your platform ($HOME/.config/docsbuild-scripts on linux). +The contents of the file is parsed as toml: + +```toml +[env] +SENTRY_DSN = "https://0a0a0a0a0a0a0a0a0a0a0a@sentry.io/69420" +FASTLY_SERVICE_ID = "deadbeefdeadbeefdead" +FASTLY_TOKEN = "secureme!" +PYTHON_DOCS_ENABLE_ANALYTICS = "1" +``` + Languages are stored in `config.toml` while versions are discovered from the devguide. @@ -22,36 +42,43 @@ from __future__ import annotations -from argparse import ArgumentParser, Namespace -from collections.abc import Sequence -from contextlib import suppress, contextmanager -from dataclasses import dataclass +import argparse +import concurrent.futures +import dataclasses +import datetime as dt import filecmp import json import logging import logging.handlers -from functools import total_ordering -from os import getenv, readlink +import os import re import shlex import shutil +import stat import subprocess import sys +import venv from bisect import bisect_left as bisect -from datetime import datetime as dt, timezone +from contextlib import contextmanager, suppress from pathlib import Path from string import Template from time import perf_counter, sleep -from typing import Iterable, Literal from urllib.parse import urljoin import jinja2 +import platformdirs import tomlkit import urllib3 import zc.lockfile +TYPE_CHECKING = False +if TYPE_CHECKING: + from collections.abc import Collection, Iterator, Sequence, Set + from typing import Literal + try: - from os import EX_OK, EX_SOFTWARE as EX_FAILURE + from os import EX_OK + from os import EX_SOFTWARE as EX_FAILURE except ImportError: EX_OK, EX_FAILURE = 0, 1 @@ -65,10 +92,68 @@ HERE = Path(__file__).resolve().parent -@total_ordering +@dataclasses.dataclass(frozen=True, slots=True) +class Versions: + _seq: Sequence[Version] + + def __iter__(self) -> Iterator[Version]: + return iter(self._seq) + + def __reversed__(self) -> Iterator[Version]: + return reversed(self._seq) + + @classmethod + def from_json(cls, data: dict) -> Versions: + """Load versions from the devguide's JSON representation.""" + permitted = ", ".join(sorted(Version.STATUSES | Version.SYNONYMS.keys())) + + versions = [] + for name, release in data.items(): + branch = release["branch"] + status = release["status"] + status = Version.SYNONYMS.get(status, status) + if status not in Version.STATUSES: + msg = ( + f"Saw invalid version status {status!r}, " + f"expected to be one of {permitted}." + ) + raise ValueError(msg) + versions.append(Version(name=name, status=status, branch_or_tag=branch)) + + return cls(sorted(versions, key=Version.as_tuple)) + + def filter(self, branches: Sequence[str] = ()) -> Sequence[Version]: + """Filter the given versions. + + If *branches* is given, only *versions* matching *branches* are returned. + + Else all live versions are returned (this means no EOL and no + security-fixes branches). + """ + if branches: + branches = frozenset(branches) + return [v for v in self if {v.name, v.branch_or_tag} & branches] + return [v for v in self if v.status not in {"EOL", "security-fixes"}] + + @property + def current_stable(self) -> Version: + """Find the current stable CPython version.""" + return max((v for v in self if v.status == "stable"), key=Version.as_tuple) + + @property + def current_dev(self) -> Version: + """Find the current CPython version in development.""" + return max(self, key=Version.as_tuple) + + +@dataclasses.dataclass(frozen=True, kw_only=True, slots=True) class Version: """Represents a CPython version and its documentation build dependencies.""" + name: str + status: Literal["EOL", "security-fixes", "stable", "pre-release", "in development"] + branch_or_tag: str + STATUSES = {"EOL", "security-fixes", "stable", "pre-release", "in development"} # Those synonyms map branch status vocabulary found in the devguide @@ -81,22 +166,11 @@ class Version: "prerelease": "pre-release", } - def __init__(self, name, *, status, branch_or_tag=None): - status = self.SYNONYMS.get(status, status) - if status not in self.STATUSES: - raise ValueError( - "Version status expected to be one of: " - f"{', '.join(self.STATUSES|set(self.SYNONYMS.keys()))}, got {status!r}." - ) - self.name = name - self.branch_or_tag = branch_or_tag - self.status = status - - def __repr__(self): - return f"Version({self.name})" + def __eq__(self, other: Version) -> bool: + return self.name == other.name @property - def requirements(self): + def requirements(self) -> list[str]: """Generate the right requirements for this version. Since CPython 3.8 a Doc/requirements.txt file can be used. @@ -108,61 +182,60 @@ def requirements(self): See https://github.com/python/cpython/issues/91483 """ - if self.name == "3.5": - return ["jieba", "blurb", "sphinx==1.8.4", "jinja2<3.1", "docutils<=0.17.1"] - if self.name in {"3.7", "3.6", "2.7"}: - return ["jieba", "blurb", "sphinx==2.3.1", "jinja2<3.1", "docutils<=0.17.1"] - - return [ + dependencies = [ + "-rrequirements.txt", "jieba", # To improve zh search. "PyStemmer~=2.2.0", # To improve performance for word stemming. - "-rrequirements.txt", ] + if self.as_tuple() >= (3, 11): + return dependencies + if self.as_tuple() >= (3, 8): + # Restore the imghdr module for Python 3.8-3.10. + return dependencies + ["standard-imghdr"] + + # Requirements/constraints for Python 3.7 and older, pre-requirements.txt + reqs = [ + "alabaster<0.7.12", + "blurb<1.2", + "docutils<=0.17.1", + "jieba", + "jinja2<3.1", + "python-docs-theme<=2023.3.1", + "sphinxcontrib-applehelp<=1.0.2", + "sphinxcontrib-devhelp<=1.0.2", + "sphinxcontrib-htmlhelp<=2.0", + "sphinxcontrib-jsmath<=1.0.1", + "sphinxcontrib-qthelp<=1.0.3", + "sphinxcontrib-serializinghtml<=1.1.5", + "standard-imghdr", + ] + if self.name in {"3.7", "3.6", "2.7"}: + return reqs + ["sphinx==2.3.1"] + if self.name == "3.5": + return reqs + ["sphinx==1.8.4", "standard-pipes"] + raise ValueError("unreachable") @property - def changefreq(self): + def changefreq(self) -> str: """Estimate this version change frequency, for the sitemap.""" return {"EOL": "never", "security-fixes": "yearly"}.get(self.status, "daily") - def as_tuple(self): + def as_tuple(self) -> tuple[int, ...]: """This version name as tuple, for easy comparisons.""" return version_to_tuple(self.name) @property - def url(self): + def url(self) -> str: """The doc URL of this version in production.""" return f"https://docs.python.org/{self.name}/" @property - def title(self): + def title(self) -> str: """The title of this version's doc, for the sidebar.""" return f"Python {self.name} ({self.status})" - @staticmethod - def filter(versions, branch=None): - """Filter the given versions. - - If *branch* is given, only *versions* matching *branch* are returned. - - Else all live versions are returned (this means no EOL and no - security-fixes branches). - """ - if branch: - return [v for v in versions if branch in (v.name, v.branch_or_tag)] - return [v for v in versions if v.status not in ("EOL", "security-fixes")] - - @staticmethod - def current_stable(versions): - """Find the current stable CPython version.""" - return max((v for v in versions if v.status == "stable"), key=Version.as_tuple) - - @staticmethod - def current_dev(versions): - """Find the current CPython version in development.""" - return max(versions, key=Version.as_tuple) - @property - def picker_label(self): + def picker_label(self) -> str: """Forge the label of a version picker.""" if self.status == "in development": return f"dev ({self.name})" @@ -170,50 +243,75 @@ def picker_label(self): return f"pre ({self.name})" return self.name - def setup_indexsidebar(self, versions: Sequence[Version], dest_path: Path): - """Build indexsidebar.html for Sphinx.""" - template_path = HERE / "templates" / "indexsidebar.html" - template = jinja2.Template(template_path.read_text(encoding="UTF-8")) - rendered_template = template.render( - current_version=self, - versions=versions[::-1], - ) - dest_path.write_text(rendered_template, encoding="UTF-8") - @classmethod - def from_json(cls, name, values): - """Loads a version from devguide's json representation.""" - return cls(name, status=values["status"], branch_or_tag=values["branch"]) +@dataclasses.dataclass(frozen=True, slots=True) +class Languages: + _seq: Sequence[Language] - def __eq__(self, other): - return self.name == other.name + def __iter__(self) -> Iterator[Language]: + return iter(self._seq) + + def __reversed__(self) -> Iterator[Language]: + return reversed(self._seq) + + @classmethod + def from_json(cls, defaults: dict, languages: dict) -> Languages: + default_translated_name = defaults.get("translated_name", "") + default_in_prod = defaults.get("in_prod", True) + default_sphinxopts = defaults.get("sphinxopts", []) + default_html_only = defaults.get("html_only", False) + langs = [ + Language( + iso639_tag=iso639_tag, + name=section["name"], + translated_name=section.get("translated_name", default_translated_name), + in_prod=section.get("in_prod", default_in_prod), + sphinxopts=section.get("sphinxopts", default_sphinxopts), + html_only=section.get("html_only", default_html_only), + ) + for iso639_tag, section in languages.items() + ] + return cls(langs) - def __gt__(self, other): - return self.as_tuple() > other.as_tuple() + def filter(self, language_tags: Sequence[str] = ()) -> Sequence[Language]: + """Filter a sequence of languages according to --languages.""" + if language_tags: + language_tags = frozenset(language_tags) + return [l for l in self if l.tag in language_tags] # NoQA: E741 + return list(self) -@dataclass(frozen=True, order=True) +@dataclasses.dataclass(order=True, frozen=True, kw_only=True) class Language: iso639_tag: str name: str + translated_name: str in_prod: bool - sphinxopts: tuple + sphinxopts: Sequence[str] html_only: bool = False @property - def tag(self): + def tag(self) -> str: return self.iso639_tag.replace("_", "-").lower() - @staticmethod - def filter(languages, language_tags=None): - """Filter a sequence of languages according to --languages.""" - if language_tags: - languages_dict = {language.tag: language for language in languages} - return [languages_dict[tag] for tag in language_tags] - return languages + @property + def is_translation(self) -> bool: + return self.tag != "en" + + @property + def locale_repo_url(self) -> str: + return f"https://github.com/python/python-docs-{self.tag}.git" + + @property + def switcher_label(self) -> str: + if self.translated_name: + return f"{self.name} | {self.translated_name}" + return self.name -def run(cmd, cwd=None) -> subprocess.CompletedProcess: +def run( + cmd: Sequence[str | Path], cwd: Path | None = None +) -> subprocess.CompletedProcess: """Like subprocess.run, with logging before and after the command execution.""" cmd = list(map(str, cmd)) cmdstring = shlex.join(cmd) @@ -239,7 +337,7 @@ def run(cmd, cwd=None) -> subprocess.CompletedProcess: return result -def run_with_logging(cmd, cwd=None): +def run_with_logging(cmd: Sequence[str | Path], cwd: Path | None = None) -> None: """Like subprocess.check_call, with logging before the command execution.""" cmd = list(map(str, cmd)) logging.debug("Run: '%s'", shlex.join(cmd)) @@ -261,46 +359,38 @@ def run_with_logging(cmd, cwd=None): raise subprocess.CalledProcessError(return_code, cmd[0]) -def changed_files(left, right): - """Compute a list of different files between left and right, recursively. - Resulting paths are relative to left. - """ - changed = [] +def changed_files(left: Path, right: Path) -> int: + """Compute the number of different files in the two directory trees.""" - def traverse(dircmp_result): - base = Path(dircmp_result.left).relative_to(left) - for file in dircmp_result.diff_files: - changed.append(str(base / file)) - if file == "index.html": - changed.append(str(base) + "/") - for dircomp in dircmp_result.subdirs.values(): - traverse(dircomp) + def traverse(dircmp_result: filecmp.dircmp) -> int: + changed = len(dircmp_result.diff_files) + changed += sum(map(traverse, dircmp_result.subdirs.values())) + return changed - traverse(filecmp.dircmp(left, right)) - return changed + return traverse(filecmp.dircmp(left, right)) -@dataclass +@dataclasses.dataclass class Repository: """Git repository abstraction for our specific needs.""" remote: str directory: Path - def run(self, *args): + def run(self, *args: str) -> subprocess.CompletedProcess: """Run git command in the clone repository.""" return run(("git", "-C", self.directory) + args) - def get_ref(self, pattern): + def get_ref(self, pattern: str) -> str: """Return the reference of a given tag or branch.""" try: # Maybe it's a branch - return self.run("show-ref", "-s", "origin/" + pattern).stdout.strip() + return self.run("show-ref", "-s", f"origin/{pattern}").stdout.strip() except subprocess.CalledProcessError: # Maybe it's a tag - return self.run("show-ref", "-s", "tags/" + pattern).stdout.strip() + return self.run("show-ref", "-s", f"tags/{pattern}").stdout.strip() - def fetch(self): + def fetch(self) -> subprocess.CompletedProcess: """Try (and retry) to run git fetch.""" try: return self.run("fetch") @@ -309,35 +399,37 @@ def fetch(self): sleep(5) return self.run("fetch") - def switch(self, branch_or_tag): + def switch(self, branch_or_tag: str) -> None: """Reset and cleans the repository to the given branch or tag.""" self.run("reset", "--hard", self.get_ref(branch_or_tag), "--") self.run("clean", "-dfqx") - def clone(self): + def clone(self) -> bool: """Maybe clone the repository, if not already cloned.""" if (self.directory / ".git").is_dir(): return False # Already cloned logging.info("Cloning %s into %s", self.remote, self.directory) self.directory.mkdir(mode=0o775, parents=True, exist_ok=True) - run(["git", "clone", self.remote, self.directory]) + run(("git", "clone", self.remote, self.directory)) return True - def update(self): + def update(self) -> None: self.clone() or self.fetch() -def version_to_tuple(version): +def version_to_tuple(version: str) -> tuple[int, ...]: """Transform a version string to a tuple, for easy comparisons.""" return tuple(int(part) for part in version.split(".")) -def tuple_to_version(version_tuple): +def tuple_to_version(version_tuple: tuple[int, ...]) -> str: """Reverse version_to_tuple.""" return ".".join(str(part) for part in version_tuple) -def locate_nearest_version(available_versions, target_version): +def locate_nearest_version( + available_versions: Collection[str], target_version: str +) -> str: """Look for the nearest version of target_version in available_versions. Versions are to be given as tuples, like (3, 7) for 3.7. @@ -381,25 +473,13 @@ def edit(file: Path): temporary.rename(file) -def setup_switchers( - versions: Sequence[Version], languages: Sequence[Language], html_root: Path -): +def setup_switchers(script_content: bytes, html_root: Path) -> None: """Setup cross-links between CPython versions: - Cross-link various languages in a language switcher - Cross-link various versions in a version switcher """ - languages_map = dict(sorted((l.tag, l.name) for l in languages if l.in_prod)) - versions_map = {v.name: v.picker_label for v in reversed(versions)} - - switchers_template_file = HERE / "templates" / "switchers.js" switchers_path = html_root / "_static" / "switchers.js" - - template = Template(switchers_template_file.read_text(encoding="UTF-8")) - rendered_template = template.safe_substitute( - LANGUAGES=json.dumps(languages_map), - VERSIONS=json.dumps(versions_map), - ) - switchers_path.write_text(rendered_template, encoding="UTF-8") + switchers_path.write_bytes(script_content) for file in html_root.glob("**/*.html"): depth = len(file.relative_to(html_root).parts) - 1 @@ -414,65 +494,16 @@ def setup_switchers( ofile.write(line) -def copy_robots_txt( - www_root: Path, - group, - skip_cache_invalidation, - http: urllib3.PoolManager, -) -> None: - """Copy robots.txt to www_root.""" - if not www_root.exists(): - logging.info("Skipping copying robots.txt (www root does not even exist).") - return - logging.info("Copying robots.txt...") - template_path = HERE / "templates" / "robots.txt" - robots_path = www_root / "robots.txt" - shutil.copyfile(template_path, robots_path) - robots_path.chmod(0o775) - run(["chgrp", group, robots_path]) - if not skip_cache_invalidation: - purge(http, "robots.txt") - - -def build_sitemap( - versions: Iterable[Version], languages: Iterable[Language], www_root: Path, group -): - """Build a sitemap with all live versions and translations.""" - if not www_root.exists(): - logging.info("Skipping sitemap generation (www root does not even exist).") - return - logging.info("Starting sitemap generation...") - template_path = HERE / "templates" / "sitemap.xml" - template = jinja2.Template(template_path.read_text(encoding="UTF-8")) - rendered_template = template.render(languages=languages, versions=versions) - sitemap_path = www_root / "sitemap.xml" - sitemap_path.write_text(rendered_template + "\n", encoding="UTF-8") - sitemap_path.chmod(0o664) - run(["chgrp", group, sitemap_path]) - - -def build_404(www_root: Path, group): - """Build a nice 404 error page to display in case PDFs are not built yet.""" - if not www_root.exists(): - logging.info("Skipping 404 page generation (www root does not even exist).") - return - logging.info("Copying 404 page...") - not_found_file = www_root / "404.html" - shutil.copyfile(HERE / "templates" / "404.html", not_found_file) - not_found_file.chmod(0o664) - run(["chgrp", group, not_found_file]) - - -def head(text, lines=10): +def head(text: str, lines: int = 10) -> str: """Return the first *lines* lines from the given text.""" return "\n".join(text.split("\n")[:lines]) -def version_info(): +def version_info() -> None: """Handler for --version.""" try: platex_version = head( - subprocess.check_output(["platex", "--version"], universal_newlines=True), + subprocess.check_output(("platex", "--version"), text=True), lines=3, ) except FileNotFoundError: @@ -480,7 +511,7 @@ def version_info(): try: xelatex_version = head( - subprocess.check_output(["xelatex", "--version"], universal_newlines=True), + subprocess.check_output(("xelatex", "--version"), text=True), lines=2, ) except FileNotFoundError: @@ -499,118 +530,15 @@ def version_info(): ) -def parse_args(): - """Parse command-line arguments.""" - - parser = ArgumentParser( - description="Runs a build of the Python docs for various branches." - ) - parser.add_argument( - "--select-output", - choices=("no-html", "only-html", "only-html-en"), - help="Choose what outputs to build.", - ) - parser.add_argument( - "-q", - "--quick", - action="store_true", - help="Run a quick build (only HTML files).", - ) - parser.add_argument( - "-b", - "--branch", - metavar="3.12", - help="Version to build (defaults to all maintained branches).", - ) - parser.add_argument( - "-r", - "--build-root", - type=Path, - help="Path to a directory containing a checkout per branch.", - default=Path("/srv/docsbuild"), - ) - parser.add_argument( - "-w", - "--www-root", - type=Path, - help="Path where generated files will be copied.", - default=Path("/srv/docs.python.org"), - ) - parser.add_argument( - "--skip-cache-invalidation", - help="Skip Fastly cache invalidation.", - action="store_true", - ) - parser.add_argument( - "--group", - help="Group files on targets and www-root file should get.", - default="docs", - ) - parser.add_argument( - "--log-directory", - type=Path, - help="Directory used to store logs.", - default=Path("/var/log/docsbuild/"), - ) - parser.add_argument( - "--languages", - nargs="*", - help="Language translation, as a PEP 545 language tag like" - " 'fr' or 'pt-br'. " - "Builds all available languages by default.", - metavar="fr", - ) - parser.add_argument( - "--version", - action="store_true", - help="Get build_docs and dependencies version info", - ) - parser.add_argument( - "--theme", - default="python-docs-theme", - help="Python package to use for python-docs-theme: Useful to test branches:" - " --theme git+https://github.com/obulat/python-docs-theme@master", - ) - args = parser.parse_args() - if args.version: - version_info() - sys.exit(0) - del args.version - if args.log_directory: - args.log_directory = args.log_directory.resolve() - if args.build_root: - args.build_root = args.build_root.resolve() - if args.www_root: - args.www_root = args.www_root.resolve() - return args - - -def setup_logging(log_directory: Path, select_output: str | None): - """Setup logging to stderr if run by a human, or to a file if run from a cron.""" - log_format = "%(asctime)s %(levelname)s: %(message)s" - if sys.stderr.isatty(): - logging.basicConfig(format=log_format, stream=sys.stderr) - else: - log_directory.mkdir(parents=True, exist_ok=True) - if select_output is None: - filename = log_directory / "docsbuild.log" - else: - filename = log_directory / f"docsbuild-{select_output}.log" - handler = logging.handlers.WatchedFileHandler(filename) - handler.setFormatter(logging.Formatter(log_format)) - logging.getLogger().addHandler(handler) - logging.getLogger().setLevel(logging.DEBUG) - - -@dataclass +@dataclasses.dataclass class DocBuilder: """Builder for a CPython version and a language.""" version: Version - versions: Sequence[Version] language: Language - languages: Sequence[Language] cpython_repo: Repository + docs_by_version_content: bytes + switchers_content: bytes build_root: Path www_root: Path select_output: Literal["no-html", "only-html", "only-html-en"] | None @@ -618,10 +546,10 @@ class DocBuilder: group: str log_directory: Path skip_cache_invalidation: bool - theme: Path + theme: str @property - def html_only(self): + def html_only(self) -> bool: return ( self.select_output in {"only-html", "only-html-en"} or self.quick @@ -629,23 +557,23 @@ def html_only(self): ) @property - def includes_html(self): + def includes_html(self) -> bool: """Does the build we are running include HTML output?""" return self.select_output != "no-html" - def run(self, http: urllib3.PoolManager) -> bool: + def run(self, http: urllib3.PoolManager, force_build: bool) -> bool | None: """Build and publish a Python doc, for a language, and a version.""" start_time = perf_counter() - start_timestamp = dt.now(tz=timezone.utc).replace(microsecond=0) + start_timestamp = dt.datetime.now(tz=dt.UTC).replace(microsecond=0) logging.info("Running.") try: if self.language.html_only and not self.includes_html: logging.info("Skipping non-HTML build (language is HTML-only).") - return True + return None # skipped self.cpython_repo.switch(self.version.branch_or_tag) - if self.language.tag != "en": + if self.language.is_translation: self.clone_translation() - if trigger_reason := self.should_rebuild(): + if trigger_reason := self.should_rebuild(force_build): self.build_venv() self.build() self.copy_build_to_webroot(http) @@ -654,6 +582,8 @@ def run(self, http: urllib3.PoolManager) -> bool: build_duration=perf_counter() - start_time, trigger=trigger_reason, ) + else: + return None # skipped except Exception as err: logging.exception("Badly handled exception, human, please help.") if sentry_sdk: @@ -661,31 +591,28 @@ def run(self, http: urllib3.PoolManager) -> bool: return False return True + @property + def locale_dir(self) -> Path: + return self.build_root / self.version.name / "locale" + @property def checkout(self) -> Path: """Path to CPython git clone.""" return self.build_root / _checkout_name(self.select_output) - def clone_translation(self): + def clone_translation(self) -> None: self.translation_repo.update() self.translation_repo.switch(self.translation_branch) @property - def translation_repo(self): + def translation_repo(self) -> Repository: """See PEP 545 for translations repository naming convention.""" - locale_repo = f"https://github.com/python/python-docs-{self.language.tag}.git" - locale_clone_dir = ( - self.build_root - / self.version.name - / "locale" - / self.language.iso639_tag - / "LC_MESSAGES" - ) - return Repository(locale_repo, locale_clone_dir) + locale_clone_dir = self.locale_dir / self.language.iso639_tag / "LC_MESSAGES" + return Repository(self.language.locale_repo_url, locale_clone_dir) @property - def translation_branch(self): + def translation_branch(self) -> str: """Some CPython versions may be untranslated, being either too old or too new. @@ -698,36 +625,18 @@ def translation_branch(self): branches = re.findall(r"/([0-9]+\.[0-9]+)$", remote_branches, re.M) return locate_nearest_version(branches, self.version.name) - def build(self): + def build(self) -> None: """Build this version/language doc.""" logging.info("Build start.") start_time = perf_counter() sphinxopts = list(self.language.sphinxopts) - if self.language.tag != "en": - locale_dirs = self.build_root / self.version.name / "locale" - sphinxopts.extend( - ( - f"-D locale_dirs={locale_dirs}", - f"-D language={self.language.iso639_tag}", - "-D gettext_compact=0", - ) - ) - if self.language.tag == "ja": - # Since luatex doesn't support \ufffd, replace \ufffd with '?'. - # https://gist.github.com/zr-tex8r/e0931df922f38fbb67634f05dfdaf66b - # Luatex already fixed this issue, so we can remove this once Texlive - # is updated. - # (https://github.com/TeX-Live/luatex/commit/af5faf1) - subprocess.check_output( - "sed -i s/\N{REPLACEMENT CHARACTER}/?/g " - f"{locale_dirs}/ja/LC_MESSAGES/**/*.po", - shell=True, - ) - subprocess.check_output( - "sed -i s/\N{REPLACEMENT CHARACTER}/?/g " - f"{self.checkout}/Doc/**/*.rst", - shell=True, - ) + if self.language.is_translation: + sphinxopts.extend(( + f"-D locale_dirs={self.locale_dir}", + f"-D language={self.language.iso639_tag}", + "-D gettext_compact=0", + "-D translation_progress_classes=1", + )) if self.version.status == "EOL": sphinxopts.append("-D html_context.outdated=1") @@ -744,70 +653,95 @@ def build(self): blurb = self.venv / "bin" / "blurb" if self.includes_html: - # Disable CPython switchers, we handle them now: - run( - ["sed", "-i"] - + ([""] if sys.platform == "darwin" else []) - + ["s/ *-A switchers=1//", self.checkout / "Doc" / "Makefile"] - ) - self.version.setup_indexsidebar( - self.versions, - self.checkout / "Doc" / "tools" / "templates" / "indexsidebar.html", + site_url = self.version.url + if self.language.is_translation: + site_url += f"{self.language.tag}/" + # Define a tag to enable opengraph socialcards previews + # (used in Doc/conf.py and requires matplotlib) + sphinxopts += ( + "-t create-social-cards", + f"-D ogp_site_url={site_url}", ) - run_with_logging( - [ - "make", - "-C", - self.checkout / "Doc", - "PYTHON=" + str(python), - "SPHINXBUILD=" + str(sphinxbuild), - "BLURB=" + str(blurb), - "VENVDIR=" + str(self.venv), - "SPHINXOPTS=" + " ".join(sphinxopts), - "SPHINXERRORHANDLING=", - maketarget, - ] - ) - run(["mkdir", "-p", self.log_directory]) - run(["chgrp", "-R", self.group, self.log_directory]) + + if self.version.as_tuple() < (3, 8): + # Disable CPython switchers, we handle them now: + text = (self.checkout / "Doc" / "Makefile").read_text(encoding="utf-8") + text = text.replace(" -A switchers=1", "") + (self.checkout / "Doc" / "Makefile").write_text(text, encoding="utf-8") + + self.setup_indexsidebar() + run_with_logging(( + "make", + "-C", + self.checkout / "Doc", + f"PYTHON={python}", + f"SPHINXBUILD={sphinxbuild}", + f"BLURB={blurb}", + f"VENVDIR={self.venv}", + f"SPHINXOPTS={' '.join(sphinxopts)}", + "SPHINXERRORHANDLING=", + maketarget, + )) + self.log_directory.mkdir(parents=True, exist_ok=True) + chgrp(self.log_directory, group=self.group, recursive=True) if self.includes_html: setup_switchers( - self.versions, self.languages, self.checkout / "Doc" / "build" / "html" + self.switchers_content, self.checkout / "Doc" / "build" / "html" ) logging.info("Build done (%s).", format_seconds(perf_counter() - start_time)) - def build_venv(self): + def build_venv(self) -> None: """Build a venv for the specific Python version. So we can reuse them from builds to builds, while they contain different Sphinx versions. """ - venv_path = self.build_root / ("venv-" + self.version.name) - run([sys.executable, "-m", "venv", venv_path]) + requirements = list(self.version.requirements) + if self.includes_html: + # opengraph previews + requirements.append("matplotlib>=3") + + venv_path = self.build_root / f"venv-{self.version.name}" + venv.create(venv_path, symlinks=os.name != "nt", with_pip=True) run( - [venv_path / "bin" / "python", "-m", "pip", "install", "--upgrade"] - + ["--upgrade-strategy=eager"] - + [self.theme] - + self.version.requirements, + ( + venv_path / "bin" / "python", + "-m", + "pip", + "install", + "--upgrade", + "--upgrade-strategy=eager", + self.theme, + *requirements, + ), cwd=self.checkout / "Doc", ) - run([venv_path / "bin" / "python", "-m", "pip", "freeze", "--all"]) + run((venv_path / "bin" / "python", "-m", "pip", "freeze", "--all")) self.venv = venv_path + def setup_indexsidebar(self) -> None: + """Copy indexsidebar.html for Sphinx.""" + tmpl_src = HERE / "templates" + tmpl_dst = self.checkout / "Doc" / "tools" / "templates" + dbv_path = tmpl_dst / "_docs_by_version.html" + + shutil.copy(tmpl_src / "indexsidebar.html", tmpl_dst / "indexsidebar.html") + if self.version.status != "EOL": + dbv_path.write_bytes(self.docs_by_version_content) + else: + shutil.copy(tmpl_src / "_docs_by_version.html", dbv_path) + def copy_build_to_webroot(self, http: urllib3.PoolManager) -> None: """Copy a given build to the appropriate webroot with appropriate rights.""" logging.info("Publishing start.") start_time = perf_counter() self.www_root.mkdir(parents=True, exist_ok=True) - if self.language.tag == "en": + if not self.language.is_translation: target = self.www_root / self.version.name else: language_dir = self.www_root / self.language.tag language_dir.mkdir(parents=True, exist_ok=True) - try: - run(["chgrp", "-R", self.group, language_dir]) - except subprocess.CalledProcessError as err: - logging.warning("Can't change group of %s: %s", language_dir, str(err)) + chgrp(language_dir, group=self.group, recursive=True) language_dir.chmod(0o775) target = language_dir / self.version.name @@ -816,84 +750,47 @@ def copy_build_to_webroot(self, http: urllib3.PoolManager) -> None: target.chmod(0o775) except PermissionError as err: logging.warning("Can't change mod of %s: %s", target, str(err)) - try: - run(["chgrp", "-R", self.group, target]) - except subprocess.CalledProcessError as err: - logging.warning("Can't change group of %s: %s", target, str(err)) + chgrp(target, group=self.group, recursive=True) - changed = [] + changed = 0 if self.includes_html: # Copy built HTML files to webroot (default /srv/docs.python.org) - changed = changed_files(self.checkout / "Doc" / "build" / "html", target) + changed += changed_files(self.checkout / "Doc" / "build" / "html", target) logging.info("Copying HTML files to %s", target) - run( - [ - "chown", - "-R", - ":" + self.group, - self.checkout / "Doc" / "build" / "html/", - ] + chgrp( + self.checkout / "Doc" / "build" / "html/", + group=self.group, + recursive=True, ) - run(["chmod", "-R", "o+r", self.checkout / "Doc" / "build" / "html"]) - run( - [ - "find", - self.checkout / "Doc" / "build" / "html", - "-type", - "d", - "-exec", - "chmod", - "o+x", - "{}", - ";", - ] - ) - run( - [ - "rsync", - "-a", - "--delete-delay", - "--filter", - "P archives/", - str(self.checkout / "Doc" / "build" / "html") + "/", - target, - ] - ) - - if not self.quick: + chmod_make_readable(self.checkout / "Doc" / "build" / "html") + run(( + "rsync", + "-a", + "--delete-delay", + "--filter", + "P archives/", + str(self.checkout / "Doc" / "build" / "html") + "/", + target, + )) + + dist_dir = self.checkout / "Doc" / "dist" + if dist_dir.is_dir(): # Copy archive files to /archives/ logging.debug("Copying dist files.") - run( - [ - "chown", - "-R", - ":" + self.group, - self.checkout / "Doc" / "dist", - ] - ) - run( - [ - "chmod", - "-R", - "o+r", - self.checkout / "Doc" / "dist", - ] + chgrp(dist_dir, group=self.group, recursive=True) + chmod_make_readable(dist_dir) + archives_dir = target / "archives" + archives_dir.mkdir(parents=True, exist_ok=True) + archives_dir.chmod( + archives_dir.stat().st_mode | stat.S_IROTH | stat.S_IXOTH ) - run(["mkdir", "-m", "o+rx", "-p", target / "archives"]) - run(["chown", ":" + self.group, target / "archives"]) - run( - [ - "cp", - "-a", - *(self.checkout / "Doc" / "dist").glob("*"), - target / "archives", - ] - ) - changed.append("archives/") - for file in (target / "archives").iterdir(): - changed.append("archives/" + file.name) + chgrp(archives_dir, group=self.group) + changed += 1 + for dist_file in dist_dir.iterdir(): + shutil.copy2(dist_file, archives_dir / dist_file.name) + changed += 1 - logging.info("%s files changed", len(changed)) + logging.info("%s files changed", changed) if changed and not self.skip_cache_invalidation: surrogate_key = f"{self.language.tag}/{self.version.name}" purge_surrogate_key(http, surrogate_key) @@ -901,13 +798,13 @@ def copy_build_to_webroot(self, http: urllib3.PoolManager) -> None: "Publishing done (%s).", format_seconds(perf_counter() - start_time) ) - def should_rebuild(self): + def should_rebuild(self, force: bool) -> str | Literal[False]: state = self.load_state() if not state: logging.info("Should rebuild: no previous state found.") return "no previous state" cpython_sha = self.cpython_repo.run("rev-parse", "HEAD").stdout.strip() - if self.language.tag != "en": + if self.language.is_translation: translation_sha = self.translation_repo.run( "rev-parse", "HEAD" ).stdout.strip() @@ -929,6 +826,9 @@ def should_rebuild(self): cpython_sha, ) return "Doc/ has changed" + if force: + logging.info("Should rebuild: forced.") + return "forced" logging.info("Nothing changed, no rebuild needed.") return False @@ -944,7 +844,9 @@ def load_state(self) -> dict: except (KeyError, FileNotFoundError): return {} - def save_state(self, build_start: dt, build_duration: float, trigger: str): + def save_state( + self, build_start: dt.datetime, build_duration: float, trigger: str + ) -> None: """Save current CPython sha1 and current translation sha1. Using this we can deduce if a rebuild is needed or not. @@ -965,7 +867,7 @@ def save_state(self, build_start: dt, build_duration: float, trigger: str): "triggered_by": trigger, "cpython_sha": self.cpython_repo.run("rev-parse", "HEAD").stdout.strip(), } - if self.language.tag != "en": + if self.language.is_translation: state["translation_sha"] = self.translation_repo.run( "rev-parse", "HEAD" ).stdout.strip() @@ -977,201 +879,239 @@ def save_state(self, build_start: dt, build_duration: float, trigger: str): logging.info("Saved new rebuild state for %s: %s", key, table.as_string()) -def symlink( - www_root: Path, - language: Language, - directory: str, - name: str, - group: str, - skip_cache_invalidation: bool, - http: urllib3.PoolManager, +def chgrp( + path: Path, + /, + group: int | str | None, + *, + recursive: bool = False, + follow_symlinks: bool = True, ) -> None: - """Used by major_symlinks and dev_symlink to maintain symlinks.""" - if language.tag == "en": # English is rooted on /, no /en/ - path = www_root - else: - path = www_root / language.tag - link = path / name - directory_path = path / directory - if not directory_path.exists(): - return # No touching link, dest doc not built yet. + if sys.platform == "win32": + return - if not link.exists() or readlink(link) != directory: - # Link does not exist or points to the wrong target. - if link.exists(): - link.unlink() - link.symlink_to(directory) - run(["chown", "-h", f":{group}", str(link)]) - if not skip_cache_invalidation: - surrogate_key = f"{language.tag}/{name}" - purge_surrogate_key(http, surrogate_key) + from grp import getgrnam + try: + try: + group_id = int(group) + except ValueError: + group_id = getgrnam(group)[2] + except (LookupError, TypeError, ValueError): + return -def major_symlinks( - www_root: Path, - group: str, - versions: Iterable[Version], - languages: Iterable[Language], - skip_cache_invalidation: bool, - http: urllib3.PoolManager, -) -> None: - """Maintains the /2/ and /3/ symlinks for each language. + try: + os.chown(path, -1, group_id, follow_symlinks=follow_symlinks) + if recursive: + for p in path.rglob("*"): + os.chown(p, -1, group_id, follow_symlinks=follow_symlinks) + except OSError as err: + logging.warning("Can't change group of %s: %s", path, str(err)) + + +def chmod_make_readable(path: Path, /, mode: int = stat.S_IROTH) -> None: + if not path.is_dir(): + raise ValueError + + path.chmod(path.stat().st_mode | stat.S_IROTH | stat.S_IXOTH) # o+rx + for p in path.rglob("*"): + if p.is_dir(): + p.chmod(p.stat().st_mode | stat.S_IROTH | stat.S_IXOTH) # o+rx + else: + p.chmod(p.stat().st_mode | stat.S_IROTH) # o+r - Like: - - /3/ → /3.9/ - - /fr/3/ → /fr/3.9/ - - /es/3/ → /es/3.9/ - """ - logging.info("Creating major version symlinks...") - current_stable = Version.current_stable(versions).name - for language in languages: - symlink( - www_root, - language, - current_stable, - "3", - group, - skip_cache_invalidation, - http, - ) - symlink(www_root, language, "2.7", "2", group, skip_cache_invalidation, http) +def format_seconds(seconds: float) -> str: + hours, remainder = divmod(seconds, 3600) + minutes, seconds = divmod(remainder, 60) + hours, minutes, seconds = int(hours), int(minutes), round(seconds) -def dev_symlink( - www_root: Path, - group, - versions, - languages, - skip_cache_invalidation: bool, - http: urllib3.PoolManager, -) -> None: - """Maintains the /dev/ symlinks for each language. + match (hours, minutes, seconds): + case 0, 0, s: + return f"{s}s" + case 0, m, s: + return f"{m}m {s}s" + case h, m, s: + return f"{h}h {m}m {s}s" - Like: - - /dev/ → /3.11/ - - /fr/dev/ → /fr/3.11/ - - /es/dev/ → /es/3.11/ - """ - logging.info("Creating development version symlinks...") - current_dev = Version.current_dev(versions).name - for language in languages: - symlink( - www_root, - language, - current_dev, - "dev", - group, - skip_cache_invalidation, - http, - ) + raise ValueError("unreachable") -def purge(http: urllib3.PoolManager, *paths: Path | str) -> None: - """Remove one or many paths from docs.python.org's CDN. +def _checkout_name(select_output: str | None) -> str: + if select_output is not None: + return f"cpython-{select_output}" + return "cpython" - To be used when a file changes, so the CDN fetches the new one. - """ - base = "https://docs.python.org/" - for path in paths: - url = urljoin(base, str(path)) - logging.debug("Purging %s from CDN", url) - http.request("PURGE", url, timeout=30) +def main() -> int: + """Script entry point.""" + args = parse_args() + setup_logging(args.log_directory, args.select_output) + load_environment_variables() -def purge_surrogate_key(http: urllib3.PoolManager, surrogate_key: str) -> None: - """Remove paths from docs.python.org's CDN. + if args.select_output is None: + return build_docs_with_lock(args, "build_docs.lock") + if args.select_output == "no-html": + return build_docs_with_lock(args, "build_docs_archives.lock") + if args.select_output == "only-html": + return build_docs_with_lock(args, "build_docs_html.lock") + if args.select_output == "only-html-en": + return build_docs_with_lock(args, "build_docs_html_en.lock") + return EX_FAILURE - All paths matching the given 'Surrogate-Key' will be removed. - This is set by the Nginx server for every language-version pair. - To be used when a directory changes, so the CDN fetches the new one. - https://www.fastly.com/documentation/reference/api/purging/#purge-tag - """ - service_id = getenv("FASTLY_SERVICE_ID", "__UNSET__") - fastly_key = getenv("FASTLY_TOKEN", "__UNSET__") +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" - logging.info("Purging Surrogate-Key '%s' from CDN", surrogate_key) - http.request( - "POST", - f"https://api.fastly.com/service/{service_id}/purge/{surrogate_key}", - headers={"Fastly-Key": fastly_key}, - timeout=30, + parser = argparse.ArgumentParser( + description="Runs a build of the Python docs for various branches.", + allow_abbrev=False, + ) + parser.add_argument( + "--select-output", + choices=("no-html", "only-html", "only-html-en"), + help="Choose what outputs to build.", + ) + parser.add_argument( + "-q", + "--quick", + action="store_true", + help="Run a quick build (only HTML files).", + ) + parser.add_argument( + "-b", + "--branches", + nargs="*", + metavar="3.12", + help="Versions to build (defaults to all maintained branches).", + ) + parser.add_argument( + "-r", + "--build-root", + type=Path, + help="Path to a directory containing a checkout per branch.", + default=Path("/srv/docsbuild"), + ) + parser.add_argument( + "-w", + "--www-root", + type=Path, + help="Path where generated files will be copied.", + default=Path("/srv/docs.python.org"), + ) + parser.add_argument( + "--force", + action="store_true", + help="Always build the chosen languages and versions, " + "regardless of existing state.", + ) + parser.add_argument( + "--skip-cache-invalidation", + help="Skip Fastly cache invalidation.", + action="store_true", + ) + parser.add_argument( + "--group", + help="Group files on targets and www-root file should get.", + default="docs", + ) + parser.add_argument( + "--log-directory", + type=Path, + help="Directory used to store logs.", + default=Path("/var/log/docsbuild/"), + ) + parser.add_argument( + "--languages", + nargs="*", + help="Language translation, as a PEP 545 language tag like" + " 'fr' or 'pt-br'. " + "Builds all available languages by default.", + metavar="fr", + ) + parser.add_argument( + "--version", + action="store_true", + help="Get build_docs and dependencies version info", ) + parser.add_argument( + "--theme", + default="python-docs-theme", + help="Python package to use for python-docs-theme: Useful to test branches:" + " --theme git+https://github.com/obulat/python-docs-theme@master", + ) + args = parser.parse_args() + if args.version: + version_info() + sys.exit(0) + del args.version + if args.log_directory: + args.log_directory = args.log_directory.resolve() + if args.build_root: + args.build_root = args.build_root.resolve() + if args.www_root: + args.www_root = args.www_root.resolve() + return args -def proofread_canonicals( - www_root: Path, skip_cache_invalidation: bool, http: urllib3.PoolManager -) -> None: - """In www_root we check that all canonical links point to existing contents. +def setup_logging(log_directory: Path, select_output: str | None) -> None: + """Setup logging to stderr if run by a human, or to a file if run from a cron.""" + log_format = "%(asctime)s %(levelname)s: %(message)s" + if sys.stderr.isatty() or "CI" in os.environ: + logging.basicConfig(format=log_format, stream=sys.stderr) + else: + log_directory.mkdir(parents=True, exist_ok=True) + if select_output is None: + filename = log_directory / "docsbuild.log" + else: + filename = log_directory / f"docsbuild-{select_output}.log" + handler = logging.handlers.WatchedFileHandler(filename) + handler.setFormatter(logging.Formatter(log_format)) + logging.getLogger().addHandler(handler) + logging.getLogger().setLevel(logging.DEBUG) - It can happen that a canonical is "broken": - - /3.11/whatsnew/3.11.html typically would link to - /3/whatsnew/3.11.html, which may not exist yet. - """ - logging.info("Checking canonical links...") - canonical_re = re.compile( - """""" - ) - for file in www_root.glob("**/*.html"): - html = file.read_text(encoding="UTF-8", errors="surrogateescape") - canonical = canonical_re.search(html) - if not canonical: - continue - target = canonical.group(1) - if not (www_root / target).exists(): - logging.info("Removing broken canonical from %s to %s", file, target) - html = html.replace(canonical.group(0), "") - file.write_text(html, encoding="UTF-8", errors="surrogateescape") - if not skip_cache_invalidation: - purge(http, str(file).replace("/srv/docs.python.org/", "")) - - -def parse_versions_from_devguide(http: urllib3.PoolManager) -> list[Version]: - releases = http.request( - "GET", - "https://raw.githubusercontent.com/" - "python/devguide/main/include/release-cycle.json", - timeout=30, - ).json() - versions = [Version.from_json(name, release) for name, release in releases.items()] - versions.sort(key=Version.as_tuple) - return versions +def load_environment_variables() -> None: + dbs_user_config = platformdirs.user_config_path("docsbuild-scripts") + dbs_site_config = platformdirs.site_config_path("docsbuild-scripts") + if dbs_user_config.is_file(): + env_conf_file = dbs_user_config + elif dbs_site_config.is_file(): + env_conf_file = dbs_site_config + else: + logging.info( + "No environment variables configured. Configure in %s or %s.", + dbs_site_config, + dbs_user_config, + ) + return + logging.info("Reading environment variables from %s.", env_conf_file) + if env_conf_file == dbs_site_config: + logging.info("You can override settings in %s.", dbs_user_config) + elif dbs_site_config.is_file(): + logging.info("Overriding %s.", dbs_site_config) -def parse_languages_from_config() -> list[Language]: - """Read config.toml to discover languages to build.""" - config = tomlkit.parse((HERE / "config.toml").read_text(encoding="UTF-8")) - languages = [] - defaults = config["defaults"] - for iso639_tag, section in config["languages"].items(): - languages.append( - Language( - iso639_tag, - section["name"], - section.get("in_prod", defaults["in_prod"]), - sphinxopts=section.get("sphinxopts", defaults["sphinxopts"]), - html_only=section.get("html_only", defaults["html_only"]), - ) - ) - return languages + env_config = env_conf_file.read_text(encoding="utf-8") + for key, value in tomlkit.parse(env_config).get("env", {}).items(): + logging.debug("Setting %s in environment.", key) + os.environ[key] = value -def format_seconds(seconds: float) -> str: - hours, remainder = divmod(seconds, 3600) - minutes, seconds = divmod(remainder, 60) - hours, minutes, seconds = int(hours), int(minutes), round(seconds) +def build_docs_with_lock(args: argparse.Namespace, lockfile_name: str) -> int: + try: + lock = zc.lockfile.LockFile(HERE / lockfile_name) + except zc.lockfile.LockError: + logging.info("Another builder is running... dying...") + return EX_FAILURE - match (hours, minutes, seconds): - case 0, 0, s: - return f"{s}s" - case 0, m, s: - return f"{m}m {s}s" - case h, m, s: - return f"{h}h {m}m {s}s" + try: + return build_docs(args) + finally: + lock.close() -def build_docs(args) -> bool: +def build_docs(args: argparse.Namespace) -> int: """Build all docs (each language and each version).""" logging.info("Full build start.") start_time = perf_counter() @@ -1183,12 +1123,19 @@ def build_docs(args) -> bool: # This runs languages in config.toml order and versions newest first. todo = [ (version, language) - for version in Version.filter(versions, args.branch) - for language in reversed(Language.filter(languages, args.languages)) + for version in versions.filter(args.branches) + for language in reversed(languages.filter(args.languages)) ] - del args.branch + del args.branches del args.languages - all_built_successfully = True + force_build = args.force + del args.force + + docs_by_version_content = render_docs_by_version(versions).encode() + switchers_content = render_switchers(versions, languages) + + build_succeeded = set() + any_build_failed = False cpython_repo = Repository( "https://github.com/python/cpython.git", args.build_root / _checkout_name(args.select_output), @@ -1204,11 +1151,21 @@ def build_docs(args) -> bool: scope = sentry_sdk.get_isolation_scope() scope.set_tag("version", version.name) scope.set_tag("language", language.tag) - cpython_repo.update() + cpython_repo.update() builder = DocBuilder( - version, versions, language, languages, cpython_repo, **vars(args) + version, + language, + cpython_repo, + docs_by_version_content, + switchers_content, + **vars(args), ) - all_built_successfully &= builder.run(http) + built_successfully = builder.run(http, force_build=force_build) + if built_successfully: + build_succeeded.add((version.name, language.tag)) + elif built_successfully is not None: + any_build_failed = True + logging.root.handlers[0].setFormatter( logging.Formatter("%(asctime)s %(levelname)s: %(message)s") ) @@ -1221,62 +1178,263 @@ def build_docs(args) -> bool: args.skip_cache_invalidation, http, ) - major_symlinks( - args.www_root, - args.group, - versions, - languages, - args.skip_cache_invalidation, - http, - ) - dev_symlink( + make_symlinks( args.www_root, args.group, versions, languages, + build_succeeded, args.skip_cache_invalidation, http, ) - proofread_canonicals(args.www_root, args.skip_cache_invalidation, http) + if build_succeeded: + # Only check canonicals if at least one version was built. + proofread_canonicals(args.www_root, args.skip_cache_invalidation, http) logging.info("Full build done (%s).", format_seconds(perf_counter() - start_time)) - return all_built_successfully + return EX_FAILURE if any_build_failed else EX_OK -def _checkout_name(select_output: str | None) -> str: - if select_output is not None: - return f"cpython-{select_output}" - return "cpython" +def parse_versions_from_devguide(http: urllib3.PoolManager) -> Versions: + releases = http.request( + "GET", + "https://raw.githubusercontent.com/" + "python/devguide/main/include/release-cycle.json", + timeout=30, + ).json() + return Versions.from_json(releases) -def main(): - """Script entry point.""" - args = parse_args() - setup_logging(args.log_directory, args.select_output) +def parse_languages_from_config() -> Languages: + """Read config.toml to discover languages to build.""" + config = tomlkit.parse((HERE / "config.toml").read_text(encoding="UTF-8")) + return Languages.from_json(config["defaults"], config["languages"]) - if args.select_output is None: - build_docs_with_lock(args, "build_docs.lock") - elif args.select_output == "no-html": - build_docs_with_lock(args, "build_docs_archives.lock") - elif args.select_output == "only-html": - build_docs_with_lock(args, "build_docs_html.lock") - elif args.select_output == "only-html-en": - build_docs_with_lock(args, "build_docs_html_en.lock") +def render_docs_by_version(versions: Versions) -> str: + """Generate content for _docs_by_version.html.""" + links = [f'
{% trans %}Download these documents{% endtrans %}
-{% endraw %} -{% if current_version.status != "EOL" %} -{% raw %}