diff --git a/.coveragerc b/.coveragerc new file mode 100644 index 0000000..f970781 --- /dev/null +++ b/.coveragerc @@ -0,0 +1,8 @@ +# .coveragerc to control coverage.py + +[report] +# Regexes for lines to exclude from consideration +exclude_also = + # Don't complain if non-runnable code isn't run: + if __name__ == .__main__.: + if TYPE_CHECKING: diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..cd9d07e --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,23 @@ +name: Lint + +on: [push, pull_request, workflow_dispatch] + +permissions: {} + +env: + FORCE_COLOR: 1 + PIP_DISABLE_PIP_VERSION_CHECK: 1 + +jobs: + lint: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: actions/setup-python@v5 + with: + python-version: "3.x" + cache: pip + - uses: pre-commit/action@v3.0.1 diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 0000000..b64e8a3 --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,80 @@ +name: Test + +on: + push: + pull_request: + workflow_dispatch: + +permissions: {} + +env: + FORCE_COLOR: 1 + +jobs: + integration: + name: Integration test + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + - uses: actions/setup-python@v5 + with: + python-version: "3.13" + + - name: Set up requirements + run: | + python -m pip install --upgrade pip + python -m pip install -r requirements.txt + + - name: Build documentation + run: > + python ./build_docs.py + --quick + --build-root ./build_root + --www-root ./www + --log-directory ./logs + --group "$(id -g)" + --skip-cache-invalidation + --languages en + --branches 3.14 + + - name: Upload documentation + uses: actions/upload-artifact@v4 + with: + name: www-root + path: ./www + retention-days: 2 + + unit: + name: Unit tests + runs-on: ${{ matrix.os }} + strategy: + fail-fast: false + matrix: + python-version: ["3.13", "3.14"] + os: [windows-latest, macos-latest, ubuntu-latest] + + steps: + - uses: actions/checkout@v4 + with: + persist-credentials: false + + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + allow-prereleases: true + + - name: Install uv + uses: hynek/setup-cached-uv@v2 + + - name: Tox tests + run: uvx --with tox-uv tox -e py + + - name: Upload coverage + uses: codecov/codecov-action@v5 + with: + flags: ${{ matrix.os }} + name: Python ${{ matrix.python-version }} + token: ${{ secrets.CODECOV_ORG_TOKEN }} diff --git a/.gitignore b/.gitignore index 0a29939..c257236 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,10 @@ /build_root/ /logs/ /www/ +# temporary lock file created while building the docs +build_docs.lock +build_docs_archives.lock +build_docs_html.lock # Created by https://www.gitignore.io/api/python diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..869a979 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,65 @@ +repos: + - repo: https://github.com/pre-commit/pre-commit-hooks + rev: v5.0.0 + hooks: + - id: check-added-large-files + - id: check-case-conflict + - id: check-merge-conflict + - id: check-toml + - id: check-yaml + - id: debug-statements + - id: end-of-file-fixer + - id: forbid-submodules + - id: requirements-txt-fixer + - id: trailing-whitespace + + - repo: https://github.com/astral-sh/ruff-pre-commit + rev: v0.11.5 + hooks: + - id: ruff + args: [--fix] + - id: ruff-format + + - repo: https://github.com/python-jsonschema/check-jsonschema + rev: 0.31.1 + hooks: + - id: check-github-workflows + + - repo: https://github.com/rhysd/actionlint + rev: v1.7.7 + hooks: + - id: actionlint + + - repo: https://github.com/woodruffw/zizmor-pre-commit + rev: v1.3.1 + hooks: + - id: zizmor + + - repo: https://github.com/tox-dev/pyproject-fmt + rev: v2.5.0 + hooks: + - id: pyproject-fmt + + - repo: https://github.com/abravalheri/validate-pyproject + rev: v0.23 + hooks: + - id: validate-pyproject + + - repo: https://github.com/tox-dev/tox-ini-fmt + rev: 1.5.0 + hooks: + - id: tox-ini-fmt + + - repo: https://github.com/rbubley/mirrors-prettier + rev: v3.5.1 + hooks: + - id: prettier + files: templates/switchers.js + + - repo: meta + hooks: + - id: check-hooks-apply + - id: check-useless-excludes + +ci: + autoupdate_schedule: quarterly diff --git a/.ruff.toml b/.ruff.toml new file mode 100644 index 0000000..47cbf74 --- /dev/null +++ b/.ruff.toml @@ -0,0 +1,34 @@ +target-version = "py313" # Pin Ruff to Python 3.13 +line-length = 88 +output-format = "full" + +[format] +preview = true +docstring-code-format = true + +[lint] +preview = true +select = [ + "C4", # flake8-comprehensions + "B", # flake8-bugbear + "E", # pycodestyle + "F", # pyflakes + "FA", # flake8-future-annotations + "FLY", # flynt + "G", # flake8-logging-format + "I", # isort + "N", # pep8-naming + "PERF", # perflint + "PGH", # pygrep-hooks + "PT", # flake8-pytest-style + "TC", # flake8-type-checking + "UP", # pyupgrade + "W", # pycodestyle +] +ignore = [ + "E501", # Ignore line length errors (we use auto-formatting) +] + +[lint.flake8-type-checking] +exempt-modules = [] +strict = true diff --git a/README.md b/README.md index 3e70fa4..8ba7070 100644 --- a/README.md +++ b/README.md @@ -1,8 +1,13 @@ +# docsbuild-scripts + +[](https://github.com/python/docsbuild-scripts/actions/workflows/test.yml) +[](https://codecov.io/gh/python/docsbuild-scripts) + This repository contains scripts for automatically building the Python documentation on [docs.python.org](https://docs.python.org). -# How to test it? +## How to test it? The following command should build all maintained versions and translations in `./www`, beware it can take a few hours: @@ -12,45 +17,81 @@ python3 ./build_docs.py --quick --build-root ./build_root --www-root ./www --log ``` If you don't need to build all translations of all branches, add -`--language en --branch main`. +`--languages en --branches main`. + + +## Sphinx versions + +Sphinx configuration in various branches: -# Check current version +| version | requirements.txt | conf.py | +|-----------|--------------------|----------------------| +| 2.6 | ø | ø | +| 2.7 | ø | ø | +| 3.0 | ø | ø | +| 3.1 | ø | ø | +| 3.2 | ø | ø | +| 3.3 | ø | ø | +| 3.4 | ø | needs_sphinx='1.2' | +| 3.5 | ø | ø | +| 3.6 | ø | ø | +| 3.7 | ø | ø | +| 3.8 | ø | ø | +| 3.9 | sphinx==2.4.4 | needs_sphinx='1.8' | +| 3.10 | sphinx==3.4.3 | needs_sphinx='3.2' | +| 3.11 | sphinx~=7.2.0 | needs_sphinx='4.2' | +| 3.12 | sphinx~=8.2.0 | needs_sphinx='8.2.0' | +| 3.13 | sphinx~=8.2.0 | needs_sphinx='8.2.0' | +| 3.14 | sphinx~=8.2.0 | needs_sphinx='8.2.0' | +| 3.15 | sphinx~=8.2.0 | needs_sphinx='8.2.0' | + +Sphinx build as seen on docs.python.org: + +| version | el | en | es | fr | bn-in | id | it | ja | ko | pl | pt-br | ro | tr | uk | zh-cn | zh-tw | +|-----------|-------|-------|-------|-------|---------|-------|-------|-------|-------|-------|---------|-------|-------|-------|---------|---------| +| 2.6 | ø | 0.6.5 | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | +| 2.7 | ø | 2.3.1 | ø | 2.3.1 | ø | 2.3.1 | ø | 2.3.1 | 2.3.1 | ø | 2.3.1 | ø | ø | ø | 2.3.1 | 2.3.1 | +| 3.0 | ø | 0.6 | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | +| 3.1 | ø | 0.6.5 | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | +| 3.2 | ø | 1.0.7 | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | +| 3.3 | ø | 1.2 | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | +| 3.4 | ø | 1.2.3 | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | ø | +| 3.5 | ø | 1.8.4 | 1.8.4 | 1.8.4 | ø | 1.8.4 | ø | 1.8.4 | 1.8.4 | 1.8.4 | 1.8.4 | ø | ø | ø | 1.8.4 | 1.8.4 | +| 3.6 | ø | 2.3.1 | 2.3.1 | 2.3.1 | ø | 2.3.1 | ø | 2.3.1 | 2.3.1 | 2.3.1 | 2.3.1 | ø | ø | ø | 2.3.1 | 2.3.1 | +| 3.7 | ø | 2.3.1 | 2.3.1 | 2.3.1 | ø | 2.3.1 | 2.3.1 | 2.3.1 | 2.3.1 | 2.3.1 | 2.3.1 | ø | 2.3.1 | 2.3.1 | 2.3.1 | 2.3.1 | +| 3.8 | ø | 2.4.4 | 2.4.4 | 2.4.4 | ø | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | ø | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | +| 3.9 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | 2.4.4 | +| 3.10 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | 3.4.3 | +| 3.11 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | 7.2.6 | +| 3.12 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | +| 3.13 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | +| 3.14 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | +| 3.15 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | 8.2.3 | + Install `tools_requirements.txt` then run `python check_versions.py -../cpython/` (pointing to a real CPython clone) to see which version -of Sphinx we're using where: - - Sphinx configuration in various branches: - - ========= ============= ============= ================== ================== - version travis azure requirements.txt conf.py - ========= ============= ============= ================== ================== - 2.7 sphinx~=2.0.1 ø ø needs_sphinx='1.2' - 3.5 sphinx==1.8.2 ø ø needs_sphinx='1.8' - 3.6 sphinx==1.8.2 sphinx==1.8.2 ø needs_sphinx='1.2' - 3.7 ø ø ø ø - 3.8 ø ø sphinx==2.4.4 needs_sphinx='1.8' - 3.9 ø ø sphinx==2.4.4 needs_sphinx='1.8' - 3.1 ø ø sphinx==3.4.3 needs_sphinx='3.2' - 3.11 ø ø sphinx==4.5.0 needs_sphinx='4.2' - 3.12 ø ø sphinx==4.5.0 needs_sphinx='4.2' - 3.13 ø ø sphinx==6.2.1 needs_sphinx='4.2' - ========= ============= ============= ================== ================== - - Sphinx build as seen on docs.python.org: - - ========= ===== ===== ===== ===== ===== ===== ===== ===== ======= ===== ===== ======= ======= - version en es fr id it ja ko pl pt-br tr uk zh-cn zh-tw - ========= ===== ===== ===== ===== ===== ===== ===== ===== ======= ===== ===== ======= ======= - 2.7 ø 2.3.1 ø 2.3.1 2.3.1 ø 2.3.1 2.3.1 ø 2.3.1 2.3.1 ø 2.3.1 - 3.5 ø 1.8.4 1.8.4 1.8.4 1.8.4 ø 1.8.4 1.8.4 ø 1.8.4 1.8.4 1.8.4 1.8.4 - 3.6 ø 2.3.1 2.3.1 2.3.1 2.3.1 ø 2.3.1 2.3.1 ø 2.3.1 2.3.1 2.3.1 2.3.1 - 3.7 2.3.1 2.3.1 2.3.1 2.3.1 2.3.1 2.3.1 2.3.1 2.3.1 2.3.1 2.3.1 2.3.1 2.3.1 2.3.1 - 3.8 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 - 3.9 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 2.4.4 - 3.10 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 3.4.3 - 3.11 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 - 3.12 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 4.5.0 - 3.13 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 6.2.1 - ========= ===== ===== ===== ===== ===== ===== ===== ===== ======= ===== ===== ======= ======= +../cpython/` (pointing to a real CPython clone) to see which versions +of Sphinx we're using. + +Or run `tox -e cog` (with a clone at `../cpython`) to directly update these tables. + +## Manually rebuild a branch + +Docs for [feature and bugfix branches](https://devguide.python.org/versions/) are +automatically built from a cron. + +Manual rebuilds are needed for new security releases, +and to add the end-of-life banner for newly end-of-life branches. + +To manually rebuild a branch, for example 3.11: + +```shell +ssh docs.nyc1.psf.io +sudo su --shell=/bin/bash docsbuild +screen -DUR # Rejoin screen session if it exists, otherwise create a new one +/srv/docsbuild/venv/bin/python /srv/docsbuild/scripts/build_docs.py --force --branches 3.11 +``` diff --git a/build_docs.py b/build_docs.py index 212989e..973c9a0 100755 --- a/build_docs.py +++ b/build_docs.py @@ -5,55 +5,80 @@ Without any arguments builds docs for all active versions and languages. +Environment variables for: + +- `SENTRY_DSN` (Error reporting) +- `FASTLY_SERVICE_ID` / `FASTLY_TOKEN` (CDN purges) +- `PYTHON_DOCS_ENABLE_ANALYTICS` (Enable Plausible for online docs) + +are read from the site configuration path for your platform +(/etc/xdg/docsbuild-scripts on linux) if available, +and can be overriden by writing a file to the user config dir +for your platform ($HOME/.config/docsbuild-scripts on linux). +The contents of the file is parsed as toml: + +```toml +[env] +SENTRY_DSN = "https://0a0a0a0a0a0a0a0a0a0a0a@sentry.io/69420" +FASTLY_SERVICE_ID = "deadbeefdeadbeefdead" +FASTLY_TOKEN = "secureme!" +PYTHON_DOCS_ENABLE_ANALYTICS = "1" +``` + Languages are stored in `config.toml` while versions are discovered -from the devguide. +from peps.python.org (generated by `python-releases.toml`). -q selects "quick build", which means to build only HTML. -Translations are fetched from github repositories according to PEP -545. `--languages` allows to select translations, like `--languages -en` to just build the english documents. +Translations are fetched from GitHub repositories according to PEP +545. `--languages` allows selecting translations, like `--languages +en` to just build the English documents. -This script was originally created and by Georg Brandl in March -2010. +This script was originally created by Georg Brandl in March 2010. Modified by Benjamin Peterson to do CDN cache invalidation. Modified by Julien Palard to build translations. """ -from argparse import ArgumentParser -from contextlib import suppress, contextmanager -from dataclasses import dataclass +from __future__ import annotations + +import argparse +import concurrent.futures +import dataclasses +import datetime as dt import filecmp import json import logging import logging.handlers -from functools import total_ordering -from os import readlink -import platform +import os import re import shlex import shutil +import stat import subprocess import sys +import venv from bisect import bisect_left as bisect -from collections import OrderedDict -from datetime import datetime as dt, timezone +from contextlib import contextmanager, suppress from pathlib import Path from string import Template -from textwrap import indent from time import perf_counter, sleep -from typing import Iterable from urllib.parse import urljoin -import zc.lockfile import jinja2 -import requests +import platformdirs import tomlkit +import urllib3 +import zc.lockfile +TYPE_CHECKING = False +if TYPE_CHECKING: + from collections.abc import Collection, Iterator, Sequence, Set + from typing import Literal try: - from os import EX_OK, EX_SOFTWARE as EX_FAILURE + from os import EX_OK + from os import EX_SOFTWARE as EX_FAILURE except ImportError: EX_OK, EX_FAILURE = 0, 1 @@ -67,9 +92,67 @@ HERE = Path(__file__).resolve().parent -@total_ordering +@dataclasses.dataclass(frozen=True, slots=True) +class Versions: + _seq: Sequence[Version] + + def __iter__(self) -> Iterator[Version]: + return iter(self._seq) + + def __reversed__(self) -> Iterator[Version]: + return reversed(self._seq) + + @classmethod + def from_json(cls, data: dict) -> Versions: + """Load versions from the devguide's JSON representation.""" + permitted = ", ".join(sorted(Version.STATUSES | Version.SYNONYMS.keys())) + + versions = [] + for name, release in data.items(): + branch = release["branch"] + status = release["status"] + status = Version.SYNONYMS.get(status, status) + if status not in Version.STATUSES: + msg = ( + f"Saw invalid version status {status!r}, " + f"expected to be one of {permitted}." + ) + raise ValueError(msg) + versions.append(Version(name=name, status=status, branch_or_tag=branch)) + + return cls(sorted(versions, key=Version.as_tuple)) + + def filter(self, branches: Sequence[str] = ()) -> Sequence[Version]: + """Filter the given versions. + + If *branches* is given, only *versions* matching *branches* are returned. + + Else all live versions are returned (this means no EOL and no + security-fixes branches). + """ + if branches: + branches = frozenset(branches) + return [v for v in self if {v.name, v.branch_or_tag} & branches] + return [v for v in self if v.status not in {"EOL", "security-fixes"}] + + @property + def current_stable(self) -> Version: + """Find the current stable CPython version.""" + return max((v for v in self if v.status == "stable"), key=Version.as_tuple) + + @property + def current_dev(self) -> Version: + """Find the current CPython version in development.""" + return max(self, key=Version.as_tuple) + + +@dataclasses.dataclass(frozen=True, kw_only=True, slots=True) class Version: - """Represents a cpython version and its documentation builds dependencies.""" + """Represents a CPython version and its documentation build dependencies.""" + + name: str + status: Literal["EOL", "security-fixes", "stable", "pre-release", "in development"] + branch_or_tag: str STATUSES = {"EOL", "security-fixes", "stable", "pre-release", "in development"} @@ -83,22 +166,11 @@ class Version: "prerelease": "pre-release", } - def __init__(self, name, *, status, branch_or_tag=None): - status = self.SYNONYMS.get(status, status) - if status not in self.STATUSES: - raise ValueError( - "Version status expected to be one of: " - f"{', '.join(self.STATUSES|set(self.SYNONYMS.keys()))}, got {status!r}." - ) - self.name = name - self.branch_or_tag = branch_or_tag - self.status = status - - def __repr__(self): - return f"Version({self.name})" + def __eq__(self, other: Version) -> bool: + return self.name == other.name @property - def requirements(self): + def requirements(self) -> list[str]: """Generate the right requirements for this version. Since CPython 3.8 a Doc/requirements.txt file can be used. @@ -110,62 +182,60 @@ def requirements(self): See https://github.com/python/cpython/issues/91483 """ - if self.name == "3.5": - return ["jieba", "blurb", "sphinx==1.8.4", "jinja2<3.1", "docutils<=0.17.1"] - if self.name in ("3.7", "3.6", "2.7"): - return ["jieba", "blurb", "sphinx==2.3.1", "jinja2<3.1", "docutils<=0.17.1"] - if self.name == ("3.8", "3.9"): - return ["jieba", "blurb", "sphinx==2.4.4", "jinja2<3.1", "docutils<=0.17.1"] - - return [ - "jieba", # To improve zh search. + dependencies = [ "-rrequirements.txt", + "jieba", # To improve zh search. + "PyStemmer~=2.2.0", # To improve performance for word stemming. ] + if self.as_tuple() >= (3, 11): + return dependencies + if self.as_tuple() >= (3, 8): + # Restore the imghdr module for Python 3.8-3.10. + return dependencies + ["standard-imghdr"] + + # Requirements/constraints for Python 3.7 and older, pre-requirements.txt + reqs = [ + "alabaster<0.7.12", + "blurb<1.2", + "docutils<=0.17.1", + "jieba", + "jinja2<3.1", + "python-docs-theme<=2023.3.1", + "sphinxcontrib-applehelp<=1.0.2", + "sphinxcontrib-devhelp<=1.0.2", + "sphinxcontrib-htmlhelp<=2.0", + "sphinxcontrib-jsmath<=1.0.1", + "sphinxcontrib-qthelp<=1.0.3", + "sphinxcontrib-serializinghtml<=1.1.5", + "standard-imghdr", + ] + if self.name in {"3.7", "3.6", "2.7"}: + return reqs + ["sphinx==2.3.1"] + if self.name == "3.5": + return reqs + ["sphinx==1.8.4", "standard-pipes"] + raise ValueError("unreachable") @property - def changefreq(self): + def changefreq(self) -> str: """Estimate this version change frequency, for the sitemap.""" return {"EOL": "never", "security-fixes": "yearly"}.get(self.status, "daily") - def as_tuple(self): + def as_tuple(self) -> tuple[int, ...]: """This version name as tuple, for easy comparisons.""" return version_to_tuple(self.name) @property - def url(self): + def url(self) -> str: """The doc URL of this version in production.""" return f"https://docs.python.org/{self.name}/" @property - def title(self): + def title(self) -> str: """The title of this version's doc, for the sidebar.""" return f"Python {self.name} ({self.status})" - @staticmethod - def filter(versions, branch=None): - """Filter the given versions. - - If *branch* is given, only *versions* matching *branch* are returned. - - Else all live version are returned (this mean no EOL and no - security-fixes branches). - """ - if branch: - return [v for v in versions if branch in (v.name, v.branch_or_tag)] - return [v for v in versions if v.status not in ("EOL", "security-fixes")] - - @staticmethod - def current_stable(versions): - """Find the current stable cPython version.""" - return max((v for v in versions if v.status == "stable"), key=Version.as_tuple) - - @staticmethod - def current_dev(versions): - """Find the current de cPython version.""" - return max(versions, key=Version.as_tuple) - @property - def picker_label(self): + def picker_label(self) -> str: """Forge the label of a version picker.""" if self.status == "in development": return f"dev ({self.name})" @@ -173,60 +243,140 @@ def picker_label(self): return f"pre ({self.name})" return self.name - def setup_indexsidebar(self, versions, dest_path): - """Build indexsidebar.html for Sphinx.""" - with open( - HERE / "templates" / "indexsidebar.html", encoding="UTF-8" - ) as sidebar_template_file: - sidebar_template = jinja2.Template(sidebar_template_file.read()) - with open(dest_path, "w", encoding="UTF-8") as sidebar_file: - sidebar_file.write( - sidebar_template.render( - current_version=self, - versions=sorted( - versions, key=lambda v: version_to_tuple(v.name), reverse=True - ), - ) - ) - @classmethod - def from_json(cls, name, values): - """Loads a version from devguide's json representation.""" - return cls(name, status=values["status"], branch_or_tag=values["branch"]) +@dataclasses.dataclass(frozen=True, slots=True) +class Languages: + _seq: Sequence[Language] - def __eq__(self, other): - return self.name == other.name + def __iter__(self) -> Iterator[Language]: + return iter(self._seq) - def __gt__(self, other): - return self.as_tuple() > other.as_tuple() + def __reversed__(self) -> Iterator[Language]: + return reversed(self._seq) + + @classmethod + def from_json(cls, defaults: dict, languages: dict) -> Languages: + default_translated_name = defaults.get("translated_name", "") + default_in_prod = defaults.get("in_prod", True) + default_sphinxopts = defaults.get("sphinxopts", []) + default_html_only = defaults.get("html_only", False) + langs = [ + Language( + iso639_tag=iso639_tag, + name=section["name"], + translated_name=section.get("translated_name", default_translated_name), + in_prod=section.get("in_prod", default_in_prod), + sphinxopts=section.get("sphinxopts", default_sphinxopts), + html_only=section.get("html_only", default_html_only), + ) + for iso639_tag, section in languages.items() + ] + return cls(langs) + + def filter(self, language_tags: Sequence[str] = ()) -> Sequence[Language]: + """Filter a sequence of languages according to --languages.""" + if language_tags: + language_tags = frozenset(language_tags) + return [l for l in self if l.tag in language_tags] # NoQA: E741 + return list(self) -@dataclass(frozen=True, order=True) +@dataclasses.dataclass(order=True, frozen=True, kw_only=True) class Language: iso639_tag: str name: str + translated_name: str in_prod: bool - sphinxopts: tuple + sphinxopts: Sequence[str] html_only: bool = False @property - def tag(self): + def tag(self) -> str: return self.iso639_tag.replace("_", "-").lower() - @staticmethod - def filter(languages, language_tags=None): - """Filter a sequence of languages according to --languages.""" - if language_tags: - languages_dict = {language.tag: language for language in languages} - return [languages_dict[tag] for tag in language_tags] - return languages + @property + def switcher_label(self) -> str: + if self.translated_name: + return f"{self.name} | {self.translated_name}" + return self.name + + +@dataclasses.dataclass(frozen=True, kw_only=True, slots=True) +class BuildMetadata: + _version: Version + _language: Language + + @property + def sphinxopts(self) -> Sequence[str]: + return self._language.sphinxopts + + @property + def iso639_tag(self) -> str: + return self._language.iso639_tag + + @property + def html_only(self) -> bool: + return self._language.html_only or not self._language.in_prod + + @property + def url(self): + """The URL of this version in production.""" + if self.is_translation: + return f"https://docs.python.org/{self.version}/{self.language}/" + return f"https://docs.python.org/{self.version}/" + @property + def branch_or_tag(self) -> str: + return self._version.branch_or_tag + + @property + def status(self) -> str: + return self._version.status + + @property + def is_eol(self) -> bool: + return self._version.status == "EOL" + + @property + def dependencies(self) -> list[str]: + return self._version.requirements -def run(cmd, cwd=None) -> subprocess.CompletedProcess: + @property + def version(self): + return self._version.name + + @property + def version_tuple(self): + return self._version.as_tuple() + + @property + def language(self): + return self._language.tag + + @property + def is_translation(self): + return self.language != "en" + + @property + def slug(self) -> str: + return f"{self.language}/{self.version}" + + @property + def venv_name(self) -> str: + return f"venv-{self.version}" + + @property + def locale_repo_url(self) -> str: + return f"https://github.com/python/python-docs-{self.language}.git" + + +def run( + cmd: Sequence[str | Path], cwd: Path | None = None +) -> subprocess.CompletedProcess: """Like subprocess.run, with logging before and after the command execution.""" - cmd = [str(arg) for arg in cmd] + cmd = list(map(str, cmd)) cmdstring = shlex.join(cmd) - logging.debug("Run: %r", cmdstring) + logging.debug("Run: '%s'", cmdstring) result = subprocess.run( cmd, cwd=cwd, @@ -240,54 +390,68 @@ def run(cmd, cwd=None) -> subprocess.CompletedProcess: if result.returncode: # Log last 20 lines, those are likely the interesting ones. logging.error( - "Run: %r KO:\n%s", + "Run: '%s' KO:\n%s", cmdstring, - indent("\n".join(result.stdout.split("\n")[-20:]), " "), + "\n".join(f" {line}" for line in result.stdout.split("\n")[-20:]), ) result.check_returncode() return result -def changed_files(left, right): - """Compute a list of different files between left and right, recursively. - Resulting paths are relative to left. - """ - changed = [] +def run_with_logging(cmd: Sequence[str | Path], cwd: Path | None = None) -> None: + """Like subprocess.check_call, with logging before the command execution.""" + cmd = list(map(str, cmd)) + logging.debug("Run: '%s'", shlex.join(cmd)) + with subprocess.Popen( + cmd, + cwd=cwd, + stdin=subprocess.PIPE, + stderr=subprocess.STDOUT, + stdout=subprocess.PIPE, + encoding="utf-8", + ) as p: + try: + for line in p.stdout or (): + logging.debug(">>>> %s", line.rstrip()) + except: + p.kill() + raise + if return_code := p.poll(): + raise subprocess.CalledProcessError(return_code, cmd[0]) + - def traverse(dircmp_result): - base = Path(dircmp_result.left).relative_to(left) - for file in dircmp_result.diff_files: - changed.append(str(base / file)) - if file == "index.html": - changed.append(str(base) + "/") - for dircomp in dircmp_result.subdirs.values(): - traverse(dircomp) +def changed_files(left: Path, right: Path) -> int: + """Compute the number of different files in the two directory trees.""" - traverse(filecmp.dircmp(left, right)) - return changed + def traverse(dircmp_result: filecmp.dircmp) -> int: + changed = len(dircmp_result.diff_files) + changed += sum(map(traverse, dircmp_result.subdirs.values())) + return changed + return traverse(filecmp.dircmp(left, right)) -@dataclass + +@dataclasses.dataclass class Repository: """Git repository abstraction for our specific needs.""" remote: str directory: Path - def run(self, *args): + def run(self, *args: str) -> subprocess.CompletedProcess: """Run git command in the clone repository.""" return run(("git", "-C", self.directory) + args) - def get_ref(self, pattern): + def get_ref(self, pattern: str) -> str: """Return the reference of a given tag or branch.""" try: # Maybe it's a branch - return self.run("show-ref", "-s", "origin/" + pattern).stdout.strip() + return self.run("show-ref", "-s", f"origin/{pattern}").stdout.strip() except subprocess.CalledProcessError: # Maybe it's a tag - return self.run("show-ref", "-s", "tags/" + pattern).stdout.strip() + return self.run("show-ref", "-s", f"tags/{pattern}").stdout.strip() - def fetch(self): + def fetch(self) -> subprocess.CompletedProcess: """Try (and retry) to run git fetch.""" try: return self.run("fetch") @@ -296,35 +460,37 @@ def fetch(self): sleep(5) return self.run("fetch") - def switch(self, branch_or_tag): + def switch(self, branch_or_tag: str) -> None: """Reset and cleans the repository to the given branch or tag.""" self.run("reset", "--hard", self.get_ref(branch_or_tag), "--") self.run("clean", "-dfqx") - def clone(self): + def clone(self) -> bool: """Maybe clone the repository, if not already cloned.""" if (self.directory / ".git").is_dir(): return False # Already cloned logging.info("Cloning %s into %s", self.remote, self.directory) self.directory.mkdir(mode=0o775, parents=True, exist_ok=True) - run(["git", "clone", self.remote, self.directory]) + run(("git", "clone", self.remote, self.directory)) return True - def update(self): + def update(self) -> None: self.clone() or self.fetch() -def version_to_tuple(version): +def version_to_tuple(version: str) -> tuple[int, ...]: """Transform a version string to a tuple, for easy comparisons.""" return tuple(int(part) for part in version.split(".")) -def tuple_to_version(version_tuple): +def tuple_to_version(version_tuple: tuple[int, ...]) -> str: """Reverse version_to_tuple.""" return ".".join(str(part) for part in version_tuple) -def locate_nearest_version(available_versions, target_version): +def locate_nearest_version( + available_versions: Collection[str], target_version: str +) -> str: """Look for the nearest version of target_version in available_versions. Versions are to be given as tuples, like (3, 7) for 3.7. @@ -340,12 +506,7 @@ def locate_nearest_version(available_versions, target_version): '3.7' """ - available_versions_tuples = sorted( - [ - version_to_tuple(available_version) - for available_version in set(available_versions) - ] - ) + available_versions_tuples = sorted(map(version_to_tuple, set(available_versions))) target_version_tuple = version_to_tuple(target_version) try: found = available_versions_tuples[ @@ -360,7 +521,7 @@ def locate_nearest_version(available_versions, target_version): def edit(file: Path): """Context manager to edit a file "in place", use it as: - with edit("/etc/hosts") as i, o: + with edit("/etc/hosts") as (i, o): for line in i: o.write(line.replace("localhoat", "localhost")) """ @@ -373,49 +534,15 @@ def edit(file: Path): temporary.rename(file) -def setup_switchers( - versions: Iterable[Version], languages: Iterable[Language], html_root: Path -): - """Setup cross-links between cpython versions: +def setup_switchers(script_content: bytes, html_root: Path) -> None: + """Setup cross-links between CPython versions: - Cross-link various languages in a language switcher - Cross-link various versions in a version switcher """ - with open( - HERE / "templates" / "switchers.js", encoding="UTF-8" - ) as switchers_template_file: - template = Template(switchers_template_file.read()) switchers_path = html_root / "_static" / "switchers.js" - switchers_path.write_text( - template.safe_substitute( - { - "LANGUAGES": json.dumps( - OrderedDict( - sorted( - [ - (language.tag, language.name) - for language in languages - if language.in_prod - ] - ) - ) - ), - "VERSIONS": json.dumps( - OrderedDict( - [ - (version.name, version.picker_label) - for version in sorted( - versions, - key=lambda v: version_to_tuple(v.name), - reverse=True, - ) - ] - ) - ), - } - ), - encoding="UTF-8", - ) - for file in Path(html_root).glob("**/*.html"): + switchers_path.write_bytes(script_content) + + for file in html_root.glob("**/*.html"): depth = len(file.relative_to(html_root).parts) - 1 src = f"{'../' * depth}_static/switchers.js" script = f' \n' @@ -428,68 +555,16 @@ def setup_switchers( ofile.write(line) -def build_robots_txt( - versions: Iterable[Version], - languages: Iterable[Language], - www_root: Path, - group, - skip_cache_invalidation, -): - """Disallow crawl of EOL versions in robots.txt.""" - if not www_root.exists(): - logging.info("Skipping robots.txt generation (www root does not even exists).") - return - robots_file = www_root / "robots.txt" - with open(HERE / "templates" / "robots.txt", encoding="UTF-8") as template_file: - template = jinja2.Template(template_file.read()) - with open(robots_file, "w", encoding="UTF-8") as robots_txt_file: - robots_txt_file.write( - template.render(languages=languages, versions=versions) + "\n" - ) - robots_file.chmod(0o775) - run(["chgrp", group, robots_file]) - if not skip_cache_invalidation: - purge("robots.txt") - - -def build_sitemap( - versions: Iterable[Version], languages: Iterable[Language], www_root: Path, group -): - """Build a sitemap with all live versions and translations.""" - if not www_root.exists(): - logging.info("Skipping sitemap generation (www root does not even exists).") - return - with open(HERE / "templates" / "sitemap.xml", encoding="UTF-8") as template_file: - template = jinja2.Template(template_file.read()) - sitemap_file = www_root / "sitemap.xml" - sitemap_file.write_text( - template.render(languages=languages, versions=versions) + "\n", encoding="UTF-8" - ) - sitemap_file.chmod(0o664) - run(["chgrp", group, sitemap_file]) - - -def build_404(www_root: Path, group): - """Build a nice 404 error page to display in case PDFs are not built yet.""" - if not www_root.exists(): - logging.info("Skipping 404 page generation (www root does not even exists).") - return - not_found_file = www_root / "404.html" - shutil.copyfile(HERE / "templates" / "404.html", not_found_file) - not_found_file.chmod(0o664) - run(["chgrp", group, not_found_file]) - - -def head(text, lines=10): +def head(text: str, lines: int = 10) -> str: """Return the first *lines* lines from the given text.""" return "\n".join(text.split("\n")[:lines]) -def version_info(): +def version_info() -> None: """Handler for --version.""" try: platex_version = head( - subprocess.check_output(["platex", "--version"], universal_newlines=True), + subprocess.check_output(("platex", "--version"), text=True), lines=3, ) except FileNotFoundError: @@ -497,7 +572,7 @@ def version_info(): try: xelatex_version = head( - subprocess.check_output(["xelatex", "--version"], universal_newlines=True), + subprocess.check_output(("xelatex", "--version"), text=True), lines=2, ) except FileNotFoundError: @@ -516,145 +591,59 @@ def version_info(): ) -def parse_args(): - """Parse command-line arguments.""" - - parser = ArgumentParser( - description="Runs a build of the Python docs for various branches." - ) - parser.add_argument( - "-q", - "--quick", - action="store_true", - help="Make HTML files only (Makefile rules suffixed with -html).", - ) - parser.add_argument( - "-b", - "--branch", - metavar="3.12", - help="Version to build (defaults to all maintained branches).", - ) - parser.add_argument( - "-r", - "--build-root", - type=Path, - help="Path to a directory containing a checkout per branch.", - default=Path("/srv/docsbuild"), - ) - parser.add_argument( - "-w", - "--www-root", - type=Path, - help="Path where generated files will be copied.", - default=Path("/srv/docs.python.org"), - ) - parser.add_argument( - "--skip-cache-invalidation", - help="Skip fastly cache invalidation.", - action="store_true", - ) - parser.add_argument( - "--group", - help="Group files on targets and www-root file should get.", - default="docs", - ) - parser.add_argument( - "--log-directory", - type=Path, - help="Directory used to store logs.", - default=Path("/var/log/docsbuild/"), - ) - parser.add_argument( - "--languages", - nargs="*", - help="Language translation, as a PEP 545 language tag like" - " 'fr' or 'pt-br'. " - "Builds all available languages by default.", - metavar="fr", - ) - parser.add_argument( - "--version", - action="store_true", - help="Get build_docs and dependencies version info", - ) - parser.add_argument( - "--theme", - default="python-docs-theme", - help="Python package to use for python-docs-theme: Usefull to test branches:" - " --theme git+https://github.com/obulat/python-docs-theme@master", - ) - args = parser.parse_args() - if args.version: - version_info() - sys.exit(0) - del args.version - if args.log_directory: - args.log_directory = args.log_directory.resolve() - if args.build_root: - args.build_root = args.build_root.resolve() - if args.www_root: - args.www_root = args.www_root.resolve() - return args - - -def setup_logging(log_directory: Path): - """Setup logging to stderr if ran by a human, or to a file if ran from a cron.""" - if sys.stderr.isatty(): - logging.basicConfig( - format="%(asctime)s %(levelname)s: %(message)s", stream=sys.stderr - ) - else: - log_directory.mkdir(parents=True, exist_ok=True) - handler = logging.handlers.WatchedFileHandler(log_directory / "docsbuild.log") - handler.setFormatter( - logging.Formatter("%(asctime)s %(levelname)s: %(message)s") - ) - logging.getLogger().addHandler(handler) - logging.getLogger().setLevel(logging.DEBUG) - - -@dataclass +@dataclasses.dataclass class DocBuilder: - """Builder for a cpython version and a language.""" + """Builder for a CPython version and a language.""" - version: Version - versions: Iterable[Version] - language: Language - languages: Iterable[Language] + build_meta: BuildMetadata cpython_repo: Repository + docs_by_version_content: bytes + switchers_content: bytes build_root: Path www_root: Path + select_output: Literal["no-html", "only-html", "only-html-en"] | None quick: bool group: str log_directory: Path skip_cache_invalidation: bool - theme: Path + theme: str @property - def full_build(self): - """Tell if a full build is needed. - - A full build is slow, it builds pdf, txt, epub, texinfo, and - archives everything. + def html_only(self) -> bool: + return ( + self.select_output in {"only-html", "only-html-en"} + or self.quick + or self.build_meta.html_only + ) - A partial build only builds HTML and does not archive, it's - fast. - """ - return not self.quick and not self.language.html_only + @property + def includes_html(self) -> bool: + """Does the build we are running include HTML output?""" + return self.select_output != "no-html" - def run(self) -> bool: + def run(self, http: urllib3.PoolManager, force_build: bool) -> bool | None: """Build and publish a Python doc, for a language, and a version.""" start_time = perf_counter() + start_timestamp = dt.datetime.now(tz=dt.UTC).replace(microsecond=0) logging.info("Running.") try: - self.cpython_repo.switch(self.version.branch_or_tag) - if self.language.tag != "en": + if self.build_meta.html_only and not self.includes_html: + logging.info("Skipping non-HTML build (language is HTML-only).") + return None # skipped + self.cpython_repo.switch(self.build_meta.branch_or_tag) + if self.build_meta.is_translation: self.clone_translation() - if self.should_rebuild(): + if trigger_reason := self.should_rebuild(force_build): self.build_venv() self.build() - self.copy_build_to_webroot() - self.save_state(build_duration=perf_counter() - start_time) + self.copy_build_to_webroot(http) + self.save_state( + build_start=start_timestamp, + build_duration=perf_counter() - start_time, + trigger=trigger_reason, + ) + else: + return None # skipped except Exception as err: logging.exception("Badly handled exception, human, please help.") if sentry_sdk: @@ -662,32 +651,29 @@ def run(self) -> bool: return False return True + @property + def locale_dir(self) -> Path: + return self.build_root / self.build_meta.version / "locale" + @property def checkout(self) -> Path: - """Path to cpython git clone.""" - return self.build_root / "cpython" + """Path to CPython git clone.""" + return self.build_root / _checkout_name(self.select_output) - def clone_translation(self): + def clone_translation(self) -> None: self.translation_repo.update() self.translation_repo.switch(self.translation_branch) @property - def translation_repo(self): + def translation_repo(self) -> Repository: """See PEP 545 for translations repository naming convention.""" - locale_repo = f"https://github.com/python/python-docs-{self.language.tag}.git" - locale_clone_dir = ( - self.build_root - / self.version.name - / "locale" - / self.language.iso639_tag - / "LC_MESSAGES" - ) - return Repository(locale_repo, locale_clone_dir) + locale_clone_dir = self.locale_dir / self.build_meta.iso639_tag / "LC_MESSAGES" + return Repository(self.build_meta.locale_repo_url, locale_clone_dir) @property - def translation_branch(self): - """Some cpython versions may be untranslated, being either too old or + def translation_branch(self) -> str: + """Some CPython versions may be untranslated, being either too old or too new. This function looks for remote branches on the given repo, and @@ -697,230 +683,185 @@ def translation_branch(self): """ remote_branches = self.translation_repo.run("branch", "-r").stdout branches = re.findall(r"/([0-9]+\.[0-9]+)$", remote_branches, re.M) - return locate_nearest_version(branches, self.version.name) + return locate_nearest_version(branches, self.build_meta.version) - def build(self): + def build(self) -> None: """Build this version/language doc.""" logging.info("Build start.") - sphinxopts = list(self.language.sphinxopts) - sphinxopts.extend(["-q"]) - if self.language.tag != "en": - locale_dirs = self.build_root / self.version.name / "locale" - sphinxopts.extend( - ( - f"-D locale_dirs={locale_dirs}", - f"-D language={self.language.iso639_tag}", - "-D gettext_compact=0", - ) - ) - if self.language.tag == "ja": - # Since luatex doesn't support \ufffd, replace \ufffd with '?'. - # https://gist.github.com/zr-tex8r/e0931df922f38fbb67634f05dfdaf66b - # Luatex already fixed this issue, so we can remove this once Texlive - # is updated. - # (https://github.com/TeX-Live/luatex/commit/af5faf1) - subprocess.check_output( - "sed -i s/\N{REPLACEMENT CHARACTER}/?/g " - f"{locale_dirs}/ja/LC_MESSAGES/**/*.po", - shell=True, - ) - subprocess.check_output( - "sed -i s/\N{REPLACEMENT CHARACTER}/?/g " - f"{self.checkout}/Doc/**/*.rst", - shell=True, - ) - - if self.version.status == "EOL": + start_time = perf_counter() + sphinxopts = list(self.build_meta.sphinxopts) + if self.build_meta.is_translation: + sphinxopts.extend(( + f"-D locale_dirs={self.locale_dir}", + f"-D language={self.build_meta.iso639_tag}", + "-D gettext_compact=0", + "-D translation_progress_classes=1", + )) + + if self.build_meta.is_eol: sphinxopts.append("-D html_context.outdated=1") - maketarget = ( - "autobuild-" - + ( - "dev" - if self.version.status in ("in development", "pre-release") - else "stable" - ) - + ("" if self.full_build else "-html") - ) + + if self.build_meta.status in ("in development", "pre-release"): + maketarget = "autobuild-dev" + else: + maketarget = "autobuild-stable" + if self.html_only: + maketarget += "-html" logging.info("Running make %s", maketarget) python = self.venv / "bin" / "python" sphinxbuild = self.venv / "bin" / "sphinx-build" blurb = self.venv / "bin" / "blurb" - # Disable cpython switchers, we handle them now: - def is_mac(): - return platform.system() == 'Darwin' + if self.includes_html: + site_url = self.build_meta.url + # Define a tag to enable opengraph socialcards previews + # (used in Doc/conf.py and requires matplotlib) + sphinxopts += ( + "-t create-social-cards", + f"-D ogp_site_url={site_url}", + ) - run( - ["sed", "-i"] - + ([""] if is_mac() else []) - + ["s/ *-A switchers=1//", self.checkout / "Doc" / "Makefile"] - ) - self.version.setup_indexsidebar( - self.versions, - self.checkout / "Doc" / "tools" / "templates" / "indexsidebar.html", - ) - run( - [ - "make", - "-C", - self.checkout / "Doc", - "PYTHON=" + str(python), - "SPHINXBUILD=" + str(sphinxbuild), - "BLURB=" + str(blurb), - "VENVDIR=" + str(self.venv), - "SPHINXOPTS=" + " ".join(sphinxopts), - "SPHINXERRORHANDLING=", - maketarget, - ] - ) - run(["mkdir", "-p", self.log_directory]) - run(["chgrp", "-R", self.group, self.log_directory]) - setup_switchers( - self.versions, self.languages, self.checkout / "Doc" / "build" / "html" - ) - logging.info("Build done.") + if self.build_meta.version_tuple < (3, 8): + # Disable CPython switchers, we handle them now: + text = (self.checkout / "Doc" / "Makefile").read_text(encoding="utf-8") + text = text.replace(" -A switchers=1", "") + (self.checkout / "Doc" / "Makefile").write_text(text, encoding="utf-8") + + self.setup_indexsidebar() + run_with_logging(( + "make", + "-C", + self.checkout / "Doc", + f"PYTHON={python}", + f"SPHINXBUILD={sphinxbuild}", + f"BLURB={blurb}", + f"VENVDIR={self.venv}", + f"SPHINXOPTS={' '.join(sphinxopts)}", + "SPHINXERRORHANDLING=", + maketarget, + )) + self.log_directory.mkdir(parents=True, exist_ok=True) + chgrp(self.log_directory, group=self.group, recursive=True) + if self.includes_html: + setup_switchers( + self.switchers_content, self.checkout / "Doc" / "build" / "html" + ) + logging.info("Build done (%s).", format_seconds(perf_counter() - start_time)) - def build_venv(self): + def build_venv(self) -> None: """Build a venv for the specific Python version. So we can reuse them from builds to builds, while they contain different Sphinx versions. """ - venv_path = self.build_root / ("venv-" + self.version.name) - run([sys.executable, "-m", "venv", venv_path]) + requirements = list(self.build_meta.dependencies) + if self.includes_html: + # opengraph previews + requirements.append("matplotlib>=3") + + venv_path = self.build_root / self.build_meta.venv_name + venv.create(venv_path, symlinks=os.name != "nt", with_pip=True) run( - [venv_path / "bin" / "python", "-m", "pip", "install", "--upgrade"] - + [self.theme] - + self.version.requirements, + ( + venv_path / "bin" / "python", + "-m", + "pip", + "install", + "--upgrade", + "--upgrade-strategy=eager", + self.theme, + *requirements, + ), cwd=self.checkout / "Doc", ) - run([venv_path / "bin" / "python", "-m", "pip", "freeze", "--all"]) + run((venv_path / "bin" / "python", "-m", "pip", "freeze", "--all")) self.venv = venv_path - def copy_build_to_webroot(self): + def setup_indexsidebar(self) -> None: + """Copy indexsidebar.html for Sphinx.""" + tmpl_src = HERE / "templates" + tmpl_dst = self.checkout / "Doc" / "tools" / "templates" + dbv_path = tmpl_dst / "_docs_by_version.html" + + shutil.copy(tmpl_src / "indexsidebar.html", tmpl_dst / "indexsidebar.html") + if not self.build_meta.is_eol: + dbv_path.write_bytes(self.docs_by_version_content) + else: + shutil.copy(tmpl_src / "_docs_by_version.html", dbv_path) + + def copy_build_to_webroot(self, http: urllib3.PoolManager) -> None: """Copy a given build to the appropriate webroot with appropriate rights.""" logging.info("Publishing start.") + start_time = perf_counter() self.www_root.mkdir(parents=True, exist_ok=True) - if self.language.tag == "en": - target = self.www_root / self.version.name + if not self.build_meta.is_translation: + target = self.www_root / self.build_meta.version else: - language_dir = self.www_root / self.language.tag + language_dir = self.www_root / self.build_meta.language language_dir.mkdir(parents=True, exist_ok=True) - try: - run(["chgrp", "-R", self.group, language_dir]) - except subprocess.CalledProcessError as err: - logging.warning("Can't change group of %s: %s", language_dir, str(err)) + chgrp(language_dir, group=self.group, recursive=True) language_dir.chmod(0o775) - target = language_dir / self.version.name + target = language_dir / self.build_meta.version target.mkdir(parents=True, exist_ok=True) try: target.chmod(0o775) except PermissionError as err: logging.warning("Can't change mod of %s: %s", target, str(err)) - try: - run(["chgrp", "-R", self.group, target]) - except subprocess.CalledProcessError as err: - logging.warning("Can't change group of %s: %s", target, str(err)) - - changed = changed_files(self.checkout / "Doc" / "build" / "html", target) - logging.info("Copying HTML files to %s", target) - run( - [ - "chown", - "-R", - ":" + self.group, + chgrp(target, group=self.group, recursive=True) + + changed = 0 + if self.includes_html: + # Copy built HTML files to webroot (default /srv/docs.python.org) + changed += changed_files(self.checkout / "Doc" / "build" / "html", target) + logging.info("Copying HTML files to %s", target) + chgrp( self.checkout / "Doc" / "build" / "html/", - ] - ) - run(["chmod", "-R", "o+r", self.checkout / "Doc" / "build" / "html"]) - run( - [ - "find", - self.checkout / "Doc" / "build" / "html", - "-type", - "d", - "-exec", - "chmod", - "o+x", - "{}", - ";", - ] - ) - if self.full_build: - run( - [ - "rsync", - "-a", - "--delete-delay", - "--filter", - "P archives/", - str(self.checkout / "Doc" / "build" / "html") + "/", - target, - ] - ) - else: - run( - [ - "rsync", - "-a", - str(self.checkout / "Doc" / "build" / "html") + "/", - target, - ] + group=self.group, + recursive=True, ) - if self.full_build: + chmod_make_readable(self.checkout / "Doc" / "build" / "html") + run(( + "rsync", + "-a", + "--delete-delay", + "--filter", + "P archives/", + str(self.checkout / "Doc" / "build" / "html") + "/", + target, + )) + + dist_dir = self.checkout / "Doc" / "dist" + if dist_dir.is_dir(): + # Copy archive files to /archives/ logging.debug("Copying dist files.") - run( - [ - "chown", - "-R", - ":" + self.group, - self.checkout / "Doc" / "dist", - ] - ) - run( - [ - "chmod", - "-R", - "o+r", - self.checkout / "Doc" / "dist", - ] + chgrp(dist_dir, group=self.group, recursive=True) + chmod_make_readable(dist_dir) + archives_dir = target / "archives" + archives_dir.mkdir(parents=True, exist_ok=True) + archives_dir.chmod( + archives_dir.stat().st_mode | stat.S_IROTH | stat.S_IXOTH ) - run(["mkdir", "-m", "o+rx", "-p", target / "archives"]) - run(["chown", ":" + self.group, target / "archives"]) - run( - [ - "cp", - "-a", - *[ - str(dist) - for dist in (Path(self.checkout) / "Doc" / "dist").glob("*") - ], - target / "archives", - ] - ) - changed.append("archives/") - for file in (target / "archives").iterdir(): - changed.append("archives/" + file.name) + chgrp(archives_dir, group=self.group) + changed += 1 + for dist_file in dist_dir.iterdir(): + shutil.copy2(dist_file, archives_dir / dist_file.name) + changed += 1 - logging.info("%s files changed", len(changed)) + logging.info("%s files changed", changed) if changed and not self.skip_cache_invalidation: - targets_dir = str(self.www_root) - prefixes = run(["find", "-L", targets_dir, "-samefile", target]).stdout - prefixes = prefixes.replace(targets_dir + "/", "") - prefixes = [prefix + "/" for prefix in prefixes.split("\n") if prefix] - purge(*prefixes) - for prefix in prefixes: - purge(*[prefix + p for p in changed]) - logging.info("Publishing done") - - def should_rebuild(self): + purge_surrogate_key(http, self.build_meta.slug) + logging.info( + "Publishing done (%s).", format_seconds(perf_counter() - start_time) + ) + + def should_rebuild(self, force: bool) -> str | Literal[False]: state = self.load_state() if not state: logging.info("Should rebuild: no previous state found.") - return True + return "no previous state" cpython_sha = self.cpython_repo.run("rev-parse", "HEAD").stdout.strip() - if self.language.tag != "en": + if self.build_meta.is_translation: translation_sha = self.translation_repo.run( "rev-parse", "HEAD" ).stdout.strip() @@ -930,239 +871,626 @@ def should_rebuild(self): state["translation_sha"], translation_sha, ) - return True + return "new translations" if cpython_sha != state["cpython_sha"]: diff = self.cpython_repo.run( "diff", "--name-only", state["cpython_sha"], cpython_sha ).stdout - if "Doc/" in diff: + if "Doc/" in diff or "Misc/NEWS.d/" in diff: logging.info( "Should rebuild: Doc/ has changed (from %s to %s)", state["cpython_sha"], cpython_sha, ) - return True + return "Doc/ has changed" + if force: + logging.info("Should rebuild: forced.") + return "forced" logging.info("Nothing changed, no rebuild needed.") return False def load_state(self) -> dict: - state_file = self.build_root / "state.toml" + if self.select_output is not None: + state_file = self.build_root / f"state-{self.select_output}.toml" + else: + state_file = self.build_root / "state.toml" try: return tomlkit.loads(state_file.read_text(encoding="UTF-8"))[ - f"/{self.language.tag}/{self.version.name}/" + f"/{self.build_meta.slug}/" ] except (KeyError, FileNotFoundError): return {} - def save_state(self, build_duration: float): - """Save current cpython sha1 and current translation sha1. + def save_state( + self, build_start: dt.datetime, build_duration: float, trigger: str + ) -> None: + """Save current CPython sha1 and current translation sha1. Using this we can deduce if a rebuild is needed or not. """ - state_file = self.build_root / "state.toml" + if self.select_output is not None: + state_file = self.build_root / f"state-{self.select_output}.toml" + else: + state_file = self.build_root / "state.toml" try: states = tomlkit.parse(state_file.read_text(encoding="UTF-8")) except FileNotFoundError: states = tomlkit.document() - state = {} - state["cpython_sha"] = self.cpython_repo.run("rev-parse", "HEAD").stdout.strip() - if self.language.tag != "en": + key = f"/{self.build_meta.slug}/" + state = { + "last_build_start": build_start, + "last_build_duration": round(build_duration, 0), + "triggered_by": trigger, + "cpython_sha": self.cpython_repo.run("rev-parse", "HEAD").stdout.strip(), + } + if self.build_meta.is_translation: state["translation_sha"] = self.translation_repo.run( "rev-parse", "HEAD" ).stdout.strip() - state["last_build"] = dt.now(timezone.utc) - state["last_build_duration"] = build_duration - states[f"/{self.language.tag}/{self.version.name}/"] = state + states[key] = state state_file.write_text(tomlkit.dumps(states), encoding="UTF-8") + table = tomlkit.inline_table() + table |= state + logging.info("Saved new rebuild state for %s: %s", key, table.as_string()) -def symlink(www_root: Path, language: Language, directory: str, name: str, group: str, skip_cache_invalidation: bool): - """Used by major_symlinks and dev_symlink to maintain symlinks.""" - if language.tag == "en": # english is rooted on /, no /en/ - path = www_root - else: - path = www_root / language.tag - link = path / name - directory_path = path / directory - if not directory_path.exists(): - return # No touching link, dest doc not built yet. - if link.exists() and readlink(str(link)) == directory: - return # Link is already pointing to right doc. - if link.exists(): - link.unlink() - link.symlink_to(directory) - run(["chown", "-h", ":" + group, str(link)]) - if not skip_cache_invalidation: - purge_path(www_root, link) +def chgrp( + path: Path, + /, + group: int | str | None, + *, + recursive: bool = False, + follow_symlinks: bool = True, +) -> None: + if sys.platform == "win32": + return -def major_symlinks( - www_root: Path, group, versions: Iterable[Version], languages: Iterable[Language], skip_cache_invalidation: bool -): - """Maintains the /2/ and /3/ symlinks for each languages. + from grp import getgrnam - Like: - - /3/ → /3.9/ - - /fr/3/ → /fr/3.9/ - - /es/3/ → /es/3.9/ - """ - current_stable = Version.current_stable(versions).name - for language in languages: - symlink(www_root, language, current_stable, "3", group, skip_cache_invalidation) - symlink(www_root, language, "2.7", "2", group, skip_cache_invalidation) + try: + try: + group_id = int(group) + except ValueError: + group_id = getgrnam(group)[2] + except (LookupError, TypeError, ValueError): + return + try: + os.chown(path, -1, group_id, follow_symlinks=follow_symlinks) + if recursive: + for p in path.rglob("*"): + os.chown(p, -1, group_id, follow_symlinks=follow_symlinks) + except OSError as err: + logging.warning("Can't change group of %s: %s", path, str(err)) + + +def chmod_make_readable(path: Path, /, mode: int = stat.S_IROTH) -> None: + if not path.is_dir(): + raise ValueError + + path.chmod(path.stat().st_mode | stat.S_IROTH | stat.S_IXOTH) # o+rx + for p in path.rglob("*"): + if p.is_dir(): + p.chmod(p.stat().st_mode | stat.S_IROTH | stat.S_IXOTH) # o+rx + else: + p.chmod(p.stat().st_mode | stat.S_IROTH) # o+r -def dev_symlink(www_root: Path, group, versions, languages, skip_cache_invalidation: bool): - """Maintains the /dev/ symlinks for each languages. - Like: - - /dev/ → /3.11/ - - /fr/dev/ → /fr/3.11/ - - /es/dev/ → /es/3.11/ - """ - current_dev = Version.current_dev(versions).name - for language in languages: - symlink(www_root, language, current_dev, "dev", group, skip_cache_invalidation) +def format_seconds(seconds: float) -> str: + hours, remainder = divmod(seconds, 3600) + minutes, seconds = divmod(remainder, 60) + hours, minutes, seconds = int(hours), int(minutes), round(seconds) + match (hours, minutes, seconds): + case 0, 0, s: + return f"{s}s" + case 0, m, s: + return f"{m}m {s}s" + case h, m, s: + return f"{h}h {m}m {s}s" -def purge(*paths): - """Remove one or many paths from docs.python.org's CDN. + raise ValueError("unreachable") - To be used when a file change, so the CDN fetch the new one. - """ - base = "https://docs.python.org/" - for path in paths: - url = urljoin(base, str(path)) - logging.debug("Purging %s from CDN", url) - requests.request("PURGE", url, timeout=30) +def _checkout_name(select_output: str | None) -> str: + if select_output is not None: + return f"cpython-{select_output}" + return "cpython" -def purge_path(www_root: Path, path: Path): - """Recursively remove a path from docs.python.org's CDN. - To be used when a directory change, so the CDN fetch the new one. - """ - purge(*[file.relative_to(www_root) for file in path.glob("**/*")]) - purge(path.relative_to(www_root)) - purge(str(path.relative_to(www_root)) + "/") +def main() -> int: + """Script entry point.""" + args = parse_args() + setup_logging(args.log_directory, args.select_output) + load_environment_variables() + if args.select_output is None: + return build_docs_with_lock(args, "build_docs.lock") + if args.select_output == "no-html": + return build_docs_with_lock(args, "build_docs_archives.lock") + if args.select_output == "only-html": + return build_docs_with_lock(args, "build_docs_html.lock") + if args.select_output == "only-html-en": + return build_docs_with_lock(args, "build_docs_html_en.lock") + return EX_FAILURE -def proofread_canonicals(www_root: Path, skip_cache_invalidation: bool) -> None: - """In www_root we check that all canonical links point to existing contents. - It can happen that a canonical is "broken": +def parse_args() -> argparse.Namespace: + """Parse command-line arguments.""" - - /3.11/whatsnew/3.11.html typically would link to - /3/whatsnew/3.11.html, which may not exist yet. - """ - canonical_re = re.compile( - """""" + parser = argparse.ArgumentParser( + description="Runs a build of the Python docs for various branches.", + allow_abbrev=False, ) - for file in www_root.glob("**/*.html"): - html = file.read_text(encoding="UTF-8", errors="surrogateescape") - canonical = canonical_re.search(html) - if not canonical: - continue - target = canonical.group(1) - if not (www_root / target).exists(): - logging.info("Removing broken canonical from %s to %s", file, target) - html = html.replace(canonical.group(0), "") - file.write_text(html, encoding="UTF-8", errors="surrogateescape") - if not skip_cache_invalidation: - purge(str(file).replace("/srv/docs.python.org/", "")) - - -def parse_versions_from_devguide(): - releases = requests.get( - "https://raw.githubusercontent.com/" - "python/devguide/main/include/release-cycle.json", - timeout=30, - ).json() - return [Version.from_json(name, release) for name, release in releases.items()] + parser.suggest_on_error = True + parser.add_argument( + "--select-output", + choices=("no-html", "only-html", "only-html-en"), + help="Choose what outputs to build.", + ) + parser.add_argument( + "-q", + "--quick", + action="store_true", + help="Run a quick build (only HTML files).", + ) + parser.add_argument( + "-b", + "--branches", + nargs="*", + metavar="3.12", + help="Versions to build (defaults to all maintained branches).", + ) + parser.add_argument( + "-r", + "--build-root", + type=Path, + help="Path to a directory containing a checkout per branch.", + default=Path("/srv/docsbuild"), + ) + parser.add_argument( + "-w", + "--www-root", + type=Path, + help="Path where generated files will be copied.", + default=Path("/srv/docs.python.org"), + ) + parser.add_argument( + "--force", + action="store_true", + help="Always build the chosen languages and versions, " + "regardless of existing state.", + ) + parser.add_argument( + "--skip-cache-invalidation", + help="Skip Fastly cache invalidation.", + action="store_true", + ) + parser.add_argument( + "--group", + help="Group files on targets and www-root file should get.", + default="docs", + ) + parser.add_argument( + "--log-directory", + type=Path, + help="Directory used to store logs.", + default=Path("/var/log/docsbuild/"), + ) + parser.add_argument( + "--languages", + nargs="*", + help="Language translation, as a PEP 545 language tag like" + " 'fr' or 'pt-br'. " + "Builds all available languages by default.", + metavar="fr", + ) + parser.add_argument( + "--version", + action="store_true", + help="Get build_docs and dependencies version info", + ) + parser.add_argument( + "--theme", + default="python-docs-theme", + help="Python package to use for python-docs-theme: Useful to test branches:" + " --theme git+https://github.com/obulat/python-docs-theme@master", + ) + args = parser.parse_args() + if args.version: + version_info() + sys.exit(0) + del args.version + if args.log_directory: + args.log_directory = args.log_directory.resolve() + if args.build_root: + args.build_root = args.build_root.resolve() + if args.www_root: + args.www_root = args.www_root.resolve() + return args -def parse_languages_from_config(): - """Read config.toml to discover languages to build.""" - config = tomlkit.parse((HERE / "config.toml").read_text(encoding="UTF-8")) - languages = [] - defaults = config["defaults"] - for iso639_tag, section in config["languages"].items(): - languages.append( - Language( - iso639_tag, - section["name"], - section.get("in_prod", defaults["in_prod"]), - sphinxopts=section.get("sphinxopts", defaults["sphinxopts"]), - html_only=section.get("html_only", defaults["html_only"]), - ) +def setup_logging(log_directory: Path, select_output: str | None) -> None: + """Setup logging to stderr if run by a human, or to a file if run from a cron.""" + log_format = "%(asctime)s %(levelname)s: %(message)s" + if sys.stderr.isatty() or "CI" in os.environ: + logging.basicConfig(format=log_format, stream=sys.stderr) + else: + log_directory.mkdir(parents=True, exist_ok=True) + if select_output is None: + filename = log_directory / "docsbuild.log" + else: + filename = log_directory / f"docsbuild-{select_output}.log" + handler = logging.handlers.WatchedFileHandler(filename) + handler.setFormatter(logging.Formatter(log_format)) + logging.getLogger().addHandler(handler) + logging.getLogger().setLevel(logging.DEBUG) + + +def load_environment_variables() -> None: + dbs_user_config = platformdirs.user_config_path("docsbuild-scripts") + dbs_site_config = platformdirs.site_config_path("docsbuild-scripts") + if dbs_user_config.is_file(): + env_conf_file = dbs_user_config + elif dbs_site_config.is_file(): + env_conf_file = dbs_site_config + else: + logging.info( + "No environment variables configured. Configure in %s or %s.", + dbs_site_config, + dbs_user_config, ) - return languages + return + + logging.info("Reading environment variables from %s.", env_conf_file) + if env_conf_file == dbs_site_config: + logging.info("You can override settings in %s.", dbs_user_config) + elif dbs_site_config.is_file(): + logging.info("Overriding %s.", dbs_site_config) + env_config = env_conf_file.read_text(encoding="utf-8") + for key, value in tomlkit.parse(env_config).get("env", {}).items(): + logging.debug("Setting %s in environment.", key) + os.environ[key] = value -def build_docs(args) -> bool: - """Build all docs (each languages and each versions).""" - versions = parse_versions_from_devguide() + +def build_docs_with_lock(args: argparse.Namespace, lockfile_name: str) -> int: + try: + lock = zc.lockfile.LockFile(HERE / lockfile_name) + except zc.lockfile.LockError: + logging.info("Another builder is running... dying...") + return EX_FAILURE + + try: + return build_docs(args) + finally: + lock.close() + + +def build_docs(args: argparse.Namespace) -> int: + """Build all docs (each language and each version).""" + logging.info("Full build start.") + start_time = perf_counter() + http = urllib3.PoolManager() + versions = parse_versions_from_peps_site(http) languages = parse_languages_from_config() + # Reverse languages but not versions, because we take version-language + # pairs from the end of the list, effectively reversing it. + # This runs languages in config.toml order and versions newest first. todo = [ - (version, language) - for version in Version.filter(versions, args.branch) - for language in Language.filter(languages, args.languages) + BuildMetadata(_version=version, _language=language) + for version in versions.filter(args.branches) + for language in reversed(languages.filter(args.languages)) ] - del args.branch + del args.branches del args.languages - all_built_successfully = True + force_build = args.force + del args.force + + docs_by_version_content = render_docs_by_version(versions).encode() + switchers_content = render_switchers(versions, languages) + + build_succeeded = set() + any_build_failed = False cpython_repo = Repository( - "https://github.com/python/cpython.git", args.build_root / "cpython" + "https://github.com/python/cpython.git", + args.build_root / _checkout_name(args.select_output), ) - cpython_repo.update() while todo: - version, language = todo.pop() + build_props = todo.pop() logging.root.handlers[0].setFormatter( logging.Formatter( - f"%(asctime)s %(levelname)s {language.tag}/{version.name}: %(message)s" + f"%(asctime)s %(levelname)s {build_props.slug}: %(message)s" ) ) if sentry_sdk: - with sentry_sdk.configure_scope() as scope: - scope.set_tag("version", version.name) - scope.set_tag("language", language.tag) + scope = sentry_sdk.get_isolation_scope() + scope.set_tag("version", build_props.version) + scope.set_tag("language", build_props.language) + cpython_repo.update() builder = DocBuilder( - version, versions, language, languages, cpython_repo, **vars(args) + build_props, + cpython_repo, + docs_by_version_content, + switchers_content, + **vars(args), ) - all_built_successfully &= builder.run() + built_successfully = builder.run(http, force_build=force_build) + if built_successfully: + build_succeeded.add(build_props.slug) + elif built_successfully is not None: + any_build_failed = True + logging.root.handlers[0].setFormatter( logging.Formatter("%(asctime)s %(levelname)s: %(message)s") ) build_sitemap(versions, languages, args.www_root, args.group) build_404(args.www_root, args.group) - build_robots_txt( - versions, languages, args.www_root, args.group, args.skip_cache_invalidation + copy_robots_txt( + args.www_root, + args.group, + args.skip_cache_invalidation, + http, ) - major_symlinks(args.www_root, args.group, versions, languages, args.skip_cache_invalidation) - dev_symlink(args.www_root, args.group, versions, languages, args.skip_cache_invalidation) - proofread_canonicals(args.www_root, args.skip_cache_invalidation) + make_symlinks( + args.www_root, + args.group, + versions, + languages, + build_succeeded, + args.skip_cache_invalidation, + http, + ) + if build_succeeded: + # Only check canonicals if at least one version was built. + proofread_canonicals(args.www_root, args.skip_cache_invalidation, http) - return all_built_successfully + logging.info("Full build done (%s).", format_seconds(perf_counter() - start_time)) + return EX_FAILURE if any_build_failed else EX_OK -def main(): - """Script entry point.""" - args = parse_args() - setup_logging(args.log_directory) - try: - lock = zc.lockfile.LockFile(HERE / "build_docs.lock") - except zc.lockfile.LockError: - logging.info("Another builder is running... dying...") - return EX_FAILURE +def parse_versions_from_peps_site(http: urllib3.PoolManager) -> Versions: + releases = http.request( + "GET", + "https://peps.python.org/api/release-cycle.json", + timeout=30, + ).json() + return Versions.from_json(releases) - try: - return EX_OK if build_docs(args) else EX_FAILURE - finally: - lock.close() + +def parse_languages_from_config() -> Languages: + """Read config.toml to discover languages to build.""" + config = tomlkit.parse((HERE / "config.toml").read_text(encoding="UTF-8")) + return Languages.from_json(config["defaults"], config["languages"]) + + +def render_docs_by_version(versions: Versions) -> str: + """Generate content for _docs_by_version.html.""" + links = [f'
{% trans %}Download these documents{% endtrans %}
-{% endraw %} -{% if current_version.status != "EOL" %} -{% raw %}