From 76cdfb261b3dab72c0e64be1756a1e50d995c2d6 Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Mon, 16 Nov 2020 22:40:28 +0100 Subject: [PATCH 1/2] Start a sitemap and a robots.txt. --- build_docs.py | 24 ++++++++++++++++++++++++ requirements.in | 1 + templates/robots.txt | 22 ++++++++++++++++++++++ templates/sitemap.xml | 28 ++++++++++++++++++++++++++++ 4 files changed, 75 insertions(+) create mode 100644 templates/robots.txt create mode 100644 templates/sitemap.xml diff --git a/build_docs.py b/build_docs.py index 4771518..982d69e 100755 --- a/build_docs.py +++ b/build_docs.py @@ -48,6 +48,8 @@ import sys from datetime import datetime +import jinja2 + HERE = Path(__file__).resolve().parent try: @@ -80,6 +82,10 @@ def __init__(self, name, branch, status, sphinx_version=DEFAULT_SPHINX_VERSION): self.status = status self.sphinx_version = sphinx_version + @property + def changefreq(self): + return {"EOL": "never", "security-fixes": "yearly"}.get(self.status, "daily") + @property def url(self): return "https://docs.python.org/{}/".format(self.name) @@ -484,6 +490,22 @@ def build_venv(build_root, version): return venv_path +def build_robots_txt(www_root): + with open(HERE / "templates" / "robots.txt") as robots_txt_template_file: + with open(os.path.join(www_root, "robots.txt"), "w") as robots_txt_file: + template = jinja2.Template(robots_txt_template_file.read()) + robots_txt_file.write( + template.render(languages=LANGUAGES, versions=VERSIONS) + ) + + +def build_sitemap(www_root): + with open(HERE / "templates" / "sitemap.xml") as sitemap_template_file: + with open(os.path.join(www_root, "sitemap.xml"), "w") as sitemap_file: + template = jinja2.Template(sitemap_template_file.read()) + sitemap_file.write(template.render(languages=LANGUAGES, versions=VERSIONS)) + + def copy_build_to_webroot( build_root, version, @@ -768,6 +790,8 @@ def main(): ) if sentry_sdk: sentry_sdk.capture_exception(err) + build_sitemap(args.www_root) + build_robots_txt(args.www_root) if __name__ == "__main__": diff --git a/requirements.in b/requirements.in index 9898221..b8b6a68 100644 --- a/requirements.in +++ b/requirements.in @@ -1 +1,2 @@ sentry-sdk +jinja2 diff --git a/templates/robots.txt b/templates/robots.txt new file mode 100644 index 0000000..c52e054 --- /dev/null +++ b/templates/robots.txt @@ -0,0 +1,22 @@ +Sitemap: https://docs.python.org/sitemap.xml + +# Prevent development and old documentation from showing up in search results. +User-agent: * +Disallow: /dev +Disallow: /release + +# Disallow EOL versions +Disallow: /2/ +Disallow: /2.0/ +Disallow: /2.1/ +Disallow: /2.2/ +Disallow: /2.3/ +Disallow: /2.4/ +Disallow: /2.5/ +Disallow: /2.6/ +Disallow: /2.7/ +Disallow: /3.0/ +Disallow: /3.1/ +Disallow: /3.2/ +Disallow: /3.3/ +Disallow: /3.4/ diff --git a/templates/sitemap.xml b/templates/sitemap.xml new file mode 100644 index 0000000..6ee5308 --- /dev/null +++ b/templates/sitemap.xml @@ -0,0 +1,28 @@ + + + {% for version in versions %} + + https://docs.python.org/{{ version.name }}/ + {% for language in languages -%} + + {% endfor -%} + {{ version.changefreq }} + + {% endfor %} + + https://docs.python.org/3/ + {% for language in languages -%} + + {% endfor -%} + daily} + + + + https://docs.python.org/dev/ + {% for language in languages -%} + + {% endfor -%} + daily + + From d4b9a0d12e75d5e8dd8aa7e680d732372af05aec Mon Sep 17 00:00:00 2001 From: Julien Palard Date: Mon, 16 Nov 2020 23:01:28 +0100 Subject: [PATCH 2/2] Don't show EOL and /dev/ in sitemap. --- build_docs.py | 6 ++++-- templates/sitemap.xml | 10 ++-------- 2 files changed, 6 insertions(+), 10 deletions(-) diff --git a/build_docs.py b/build_docs.py index 982d69e..fb61a47 100755 --- a/build_docs.py +++ b/build_docs.py @@ -495,7 +495,7 @@ def build_robots_txt(www_root): with open(os.path.join(www_root, "robots.txt"), "w") as robots_txt_file: template = jinja2.Template(robots_txt_template_file.read()) robots_txt_file.write( - template.render(languages=LANGUAGES, versions=VERSIONS) + template.render(languages=LANGUAGES, versions=VERSIONS) + "\n" ) @@ -503,7 +503,9 @@ def build_sitemap(www_root): with open(HERE / "templates" / "sitemap.xml") as sitemap_template_file: with open(os.path.join(www_root, "sitemap.xml"), "w") as sitemap_file: template = jinja2.Template(sitemap_template_file.read()) - sitemap_file.write(template.render(languages=LANGUAGES, versions=VERSIONS)) + sitemap_file.write( + template.render(languages=LANGUAGES, versions=VERSIONS) + "\n" + ) def copy_build_to_webroot( diff --git a/templates/sitemap.xml b/templates/sitemap.xml index 6ee5308..84487aa 100644 --- a/templates/sitemap.xml +++ b/templates/sitemap.xml @@ -2,6 +2,7 @@ {% for version in versions %} + {%- if version.status != "EOL" %} https://docs.python.org/{{ version.name }}/ {% for language in languages -%} @@ -9,20 +10,13 @@ {% endfor -%} {{ version.changefreq }} + {% endif -%} {% endfor %} https://docs.python.org/3/ {% for language in languages -%} {% endfor -%} - daily} - - - - https://docs.python.org/dev/ - {% for language in languages -%} - - {% endfor -%} daily