diff --git a/.copier-answers.yml b/.copier-answers.yml index c198014..57e594b 100644 --- a/.copier-answers.yml +++ b/.copier-answers.yml @@ -1,5 +1,5 @@ # Changes here will be overwritten by Copier. -_commit: 1.8.2 +_commit: 1.8.4 _src_path: gh:pawamoy/copier-uv author_email: dev@pawamoy.fr author_fullname: Timothée Mazzucotelli diff --git a/CHANGELOG.md b/CHANGELOG.md index e35332b..27f1fe8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -6,6 +6,18 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). +## [0.3.0](https://github.com/pawamoy/mkdocs-llmstxt/releases/tag/0.3.0) - 2025-07-14 + +[Compare with 0.2.0](https://github.com/pawamoy/mkdocs-llmstxt/compare/0.2.0...0.3.0) + +### Features + +- Support file descriptions ([33f64b3](https://github.com/pawamoy/mkdocs-llmstxt/commit/33f64b306199218dbb34cd796e59113388a6c26c) by Logan). [Issue-6](https://github.com/pawamoy/mkdocs-llmstxt/issues/6), [PR-8](https://github.com/pawamoy/mkdocs-llmstxt/pull/8), Co-authored-by: Timothée Mazzucotelli + +### Bug Fixes + +- Support formatting Markdown tables ([f1fc875](https://github.com/pawamoy/mkdocs-llmstxt/commit/f1fc8757dcab95af7b645331ddc5f1f01888bc88) by Timothée Mazzucotelli). [Issue-13](https://github.com/pawamoy/mkdocs-llmstxt/issues/13) + ## [0.2.0](https://github.com/pawamoy/mkdocs-llmstxt/releases/tag/0.2.0) - 2025-04-08 [Compare with 0.1.0](https://github.com/pawamoy/mkdocs-llmstxt/compare/0.1.0...0.2.0) diff --git a/README.md b/README.md index b080569..8a80f73 100644 --- a/README.md +++ b/README.md @@ -31,8 +31,8 @@ plugins: markdown_description: Long description of my project. sections: Usage documentation: - - file1.md - - file2.md + - file1.md: Description of file1 + - file2.md # Descriptions are optional. ``` The resulting `/llms.txt` file will be available at the root of your documentation. With the previous example, it will be accessible at https://myproject.com/llms.txt and will contain the following: @@ -46,7 +46,7 @@ Long description of my project. ## Usage documentation -- [File1 title](https://myproject.com/file1.md) +- [File1 title](https://myproject.com/file1.md): Description of file1 - [File2 title](https://myproject.com/file2.md) ``` @@ -59,7 +59,7 @@ plugins: - llmstxt: sections: Usage documentation: - - index.md + - index.md: Main documentation page - usage/*.md ``` @@ -67,7 +67,7 @@ plugins: Although not explicitly written out in the https://llmstxt.org/ guidelines, it is common to output a `llms-full.txt` file with every page content expanded. This file can be generated by setting the `full_output` configuration value: -```markdown +```yaml title="mkdocs.yml" plugins: - llmstxt: full_output: llms-full.txt @@ -118,4 +118,4 @@ def preprocess(soup: BeautifulSoup, output: str) -> None: The `output` argument lets you modify the soup *depending on which file is being generated*. -Have a look at [our own cleaning function](https://pawamoy.github.io/mkdocs-llmstxt/reference/mkdocs_llmstxt/#mkdocs_llmstxt.autoclean) to get inspiration. +Have a look at [our own cleaning function](https://pawamoy.github.io/mkdocs-llmstxt/reference/api/#mkdocs_llmstxt.autoclean) to get inspiration. diff --git a/duties.py b/duties.py index 5a34476..bb56eb5 100644 --- a/duties.py +++ b/duties.py @@ -193,7 +193,7 @@ def coverage(ctx: Context) -> None: @duty -def test(ctx: Context, *cli_args: str, match: str = "") -> None: +def test(ctx: Context, *cli_args: str, match: str = "") -> None: # noqa: PT028 """Run the test suite. Parameters: diff --git a/mkdocs.yml b/mkdocs.yml index e76bff1..a0ba88c 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -139,7 +139,7 @@ plugins: Usage documentation: - index.md API reference: - - reference/*.md + - reference/api.md - git-revision-date-localized: enabled: !ENV [DEPLOY, false] enable_creation_date: true diff --git a/pyproject.toml b/pyproject.toml index febbe19..28b3e6d 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -33,6 +33,7 @@ dependencies = [ "beautifulsoup4>=4.12", "markdownify>=0.14", "mdformat>=0.7.21", + "mdformat-tables>=1.0", ] [project.urls] @@ -99,7 +100,7 @@ ci = [ "mkdocs>=1.6", "mkdocs-coverage>=1.0", "mkdocs-git-revision-date-localized-plugin>=1.2", - "mkdocs-llmstxt>=0.1", + "mkdocs-llmstxt>=0.2", "mkdocs-material>=9.5", "mkdocs-minify-plugin>=0.8", "mkdocs-section-index>=0.3", diff --git a/src/mkdocs_llmstxt/_internal/config.py b/src/mkdocs_llmstxt/_internal/config.py index a9f5877..c5fef5d 100644 --- a/src/mkdocs_llmstxt/_internal/config.py +++ b/src/mkdocs_llmstxt/_internal/config.py @@ -13,4 +13,12 @@ class _PluginConfig(BaseConfig): preprocess = mkconf.Optional(mkconf.File(exists=True)) markdown_description = mkconf.Optional(mkconf.Type(str)) full_output = mkconf.Optional(mkconf.Type(str)) - sections = mkconf.DictOfItems(mkconf.ListOfItems(mkconf.Type(str))) + sections = mkconf.DictOfItems( + # Each list item can either be: + # + # - a string representing the source file path (possibly with glob patterns) + # - a mapping where the single key is the file path and the value is its description. + # + # We therefore accept both `str` and `dict` values. + mkconf.ListOfItems(mkconf.Type((str, dict))), + ) diff --git a/src/mkdocs_llmstxt/_internal/plugin.py b/src/mkdocs_llmstxt/_internal/plugin.py index e89f2bb..570611a 100644 --- a/src/mkdocs_llmstxt/_internal/plugin.py +++ b/src/mkdocs_llmstxt/_internal/plugin.py @@ -36,6 +36,7 @@ class _MDPageInfo(NamedTuple): path_md: Path md_url: str content: str + description: str class MkdocsLLMsTxtPlugin(BasePlugin[_PluginConfig]): @@ -56,13 +57,21 @@ class MkdocsLLMsTxtPlugin(BasePlugin[_PluginConfig]): md_pages: dict[str, list[_MDPageInfo]] """Dictionary mapping section names to a list of page infos.""" - def _expand_inputs(self, inputs: list[str], page_uris: list[str]) -> list[str]: - expanded: list[str] = [] - for input_file in inputs: + _sections: dict[str, dict[str, str]] + + def _expand_inputs(self, inputs: list[str | dict[str, str]], page_uris: list[str]) -> dict[str, str]: + expanded: dict[str, str] = {} + for input_item in inputs: + if isinstance(input_item, dict): + input_file, description = next(iter(input_item.items())) + else: + input_file = input_item + description = "" if "*" in input_file: - expanded.extend(fnmatch.filter(page_uris, input_file)) + for match in fnmatch.filter(page_uris, input_file): + expanded[match] = description else: - expanded.append(input_file) + expanded[input_file] = description return expanded def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: @@ -81,6 +90,7 @@ def on_config(self, config: MkDocsConfig) -> MkDocsConfig | None: if config.site_url is None: raise ValueError("'site_url' must be set in the MkDocs configuration to be used with the 'llmstxt' plugin") self.mkdocs_config = config + # A `defaultdict` could be used, but we need to retain the same order between `config.sections` and `md_pages` # (which wouldn't be guaranteed when filling `md_pages` in `on_page_content()`). self.md_pages = {section: [] for section in self.config.sections} @@ -100,10 +110,10 @@ def on_files(self, files: Files, *, config: MkDocsConfig) -> Files | None: # no Modified collection or none. """ page_uris = list(files.src_uris) - - for section_name, file_list in list(self.config.sections.items()): - self.config.sections[section_name] = self._expand_inputs(file_list, page_uris=page_uris) - + self._sections = { + section_name: self._expand_inputs(file_list, page_uris=page_uris) # type: ignore[arg-type] + for section_name, file_list in self.config.sections.items() + } return files def on_page_content(self, html: str, *, page: Page, **kwargs: Any) -> str | None: # noqa: ARG002 @@ -115,8 +125,9 @@ def on_page_content(self, html: str, *, page: Page, **kwargs: Any) -> str | None html: The rendered HTML. page: The page object. """ - for section_name, file_list in self.config.sections.items(): - if page.file.src_uri in file_list: + src_uri = page.file.src_uri + for section_name, files in self._sections.items(): + if src_uri in files: path_md = Path(page.file.abs_dest_path).with_suffix(".md") page_md = _generate_page_markdown( html, @@ -138,10 +149,11 @@ def on_page_content(self, html: str, *, page: Page, **kwargs: Any) -> str | None self.md_pages[section_name].append( _MDPageInfo( - title=page.title if page.title is not None else page.file.src_uri, + title=page.title if page.title is not None else src_uri, path_md=path_md, md_url=md_url, content=page_md, + description=files[src_uri], ), ) @@ -169,10 +181,10 @@ def on_post_build(self, *, config: MkDocsConfig, **kwargs: Any) -> None: # noqa for section_name, file_list in self.md_pages.items(): markdown += f"## {section_name}\n\n" - for page_title, path_md, md_url, content in file_list: + for page_title, path_md, md_url, content, desc in file_list: path_md.write_text(content, encoding="utf8") _logger.debug(f"Generated MD file to {path_md}") - markdown += f"- [{page_title}]({md_url})\n" + markdown += f"- [{page_title}]({md_url}){(': ' + desc) if desc else ''}\n" markdown += "\n" output_file.write_text(markdown, encoding="utf8") @@ -225,4 +237,8 @@ def _generate_page_markdown( autoclean(soup) if preprocess: _preprocess(soup, preprocess, path) - return mdformat.text(_converter.convert_soup(soup), options={"wrap": "no"}) + return mdformat.text( + _converter.convert_soup(soup), + options={"wrap": "no"}, + extensions=("tables",), + ) diff --git a/tests/conftest.py b/tests/conftest.py index 3be27ba..517fe7f 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -1 +1,48 @@ """Configuration for the pytest test suite.""" + +from __future__ import annotations + +from pathlib import Path +from typing import TYPE_CHECKING + +import pytest +from mkdocs.config.defaults import MkDocsConfig + +if TYPE_CHECKING: + from mkdocs_llmstxt._internal.plugin import MkdocsLLMsTxtPlugin + + +@pytest.fixture(name="mkdocs_conf") +def fixture_mkdocs_conf(request: pytest.FixtureRequest, tmp_path: Path) -> MkDocsConfig: + """Yield a MkDocs configuration object.""" + while hasattr(request, "_parent_request") and hasattr(request._parent_request, "_parent_request"): + request = request._parent_request + params = getattr(request, "param", {}) + config = params.get("config", {}) + pages = params.get("pages", {}) + conf = MkDocsConfig() + conf.load_dict( + { + "site_name": "Test Project", + "site_url": "https://example.org/", + "site_dir": str(tmp_path / "site"), + "docs_dir": str(tmp_path / "docs"), + **config, + }, + ) + Path(conf.docs_dir).mkdir(exist_ok=True) + for page, content in pages.items(): + page_file = Path(conf.docs_dir, page) + page_file.parent.mkdir(exist_ok=True) + page_file.write_text(content) + assert conf.validate() == ([], []) + if "toc" not in conf.markdown_extensions: + # Guaranteed to be added by MkDocs. + conf.markdown_extensions.insert(0, "toc") + return conf + + +@pytest.fixture(name="plugin") +def fixture_plugin(mkdocs_conf: MkDocsConfig) -> MkdocsLLMsTxtPlugin: + """Return a plugin instance.""" + return mkdocs_conf.plugins["llmstxt"] # type: ignore[return-value] diff --git a/tests/test_plugin.py b/tests/test_plugin.py index b892496..ad85dab 100644 --- a/tests/test_plugin.py +++ b/tests/test_plugin.py @@ -1,11 +1,57 @@ """Tests for the plugin.""" +from pathlib import Path + import pytest -from duty.tools import mkdocs +from mkdocs.commands.build import build +from mkdocs.config.defaults import MkDocsConfig + + +@pytest.mark.parametrize( + "mkdocs_conf", + [ + { + "config": { + "plugins": [ + { + "llmstxt": { + "full_output": "llms-full.txt", + "sections": { + "Index": ["index.md"], + "Usage": [{"page1.md": "Some usage docs."}], + }, + }, + }, + ], + }, + "pages": { + "index.md": "# Hello world", + "page1.md": "# Usage\n\nSome paragraph.", + }, + }, + ], + indirect=["mkdocs_conf"], +) +def test_plugin(mkdocs_conf: MkDocsConfig) -> None: + """Test that page descriptions are correctly handled and included in output.""" + build(config=mkdocs_conf) + + llmstxt = Path(mkdocs_conf.site_dir, "llms.txt") + assert llmstxt.exists() + llmstxt_content = llmstxt.read_text() + assert "Some usage docs." in llmstxt_content + assert "Some paragraph." not in llmstxt_content + + llmsfulltxt = Path(mkdocs_conf.site_dir, "llms-full.txt") + assert llmsfulltxt.exists() + llmsfulltxt_content = llmsfulltxt.read_text() + assert "Some usage docs." not in llmsfulltxt_content + assert "Some paragraph." in llmsfulltxt_content + indexmd = Path(mkdocs_conf.site_dir, "index.md") + assert indexmd.exists() + assert "Hello world" in indexmd.read_text() -def test_plugin() -> None: - """Run the plugin.""" - with pytest.raises(expected_exception=SystemExit) as exc: - mkdocs.build()() - assert exc.value.code == 0 + page1md = Path(mkdocs_conf.site_dir, "page1/index.md") + assert page1md.exists() + assert "Some paragraph." in page1md.read_text()