From 41fac851fb62b4224e0400be46a3884708d185c7 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Tue, 12 Dec 2023 19:02:14 -0500 Subject: [PATCH 001/655] Avoid mktemp in tests, in straightforward cases The tempfile.mktemp function is deprecated, because of a race condition where the file may be concurrently created between when its name is generated and when it is opened. Other faciliies in the tempfile module overcome this by generating a name, attempting to create the file or directory in a way that guarantees failure if it already existed, and, in the occasional case that it did already exist, generating another name and trying again (stopping after a predefined limit). For further information on mktemp deprecation: - https://docs.python.org/3/library/tempfile.html#tempfile.mktemp - https://github.com/gitpython-developers/smmap/issues/41 The security risk of calls to mktemp in this project's test suite is low. However, it is still best to avoid using it, because it is deprecated, because it is (at least slightly) brittle, and because any use of mktemp looks like a potential security risk and thereby imposes a burden on working with the code (which could potentially be addressed with detailed comments analyzing why it is believed safe in particular cases, but this would typically be more verbose, and at least as challenging to add, as replacing mktemp with a better alternative). This commit replaces *some* uses of mktemp in the test suite: those where it is readily clear how to do so in a way that preserves the code's intent: - Where a name for a temporary directory is generated with mktemp and os.mkdir is called immediately, mkdtemp is now used. - Where a name for a temporary file that is not customized (such as with a prefix) is generated with mktemp, such that the code under test never uses the filename but only the already-open file-like object, TemporaryFile is now used. As the name isn't customized, the test code in these cases does not express an intent to allow the developer to inspect the file after a test failure, so even if the file wasn't guaranteed to be deleted with a finally block or context manager, it is fine to do so. TemporaryFile supports this use case well on all systems including Windows, and automatically deletes the file. - Where a name for a temporary file that *is* customized (such as with a prefix) to reflect the way the test uses it is generated with mktemp, and the test code does not attempt deterministic deletion of the file when an exception would make the test fail, NamedTemporaryFile with delete=False is now used. The original code to remove the file when the test succeeds is modified accordingly to do the same job, and also commented to explain that it is being handled this way to allow the file to be kept and examined when a test failure occurs. Other cases in the test suite should also be feasible to replace, but are left alone for now. Some of them are ambiguous in their intent, with respect to whether the file should be retained after a test failure. Others appear deliberately to avoid creating a file or directory where the code under test should do so, possibly to check that this is done properly. (One way to preserve that latter behavior, while avoiding the weakness of using mktemp and also avoiding inadverently reproducing that weakness by other means, could be to use a path in a temporary directory made for the test.) This commit also doesn't address the one use of mktemp in the code under test (i.e., outside the test suite, inside the git module). --- test/lib/helper.py | 3 +- test/performance/lib.py | 3 +- test/test_base.py | 5 ++-- test/test_reflog.py | 7 ++--- test/test_repo.py | 5 ++-- test/test_util.py | 64 ++++++++++++++++++++++------------------- 6 files changed, 42 insertions(+), 45 deletions(-) diff --git a/test/lib/helper.py b/test/lib/helper.py index 58d96534a..d662b632d 100644 --- a/test/lib/helper.py +++ b/test/lib/helper.py @@ -89,8 +89,7 @@ def with_rw_directory(func): @wraps(func) def wrapper(self): - path = tempfile.mktemp(prefix=func.__name__) - os.mkdir(path) + path = tempfile.mkdtemp(prefix=func.__name__) keep = False try: return func(self, path) diff --git a/test/performance/lib.py b/test/performance/lib.py index ceee6c2a1..d08e1027f 100644 --- a/test/performance/lib.py +++ b/test/performance/lib.py @@ -65,8 +65,7 @@ class TestBigRepoRW(TestBigRepoR): def setUp(self): self.gitrwrepo = None super().setUp() - dirname = tempfile.mktemp() - os.mkdir(dirname) + dirname = tempfile.mkdtemp() self.gitrwrepo = self.gitrorepo.clone(dirname, shared=True, bare=True, odbt=GitCmdObjectDB) self.puregitrwrepo = Repo(dirname, odbt=GitDB) diff --git a/test/test_base.py b/test/test_base.py index cdf82b74d..74f342071 100644 --- a/test/test_base.py +++ b/test/test_base.py @@ -68,12 +68,11 @@ def test_base_object(self): data = data_stream.read() assert data - tmpfilename = tempfile.mktemp(suffix="test-stream") - with open(tmpfilename, "wb+") as tmpfile: + with tempfile.NamedTemporaryFile(suffix="test-stream", delete=False) as tmpfile: self.assertEqual(item, item.stream_data(tmpfile)) tmpfile.seek(0) self.assertEqual(tmpfile.read(), data) - os.remove(tmpfilename) + os.remove(tmpfile.name) # Do it this way so we can inspect the file on failure. # END for each object type to create # Each has a unique sha. diff --git a/test/test_reflog.py b/test/test_reflog.py index 625466d40..1bd2e5dab 100644 --- a/test/test_reflog.py +++ b/test/test_reflog.py @@ -1,7 +1,7 @@ # This module is part of GitPython and is released under the # 3-Clause BSD License: https://opensource.org/license/bsd-3-clause/ -import os +import os.path as osp import tempfile from git.objects import IndexObject @@ -9,8 +9,6 @@ from test.lib import TestBase, fixture_path from git.util import Actor, rmtree, hex_to_bin -import os.path as osp - class TestRefLog(TestBase): def test_reflogentry(self): @@ -35,8 +33,7 @@ def test_reflogentry(self): def test_base(self): rlp_head = fixture_path("reflog_HEAD") rlp_master = fixture_path("reflog_master") - tdir = tempfile.mktemp(suffix="test_reflogs") - os.mkdir(tdir) + tdir = tempfile.mkdtemp(suffix="test_reflogs") rlp_master_ro = RefLog.path(self.rorepo.head) assert osp.isfile(rlp_master_ro) diff --git a/test/test_repo.py b/test/test_repo.py index 007b8ecb0..465bb2574 100644 --- a/test/test_repo.py +++ b/test/test_repo.py @@ -667,11 +667,10 @@ def test_tag_to_full_tag_path(self): self.assertEqual(value_errors, []) def test_archive(self): - tmpfile = tempfile.mktemp(suffix="archive-test") - with open(tmpfile, "wb") as stream: + with tempfile.NamedTemporaryFile("wb", suffix="archive-test", delete=False) as stream: self.rorepo.archive(stream, "0.1.6", path="doc") assert stream.tell() - os.remove(tmpfile) + os.remove(stream.name) # Do it this way so we can inspect the file on failure. @mock.patch.object(Git, "_call_process") def test_should_display_blame_information(self, git): diff --git a/test/test_util.py b/test/test_util.py index 1cb255cfe..6616e1067 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -359,48 +359,52 @@ def test_it_should_dashify(self): self.assertEqual("foo", dashify("foo")) def test_lock_file(self): - my_file = tempfile.mktemp() - lock_file = LockFile(my_file) - assert not lock_file._has_lock() - # Release lock we don't have - fine. - lock_file._release_lock() + with tempfile.TemporaryDirectory() as tdir: + my_file = os.path.join(tdir, "my-lock-file") + lock_file = LockFile(my_file) + assert not lock_file._has_lock() + # Release lock we don't have - fine. + lock_file._release_lock() - # Get lock. - lock_file._obtain_lock_or_raise() - assert lock_file._has_lock() + # Get lock. + lock_file._obtain_lock_or_raise() + assert lock_file._has_lock() - # Concurrent access. - other_lock_file = LockFile(my_file) - assert not other_lock_file._has_lock() - self.assertRaises(IOError, other_lock_file._obtain_lock_or_raise) + # Concurrent access. + other_lock_file = LockFile(my_file) + assert not other_lock_file._has_lock() + self.assertRaises(IOError, other_lock_file._obtain_lock_or_raise) - lock_file._release_lock() - assert not lock_file._has_lock() + lock_file._release_lock() + assert not lock_file._has_lock() - other_lock_file._obtain_lock_or_raise() - self.assertRaises(IOError, lock_file._obtain_lock_or_raise) + other_lock_file._obtain_lock_or_raise() + self.assertRaises(IOError, lock_file._obtain_lock_or_raise) - # Auto-release on destruction. - del other_lock_file - lock_file._obtain_lock_or_raise() - lock_file._release_lock() + # Auto-release on destruction. + del other_lock_file + lock_file._obtain_lock_or_raise() + lock_file._release_lock() def test_blocking_lock_file(self): - my_file = tempfile.mktemp() - lock_file = BlockingLockFile(my_file) - lock_file._obtain_lock() - - # Next one waits for the lock. - start = time.time() - wait_time = 0.1 - wait_lock = BlockingLockFile(my_file, 0.05, wait_time) - self.assertRaises(IOError, wait_lock._obtain_lock) - elapsed = time.time() - start + with tempfile.TemporaryDirectory() as tdir: + my_file = os.path.join(tdir, "my-lock-file") + lock_file = BlockingLockFile(my_file) + lock_file._obtain_lock() + + # Next one waits for the lock. + start = time.time() + wait_time = 0.1 + wait_lock = BlockingLockFile(my_file, 0.05, wait_time) + self.assertRaises(IOError, wait_lock._obtain_lock) + elapsed = time.time() - start + extra_time = 0.02 if os.name == "nt" or sys.platform == "cygwin": extra_time *= 6 # Without this, we get indeterministic failures on Windows. elif sys.platform == "darwin": extra_time *= 18 # The situation on macOS is similar, but with more delay. + self.assertLess(elapsed, wait_time + extra_time) def test_user_id(self): From 9e86053c885a27360d9d4699fafaef5e995d6ec5 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Tue, 12 Dec 2023 22:30:51 -0500 Subject: [PATCH 002/655] Replace the one use of mktemp in the git module This makes two related changes to git.index.util.TemporaryFileSwap: - Replace mktemp with mkstemp and then immediately closing the file. This avoids two possible name clashes: the usual name clash where the file may be created by another process between when mktemp generates the name and when the file is created, and the problem that mktemp does not check for files that contain the generated name as a part. The latter is specific to the use here, where a string generated by mktemp was manually concatenated to the base filename. This addresses that by passing the filename as the prefix for mkstemp to use. - Replace os.remove with os.replace and simplify. This is made necessary on Windows by the file already existing, due to mkstemp creating it. Deleting the file and allowing it to be recreated would reproduce the mktemp race condition in full (obscured and with extra steps). But os.replace supports an existing target file on all platforms. It is now also used in __exit__, where it allows the Windows check for the file to be removed, and (in any OS) better expresses the intent of the code to human readers. Furthermore, because one of the "look before you leap" checks in __exit__ is removed, the minor race condition in cleaning up the file is somewhat decreased. A small amount of related refactoring is included. The interface is not changed, even where it could be simplified (by letting __exit__ return None), and resource acquisition remains done on construction rather than in __enter__, because changing those aspects of the design could break some existing uses. Although the use of mktemp replaced here was in the git module and not the test suite, its use was to generate filenames for use in a directory that would ordinarily be under the user's control, such that the ability to carry out typical mktemp-related attacks would already require the ability to achieve the same goals more easily and reliably. Although TemporaryFileSwap is a public class that may be used directly by applications, it is only useful for making a temporary file in the same directory as an existing file. Thus the intended benefits of this change are mainly to code quality and a slight improvement in robustness. --- git/index/util.py | 16 ++++++---------- 1 file changed, 6 insertions(+), 10 deletions(-) diff --git a/git/index/util.py b/git/index/util.py index b1aaa58fd..1c3b1c4ad 100644 --- a/git/index/util.py +++ b/git/index/util.py @@ -3,6 +3,7 @@ """Index utilities.""" +import contextlib from functools import wraps import os import os.path as osp @@ -40,12 +41,10 @@ class TemporaryFileSwap: def __init__(self, file_path: PathLike) -> None: self.file_path = file_path - self.tmp_file_path = str(self.file_path) + tempfile.mktemp("", "", "") - # It may be that the source does not exist. - try: - os.rename(self.file_path, self.tmp_file_path) - except OSError: - pass + fd, self.tmp_file_path = tempfile.mkstemp(prefix=self.file_path, dir="") + os.close(fd) + with contextlib.suppress(OSError): # It may be that the source does not exist. + os.replace(self.file_path, self.tmp_file_path) def __enter__(self) -> "TemporaryFileSwap": return self @@ -57,10 +56,7 @@ def __exit__( exc_tb: Optional[TracebackType], ) -> bool: if osp.isfile(self.tmp_file_path): - if os.name == "nt" and osp.exists(self.file_path): - os.remove(self.file_path) - os.rename(self.tmp_file_path, self.file_path) - + os.replace(self.tmp_file_path, self.file_path) return False From 83b7ec61a1596971d4834e3c93a51b6efdf9e766 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Sun, 10 Dec 2023 03:40:28 -0500 Subject: [PATCH 003/655] Set up CodeQL This is an automatically generated CodeQL workflow for the project. It is not yet customized. --- .github/workflows/codeql.yml | 81 ++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 .github/workflows/codeql.yml diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml new file mode 100644 index 000000000..7f64e35cb --- /dev/null +++ b/.github/workflows/codeql.yml @@ -0,0 +1,81 @@ +# For most projects, this workflow file will not need changing; you simply need +# to commit it to your repository. +# +# You may wish to alter this file to override the set of languages analyzed, +# or to provide custom queries or build logic. +# +# ******** NOTE ******** +# We have attempted to detect the languages in your repository. Please check +# the `language` matrix defined below to confirm you have the correct set of +# supported CodeQL languages. +# +name: "CodeQL" + +on: + push: + branches: [ "main" ] + pull_request: + branches: [ "main" ] + schedule: + - cron: '27 10 * * 3' + +jobs: + analyze: + name: Analyze + # Runner size impacts CodeQL analysis time. To learn more, please see: + # - https://gh.io/recommended-hardware-resources-for-running-codeql + # - https://gh.io/supported-runners-and-hardware-resources + # - https://gh.io/using-larger-runners + # Consider using larger runners for possible analysis time improvements. + runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} + timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} + permissions: + actions: read + contents: read + security-events: write + + strategy: + fail-fast: false + matrix: + language: [ 'python' ] + # CodeQL supports [ 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' ] + # Use only 'java-kotlin' to analyze code written in Java, Kotlin or both + # Use only 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both + # Learn more about CodeQL language support at https://aka.ms/codeql-docs/language-support + + steps: + - name: Checkout repository + uses: actions/checkout@v3 + + # Initializes the CodeQL tools for scanning. + - name: Initialize CodeQL + uses: github/codeql-action/init@v2 + with: + languages: ${{ matrix.language }} + # If you wish to specify custom queries, you can do so here or in a config file. + # By default, queries listed here will override any specified in a config file. + # Prefix the list here with "+" to use these queries and those in the config file. + + # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs + # queries: security-extended,security-and-quality + + + # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). + # If this step fails, then you should remove it and run the build manually (see below) + - name: Autobuild + uses: github/codeql-action/autobuild@v2 + + # â„šī¸ Command-line programs to run using the OS shell. + # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun + + # If the Autobuild fails above, remove it and uncomment the following three lines. + # modify them (or add more) to build your code if your project, please refer to the EXAMPLE below for guidance. + + # - run: | + # echo "Run, Build Application using script" + # ./location_of_script_within_repo/buildscript.sh + + - name: Perform CodeQL Analysis + uses: github/codeql-action/analyze@v2 + with: + category: "/language:${{matrix.language}}" From 58547d82fa36165f611563c6bc449fde54dd3b2e Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Sun, 10 Dec 2023 03:41:16 -0500 Subject: [PATCH 004/655] Customize CodeQL - Run CodeQL on all branches. - Don't install Python dependencies. - Use v4 of actions/checkout. --- .github/workflows/codeql.yml | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 7f64e35cb..0d4d81efd 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -13,9 +13,7 @@ name: "CodeQL" on: push: - branches: [ "main" ] pull_request: - branches: [ "main" ] schedule: - cron: '27 10 * * 3' @@ -45,13 +43,14 @@ jobs: steps: - name: Checkout repository - uses: actions/checkout@v3 + uses: actions/checkout@v4 # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL uses: github/codeql-action/init@v2 with: languages: ${{ matrix.language }} + setup-python-dependencies: false # If you wish to specify custom queries, you can do so here or in a config file. # By default, queries listed here will override any specified in a config file. # Prefix the list here with "+" to use these queries and those in the config file. From d46c70d8059f20cab655720ec5792d18b71daad0 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 18 Dec 2023 13:11:25 +0000 Subject: [PATCH 005/655] Bump github/codeql-action from 2 to 3 Bumps [github/codeql-action](https://github.com/github/codeql-action) from 2 to 3. - [Release notes](https://github.com/github/codeql-action/releases) - [Changelog](https://github.com/github/codeql-action/blob/main/CHANGELOG.md) - [Commits](https://github.com/github/codeql-action/compare/v2...v3) --- updated-dependencies: - dependency-name: github/codeql-action dependency-type: direct:production update-type: version-update:semver-major ... Signed-off-by: dependabot[bot] --- .github/workflows/codeql.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/codeql.yml b/.github/workflows/codeql.yml index 0d4d81efd..ae5241898 100644 --- a/.github/workflows/codeql.yml +++ b/.github/workflows/codeql.yml @@ -47,7 +47,7 @@ jobs: # Initializes the CodeQL tools for scanning. - name: Initialize CodeQL - uses: github/codeql-action/init@v2 + uses: github/codeql-action/init@v3 with: languages: ${{ matrix.language }} setup-python-dependencies: false @@ -62,7 +62,7 @@ jobs: # Autobuild attempts to build any compiled languages (C/C++, C#, Go, Java, or Swift). # If this step fails, then you should remove it and run the build manually (see below) - name: Autobuild - uses: github/codeql-action/autobuild@v2 + uses: github/codeql-action/autobuild@v3 # â„šī¸ Command-line programs to run using the OS shell. # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun @@ -75,6 +75,6 @@ jobs: # ./location_of_script_within_repo/buildscript.sh - name: Perform CodeQL Analysis - uses: github/codeql-action/analyze@v2 + uses: github/codeql-action/analyze@v3 with: category: "/language:${{matrix.language}}" From b12a54a2fc7e988667f95145833a0dc8402b084d Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 18 Dec 2023 19:51:34 -0500 Subject: [PATCH 006/655] Use Path.touch to create files for rmtree tests It's not necessary to use `write_bytes(b"")`, because pathlib.Path has `touch()`. --- test/test_util.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 6616e1067..9408a91a0 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -46,7 +46,7 @@ def permission_error_tmpdir(tmp_path): """Fixture to test permissions errors in situations where they are not overcome.""" td = tmp_path / "testdir" td.mkdir() - (td / "x").write_bytes(b"") + (td / "x").touch() # Set up PermissionError on Windows, where we can't delete read-only files. (td / "x").chmod(stat.S_IRUSR) @@ -73,7 +73,7 @@ def test_deletes_nested_dir_with_files(self, tmp_path): td / "s" / "y", td / "s" / "z", ): - f.write_bytes(b"") + f.touch() try: rmtree(td) @@ -95,7 +95,7 @@ def test_deletes_dir_with_readonly_files(self, tmp_path): for d in td, td / "sub": d.mkdir() for f in td / "x", td / "sub" / "y": - f.write_bytes(b"") + f.touch() f.chmod(0) try: @@ -115,7 +115,7 @@ def test_avoids_changing_permissions_outside_tree(self, tmp_path): dir1 = tmp_path / "dir1" dir1.mkdir() - (dir1 / "file").write_bytes(b"") + (dir1 / "file").touch() (dir1 / "file").chmod(stat.S_IRUSR) old_mode = (dir1 / "file").stat().st_mode From 6a8ed70a6003c13800d908ab055038eb61ba4ce9 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Mon, 18 Dec 2023 20:00:56 -0500 Subject: [PATCH 007/655] Run test_env_vars_for_windows_tests only on Windows This skips the tests of how the HIDE_WINDOWS_KNOWN_ERRORS and HIDE_WINDOWS_FREEZE_ERRORS environment variables affect the same-named attributes of git.util, except when testing on Windows. These are parsed only to ever set a True value on Windows, but checking that this is the case is less important ever since git.util.rmtree was changed to not check HIDE_WINDOWS_KNOWN_ERRORS on other systems (and this is covered in other tests). Setting the variables to True on non-Windows systems would still have a bad effect on the tests themselves, some of which use them as skip or xfail conditions separate from the skipping logic in git.util.rmtree. However, this is effectively using them as part of the test suite (which they were initially meant for and which they may eventually go back to being, for #790), where they would not ordinarily have tests. The benefit and motivation for running these tests only on Windows is that the tests can be simplified, so that their parameter sets are no longer confusing. That change is also made here. --- test/test_util.py | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) diff --git a/test/test_util.py b/test/test_util.py index 9408a91a0..f3769088c 100644 --- a/test/test_util.py +++ b/test/test_util.py @@ -207,24 +207,28 @@ def _run_parse(name, value): ) return ast.literal_eval(output) + @pytest.mark.skipif( + os.name != "nt", + reason="These environment variables are only used on Windows.", + ) @pytest.mark.parametrize( "env_var_value, expected_truth_value", [ - (None, os.name == "nt"), # True on Windows when the environment variable is unset. + (None, True), # When the environment variable is unset. ("", False), (" ", False), ("0", False), - ("1", os.name == "nt"), + ("1", True), ("false", False), - ("true", os.name == "nt"), + ("true", True), ("False", False), - ("True", os.name == "nt"), + ("True", True), ("no", False), - ("yes", os.name == "nt"), + ("yes", True), ("NO", False), - ("YES", os.name == "nt"), + ("YES", True), (" no ", False), - (" yes ", os.name == "nt"), + (" yes ", True), ], ) @pytest.mark.parametrize( From 487a4fdb38efc82d30111e49f941a25cba4aa7a7 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 20 Dec 2023 22:39:11 -0500 Subject: [PATCH 008/655] Add a test for git.index.util.TemporaryFileSwap This is a general test for TemporaryFileSwap, but by being parametrized by the type of file_path, it reveals a regression introduced in 9e86053 (#1770). TemporaryFileSwap still works when file_path is a string, but is now broken when it is a Path. That worked before, and the type annotations document that it should be able to work. This is at least a bug because TemporaryFileSwap is public. (I am unsure whether, in practice, GitPython itself uses it in a way that sometimes passes a Path object as file_path. But code that uses GitPython may call it directly and pass Path.) --- test/test_index.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/test/test_index.py b/test/test_index.py index 2f97f0af8..c3f3b4fae 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -34,10 +34,11 @@ ) from git.index.fun import hook_path from git.index.typ import BaseIndexEntry, IndexEntry +from git.index.util import TemporaryFileSwap from git.objects import Blob -from test.lib import TestBase, fixture, fixture_path, with_rw_directory, with_rw_repo from git.util import Actor, hex_to_bin, rmtree from gitdb.base import IStream +from test.lib import TestBase, fixture, fixture_path, with_rw_directory, with_rw_repo HOOKS_SHEBANG = "#!/usr/bin/env sh\n" @@ -1087,3 +1088,25 @@ def test_index_add_pathlike(self, rw_repo): file.touch() rw_repo.index.add(file) + + +class TestIndexUtils: + @pytest.mark.parametrize("file_path_type", [str, Path]) + def test_temporary_file_swap(self, tmp_path, file_path_type): + file_path = tmp_path / "foo" + file_path.write_bytes(b"some data") + + with TemporaryFileSwap(file_path_type(file_path)) as ctx: + assert Path(ctx.file_path) == file_path + assert not file_path.exists() + + # Recreate it with new data, so we can observe that they're really separate. + file_path.write_bytes(b"other data") + + temp_file_path = Path(ctx.tmp_file_path) + assert temp_file_path.parent == file_path.parent + assert temp_file_path.name.startswith(file_path.name) + assert temp_file_path.read_bytes() == b"some data" + + assert not temp_file_path.exists() + assert file_path.read_bytes() == b"some data" # Not b"other data". From 1ddf953ea0d7625485ed02c90b03aae7f939ec46 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 20 Dec 2023 22:58:17 -0500 Subject: [PATCH 009/655] Fix TemporaryFileSwap bug when file_path is a Path This fixes the regression introduced in 9e86053 (#1770) where the file_path argument to TemporaryFileSwap.__init__ could no longer be a Path object. The change also makes this truer to the code from before #1770, still without the race condition fixed there, in that str was called on file_path then as well. However, it is not clear that this is a good thing, because this is not an idiomatic use of mkstemp. The reason the `prefix` cannot be a Path is that it is expected to be a filename prefix, with leading directories given in the `dir` argument. --- git/index/util.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git/index/util.py b/git/index/util.py index 1c3b1c4ad..03de80f71 100644 --- a/git/index/util.py +++ b/git/index/util.py @@ -41,7 +41,7 @@ class TemporaryFileSwap: def __init__(self, file_path: PathLike) -> None: self.file_path = file_path - fd, self.tmp_file_path = tempfile.mkstemp(prefix=self.file_path, dir="") + fd, self.tmp_file_path = tempfile.mkstemp(prefix=str(self.file_path), dir="") os.close(fd) with contextlib.suppress(OSError): # It may be that the source does not exist. os.replace(self.file_path, self.tmp_file_path) From b438c459d91243b685f540ccb1c124260e9d28b9 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 20 Dec 2023 23:29:14 -0500 Subject: [PATCH 010/655] Refactor TemporaryFileSwap.__init__ for clarity This changes the mkstemp call TemporaryFileSwap uses to atomically create (and thus reserve) the temporary file, so that it passes only a filename (i.e., basename) prefix as `prefix`, and passes all other path components (i.e., the directory) as `dir`. (If the original path has no directory, then `dir` is "" as before.) This makes the mkstemp call *slightly* more idiomatic. This also makes it clearer, as it is no longer using `prefix` for something that feels like it should be possible to pass as a Path object. Although mkstemp does not accept a Path as `prefix`, it does (as expected) accept one as `dir`. However, to keep the code simple, I'm passing str for both. The os.path.split function accepts both str and Path (since Python 3.6), and returns str objects, which are now used for the `dir` and `prefix` arguments to mkstemp. For unusual cases, this may technically not be a refactoring. For example, a file_path of "a/b//c" will be split into "a/b" and "c". If the automatically generated temporary file suffix is "xyz", then that results in a tmp_file_path of "a/b/cxyz" where "a/b//cxyz" would have been used before. The tmp_file_path attribute of a TemporaryFileSwap object is public (and used in filesystem calls). However, no guarantee has ever been given that the temporary file path have the original path as an exact string prefix. I believe the slightly weaker relationship I expressed in the recently introduced test_temporary_file_swap -- another file in the same directory, named with the original filename with more characters, consisting of equivalent path components in the same order -- has always been the intended one. Note that this slight possible variation does not apply to the file_path attribute. That attribute is always kept exactly as it was, both in its type and its value, and it always used unmodified in calls that access the filesystem. --- git/index/util.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/git/index/util.py b/git/index/util.py index 03de80f71..61039fe7c 100644 --- a/git/index/util.py +++ b/git/index/util.py @@ -41,7 +41,8 @@ class TemporaryFileSwap: def __init__(self, file_path: PathLike) -> None: self.file_path = file_path - fd, self.tmp_file_path = tempfile.mkstemp(prefix=str(self.file_path), dir="") + dirname, basename = osp.split(file_path) + fd, self.tmp_file_path = tempfile.mkstemp(prefix=basename, dir=dirname) os.close(fd) with contextlib.suppress(OSError): # It may be that the source does not exist. os.replace(self.file_path, self.tmp_file_path) From 4e91a6c7b521dc7d0331dbb5455e9c424d26d655 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Wed, 20 Dec 2023 23:56:55 -0500 Subject: [PATCH 011/655] Tweak formatting for `@pytest.mark.parametrize` This removes the "fmt: off" / "fmt: on" directives around the `@pytest.mark.parametrize` decoration on test_blob_filter, and reformats it with black, for consistency with other such decorations. The style used there, *if* it could be specified as a rule and thus used without "fmt:" directives, may be nicer than how black formats multi-line mark decorations. However, since that decoration was written, there have been a number of other such decorations, which have been in black style. This also removes the only (or only remaining?) "fmt:" directive in the codebase. As such, it should possibly have been done in #1760. --- test/test_blob_filter.py | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) diff --git a/test/test_blob_filter.py b/test/test_blob_filter.py index a91f211bf..ddd83079a 100644 --- a/test/test_blob_filter.py +++ b/test/test_blob_filter.py @@ -14,14 +14,15 @@ from git.types import PathLike -# fmt: off -@pytest.mark.parametrize('paths, path, expected_result', [ - ((Path("foo"),), Path("foo"), True), - ((Path("foo"),), Path("foo/bar"), True), - ((Path("foo/bar"),), Path("foo"), False), - ((Path("foo"), Path("bar")), Path("foo"), True), -]) -# fmt: on +@pytest.mark.parametrize( + "paths, path, expected_result", + [ + ((Path("foo"),), Path("foo"), True), + ((Path("foo"),), Path("foo/bar"), True), + ((Path("foo/bar"),), Path("foo"), False), + ((Path("foo"), Path("bar")), Path("foo"), True), + ], +) def test_blob_filter(paths: Sequence[PathLike], path: PathLike, expected_result: bool) -> None: """Test the blob filter.""" blob_filter = BlobFilter(paths) From f664a0b85dd6be4bb8fd16d7283fd1be0f81c8c8 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Thu, 21 Dec 2023 00:58:33 -0500 Subject: [PATCH 012/655] Add xfail marks to hook tests for WinBashStatus.Absent Precise xfail marks were added to commit hook tests in #1745, but in a few tests I didn't get it right for WinBashStatus.Absent. That is, on a Windows system that has no bash.exe at all: - More of the tests are unable to pass than have xfail marks. - One of the tests, test_commit_msg_hook_success, which does have an xfail mark for that, wrongly combines it with the xfail mark for WinBashStatus.Wsl. That test is the only one where the WSL bash.exe, even when a working WSL distribution is installed, is unable to pass. But using a single mark there is wrong, in part because the "reason" is not correct for both, but even more so because the exceptions they raise are different: AssertionError is raised when the WSL bash.exe is used in that test, but when bash.exe is altogether absent, HookExecutionError is raised. This fixes that by adding xfail marks for WinBashStatus.Absent where missing, and splitting test_commit_msg_hook_success's xfail mark that unsuccessfully tried to cover two conditions into two separate marks, each of which gives a correct reason and exception. This commit also rewords the xfail reason given for WslNoDistro, which was somewhat unclear and potentially ambiguous, to make it clearer. --- test/test_index.py | 20 +++++++++++++++----- 1 file changed, 15 insertions(+), 5 deletions(-) diff --git a/test/test_index.py b/test/test_index.py index 2f97f0af8..3f0990ecc 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -991,9 +991,14 @@ class Mocked: rel = index._to_relative_path(path) self.assertEqual(rel, os.path.relpath(path, root)) + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.Absent, + reason="Can't run a hook on Windows without bash.exe.", + rasies=HookExecutionError, + ) @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.WslNoDistro, - reason="Currently uses the bash.exe for WSL even with no WSL distro installed", + reason="Currently uses the bash.exe of WSL, even with no WSL distro installed", raises=HookExecutionError, ) @with_rw_repo("HEAD", bare=True) @@ -1004,7 +1009,7 @@ def test_pre_commit_hook_success(self, rw_repo): @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.WslNoDistro, - reason="Currently uses the bash.exe for WSL even with no WSL distro installed", + reason="Currently uses the bash.exe of WSL, even with no WSL distro installed", raises=AssertionError, ) @with_rw_repo("HEAD", bare=True) @@ -1030,13 +1035,18 @@ def test_pre_commit_hook_fail(self, rw_repo): raise AssertionError("Should have caught a HookExecutionError") @pytest.mark.xfail( - type(_win_bash_status) in {WinBashStatus.Absent, WinBashStatus.Wsl}, + type(_win_bash_status) is WinBashStatus.Absent, + reason="Can't run a hook on Windows without bash.exe.", + rasies=HookExecutionError, + ) + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.Wsl, reason="Specifically seems to fail on WSL bash (in spite of #1399)", raises=AssertionError, ) @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.WslNoDistro, - reason="Currently uses the bash.exe for WSL even with no WSL distro installed", + reason="Currently uses the bash.exe of WSL, even with no WSL distro installed", raises=HookExecutionError, ) @with_rw_repo("HEAD", bare=True) @@ -1054,7 +1064,7 @@ def test_commit_msg_hook_success(self, rw_repo): @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.WslNoDistro, - reason="Currently uses the bash.exe for WSL even with no WSL distro installed", + reason="Currently uses the bash.exe of WSL, even with no WSL distro installed", raises=AssertionError, ) @with_rw_repo("HEAD", bare=True) From e1486474a2381763e11932b3bb15e08f6e9faf53 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Thu, 21 Dec 2023 01:31:54 -0500 Subject: [PATCH 013/655] Add a direct test of run_commit_hook This has three benefits: - run_commit_hook is public, being listed in git.index.fun.__all__, so it makes sense for it to have its own test. - When investigating (future, or current xfail-covered) failure of functions that use run_commit_hook, it will be useful to compare the results of other tests that already do exist to that of a direct test of run_commit_hook. - When changing which bash.exe run_commit_hook selects to use on Windows (including to ameliorate the limitation covered by the WinBashStatus.WslNoDistro xfail marks, or to attempt other possible changes suggested in #1745), or even just to investigate the possibility of doing so, it will make sense to add tests like this test but for more specific conditions or edge cases. Having this typical-case test to compare to should be helpful both for writing such tests and for efficiently verifying that the conditions they test are really the triggers for any failures. --- test/test_index.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/test/test_index.py b/test/test_index.py index 3f0990ecc..afceda131 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -32,7 +32,7 @@ InvalidGitRepositoryError, UnmergedEntriesError, ) -from git.index.fun import hook_path +from git.index.fun import hook_path, run_commit_hook from git.index.typ import BaseIndexEntry, IndexEntry from git.objects import Blob from test.lib import TestBase, fixture, fixture_path, with_rw_directory, with_rw_repo @@ -991,6 +991,24 @@ class Mocked: rel = index._to_relative_path(path) self.assertEqual(rel, os.path.relpath(path, root)) + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.Absent, + reason="Can't run a hook on Windows without bash.exe.", + rasies=HookExecutionError, + ) + @pytest.mark.xfail( + type(_win_bash_status) is WinBashStatus.WslNoDistro, + reason="Currently uses the bash.exe of WSL, even with no WSL distro installed", + raises=HookExecutionError, + ) + @with_rw_repo("HEAD", bare=True) + def test_run_commit_hook(self, rw_repo): + index = rw_repo.index + _make_hook(index.repo.git_dir, "fake-hook", "echo 'ran fake hook' >output.txt") + run_commit_hook("fake-hook", index) + output = Path(rw_repo.git_dir, "output.txt").read_text(encoding="utf-8") + self.assertEqual(output, "ran fake hook\n") + @pytest.mark.xfail( type(_win_bash_status) is WinBashStatus.Absent, reason="Can't run a hook on Windows without bash.exe.", From 6e4cee4fa708465cf714e36a9dc8b9b6b94acc0c Mon Sep 17 00:00:00 2001 From: Stefan Gmeiner Date: Thu, 21 Dec 2023 21:33:37 +0100 Subject: [PATCH 014/655] Fix Items of type PathLike --- git/index/base.py | 2 +- test/test_index.py | 12 +++++++----- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/git/index/base.py b/git/index/base.py index 112ad3feb..cffb213a9 100644 --- a/git/index/base.py +++ b/git/index/base.py @@ -940,7 +940,7 @@ def _items_to_rela_paths( for item in items: if isinstance(item, (BaseIndexEntry, (Blob, Submodule))): paths.append(self._to_relative_path(item.path)) - elif isinstance(item, str): + elif isinstance(item, (str, os.PathLike)): paths.append(self._to_relative_path(item)) else: raise TypeError("Invalid item type: %r" % item) diff --git a/test/test_index.py b/test/test_index.py index 6b746b8b4..50d941e83 100644 --- a/test/test_index.py +++ b/test/test_index.py @@ -558,14 +558,16 @@ def test_index_mutation(self, rw_repo): def mixed_iterator(): count = 0 for entry in index.entries.values(): - type_id = count % 4 - if type_id == 0: # path + type_id = count % 5 + if type_id == 0: # path (str) yield entry.path - elif type_id == 1: # blob + elif type_id == 1: # path (PathLike) + yield Path(entry.path) + elif type_id == 2: # blob yield Blob(rw_repo, entry.binsha, entry.mode, entry.path) - elif type_id == 2: # BaseIndexEntry + elif type_id == 3: # BaseIndexEntry yield BaseIndexEntry(entry[:4]) - elif type_id == 3: # IndexEntry + elif type_id == 4: # IndexEntry yield entry else: raise AssertionError("Invalid Type") From 53e73830f64e13a88b0e246fb825944a6fe2848e Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 22 Dec 2023 00:57:14 -0500 Subject: [PATCH 015/655] Remove explicit PushInfo/FetchInfo inheritance from object I had missed these in f78587f (#1725). --- git/remote.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/git/remote.py b/git/remote.py index 4055dba2e..98a421b3a 100644 --- a/git/remote.py +++ b/git/remote.py @@ -130,7 +130,7 @@ def to_progress_instance( return progress -class PushInfo(IterableObj, object): +class PushInfo(IterableObj): """ Carries information about the result of a push operation of a single head:: @@ -300,7 +300,7 @@ def raise_if_error(self) -> None: raise self.error -class FetchInfo(IterableObj, object): +class FetchInfo(IterableObj): """ Carries information about the results of a fetch operation of a single head:: From 96fc3547cf62c5ade1477c24566c8b34254a1507 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 22 Dec 2023 01:55:30 -0500 Subject: [PATCH 016/655] Add tests for current Submodule.iter_items behavior Where the behavior is intended. In the case of an invalid hash (or IOError, which in Python 2 was a subclass of OSError but now is just another name for it), the behavior of just yielding no items may be unintuitive, since on most other errors an exception is raised. However, examining the code reveals this behavior is clearly intentional. Furthrmore, it may be reasonable for applications to rely on it, and it may be convenient in some situations. For backward compatibility, it probably can't be changed significantly. This adds tests that show both an error that does raise an error-representing exception -- a well-formed hash not present in the repository raising ValueError with a suitable message -- and an error that silently causes the iterator to yield zero items. --- test/test_submodule.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/test/test_submodule.py b/test/test_submodule.py index 4dc89f98f..5226d7a6e 100644 --- a/test/test_submodule.py +++ b/test/test_submodule.py @@ -688,6 +688,17 @@ def test_root_module(self, rwrepo): # gitdb: has either 1 or 2 submodules depending on the version. assert len(nsm.children()) >= 1 and nsmc.module_exists() + def test_iter_items_from_nonexistent_hash(self): + it = Submodule.iter_items(self.rorepo, "b4ecbfaa90c8be6ed6d9fb4e57cc824663ae15b4") + with self.assertRaisesRegex(ValueError, r"\bcould not be resolved\b"): + next(it) + + def test_iter_items_from_invalid_hash(self): + """Check legacy behavaior on BadName (also applies to IOError, i.e. OSError).""" + it = Submodule.iter_items(self.rorepo, "xyz") + with self.assertRaises(StopIteration): + next(it) + @with_rw_repo(k_no_subm_tag, bare=False) def test_first_submodule(self, rwrepo): assert len(list(rwrepo.iter_submodules())) == 0 From f5dc1c4713dfb937d45d10a595ac879d6e76481c Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 22 Dec 2023 02:16:28 -0500 Subject: [PATCH 017/655] Expand "invalid hash" test to assert normal StopIteration Returning an explicit value from a generator function causes that value to be bound to the `value` attribute of the StopIteration exception. This is available as the result of "yield from" when it is used as an expression; or by explicitly catching StopIteration, binding the StopIteration exception to a variable, and accessing the attribute. This feature of generators is rarely used. The `return iter([])` statement in Submodule.iter_items uses this feature, causing the resulting StopIteration exception object to have a `value` attribute that refers to a separate second iterator that also yields no values (#1779). From context, this behavior is clearly not the goal; a bare return statement should be used here (which has the same effect except for the `value` attribute of the StopIteration exception). The code had used a bare return prior to 82b131c (#1282), when `return` was changed to `return iter([])`. That was part of a change that added numerous type annotations. It looks like it was either a mistake, or possibly an attempt to work around an old bug in a static type checker. This commit extends the test_iter_items_from_invalid_hash test to assert that the `value` attribute of the StopIteration is its usual default value of None. This commit only extends the test; it does not fix the bug. --- test/test_submodule.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/test/test_submodule.py b/test/test_submodule.py index 5226d7a6e..993f6b57e 100644 --- a/test/test_submodule.py +++ b/test/test_submodule.py @@ -696,8 +696,9 @@ def test_iter_items_from_nonexistent_hash(self): def test_iter_items_from_invalid_hash(self): """Check legacy behavaior on BadName (also applies to IOError, i.e. OSError).""" it = Submodule.iter_items(self.rorepo, "xyz") - with self.assertRaises(StopIteration): + with self.assertRaises(StopIteration) as ctx: next(it) + self.assertIsNone(ctx.exception.value) @with_rw_repo(k_no_subm_tag, bare=False) def test_first_submodule(self, rwrepo): From c3c008c4971f9d4189dc08e88334a207ce14298c Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 22 Dec 2023 02:44:56 -0500 Subject: [PATCH 018/655] In Submodule.iter_items, don't attach second empty iterator This fixes the minor bug where a separate empty iterator was bound to the StopIteration exception raised as a result of returning from the generator function (#1779). This change does not cause what exceptions are raised from GitPython code in any situations, nor how many items any iterators yield. --- git/objects/submodule/base.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/git/objects/submodule/base.py b/git/objects/submodule/base.py index 651d9535c..49dfedf9a 100644 --- a/git/objects/submodule/base.py +++ b/git/objects/submodule/base.py @@ -1401,7 +1401,7 @@ def iter_items( pc = repo.commit(parent_commit) # Parent commit instance parser = cls._config_parser(repo, pc, read_only=True) except (IOError, BadName): - return iter([]) + return # END handle empty iterator for sms in parser.sections(): From dfee31f2100d7f4a653c69c4a5a505607fe328e1 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 22 Dec 2023 03:48:57 -0500 Subject: [PATCH 019/655] Improve self-documentation of IterableObj and related classes - Fill in the missing part of the explanation of why to favor iter_items over list_items in IterableObj and Iterable (#1775). - Move the explanation of how subclasses must treat arguments from the list_items methods to the iter_items methods, because the iter_items methdos are the ones that are abstract and must be overridden by a well-behaved subclass, and also because, since the iter_items methods are preferred for use, they should be the place where less duplicated shared documentation resides. - Subtantially reword the existing documentation for clarity, especially regarding the signifance of extra args and kwargs. - Put the iter_items method before (i.e. above) the list_items method (in each of the IterableObj and Iterable classes), because that method is the one that should be used more often, and because it is also now the one with the more detailed docstring. - Remove and old comment on a return type that said exactly the exact same thing as the annotation. - In Iterable, note deprecation more consistently (and thus in more places). - Rewrite the IterableClassWatcher class docstring to explain exactly what that metaclass achieves. --- git/util.py | 84 ++++++++++++++++++++++++++++++++--------------------- 1 file changed, 51 insertions(+), 33 deletions(-) diff --git a/git/util.py b/git/util.py index 0a5da7d71..5acc001f7 100644 --- a/git/util.py +++ b/git/util.py @@ -1183,7 +1183,8 @@ def __delitem__(self, index: Union[SupportsIndex, int, slice, str]) -> None: class IterableClassWatcher(type): - """Metaclass that watches.""" + """Metaclass that issues :class:`DeprecationWarning` when :class:`git.util.Iterable` + is subclassed.""" def __init__(cls, name: str, bases: Tuple, clsdict: Dict) -> None: for base in bases: @@ -1199,23 +1200,42 @@ def __init__(cls, name: str, bases: Tuple, clsdict: Dict) -> None: class Iterable(metaclass=IterableClassWatcher): - """Defines an interface for iterable items, so there is a uniform way to retrieve - and iterate items within the git repository.""" + """Deprecated, use :class:`IterableObj` instead. + + Defines an interface for iterable items, so there is a uniform way to retrieve + and iterate items within the git repository. + """ __slots__ = () _id_attribute_ = "attribute that most suitably identifies your instance" @classmethod - def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: + def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: + # return typed to be compatible with subtypes e.g. Remote + """Deprecated, use :class:`IterableObj` instead. + + Find (all) items of this type. + + Subclasses can specify ``args`` and ``kwargs`` differently, and may use them for + filtering. However, when the method is called with no additional positional or + keyword arguments, subclasses are obliged to to yield all items. + + :return: Iterator yielding Items """ - Deprecated, use IterableObj instead. + raise NotImplementedError("To be implemented by Subclass") + + @classmethod + def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: + """Deprecated, use :class:`IterableObj` instead. + + Find (all) items of this type and collect them into a list. - Find all items of this type - subclasses can specify args and kwargs differently. - If no args are given, subclasses are obliged to return all items if no additional - arguments arg given. + For more information about the arguments, see :meth:`list_items`. - :note: Favor the iter_items method as it will + :note: Favor the :meth:`iter_items` method as it will avoid eagerly collecting + all items. When there are many items, that can slow performance and increase + memory usage. :return: list(Item,...) list of item instances """ @@ -1223,15 +1243,6 @@ def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: out_list.extend(cls.iter_items(repo, *args, **kwargs)) return out_list - @classmethod - def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Any: - # return typed to be compatible with subtypes e.g. Remote - """For more information about the arguments, see list_items. - - :return: Iterator yielding Items - """ - raise NotImplementedError("To be implemented by Subclass") - @runtime_checkable class IterableObj(Protocol): @@ -1246,13 +1257,30 @@ class IterableObj(Protocol): _id_attribute_: str @classmethod - def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> IterableList[T_IterableObj]: + @abstractmethod + def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Iterator[T_IterableObj]: + # Return-typed to be compatible with subtypes e.g. Remote. + """Find (all) items of this type. + + Subclasses can specify ``args`` and ``kwargs`` differently, and may use them for + filtering. However, when the method is called with no additional positional or + keyword arguments, subclasses are obliged to to yield all items. + + For more information about the arguments, see list_items. + + :return: Iterator yielding Items """ - Find all items of this type - subclasses can specify args and kwargs differently. - If no args are given, subclasses are obliged to return all items if no additional - arguments arg given. + raise NotImplementedError("To be implemented by Subclass") + + @classmethod + def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> IterableList[T_IterableObj]: + """Find (all) items of this type and collect them into a list. + + For more information about the arguments, see :meth:`list_items`. - :note: Favor the iter_items method as it will + :note: Favor the :meth:`iter_items` method as it will avoid eagerly collecting + all items. When there are many items, that can slow performance and increase + memory usage. :return: list(Item,...) list of item instances """ @@ -1260,16 +1288,6 @@ def list_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> IterableList[T_I out_list.extend(cls.iter_items(repo, *args, **kwargs)) return out_list - @classmethod - @abstractmethod - def iter_items(cls, repo: "Repo", *args: Any, **kwargs: Any) -> Iterator[T_IterableObj]: # Iterator[T_IterableObj]: - # Return-typed to be compatible with subtypes e.g. Remote. - """For more information about the arguments, see list_items. - - :return: Iterator yielding Items - """ - raise NotImplementedError("To be implemented by Subclass") - # } END classes From 94a85d1159e6374e901df4e3bd284cf55b623e66 Mon Sep 17 00:00:00 2001 From: Eliah Kagan Date: Fri, 22 Dec 2023 14:31:28 -0500 Subject: [PATCH 020/655] Convert constant and attribute comments to docstrings These are "non-reified docstrings" as described in #1734. They are not made part of the data model, but most editors will display them, including on symbols present in code of other projects that use the GitPython library. A number of these have already been added, but this adds what will probably be most of the remaining ones. For the most part, this doesn't create documentation where there wasn't any, but instead converts existing comments to "docstrings." In a handful of cases, they are expanded, reworded, or a docstring added. This also fixes some small style inconsistencies that were missed in #1725, and moves a comment that had become inadvertently displaced due to autoformatting to the item it was meant for. The major omission here is HIDE_WINDOWS_KNOWN_ERRORS and HIDE_WINDOWS_FREEZE_ERRORS. This doesn't convert the comments above them to "docstrings," for a few reasons. They are not specific to either of the symbols, they are oriented toward considerations that are not really relevant except when developing GitPython itself, and they are out of date. Also, because HIDE_WINDOWS_KNOWN_ERRORS is listed in __all__, increasing the level of documentation for it might be taken as a committment to preserve some aspect of its current behavior, which could interfere with progress on #790. So I've kept those comments as comments, and unchanged, for now. --- git/cmd.py | 25 ++++++++++++++++--------- git/config.py | 20 ++++++++++++-------- git/diff.py | 2 +- git/index/base.py | 6 ++++-- git/index/fun.py | 4 +++- git/objects/submodule/base.py | 11 ++++++----- git/repo/base.py | 31 ++++++++++++++++++++----------- git/util.py | 4 ++-- 8 files changed, 64 insertions(+), 39 deletions(-) diff --git a/git/cmd.py b/git/cmd.py index 27148d3d6..9518c2c8c 100644 --- a/git/cmd.py +++ b/git/cmd.py @@ -273,23 +273,30 @@ def __setstate__(self, d: Dict[str, Any]) -> None: # CONFIGURATION - git_exec_name = "git" # Default that should work on Linux and Windows. + git_exec_name = "git" + """Default git command that should work on Linux, Windows, and other systems.""" - # Enables debugging of GitPython's git commands. GIT_PYTHON_TRACE = os.environ.get("GIT_PYTHON_TRACE", False) + """Enables debugging of GitPython's git commands.""" - # If True, a shell will be used when executing git commands. - # This should only be desirable on Windows, see https://github.com/gitpython-developers/GitPython/pull/126 - # and check `git/test_repo.py:TestRepo.test_untracked_files()` TC for an example where it is required. - # Override this value using `Git.USE_SHELL = True`. USE_SHELL = False + """If True, a shell will be used when executing git commands. + + This should only be desirable on Windows, see https://github.com/gitpython-developers/GitPython/pull/126 + and check `git/test_repo.py:TestRepo.test_untracked_files()` TC for an example where it is required. + + Override this value using ``Git.USE_SHELL = True``. + """ - # Provide the full path to the git executable. Otherwise it assumes git is in the path. _git_exec_env_var = "GIT_PYTHON_GIT_EXECUTABLE" _refresh_env_var = "GIT_PYTHON_REFRESH" + GIT_PYTHON_GIT_EXECUTABLE = None - # Note that the git executable is actually found during the refresh step in - # the top level __init__. + """Provide the full path to the git executable. Otherwise it assumes git is in the path. + + Note that the git executable is actually found during the refresh step in + the top level ``__init__``. + """ @classmethod def refresh(cls, path: Union[None, PathLike] = None) -> bool: diff --git a/git/config.py b/git/config.py index 5708a7385..2730ddaf3 100644 --- a/git/config.py +++ b/git/config.py @@ -65,12 +65,14 @@ log.addHandler(logging.NullHandler()) -# The configuration level of a configuration file. CONFIG_LEVELS: ConfigLevels_Tup = ("system", "user", "global", "repository") +"""The configuration level of a configuration file.""" -# Section pattern to detect conditional includes. -# https://git-scm.com/docs/git-config#_conditional_includes CONDITIONAL_INCLUDE_REGEXP = re.compile(r"(?<=includeIf )\"(gitdir|gitdir/i|onbranch):(.+)\"") +"""Section pattern to detect conditional includes. + +See: https://git-scm.com/docs/git-config#_conditional_includes +""" class MetaParserBuilder(abc.ABCMeta): # noqa: B024 @@ -283,12 +285,14 @@ class GitConfigParser(cp.RawConfigParser, metaclass=MetaParserBuilder): """ # { Configuration - # The lock type determines the type of lock to use in new configuration readers. - # They must be compatible to the LockFile interface. - # A suitable alternative would be the BlockingLockFile t_lock = LockFile - re_comment = re.compile(r"^\s*[#;]") + """The lock type determines the type of lock to use in new configuration readers. + They must be compatible to the LockFile interface. + A suitable alternative would be the :class:`~git.util.BlockingLockFile`. + """ + + re_comment = re.compile(r"^\s*[#;]") # } END configuration optvalueonly_source = r"\s*(?P