diff --git a/Lib/fnmatch.py b/Lib/fnmatch.py index 865baea23467ea..5d38891c7da632 100644 --- a/Lib/fnmatch.py +++ b/Lib/fnmatch.py @@ -87,6 +87,8 @@ def _translate(pat, star, question_mark): res = [] add = res.append star_indices = [] + inside_range = False + question_mark_char = re.sub(r'\[|\]|\^', '', question_mark) i, n = 0, len(pat) while i < n: @@ -135,6 +137,9 @@ def _translate(pat, star, question_mark): if chunks[k-1][-1] > chunks[k][0]: chunks[k-1] = chunks[k-1][:-1] + chunks[k][1:] del chunks[k] + if len(chunks) > 1: + if question_mark_char: + inside_range = chunks[0][-1] <= question_mark_char <= chunks[-1][0] # Escape backslashes and hyphens for set difference (--). # Hyphens that create ranges shouldn't be escaped. stuff = '-'.join(s.replace('\\', r'\\').replace('-', r'\-') @@ -145,11 +150,16 @@ def _translate(pat, star, question_mark): add('(?!)') elif stuff == '!': # Negated empty range: match any character. - add('.') + add(question_mark) else: + negative_lookahead='' + if question_mark != '.' and inside_range: + add(f'(?![{question_mark_char}])') # Escape set operations (&&, ~~ and ||). stuff = _re_setops_sub(r'\\\1', stuff) if stuff[0] == '!': + if question_mark_char not in stuff and question_mark != '.': + stuff = f'^{question_mark_char}' + '^' + stuff[1:] stuff = '^' + stuff[1:] elif stuff[0] in ('^', '['): stuff = '\\' + stuff diff --git a/Lib/glob.py b/Lib/glob.py index 8879eff80415aa..f2a19167a82550 100644 --- a/Lib/glob.py +++ b/Lib/glob.py @@ -262,8 +262,6 @@ def escape(pathname): _special_parts = ('', '.', '..') _dir_open_flags = os.O_RDONLY | getattr(os, 'O_DIRECTORY', 0) _no_recurse_symlinks = object() - - def translate(pat, *, recursive=False, include_hidden=False, seps=None): """Translate a pathname with shell wildcards to a regular expression. diff --git a/Lib/test/test_fnmatch.py b/Lib/test/test_fnmatch.py index 9f360e1dc10f47..0066878b0b1c5f 100644 --- a/Lib/test/test_fnmatch.py +++ b/Lib/test/test_fnmatch.py @@ -236,6 +236,13 @@ def test_translate(self): self.assertEqual(translate('A*********'), r'(?s:A.*)\Z') self.assertEqual(translate('*********A'), r'(?s:.*A)\Z') self.assertEqual(translate('A*********?[?]?'), r'(?s:A.*.[?].)\Z') + self.assertEqual(translate('foo[%-0]bar'), r'(?s:foo[%-0]bar)\Z') + self.assertEqual(translate('foo[%-0][%-0[%-0]bar'), r'(?s:foo[%-0][%-0[%-0]bar)\Z') + self.assertEqual(translate('foo[/-/]bar'), r'(?s:foo[/-/]bar)\Z') + self.assertEqual(translate('foo[%-0][1-9]bar'), r'(?s:foo[%-0][1-9]bar)\Z') + self.assertEqual(translate('foo[%-/]bar'), r'(?s:foo[%-/]bar)\Z') + self.assertEqual(translate('foo?'), r'(?s:foo.)\Z') + self.assertEqual(translate('foo.'), r'(?s:foo\.)\Z') # fancy translation to prevent exponential-time match failure t = translate('**a*a****a') self.assertEqual(t, r'(?s:(?>.*?a)(?>.*?a).*a)\Z') diff --git a/Lib/test/test_glob.py b/Lib/test/test_glob.py index da73769c16e9af..0b52e7a0d18630 100644 --- a/Lib/test/test_glob.py +++ b/Lib/test/test_glob.py @@ -456,6 +456,14 @@ def test_translate_matching(self): self.assertIsNone(match(os.path.join('foo', '.bar'))) self.assertIsNotNone(match(os.path.join('foo', 'bar.txt'))) self.assertIsNone(match(os.path.join('foo', '.bar.txt'))) + match = re.compile(glob.translate('foo[%-0]bar', recursive=True)).match + self.assertIsNone(match(os.path.join('foo', 'bar'))) + match = re.compile(glob.translate('foo?bar', recursive=True)).match + self.assertIsNone(match('foo/bar')) + match = re.compile(glob.translate('foo.', recursive=True)).match + self.assertIsNone(match('foo/')) + match = re.compile(glob.translate('foo*', recursive=True)).match + self.assertIsNone(match('foo/')) def test_translate(self): def fn(pat): @@ -513,7 +521,17 @@ def fn(pat): return glob.translate(pat, recursive=True, include_hidden=True, seps=['/', '\\']) self.assertEqual(fn('foo/bar\\baz'), r'(?s:foo[/\\]bar[/\\]baz)\Z') self.assertEqual(fn('**/*'), r'(?s:(?:.+[/\\])?[^/\\]+)\Z') - + self.assertEqual(fn('foo[!a]bar'), r'(?s:foo[^/\\^a]bar)\Z') + self.assertEqual(fn('foo[%-0]bar'), r'(?s:foo(?![/\\])[%-0]bar)\Z') + self.assertEqual(fn('foo[%-0][1-9]bar'), r'(?s:foo(?![/\\])[%-0][1-9]bar)\Z') + self.assertEqual(fn('foo[0-%]bar'), r'(?s:foo(?!)bar)\Z') + self.assertEqual(fn('foo[^-'), r'(?s:foo\[\^\-)\Z') + self.assertEqual(fn('foo[/-/]bar'), r'(?s:foo\[[/\\]\-[/\\]\]bar)\Z') + self.assertEqual(fn('foo[%-/]bar'), r'(?s:foo\[%\-[/\\]\]bar)\Z') + self.assertEqual(fn('foo[/]bar'), r'(?s:foo\[[/\\]\]bar)\Z') + self.assertEqual(fn('foo[%-0][0-%[%-0]bar'), r'(?s:foo(?![/\\])[%-0](?![/\\])[\[%-0]bar)\Z') + self.assertEqual(fn('foo?'), r'(?s:foo[^/\\])\Z') + self.assertEqual(fn('foo.'), r'(?s:foo\.)\Z') if __name__ == "__main__": unittest.main() diff --git a/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst new file mode 100644 index 00000000000000..735e4e70382724 --- /dev/null +++ b/Misc/NEWS.d/next/Library/2025-03-08-23-26-50.gh-issue-130942.jxRMK_.rst @@ -0,0 +1,11 @@ +.. versionchanged:: next + :func:`glob.translate` now correctly handles ranges implicitly containing path + separators (for instance, ``[%-0]`` contains ``/``) by adding either a negative + lookahead (``(?!/)``) or by not including the path separator (``^/``). In addition, + ranges including path separator literals are now correctly escaped, as specified by + POSIX specifications. +.. versionchanged:: next + :func:`fnmatch.translate` does not treat path separator characters as having any + special meaning at all, so it still matches ranges implicitly containing path + separators (for instance, ``[%-0]`` contains ``/``) and ranges explicitly + containing path separators (for instance, ``[/-/]`` contains ``/``).