diff --git a/CHANGELOG.md b/CHANGELOG.md index 3c77841..3d6f080 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,10 @@ # Changelog +## 0.0.8 (2024-02-09) + +* Check if Message.get_params return 3-tuple instead of str on parse_options_header [#79](https://github.com/Kludex/python-multipart/pull/79). +* Cleanup unused regex patterns [#82](https://github.com/Kludex/python-multipart/pull/82). + ## 0.0.7 (2024-02-03) * Refactor header option parser to use the standard library instead of a custom RegEx [#75](https://github.com/andrew-d/python-multipart/pull/75). diff --git a/docs_requirements.txt b/docs_requirements.txt index 44993e5..fccc00d 100644 --- a/docs_requirements.txt +++ b/docs_requirements.txt @@ -1,4 +1,4 @@ -Jinja2==2.11.3 +Jinja2==3.1.3 PyYAML==5.4 Pygments==2.15.0 Sphinx==1.2b1 diff --git a/multipart/__init__.py b/multipart/__init__.py index e8b163a..28c7ad6 100644 --- a/multipart/__init__.py +++ b/multipart/__init__.py @@ -2,7 +2,7 @@ __author__ = "Andrew Dunham" __license__ = "Apache" __copyright__ = "Copyright (c) 2012-2013, Andrew Dunham" -__version__ = "0.0.7" +__version__ = "0.0.8" from .multipart import ( diff --git a/multipart/multipart.py b/multipart/multipart.py index e1d10fc..73910da 100644 --- a/multipart/multipart.py +++ b/multipart/multipart.py @@ -2,7 +2,6 @@ from .exceptions import * import os -import re import sys import shutil import logging @@ -67,16 +66,6 @@ ord_char = lambda c: c join_bytes = lambda b: bytes(list(b)) -# These are regexes for parsing header values. -SPECIAL_CHARS = re.escape(b'()<>@,;:\\"/[]?={} \t') -QUOTED_STR = br'"(?:\\.|[^"])*"' -VALUE_STR = br'(?:[^' + SPECIAL_CHARS + br']+|' + QUOTED_STR + br')' -OPTION_RE_STR = ( - br'(?:;|^)\s*([^' + SPECIAL_CHARS + br']+)\s*=\s*(' + VALUE_STR + br')' -) -OPTION_RE = re.compile(OPTION_RE_STR) -QUOTE = b'"'[0] - def parse_options_header(value: Union[str, bytes]) -> Tuple[bytes, Dict[bytes, bytes]]: """ @@ -110,6 +99,11 @@ def parse_options_header(value: Union[str, bytes]) -> Tuple[bytes, Dict[bytes, b options = {} for param in params: key, value = param + # If the value returned from get_params() is a 3-tuple, the last + # element corresponds to the value. + # See: https://docs.python.org/3/library/email.compat32-message.html + if isinstance(value, tuple): + value = value[-1] # If the value is a filename, we need to fix a bug on IE6 that sends # the full file path instead of the filename. if key == 'filename': diff --git a/tests/test_multipart.py b/tests/test_multipart.py index 031515b..5cfacf4 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -270,6 +270,16 @@ def test_handles_ie6_bug(self): t, p = parse_options_header(b'text/plain; filename="C:\\this\\is\\a\\path\\file.txt"') self.assertEqual(p[b'filename'], b'file.txt') + + def test_redos_attack_header(self): + t, p = parse_options_header(b'application/x-www-form-urlencoded; !="\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\\') + # If vulnerable, this test wouldn't finish, the line above would hang + self.assertIn(b'"\\', p[b'!']) + + def test_handles_rfc_2231(self): + t, p = parse_options_header(b'text/plain; param*=us-ascii\'en-us\'encoded%20message') + + self.assertEqual(p[b'param'], b'encoded message') class TestBaseParser(unittest.TestCase):