Skip to content

Commit 0400d33

Browse files
committed
#16983: Apply postel's law to encoded words inside quoted strings.
This applies only to the new parser. The old parser decodes encoded words inside quoted strings already, although it gets the whitespace wrong when it does so. This version of the patch only handles the most common case (a single encoded word surrounded by quotes), but I haven't seen any other variations of this in the wild yet, so its good enough for now.
1 parent 905c8c3 commit 0400d33

File tree

4 files changed

+29
-0
lines changed

4 files changed

+29
-0
lines changed

Lib/email/_header_value_parser.py

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1559,6 +1559,13 @@ def get_bare_quoted_string(value):
15591559
while value and value[0] != '"':
15601560
if value[0] in WSP:
15611561
token, value = get_fws(value)
1562+
elif value[:2] == '=?':
1563+
try:
1564+
token, value = get_encoded_word(value)
1565+
bare_quoted_string.defects.append(errors.InvalidHeaderDefect(
1566+
"encoded word inside quoted string"))
1567+
except errors.HeaderParseError:
1568+
token, value = get_qcontent(value)
15621569
else:
15631570
token, value = get_qcontent(value)
15641571
bare_quoted_string.append(token)

Lib/test/test_email/test__header_value_parser.py

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -540,6 +540,15 @@ def test_get_bare_quoted_string_empty_quotes(self):
540540
self._test_get_x(parser.get_bare_quoted_string,
541541
'""', '""', '', [], '')
542542

543+
# Issue 16983: apply postel's law to some bad encoding.
544+
def test_encoded_word_inside_quotes(self):
545+
self._test_get_x(parser.get_bare_quoted_string,
546+
'"=?utf-8?Q?not_really_valid?="',
547+
'"not really valid"',
548+
'not really valid',
549+
[errors.InvalidHeaderDefect],
550+
'')
551+
543552
# get_comment
544553

545554
def test_get_comment_only(self):

Lib/test/test_email/test_headerregistry.py

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1143,6 +1143,16 @@ class TestAddressHeader(TestHeaderBase):
11431143
'example.com',
11441144
None),
11451145

1146+
'rfc2047_atom_in_quoted_string_is_decoded':
1147+
('"=?utf-8?q?=C3=89ric?=" <foo@example.com>',
1148+
[errors.InvalidHeaderDefect],
1149+
'Éric <foo@example.com>',
1150+
'Éric',
1151+
'foo@example.com',
1152+
'foo',
1153+
'example.com',
1154+
None),
1155+
11461156
}
11471157

11481158
# XXX: Need many more examples, and in particular some with names in

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,9 @@ Core and Builtins
4848
Library
4949
-------
5050

51+
- Issue #16983: the new email header parsing code will now decode encoded words
52+
that are (incorrectly) surrounded by quotes, and register a defect.
53+
5154
- Issue #19772: email.generator no longer mutates the message object when
5255
doing a down-transform from 8bit to 7bit CTEs.
5356

0 commit comments

Comments
 (0)