Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[3.13] gh-121284: Fix email address header folding with parsed encoded-word (GH-122754) #131403

Merged
merged 1 commit into from
Mar 18, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 5 additions & 5 deletions Lib/email/_header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -1053,7 +1053,7 @@ def get_fws(value):
fws = WhiteSpaceTerminal(value[:len(value)-len(newvalue)], 'fws')
return fws, newvalue

def get_encoded_word(value):
def get_encoded_word(value, terminal_type='vtext'):
""" encoded-word = "=?" charset "?" encoding "?" encoded-text "?="

"""
Expand Down Expand Up @@ -1092,7 +1092,7 @@ def get_encoded_word(value):
ew.append(token)
continue
chars, *remainder = _wsp_splitter(text, 1)
vtext = ValueTerminal(chars, 'vtext')
vtext = ValueTerminal(chars, terminal_type)
_validate_xtext(vtext)
ew.append(vtext)
text = ''.join(remainder)
Expand Down Expand Up @@ -1134,7 +1134,7 @@ def get_unstructured(value):
valid_ew = True
if value.startswith('=?'):
try:
token, value = get_encoded_word(value)
token, value = get_encoded_word(value, 'utext')
except _InvalidEwError:
valid_ew = False
except errors.HeaderParseError:
Expand Down Expand Up @@ -1163,7 +1163,7 @@ def get_unstructured(value):
# the parser to go in an infinite loop.
if valid_ew and rfc2047_matcher.search(tok):
tok, *remainder = value.partition('=?')
vtext = ValueTerminal(tok, 'vtext')
vtext = ValueTerminal(tok, 'utext')
_validate_xtext(vtext)
unstructured.append(vtext)
value = ''.join(remainder)
Expand Down Expand Up @@ -2813,7 +2813,7 @@ def _refold_parse_tree(parse_tree, *, policy):
continue
tstr = str(part)
if not want_encoding:
if part.token_type == 'ptext':
if part.token_type in ('ptext', 'vtext'):
# Encode if tstr contains special characters.
want_encoding = not SPECIALSNL.isdisjoint(tstr)
else:
Expand Down
25 changes: 25 additions & 0 deletions Lib/test/test_email/test__header_value_parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -3076,6 +3076,31 @@ def test_address_list_with_unicode_names_in_quotes(self):
'=?utf-8?q?H=C3=BCbsch?= Kaktus <beautiful@example.com>,\n'
' =?utf-8?q?bei=C3=9Ft_bei=C3=9Ft?= <biter@example.com>\n')

def test_address_list_with_specials_in_encoded_word(self):
# An encoded-word parsed from a structured header must remain
# encoded when it contains specials. Regression for gh-121284.
policy = self.policy.clone(max_line_length=40)
cases = [
# (to, folded)
('=?utf-8?q?A_v=C3=A9ry_long_name_with=2C_comma?= <to@example.com>',
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
('=?utf-8?q?This_long_name_does_not_need_encoded=2Dword?= <to@example.com>',
'This long name does not need\n'
' encoded-word <to@example.com>\n'),
('"A véry long name with, comma" <to@example.com>',
# (This isn't the best fold point, but it's not invalid.)
'A =?utf-8?q?v=C3=A9ry_long_name_with?=\n'
' =?utf-8?q?=2C?= comma <to@example.com>\n'),
('"A véry long name containing a, comma" <to@example.com>',
'A =?utf-8?q?v=C3=A9ry?= long name\n'
' containing =?utf-8?q?a=2C?= comma\n'
' <to@example.com>\n'),
]
for (to, folded) in cases:
with self.subTest(to=to):
self._test(parser.get_address_list(to)[0], folded, policy=policy)

def test_address_list_with_list_separator_after_fold(self):
a = 'x' * 66 + '@example.com'
to = f'{a}, "Hübsch Kaktus" <beautiful@example.com>'
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
Fix bug in the folding of rfc2047 encoded-words when flattening an email message
using a modern email policy. Previously when an encoded-word was too long
for a line, it would be decoded, split across lines, and re-encoded. But commas
and other special characters in the original text could be left unencoded and
unquoted. This could theoretically be used to spoof header lines using
a carefully constructed encoded-word if the resulting rendered email was
transmitted or re-parsed.
Loading