Skip to content

Commit 27e9de6

Browse files
committed
#20531: Revert e20f98a8ed71, the 3.4 version of the #19063 fix.
1 parent 790202d commit 27e9de6

File tree

5 files changed

+38
-78
lines changed

5 files changed

+38
-78
lines changed

Doc/library/email.message.rst

+1-7
Original file line numberDiff line numberDiff line change
@@ -196,13 +196,7 @@ Here are the methods of the :class:`Message` class:
196196

197197
Set the entire message object's payload to *payload*. It is the client's
198198
responsibility to ensure the payload invariants. Optional *charset* sets
199-
the message's character set; see :meth:`set_charset` for details. If
200-
*payload* is a string containing non-ASCII characters, *charset* is
201-
required.
202-
203-
.. versionchanged:: 3.4
204-
Previous to 3.4 *charset* was not required when *payload* was a
205-
non-ASCII string, but omitting it produced nonsense results.
199+
the message's default character set; see :meth:`set_charset` for details.
206200

207201
.. method:: set_charset(charset)
208202

Lib/email/charset.py

+18-11
Original file line numberDiff line numberDiff line change
@@ -378,19 +378,18 @@ def _get_encoder(self, header_bytes):
378378
return None
379379

380380
def body_encode(self, string):
381-
"""Body-encode a string, converting it first to bytes if needed.
381+
"""Body-encode a string by converting it first to bytes.
382382
383383
The type of encoding (base64 or quoted-printable) will be based on
384-
self.body_encoding. If body_encoding is None, we perform no CTE
385-
encoding (the CTE will be either 7bit or 8bit), we just encode the
386-
binary representation to ascii using the surrogateescape error handler,
387-
which will enable the Generators to produce the correct output.
384+
self.body_encoding. If body_encoding is None, we assume the
385+
output charset is a 7bit encoding, so re-encoding the decoded
386+
string using the ascii codec produces the correct string version
387+
of the content.
388388
"""
389-
if not string:
390-
return string
391-
if isinstance(string, str):
392-
string = string.encode(self.output_charset)
389+
# 7bit/8bit encodings return the string unchanged (module conversions)
393390
if self.body_encoding is BASE64:
391+
if isinstance(string, str):
392+
string = string.encode(self.output_charset)
394393
return email.base64mime.body_encode(string)
395394
elif self.body_encoding is QP:
396395
# quopromime.body_encode takes a string, but operates on it as if
@@ -399,7 +398,15 @@ def body_encode(self, string):
399398
# character set, then, we must turn it into pseudo bytes via the
400399
# latin1 charset, which will encode any byte as a single code point
401400
# between 0 and 255, which is what body_encode is expecting.
402-
string = string.decode('latin1')
401+
#
402+
# Note that this clause doesn't handle the case of a _payload that
403+
# is already bytes. It never did, and the semantics of _payload
404+
# being bytes has never been nailed down, so fixing that is a
405+
# longer term TODO.
406+
if isinstance(string, str):
407+
string = string.encode(self.output_charset).decode('latin1')
403408
return email.quoprimime.body_encode(string)
404409
else:
405-
return string.decode('ascii', 'surrogateescape')
410+
if isinstance(string, str):
411+
string = string.encode(self.output_charset).decode('ascii')
412+
return string

Lib/email/message.py

+4-18
Original file line numberDiff line numberDiff line change
@@ -301,23 +301,9 @@ def set_payload(self, payload, charset=None):
301301
Optional charset sets the message's default character set. See
302302
set_charset() for details.
303303
"""
304-
if hasattr(payload, 'encode'):
305-
if charset is None:
306-
try:
307-
payload.encode('ascii', 'surrogateescape')
308-
except UnicodeError:
309-
raise TypeError("charset argument must be specified"
310-
" when non-ASCII characters are used in the"
311-
" payload") from None
312-
self._payload = payload
313-
return
314-
if not isinstance(charset, Charset):
315-
charset = Charset(charset)
316-
payload = payload.encode(charset.output_charset)
317-
if hasattr(payload, 'decode'):
318-
self._payload = payload.decode('ascii', 'surrogateescape')
319-
else:
320-
self._payload = payload
304+
if isinstance(payload, bytes):
305+
payload = payload.decode('ascii', 'surrogateescape')
306+
self._payload = payload
321307
if charset is not None:
322308
self.set_charset(charset)
323309

@@ -356,7 +342,7 @@ def set_charset(self, charset):
356342
try:
357343
cte(self)
358344
except TypeError:
359-
self._payload = charset.body_encode(self.get_payload(decode=True))
345+
self._payload = charset.body_encode(self._payload)
360346
self.add_header('Content-Transfer-Encoding', cte)
361347

362348
def get_charset(self):

Lib/test/test_email/test_contentmanager.py

+3-2
Original file line numberDiff line numberDiff line change
@@ -208,11 +208,12 @@ def test_get_text_plain_bad_utf8_quoted_printable_ignore_errors(self):
208208
"Basìc tëxt.\n")
209209

210210
def test_get_text_plain_utf8_base64_recoverable_bad_CTE_data(self):
211-
m = self._bytes_msg(textwrap.dedent("""\
211+
m = self._str_msg(textwrap.dedent("""\
212212
Content-Type: text/plain; charset="utf8"
213213
Content-Transfer-Encoding: base64
214214
215-
QmFzw6xjIHTDq3h0Lgo""").encode('ascii') + b'\xFF=\n')
215+
QmFzw6xjIHTDq3h0Lgo\xFF=
216+
"""))
216217
self.assertEqual(raw_data_manager.get_content(m, errors='ignore'),
217218
"Basìc tëxt.\n")
218219

Lib/test/test_email/test_email.py

+12-40
Original file line numberDiff line numberDiff line change
@@ -92,44 +92,6 @@ def test_set_payload_with_charset(self):
9292
msg.set_payload('This is a string payload', charset)
9393
self.assertEqual(msg.get_charset().input_charset, 'iso-8859-1')
9494

95-
def test_set_payload_with_8bit_data_and_charset(self):
96-
data = b'\xd0\x90\xd0\x91\xd0\x92'
97-
charset = Charset('utf-8')
98-
msg = Message()
99-
msg.set_payload(data, charset)
100-
self.assertEqual(msg['content-transfer-encoding'], 'base64')
101-
self.assertEqual(msg.get_payload(decode=True), data)
102-
self.assertEqual(msg.get_payload(), '0JDQkdCS\n')
103-
104-
def test_set_payload_with_non_ascii_and_charset_body_encoding_none(self):
105-
data = b'\xd0\x90\xd0\x91\xd0\x92'
106-
charset = Charset('utf-8')
107-
charset.body_encoding = None # Disable base64 encoding
108-
msg = Message()
109-
msg.set_payload(data.decode('utf-8'), charset)
110-
self.assertEqual(msg['content-transfer-encoding'], '8bit')
111-
self.assertEqual(msg.get_payload(decode=True), data)
112-
113-
def test_set_payload_with_8bit_data_and_charset_body_encoding_none(self):
114-
data = b'\xd0\x90\xd0\x91\xd0\x92'
115-
charset = Charset('utf-8')
116-
charset.body_encoding = None # Disable base64 encoding
117-
msg = Message()
118-
msg.set_payload(data, charset)
119-
self.assertEqual(msg['content-transfer-encoding'], '8bit')
120-
self.assertEqual(msg.get_payload(decode=True), data)
121-
122-
def test_set_payload_to_list(self):
123-
msg = Message()
124-
msg.set_payload([])
125-
self.assertEqual(msg.get_payload(), [])
126-
127-
def test_set_payload_with_non_ascii_and_no_charset_raises(self):
128-
data = b'\xd0\x90\xd0\x91\xd0\x92'.decode('utf-8')
129-
msg = Message()
130-
with self.assertRaises(TypeError):
131-
msg.set_payload(data)
132-
13395
def test_get_charsets(self):
13496
eq = self.assertEqual
13597

@@ -596,10 +558,20 @@ def test_broken_base64_payload(self):
596558
self.assertIsInstance(msg.defects[0],
597559
errors.InvalidBase64CharactersDefect)
598560

561+
def test_broken_unicode_payload(self):
562+
# This test improves coverage but is not a compliance test.
563+
# The behavior in this situation is currently undefined by the API.
564+
x = 'this is a br\xf6ken thing to do'
565+
msg = Message()
566+
msg['content-type'] = 'text/plain'
567+
msg['content-transfer-encoding'] = '8bit'
568+
msg.set_payload(x)
569+
self.assertEqual(msg.get_payload(decode=True),
570+
bytes(x, 'raw-unicode-escape'))
571+
599572
def test_questionable_bytes_payload(self):
600573
# This test improves coverage but is not a compliance test,
601-
# since it involves poking inside the black box in a way
602-
# that actually breaks the model invariants.
574+
# since it involves poking inside the black box.
603575
x = 'this is a quéstionable thing to do'.encode('utf-8')
604576
msg = Message()
605577
msg['content-type'] = 'text/plain; charset="utf-8"'

0 commit comments

Comments
 (0)