Skip to content

Commit 1f7fffb

Browse files
committed
#2830: add html.escape() helper and move cgi.escape() uses in the standard library to it. It defaults to quote=True and also escapes single quotes, which makes casual use safer. The cgi.escape() interface is not touched, but emits a (silent) PendingDeprecationWarning.
1 parent 70543ac commit 1f7fffb

File tree

11 files changed

+94
-28
lines changed

11 files changed

+94
-28
lines changed

Doc/howto/webservers.rst

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -293,7 +293,7 @@ following WSGI-application::
293293
# -*- coding: UTF-8 -*-
294294

295295
import sys, os
296-
from cgi import escape
296+
from html import escape
297297
from flup.server.fcgi import WSGIServer
298298

299299
def app(environ, start_response):

Doc/library/cgi.rst

Lines changed: 7 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -328,9 +328,9 @@ algorithms implemented in this module in other circumstances.
328328
attribute value delimited by double quotes, as in ``<a href="...">``. Note
329329
that single quotes are never translated.
330330

331-
If the value to be quoted might include single- or double-quote characters,
332-
or both, consider using the :func:`~xml.sax.saxutils.quoteattr` function in the
333-
:mod:`xml.sax.saxutils` module instead.
331+
.. deprecated:: 3.2
332+
This function is unsafe because *quote* is false by default, and therefore
333+
deprecated. Use :func:`html.escape` instead.
334334

335335

336336
.. _cgi-security:
@@ -508,8 +508,8 @@ Common problems and solutions
508508

509509
.. rubric:: Footnotes
510510

511-
.. [#] Note that some recent versions of the HTML specification do state what order the
512-
field values should be supplied in, but knowing whether a request was
513-
received from a conforming browser, or even from a browser at all, is tedious
514-
and error-prone.
511+
.. [#] Note that some recent versions of the HTML specification do state what
512+
order the field values should be supplied in, but knowing whether a request
513+
was received from a conforming browser, or even from a browser at all, is
514+
tedious and error-prone.
515515

Doc/library/html.rst

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,18 @@
1+
:mod:`html` --- HyperText Markup Language support
2+
=================================================
3+
4+
.. module:: html
5+
:synopsis: Helpers for manipulating HTML.
6+
7+
.. versionadded:: 3.2
8+
9+
10+
This module defines utilities to manipulate HTML.
11+
12+
.. function:: escape(s, quote=True)
13+
14+
Convert the characters ``&``, ``<`` and ``>`` in string *s* to HTML-safe
15+
sequences. Use this if you need to display text that might contain such
16+
characters in HTML. If the optional flag *quote* is true, the characters
17+
(``"``) and (``'``) are also translated; this helps for inclusion in an HTML
18+
attribute value delimited by quotes, as in ``<a href="...">``.

Doc/library/markup.rst

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,7 @@ definition of the Python bindings for the DOM and SAX interfaces.
2020

2121
.. toctree::
2222

23+
html.rst
2324
html.parser.rst
2425
html.entities.rst
2526
pyexpat.rst

Lib/cgi.py

Lines changed: 13 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -31,13 +31,13 @@
3131
# Imports
3232
# =======
3333

34-
from operator import attrgetter
3534
from io import StringIO
3635
import sys
3736
import os
3837
import urllib.parse
3938
import email.parser
4039
from warnings import warn
40+
import html
4141

4242
__all__ = ["MiniFieldStorage", "FieldStorage",
4343
"parse", "parse_qs", "parse_qsl", "parse_multipart",
@@ -800,8 +800,8 @@ def print_exception(type=None, value=None, tb=None, limit=None):
800800
list = traceback.format_tb(tb, limit) + \
801801
traceback.format_exception_only(type, value)
802802
print("<PRE>%s<B>%s</B></PRE>" % (
803-
escape("".join(list[:-1])),
804-
escape(list[-1]),
803+
html.escape("".join(list[:-1])),
804+
html.escape(list[-1]),
805805
))
806806
del tb
807807

@@ -812,7 +812,7 @@ def print_environ(environ=os.environ):
812812
print("<H3>Shell Environment:</H3>")
813813
print("<DL>")
814814
for key in keys:
815-
print("<DT>", escape(key), "<DD>", escape(environ[key]))
815+
print("<DT>", html.escape(key), "<DD>", html.escape(environ[key]))
816816
print("</DL>")
817817
print()
818818

@@ -825,10 +825,10 @@ def print_form(form):
825825
print("<P>No form fields.")
826826
print("<DL>")
827827
for key in keys:
828-
print("<DT>" + escape(key) + ":", end=' ')
828+
print("<DT>" + html.escape(key) + ":", end=' ')
829829
value = form[key]
830-
print("<i>" + escape(repr(type(value))) + "</i>")
831-
print("<DD>" + escape(repr(value)))
830+
print("<i>" + html.escape(repr(type(value))) + "</i>")
831+
print("<DD>" + html.escape(repr(value)))
832832
print("</DL>")
833833
print()
834834

@@ -839,9 +839,9 @@ def print_directory():
839839
try:
840840
pwd = os.getcwd()
841841
except os.error as msg:
842-
print("os.error:", escape(str(msg)))
842+
print("os.error:", html.escape(str(msg)))
843843
else:
844-
print(escape(pwd))
844+
print(html.escape(pwd))
845845
print()
846846

847847
def print_arguments():
@@ -899,16 +899,17 @@ def print_environ_usage():
899899
# =========
900900

901901
def escape(s, quote=None):
902-
'''Replace special characters "&", "<" and ">" to HTML-safe sequences.
903-
If the optional flag quote is true, the quotation mark character (")
904-
is also translated.'''
902+
"""Deprecated API."""
903+
warn("cgi.escape is deprecated, use html.escape instead",
904+
PendingDeprecationWarning, stacklevel=2)
905905
s = s.replace("&", "&amp;") # Must be done first!
906906
s = s.replace("<", "&lt;")
907907
s = s.replace(">", "&gt;")
908908
if quote:
909909
s = s.replace('"', "&quot;")
910910
return s
911911

912+
912913
def valid_boundary(s, _vb_pattern="^[ -~]{0,200}[!-~]$"):
913914
import re
914915
return re.match(_vb_pattern, s)

Lib/html/__init__.py

Lines changed: 20 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1 +1,20 @@
1-
# This directory is a Python package.
1+
"""
2+
General functions for HTML manipulation.
3+
"""
4+
5+
6+
_escape_map = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;'}
7+
_escape_map_full = {ord('&'): '&amp;', ord('<'): '&lt;', ord('>'): '&gt;',
8+
ord('"'): '&quot;', ord('\''): '&#x27;'}
9+
10+
# NB: this is a candidate for a bytes/string polymorphic interface
11+
12+
def escape(s, quote=True):
13+
"""
14+
Replace special characters "&", "<" and ">" to HTML-safe sequences.
15+
If the optional flag quote is true (the default), the quotation mark
16+
character (") is also translated.
17+
"""
18+
if quote:
19+
return s.translate(_escape_map_full)
20+
return s.translate(_escape_map)

Lib/http/server.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -84,7 +84,7 @@
8484

8585
__all__ = ["HTTPServer", "BaseHTTPRequestHandler"]
8686

87-
import cgi
87+
import html
8888
import email.message
8989
import email.parser
9090
import http.client
@@ -705,7 +705,7 @@ def list_directory(self, path):
705705
return None
706706
list.sort(key=lambda a: a.lower())
707707
r = []
708-
displaypath = cgi.escape(urllib.parse.unquote(self.path))
708+
displaypath = html.escape(urllib.parse.unquote(self.path))
709709
r.append('<!DOCTYPE html PUBLIC "-//W3C//DTD HTML 3.2 Final//EN">')
710710
r.append("<html>\n<title>Directory listing for %s</title>\n" % displaypath)
711711
r.append("<body>\n<h2>Directory listing for %s</h2>\n" % displaypath)
@@ -721,7 +721,7 @@ def list_directory(self, path):
721721
displayname = name + "@"
722722
# Note: a link to a directory displays with @ and links with /
723723
r.append('<li><a href="%s">%s</a>\n'
724-
% (urllib.parse.quote(linkname), cgi.escape(displayname)))
724+
% (urllib.parse.quote(linkname), html.escape(displayname)))
725725
r.append("</ul>\n<hr>\n</body>\n</html>\n")
726726
enc = sys.getfilesystemencoding()
727727
encoded = ''.join(r).encode(enc)

Lib/lib2to3/tests/test_util.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -568,8 +568,8 @@ def test_beginning(self):
568568

569569
def test_from_import(self):
570570
node = parse('bar()')
571-
fixer_util.touch_import("cgi", "escape", node)
572-
self.assertEqual(str(node), 'from cgi import escape\nbar()\n\n')
571+
fixer_util.touch_import("html", "escape", node)
572+
self.assertEqual(str(node), 'from html import escape\nbar()\n\n')
573573

574574
def test_name_import(self):
575575
node = parse('bar()')

Lib/test/test_html.py

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
"""
2+
Tests for the html module functions.
3+
"""
4+
5+
import html
6+
import unittest
7+
from test.support import run_unittest
8+
9+
10+
class HtmlTests(unittest.TestCase):
11+
def test_escape(self):
12+
self.assertEqual(
13+
html.escape('\'<script>"&foo;"</script>\''),
14+
'&#x27;&lt;script&gt;&quot;&amp;foo;&quot;&lt;/script&gt;&#x27;')
15+
self.assertEqual(
16+
html.escape('\'<script>"&foo;"</script>\'', False),
17+
'\'&lt;script&gt;"&amp;foo;"&lt;/script&gt;\'')
18+
19+
20+
def test_main():
21+
run_unittest(HtmlTests)
22+
23+
if __name__ == '__main__':
24+
test_main()

Lib/test/test_xml_etree.py

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,7 @@
1212
# except if the test is specific to the Python implementation.
1313

1414
import sys
15-
import cgi
15+
import html
1616
import unittest
1717

1818
from test import support
@@ -1328,7 +1328,7 @@ def processinginstruction():
13281328
<p>Example.</p>
13291329
<xi:include href="{}"/>
13301330
</document>
1331-
""".format(cgi.escape(SIMPLE_XMLFILE, True))
1331+
""".format(html.escape(SIMPLE_XMLFILE, True))
13321332

13331333
def xinclude_loader(href, parse="xml", encoding=None):
13341334
try:

Misc/NEWS

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,9 @@ Core and Builtins
2424
Library
2525
-------
2626

27+
- Issue #2830: Add the ``html.escape()`` function, which quotes all problematic
28+
characters by default. Deprecate ``cgi.escape()``.
29+
2730
- Issue 9409: Fix the regex to match all kind of filenames, for interactive
2831
debugging in doctests.
2932

0 commit comments

Comments
 (0)