From 637e90b430f2dedf5810509e84c10141250ce46c Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Mon, 11 Jan 2021 17:35:44 +0200
Subject: [PATCH 01/10] Drop support for EOL Python 2.7 and 3.5

---
 .appveyor.yml                       |  4 ----
 .github/workflows/python-tox.yml    |  2 +-
 .travis.yml                         |  5 +----
 README.rst                          |  2 +-
 html5lib/_trie/_base.py             |  5 +----
 html5lib/_utils.py                  |  5 +----
 html5lib/tests/support.py           |  2 --
 html5lib/treebuilders/dom.py        |  5 +----
 html5lib/treebuilders/etree_lxml.py |  8 ++------
 parse.py                            |  9 ++-------
 requirements-test.txt               |  6 ++----
 setup.py                            | 12 ++++--------
 tox.ini                             |  2 +-
 13 files changed, 17 insertions(+), 50 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index a1a3e347..0b3c4e8a 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -3,10 +3,6 @@ environment:
   global:
     PATH: "C:\\Python27\\Scripts\\;%PATH%"
   matrix:
-    - TOXENV: py27-base
-    - TOXENV: py27-optional
-    - TOXENV: py35-base
-    - TOXENV: py35-optional
     - TOXENV: py36-base
     - TOXENV: py36-optional
     - TOXENV: py37-base
diff --git a/.github/workflows/python-tox.yml b/.github/workflows/python-tox.yml
index ec5cf636..9673f503 100644
--- a/.github/workflows/python-tox.yml
+++ b/.github/workflows/python-tox.yml
@@ -7,7 +7,7 @@ jobs:
     runs-on: ubuntu-latest
     strategy:
       matrix:
-        python: [2.7, 3.5, 3.6, 3.7, 3.8, pypy-2.7, pypy3]
+        python: [3.6, 3.7, 3.8, pypy3]
     steps:
       - uses: actions/checkout@v2
         with:
diff --git a/.travis.yml b/.travis.yml
index d2d9e30e..a530faeb 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,13 +1,10 @@
 language: python
 python:
   - "pypy3"
-  - "pypy"
+  - "3.9"
   - "3.8"
   - "3.7"
   - "3.6"
-  - "3.5"
-  - "2.7"
-  - "3.9-dev"
 
 cache: pip
 
diff --git a/README.rst b/README.rst
index d367905d..fef6d315 100644
--- a/README.rst
+++ b/README.rst
@@ -91,7 +91,7 @@ More documentation is available at https://html5lib.readthedocs.io/.
 Installation
 ------------
 
-html5lib works on CPython 2.7+, CPython 3.5+ and PyPy. To install:
+html5lib works on CPython 3.6+ and PyPy3. To install:
 
 .. code-block:: bash
 
diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py
index 6b71975f..a9295a2b 100644
--- a/html5lib/_trie/_base.py
+++ b/html5lib/_trie/_base.py
@@ -1,9 +1,6 @@
 from __future__ import absolute_import, division, unicode_literals
 
-try:
-    from collections.abc import Mapping
-except ImportError:  # Python 2.7
-    from collections import Mapping
+from collections.abc import Mapping
 
 
 class Trie(Mapping):
diff --git a/html5lib/_utils.py b/html5lib/_utils.py
index 9ea57942..13d4c656 100644
--- a/html5lib/_utils.py
+++ b/html5lib/_utils.py
@@ -2,10 +2,7 @@
 
 from types import ModuleType
 
-try:
-    from collections.abc import Mapping
-except ImportError:
-    from collections import Mapping
+from collections.abc import Mapping
 
 from six import text_type, PY3
 
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index 9cd5afbe..4a53dc18 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -150,8 +150,6 @@ def convertData(data):
 def errorMessage(input, expected, actual):
     msg = ("Input:\n%s\nExpected:\n%s\nReceived\n%s\n" %
            (repr(input), repr(expected), repr(actual)))
-    if sys.version_info[0] == 2:
-        msg = msg.encode("ascii", "backslashreplace")
     return msg
 
 
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
index d8b53004..818a3343 100644
--- a/html5lib/treebuilders/dom.py
+++ b/html5lib/treebuilders/dom.py
@@ -1,10 +1,7 @@
 from __future__ import absolute_import, division, unicode_literals
 
 
-try:
-    from collections.abc import MutableMapping
-except ImportError:  # Python 2.7
-    from collections import MutableMapping
+from collections.abc import MutableMapping
 from xml.dom import minidom, Node
 import weakref
 
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index e73de61a..d99a51a9 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -14,12 +14,8 @@
 
 import warnings
 import re
-import sys
 
-try:
-    from collections.abc import MutableMapping
-except ImportError:
-    from collections import MutableMapping
+from collections.abc import MutableMapping
 
 from . import base
 from ..constants import DataLossWarning
@@ -89,7 +85,7 @@ def serializeElement(element, indent=0):
                     next_element = next_element.getnext()
             elif isinstance(element, str) or isinstance(element, bytes):
                 # Text in a fragment
-                assert isinstance(element, str) or sys.version_info[0] == 2
+                assert isinstance(element, str)
                 rv.append("|%s\"%s\"" % (' ' * indent, element))
             else:
                 # Fragment case
diff --git a/parse.py b/parse.py
index e6806b46..f973fbb4 100755
--- a/parse.py
+++ b/parse.py
@@ -36,8 +36,7 @@ def parse():
                 pass
         elif f == '-':
             f = sys.stdin
-            if sys.version_info[0] >= 3:
-                encoding = None
+            encoding = None
         else:
             try:
                 # Try opening from file system
@@ -135,11 +134,7 @@ def printOutput(parser, document, opts):
                 kwargs["sanitize"] = True
 
             tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
-            if sys.version_info[0] >= 3:
-                encoding = None
-            else:
-                encoding = "utf-8"
-            for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding):
+            for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=None):
                 sys.stdout.write(text)
             if not text.endswith('\n'):
                 sys.stdout.write('\n')
diff --git a/requirements-test.txt b/requirements-test.txt
index 57f8f617..ce882670 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -2,9 +2,7 @@
 
 tox>=3.15.1,<4
 flake8>=3.8.1,<3.9
-pytest>=4.6.10,<5 ; python_version < '3'
-pytest>=5.4.2,<7 ; python_version >= '3'
+pytest>=5.4.2,<7
 coverage>=5.1,<6
 pytest-expect>=1.1.0,<2
-mock>=3.0.5,<4 ; python_version < '3.6'
-mock>=4.0.2,<5 ; python_version >= '3.6'
+mock>=4.0.2,<5
diff --git a/setup.py b/setup.py
index f84c1284..33ab359d 100644
--- a/setup.py
+++ b/setup.py
@@ -2,7 +2,6 @@
 
 import ast
 import codecs
-import sys
 
 from os.path import join, dirname
 from setuptools import setup, find_packages, __version__ as setuptools_version
@@ -18,8 +17,7 @@
 
 # _markerlib.default_environment() obtains its data from _VARS
 # and wraps it in another dict, but _markerlib_evaluate writes
-# to the dict while it is iterating the keys, causing an error
-# on Python 3 only.
+# to the dict while it is iterating the keys, causing an error.
 # Replace _markerlib.default_environment to return a custom dict
 # that has all the necessary markers, and ignores any writes.
 
@@ -32,7 +30,7 @@ def pop(self, i=-1):
         return self[i]
 
 
-if _markerlib and sys.version_info[0] == 3:
+if _markerlib:
     env = _markerlib.markers._VARS
     for key in list(env.keys()):
         new_key = key.replace('.', '_')
@@ -63,13 +61,11 @@ def default_environment():
     'License :: OSI Approved :: MIT License',
     'Operating System :: OS Independent',
     'Programming Language :: Python',
-    'Programming Language :: Python :: 2',
-    'Programming Language :: Python :: 2.7',
     'Programming Language :: Python :: 3',
-    'Programming Language :: Python :: 3.5',
     'Programming Language :: Python :: 3.6',
     'Programming Language :: Python :: 3.7',
     'Programming Language :: Python :: 3.8',
+    'Programming Language :: Python :: 3 :: Only',
     'Programming Language :: Python :: Implementation :: CPython',
     'Programming Language :: Python :: Implementation :: PyPy',
     'Topic :: Software Development :: Libraries :: Python Modules',
@@ -107,7 +103,7 @@ def default_environment():
           'six>=1.9',
           'webencodings',
       ],
-      python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*",
+      python_requires=">=3.6",
       extras_require={
           # A conditional extra will only install these items when the extra is
           # requested and the condition matches.
diff --git a/tox.ini b/tox.ini
index 16b8cf41..05ca59aa 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py{27,35,36,37,38,py,py3}-{base,six19,optional}
+envlist = py{36,37,38,py3}-{base,six19,optional}
 
 [testenv]
 deps =

From 2f1d6e09265e5fb05f137fa4cb7eb2264552a73a Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Mon, 24 May 2021 20:41:49 +0300
Subject: [PATCH 02/10] Upgrade Python syntax with pyupgrade --py3-plus

---
 debug-info.py                                 |  2 -
 doc/conf.py                                   |  3 +-
 html5lib/__init__.py                          |  1 -
 html5lib/_ihatexml.py                         |  4 +-
 html5lib/_inputstream.py                      | 26 +++++-----
 html5lib/_tokenizer.py                        |  6 +--
 html5lib/_trie/__init__.py                    |  2 -
 html5lib/_trie/_base.py                       |  4 +-
 html5lib/_trie/py.py                          |  3 +-
 html5lib/_utils.py                            | 14 ++---
 html5lib/constants.py                         |  2 -
 html5lib/filters/alphabeticalattributes.py    |  2 -
 html5lib/filters/base.py                      |  5 +-
 html5lib/filters/inject_meta_charset.py       |  2 -
 html5lib/filters/lint.py                      | 36 +++++++------
 html5lib/filters/optionaltags.py              |  2 -
 html5lib/filters/sanitizer.py                 |  7 ++-
 html5lib/filters/whitespace.py                |  2 -
 html5lib/html5parser.py                       | 19 ++++---
 html5lib/serializer.py                        |  9 ++--
 html5lib/tests/__init__.py                    |  1 -
 html5lib/tests/conftest.py                    |  5 +-
 html5lib/tests/sanitizer.py                   |  4 +-
 html5lib/tests/support.py                     |  4 +-
 html5lib/tests/test_alphabeticalattributes.py |  2 -
 html5lib/tests/test_encoding.py               |  6 +--
 html5lib/tests/test_meta.py                   | 11 ++--
 html5lib/tests/test_optionaltags_filter.py    |  2 -
 html5lib/tests/test_parser2.py                |  6 +--
 html5lib/tests/test_sanitizer.py              | 24 ++++-----
 html5lib/tests/test_serializer.py             |  6 +--
 html5lib/tests/test_stream.py                 |  8 ++-
 html5lib/tests/test_tokenizer2.py             | 10 ++--
 html5lib/tests/test_treeadapters.py           |  2 -
 html5lib/tests/test_treewalkers.py            | 12 ++---
 html5lib/tests/test_whitespace_filter.py      |  2 -
 html5lib/tests/tokenizer.py                   | 14 +++--
 html5lib/tests/tokenizertotree.py             |  4 +-
 html5lib/tests/tree_construction.py           | 15 +++---
 html5lib/treeadapters/__init__.py             |  1 -
 html5lib/treeadapters/genshi.py               |  6 +--
 html5lib/treeadapters/sax.py                  |  2 -
 html5lib/treebuilders/__init__.py             |  1 -
 html5lib/treebuilders/base.py                 | 17 +++----
 html5lib/treebuilders/dom.py                  | 21 ++++----
 html5lib/treebuilders/etree.py                | 41 ++++++++-------
 html5lib/treebuilders/etree_lxml.py           | 51 +++++++++----------
 html5lib/treewalkers/__init__.py              | 19 ++++---
 html5lib/treewalkers/base.py                  | 12 ++---
 html5lib/treewalkers/dom.py                   |  2 -
 html5lib/treewalkers/etree.py                 |  4 +-
 html5lib/treewalkers/etree_lxml.py            |  9 ++--
 html5lib/treewalkers/genshi.py                | 18 +++----
 parse.py                                      |  4 +-
 setup.py                                      |  2 -
 utils/entities.py                             |  6 +--
 56 files changed, 197 insertions(+), 308 deletions(-)

diff --git a/debug-info.py b/debug-info.py
index b47b8ebf..6e2a19bf 100644
--- a/debug-info.py
+++ b/debug-info.py
@@ -1,5 +1,3 @@
-from __future__ import print_function, unicode_literals
-
 import platform
 import sys
 
diff --git a/doc/conf.py b/doc/conf.py
index 22ebab4f..d28655ac 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,5 +1,4 @@
 #!/usr/bin/env python3
-# -*- coding: utf-8 -*-
 #
 # html5lib documentation build configuration file, created by
 # sphinx-quickstart on Wed May  8 00:04:49 2013.
@@ -92,7 +91,7 @@
 ]
 
 
-class CExtMock(object):
+class CExtMock:
     """Required for autodoc on readthedocs.org where you cannot build C extensions."""
     def __init__(self, *args, **kwargs):
         pass
diff --git a/html5lib/__init__.py b/html5lib/__init__.py
index 7b854f99..d2c68855 100644
--- a/html5lib/__init__.py
+++ b/html5lib/__init__.py
@@ -20,7 +20,6 @@
 * :func:`~.serializer.serialize`
 """
 
-from __future__ import absolute_import, division, unicode_literals
 
 from .html5parser import HTMLParser, parse, parseFragment
 from .treebuilders import getTreeBuilder
diff --git a/html5lib/_ihatexml.py b/html5lib/_ihatexml.py
index 3ff803c1..90757cbf 100644
--- a/html5lib/_ihatexml.py
+++ b/html5lib/_ihatexml.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import re
 import warnings
 
@@ -184,7 +182,7 @@ def escapeRegexp(string):
 nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
 
 
-class InfosetFilter(object):
+class InfosetFilter:
     replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
 
     def __init__(self,
diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py
index 0207dd21..b3a75452 100644
--- a/html5lib/_inputstream.py
+++ b/html5lib/_inputstream.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from six import text_type
 from six.moves import http_client, urllib
 
@@ -14,9 +12,9 @@
 from . import _utils
 
 # Non-unicode versions of constants for use in the pre-parser
-spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
-asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
-asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
+spaceCharactersBytes = frozenset(item.encode("ascii") for item in spaceCharacters)
+asciiLettersBytes = frozenset(item.encode("ascii") for item in asciiLetters)
+asciiUppercaseBytes = frozenset(item.encode("ascii") for item in asciiUppercase)
 spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
 
 
@@ -48,7 +46,7 @@
 charsUntilRegEx = {}
 
 
-class BufferedStream(object):
+class BufferedStream:
     """Buffering for streams that do not have buffering of their own
 
     The buffer is implemented as a list of chunks on the assumption that
@@ -86,7 +84,7 @@ def read(self, bytes):
             return self._readFromBuffer(bytes)
 
     def _bufferedBytes(self):
-        return sum([len(item) for item in self.buffer])
+        return sum(len(item) for item in self.buffer)
 
     def _readStream(self, bytes):
         data = self.stream.read(bytes)
@@ -131,9 +129,9 @@ def HTMLInputStream(source, **kwargs):
          isinstance(source.fp, http_client.HTTPResponse))):
         isUnicode = False
     elif hasattr(source, "read"):
-        isUnicode = isinstance(source.read(0), text_type)
+        isUnicode = isinstance(source.read(0), str)
     else:
-        isUnicode = isinstance(source, text_type)
+        isUnicode = isinstance(source, str)
 
     if isUnicode:
         encodings = [x for x in kwargs if x.endswith("_encoding")]
@@ -145,7 +143,7 @@ def HTMLInputStream(source, **kwargs):
         return HTMLBinaryInputStream(source, **kwargs)
 
 
-class HTMLUnicodeInputStream(object):
+class HTMLUnicodeInputStream:
     """Provides a unicode stream of characters to the HTMLTokenizer.
 
     This class takes care of character encoding and removing or replacing
@@ -325,7 +323,7 @@ def charsUntil(self, characters, opposite=False):
             if __debug__:
                 for c in characters:
                     assert(ord(c) < 128)
-            regex = "".join(["\\x%02x" % ord(c) for c in characters])
+            regex = "".join("\\x%02x" % ord(c) for c in characters)
             if not opposite:
                 regex = "^%s" % regex
             chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
@@ -524,7 +522,7 @@ def changeEncoding(self, newEncoding):
             self.rawStream.seek(0)
             self.charEncoding = (newEncoding, "certain")
             self.reset()
-            raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
+            raise _ReparseException("Encoding changed from {} to {}".format(self.charEncoding[0], newEncoding))
 
     def detectBOM(self):
         """Attempts to detect at BOM at the start of the stream. If
@@ -673,7 +671,7 @@ def jumpTo(self, bytes):
         return True
 
 
-class EncodingParser(object):
+class EncodingParser:
     """Mini parser for detecting character encoding from meta elements"""
 
     def __init__(self, data):
@@ -861,7 +859,7 @@ def getAttribute(self):
                 attrValue.append(c)
 
 
-class ContentAttrParser(object):
+class ContentAttrParser:
     def __init__(self, data):
         assert isinstance(data, bytes)
         self.data = data
diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py
index 4748a197..e3750c5c 100644
--- a/html5lib/_tokenizer.py
+++ b/html5lib/_tokenizer.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from six import unichr as chr
 
 from collections import deque, OrderedDict
@@ -24,7 +22,7 @@
     attributeMap = OrderedDict
 
 
-class HTMLTokenizer(object):
+class HTMLTokenizer:
     """ This class takes care of tokenizing HTML.
 
     * self.currentToken
@@ -50,7 +48,7 @@ def __init__(self, stream, parser=None, **kwargs):
 
         # The current token being created
         self.currentToken = None
-        super(HTMLTokenizer, self).__init__()
+        super().__init__()
 
     def __iter__(self):
         """ This is where the magic happens.
diff --git a/html5lib/_trie/__init__.py b/html5lib/_trie/__init__.py
index 07bad5d3..98a6841a 100644
--- a/html5lib/_trie/__init__.py
+++ b/html5lib/_trie/__init__.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from .py import Trie
 
 __all__ = ["Trie"]
diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py
index a9295a2b..6b2977b2 100644
--- a/html5lib/_trie/_base.py
+++ b/html5lib/_trie/_base.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from collections.abc import Mapping
 
 
@@ -8,7 +6,7 @@ class Trie(Mapping):
 
     def keys(self, prefix=None):
         # pylint:disable=arguments-differ
-        keys = super(Trie, self).keys()
+        keys = super().keys()
 
         if prefix is None:
             return set(keys)
diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py
index c2ba3da7..2e1aa188 100644
--- a/html5lib/_trie/py.py
+++ b/html5lib/_trie/py.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 
 from bisect import bisect_left
@@ -8,7 +7,7 @@
 
 class Trie(ABCTrie):
     def __init__(self, data):
-        if not all(isinstance(x, text_type) for x in data.keys()):
+        if not all(isinstance(x, str) for x in data.keys()):
             raise TypeError("All keys must be strings")
 
         self._data = data
diff --git a/html5lib/_utils.py b/html5lib/_utils.py
index 13d4c656..f59eec1e 100644
--- a/html5lib/_utils.py
+++ b/html5lib/_utils.py
@@ -1,18 +1,10 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from types import ModuleType
 
 from collections.abc import Mapping
 
 from six import text_type, PY3
 
-if PY3:
-    import xml.etree.ElementTree as default_etree
-else:
-    try:
-        import xml.etree.cElementTree as default_etree
-    except ImportError:
-        import xml.etree.ElementTree as default_etree
+import xml.etree.ElementTree as default_etree
 
 
 __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
@@ -28,10 +20,10 @@
 # escapes.
 try:
     _x = eval('"\\uD800"')  # pylint:disable=eval-used
-    if not isinstance(_x, text_type):
+    if not isinstance(_x, str):
         # We need this with u"" because of http://bugs.jython.org/issue2039
         _x = eval('u"\\uD800"')  # pylint:disable=eval-used
-        assert isinstance(_x, text_type)
+        assert isinstance(_x, str)
 except Exception:
     supports_lone_surrogates = False
 else:
diff --git a/html5lib/constants.py b/html5lib/constants.py
index fe3e237c..3596ea21 100644
--- a/html5lib/constants.py
+++ b/html5lib/constants.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import string
 
 EOF = None
diff --git a/html5lib/filters/alphabeticalattributes.py b/html5lib/filters/alphabeticalattributes.py
index 5ba926e3..d96ad62a 100644
--- a/html5lib/filters/alphabeticalattributes.py
+++ b/html5lib/filters/alphabeticalattributes.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from . import base
 
 from collections import OrderedDict
diff --git a/html5lib/filters/base.py b/html5lib/filters/base.py
index c7dbaed0..6937911d 100644
--- a/html5lib/filters/base.py
+++ b/html5lib/filters/base.py
@@ -1,7 +1,4 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-class Filter(object):
+class Filter:
     def __init__(self, source):
         self.source = source
 
diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py
index aefb5c84..cfa469c3 100644
--- a/html5lib/filters/inject_meta_charset.py
+++ b/html5lib/filters/inject_meta_charset.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from . import base
 
 
diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py
index acd4d7a2..ff6c5bd7 100644
--- a/html5lib/filters/lint.py
+++ b/html5lib/filters/lint.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from six import text_type
 
 from . import base
@@ -23,7 +21,7 @@ def __init__(self, source, require_matching_tags=True):
         :arg require_matching_tags: whether or not to require matching tags
 
         """
-        super(Filter, self).__init__(source)
+        super().__init__(source)
         self.require_matching_tags = require_matching_tags
 
     def __iter__(self):
@@ -33,9 +31,9 @@ def __iter__(self):
             if type in ("StartTag", "EmptyTag"):
                 namespace = token["namespace"]
                 name = token["name"]
-                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace is None or isinstance(namespace, str)
                 assert namespace != ""
-                assert isinstance(name, text_type)
+                assert isinstance(name, str)
                 assert name != ""
                 assert isinstance(token["data"], dict)
                 if (not namespace or namespace == namespaces["html"]) and name in voidElements:
@@ -45,49 +43,49 @@ def __iter__(self):
                 if type == "StartTag" and self.require_matching_tags:
                     open_elements.append((namespace, name))
                 for (namespace, name), value in token["data"].items():
-                    assert namespace is None or isinstance(namespace, text_type)
+                    assert namespace is None or isinstance(namespace, str)
                     assert namespace != ""
-                    assert isinstance(name, text_type)
+                    assert isinstance(name, str)
                     assert name != ""
-                    assert isinstance(value, text_type)
+                    assert isinstance(value, str)
 
             elif type == "EndTag":
                 namespace = token["namespace"]
                 name = token["name"]
-                assert namespace is None or isinstance(namespace, text_type)
+                assert namespace is None or isinstance(namespace, str)
                 assert namespace != ""
-                assert isinstance(name, text_type)
+                assert isinstance(name, str)
                 assert name != ""
                 if (not namespace or namespace == namespaces["html"]) and name in voidElements:
-                    assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
+                    assert False, "Void element reported as EndTag token: {tag}".format(tag=name)
                 elif self.require_matching_tags:
                     start = open_elements.pop()
                     assert start == (namespace, name)
 
             elif type == "Comment":
                 data = token["data"]
-                assert isinstance(data, text_type)
+                assert isinstance(data, str)
 
             elif type in ("Characters", "SpaceCharacters"):
                 data = token["data"]
-                assert isinstance(data, text_type)
+                assert isinstance(data, str)
                 assert data != ""
                 if type == "SpaceCharacters":
                     assert data.strip(spaceCharacters) == ""
 
             elif type == "Doctype":
                 name = token["name"]
-                assert name is None or isinstance(name, text_type)
-                assert token["publicId"] is None or isinstance(name, text_type)
-                assert token["systemId"] is None or isinstance(name, text_type)
+                assert name is None or isinstance(name, str)
+                assert token["publicId"] is None or isinstance(name, str)
+                assert token["systemId"] is None or isinstance(name, str)
 
             elif type == "Entity":
-                assert isinstance(token["name"], text_type)
+                assert isinstance(token["name"], str)
 
             elif type == "SerializerError":
-                assert isinstance(token["data"], text_type)
+                assert isinstance(token["data"], str)
 
             else:
-                assert False, "Unknown token type: %(type)s" % {"type": type}
+                assert False, "Unknown token type: {type}".format(type=type)
 
             yield token
diff --git a/html5lib/filters/optionaltags.py b/html5lib/filters/optionaltags.py
index 4a865012..f1c21118 100644
--- a/html5lib/filters/optionaltags.py
+++ b/html5lib/filters/optionaltags.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from . import base
 
 
diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
index 70ef9066..4b753d30 100644
--- a/html5lib/filters/sanitizer.py
+++ b/html5lib/filters/sanitizer.py
@@ -6,7 +6,6 @@
 if Bleach is unsuitable for your needs.
 
 """
-from __future__ import absolute_import, division, unicode_literals
 
 import re
 import warnings
@@ -766,7 +765,7 @@ def __init__(self,
             hrefs--these are removed
 
         """
-        super(Filter, self).__init__(source)
+        super().__init__(source)
 
         warnings.warn(_deprecation_msg, DeprecationWarning)
 
@@ -874,8 +873,8 @@ def disallowed_token(self, token):
             assert token_type in ("StartTag", "EmptyTag")
             attrs = []
             for (ns, name), v in token["data"].items():
-                attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
-            token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
+                attrs.append(' {}="{}"'.format(name if ns is None else "{}:{}".format(prefixes[ns], name), escape(v)))
+            token["data"] = "<{}{}>".format(token["name"], ''.join(attrs))
         else:
             token["data"] = "<%s>" % token["name"]
         if token.get("selfClosing"):
diff --git a/html5lib/filters/whitespace.py b/html5lib/filters/whitespace.py
index 0d12584b..2f35f4a0 100644
--- a/html5lib/filters/whitespace.py
+++ b/html5lib/filters/whitespace.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import re
 
 from . import base
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 74d829d9..b4c51821 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 from six import with_metaclass, viewkeys
 
 import types
@@ -83,7 +82,7 @@ def __new__(meta, classname, bases, classDict):
     return Decorated
 
 
-class HTMLParser(object):
+class HTMLParser:
     """HTML parser
 
     Generates a tree structure from a stream of (possibly malformed) HTML.
@@ -423,7 +422,7 @@ def getMetaclass(use_metaclass, metaclass_func):
             return type
 
     # pylint:disable=unused-argument
-    class Phase(with_metaclass(getMetaclass(debug, log))):
+    class Phase(metaclass=getMetaclass(debug, log)):
         """Base class for helper object that implements each phase of processing
         """
         __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
@@ -944,7 +943,7 @@ class InBodyPhase(Phase):
         __slots__ = ("processSpaceCharacters",)
 
         def __init__(self, *args, **kwargs):
-            super(InBodyPhase, self).__init__(*args, **kwargs)
+            super().__init__(*args, **kwargs)
             # Set this to the default handler
             self.processSpaceCharacters = self.processSpaceCharactersNonPre
 
@@ -1002,8 +1001,8 @@ def processCharacters(self, token):
             self.tree.insertText(token["data"])
             # This must be bad for performance
             if (self.parser.framesetOK and
-                any([char not in spaceCharacters
-                     for char in token["data"]])):
+                    any(char not in spaceCharacters
+                        for char in token["data"])):
                 self.parser.framesetOK = False
 
         def processSpaceCharactersNonPre(self, token):
@@ -1844,13 +1843,13 @@ class InTableTextPhase(Phase):
         __slots__ = ("originalPhase", "characterTokens")
 
         def __init__(self, *args, **kwargs):
-            super(InTableTextPhase, self).__init__(*args, **kwargs)
+            super().__init__(*args, **kwargs)
             self.originalPhase = None
             self.characterTokens = []
 
         def flushCharacters(self):
-            data = "".join([item["data"] for item in self.characterTokens])
-            if any([item not in spaceCharacters for item in data]):
+            data = "".join(item["data"] for item in self.characterTokens)
+            if any(item not in spaceCharacters for item in data):
                 token = {"type": tokenTypes["Characters"], "data": data}
                 self.parser.phases["inTable"].insertText(token)
             elif data:
@@ -2776,7 +2775,7 @@ def processEndTag(self, token):
 
 
 def adjust_attributes(token, replacements):
-    needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
+    needs_adjustment = token['data'].keys() & replacements.keys()
     if needs_adjustment:
         token['data'] = type(token['data'])((replacements.get(k, k), v)
                                             for k, v in token['data'].items())
diff --git a/html5lib/serializer.py b/html5lib/serializer.py
index c66df683..bbcbf9d6 100644
--- a/html5lib/serializer.py
+++ b/html5lib/serializer.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 
 import re
@@ -101,7 +100,7 @@ def serialize(input, tree="etree", encoding=None, **serializer_opts):
     return s.render(walker(input), encoding)
 
 
-class HTMLSerializer(object):
+class HTMLSerializer:
 
     # attribute quoting options
     quote_attr_values = "legacy"  # be secure by default
@@ -222,14 +221,14 @@ def __init__(self, **kwargs):
         self.strict = False
 
     def encode(self, string):
-        assert(isinstance(string, text_type))
+        assert(isinstance(string, str))
         if self.encoding:
             return string.encode(self.encoding, "htmlentityreplace")
         else:
             return string
 
     def encodeStrict(self, string):
-        assert(isinstance(string, text_type))
+        assert(isinstance(string, str))
         if self.encoding:
             return string.encode(self.encoding, "strict")
         else:
@@ -278,7 +277,7 @@ def serialize(self, treewalker, encoding=None):
                         quote_char = "'"
                     else:
                         quote_char = '"'
-                    doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
+                    doctype += " {}{}{}".format(quote_char, token["systemId"], quote_char)
 
                 doctype += ">"
                 yield self.encodeStrict(doctype)
diff --git a/html5lib/tests/__init__.py b/html5lib/tests/__init__.py
index b8ce2de3..e69de29b 100644
--- a/html5lib/tests/__init__.py
+++ b/html5lib/tests/__init__.py
@@ -1 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py
index fffeb50c..62a7991b 100644
--- a/html5lib/tests/conftest.py
+++ b/html5lib/tests/conftest.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
 import os.path
 import sys
 
@@ -54,7 +53,7 @@ def pytest_configure(config):
         # Check for optional requirements
         req_file = os.path.join(_root, "requirements-optional.txt")
         if os.path.exists(req_file):
-            with open(req_file, "r") as fp:
+            with open(req_file) as fp:
                 for line in fp:
                     if (line.strip() and
                         not (line.startswith("-r") or
@@ -70,7 +69,7 @@ def pytest_configure(config):
                             try:
                                 installed = pkg_resources.working_set.find(req)
                             except pkg_resources.VersionConflict:
-                                msgs.append("Outdated version of %s installed, need %s" % (req.name, spec))
+                                msgs.append("Outdated version of {} installed, need {}".format(req.name, spec))
                             else:
                                 if not installed:
                                     msgs.append("Need %s" % spec)
diff --git a/html5lib/tests/sanitizer.py b/html5lib/tests/sanitizer.py
index 16e53868..fb7fadf9 100644
--- a/html5lib/tests/sanitizer.py
+++ b/html5lib/tests/sanitizer.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import codecs
 import json
 
@@ -18,7 +16,7 @@ def collect(self):
 
 class SanitizerTest(pytest.Item):
     def __init__(self, name, parent, test):
-        super(SanitizerTest, self).__init__(name, parent)
+        super().__init__(name, parent)
         self.obj = lambda: 1  # this is to hack around skipif needing a function!
         self.test = test
 
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index 4a53dc18..d51cae12 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 # pylint:disable=wrong-import-position
 
 import os
@@ -86,7 +84,7 @@ def __getitem__(self, key):
         return dict.get(self, key, self.default)
 
 
-class TestData(object):
+class TestData:
     def __init__(self, filename, newTestHeading="data", encoding="utf8"):
         if encoding is None:
             self.f = open(filename, mode="rb")
diff --git a/html5lib/tests/test_alphabeticalattributes.py b/html5lib/tests/test_alphabeticalattributes.py
index 7d5b8e0f..3b82c2b0 100644
--- a/html5lib/tests/test_alphabeticalattributes.py
+++ b/html5lib/tests/test_alphabeticalattributes.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from collections import OrderedDict
 
 import pytest
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
index 47c4814a..ddad9100 100644
--- a/html5lib/tests/test_encoding.py
+++ b/html5lib/tests/test_encoding.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import os
 
 import pytest
@@ -9,7 +7,7 @@
 
 
 def test_basic_prescan_length():
-    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
+    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode()
     pad = 1024 - len(data) + 1
     data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
     assert len(data) == 1024  # Sanity
@@ -18,7 +16,7 @@ def test_basic_prescan_length():
 
 
 def test_parser_reparse():
-    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode('utf-8')
+    data = "<title>Caf\u00E9</title><!--a--><meta charset='utf-8'>".encode()
     pad = 10240 - len(data) + 1
     data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
     assert len(data) == 10240  # Sanity
diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py
index dd02dd7f..8f49bb57 100644
--- a/html5lib/tests/test_meta.py
+++ b/html5lib/tests/test_meta.py
@@ -1,7 +1,5 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import six
-from mock import Mock
+from unittest.mock import Mock
 
 from . import support
 
@@ -27,11 +25,8 @@ def test_errorMessage():
     r = support.errorMessage(input, expected, actual)
 
     # Assertions!
-    if six.PY2:
-        assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r
-    else:
-        assert six.PY3
-        assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
+    assert six.PY3
+    assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
 
     assert input.__repr__.call_count == 1
     assert expected.__repr__.call_count == 1
diff --git a/html5lib/tests/test_optionaltags_filter.py b/html5lib/tests/test_optionaltags_filter.py
index cd282149..1b054f40 100644
--- a/html5lib/tests/test_optionaltags_filter.py
+++ b/html5lib/tests/test_optionaltags_filter.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from html5lib.filters.optionaltags import Filter
 
 
diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
index 879d2447..8f40bc60 100644
--- a/html5lib/tests/test_parser2.py
+++ b/html5lib/tests/test_parser2.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from six import PY2, text_type
 
 import io
@@ -39,7 +37,7 @@ def test_namespace_html_elements_0_etree():
     doc = parse("<html></html>",
                 treebuilder="etree",
                 namespaceHTMLElements=True)
-    assert doc.tag == "{%s}html" % (namespaces["html"],)
+    assert doc.tag == "{{{}}}html".format(namespaces["html"])
 
 
 def test_namespace_html_elements_1_etree():
@@ -77,7 +75,7 @@ def test_debug_log():
 
     if PY2:
         for i, log in enumerate(expected):
-            log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log]
+            log = [x.encode("ascii") if isinstance(x, str) else x for x in log]
             expected[i] = tuple(log)
 
     assert parser.log == expected
diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py
index f3faeb80..e5cdc2af 100644
--- a/html5lib/tests/test_sanitizer.py
+++ b/html5lib/tests/test_sanitizer.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import pytest
 
 from html5lib import constants, parseFragment, serialize
@@ -60,19 +58,19 @@ def param_sanitizer():
         if tag_name == 'image':
             yield ("test_should_allow_%s_tag" % tag_name,
                    "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
-                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
+                   "<{} title='1'>foo <bad>bar</bad> baz</{}>".format(tag_name, tag_name))
         elif tag_name == 'br':
             yield ("test_should_allow_%s_tag" % tag_name,
                    "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
-                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
+                   "<{} title='1'>foo <bad>bar</bad> baz</{}>".format(tag_name, tag_name))
         elif tag_name in constants.voidElements:
             yield ("test_should_allow_%s_tag" % tag_name,
                    "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
-                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
+                   "<{} title='1'>foo <bad>bar</bad> baz</{}>".format(tag_name, tag_name))
         else:
             yield ("test_should_allow_%s_tag" % tag_name,
-                   "<%s title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</%s>" % (tag_name, tag_name),
-                   "<%s title='1'>foo <bad>bar</bad> baz</%s>" % (tag_name, tag_name))
+                   "<{} title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</{}>".format(tag_name, tag_name),
+                   "<{} title='1'>foo <bad>bar</bad> baz</{}>".format(tag_name, tag_name))
 
     for ns, attribute_name in sanitizer.allowed_attributes:
         if ns is not None:
@@ -85,16 +83,16 @@ def param_sanitizer():
         if attribute_name in sanitizer.attr_val_is_uri:
             attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
         yield ("test_should_allow_%s_attribute" % attribute_name,
-               "<p %s=\"%s\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>" % (attribute_name, attribute_value),
-               "<p %s='%s'>foo <bad>bar</bad> baz</p>" % (attribute_name, attribute_value))
+               "<p {}=\"{}\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>".format(attribute_name, attribute_value),
+               "<p {}='{}'>foo <bad>bar</bad> baz</p>".format(attribute_name, attribute_value))
 
     for protocol in sanitizer.allowed_protocols:
         rest_of_uri = '//sub.domain.tld/path/object.ext'
         if protocol == 'data':
             rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
         yield ("test_should_allow_uppercase_%s_uris" % protocol,
-               "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
-               """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
+               "<img src=\"{}:{}\">foo</a>".format(protocol, rest_of_uri),
+               """<img src="{}:{}">foo</a>""".format(protocol, rest_of_uri))
 
     for protocol in sanitizer.allowed_protocols:
         rest_of_uri = '//sub.domain.tld/path/object.ext'
@@ -102,8 +100,8 @@ def param_sanitizer():
             rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
         protocol = protocol.upper()
         yield ("test_should_allow_uppercase_%s_uris" % protocol,
-               "<img src=\"%s:%s\">foo</a>" % (protocol, rest_of_uri),
-               """<img src="%s:%s">foo</a>""" % (protocol, rest_of_uri))
+               "<img src=\"{}:{}\">foo</a>".format(protocol, rest_of_uri),
+               """<img src="{}:{}">foo</a>""".format(protocol, rest_of_uri))
 
 
 @pytest.mark.parametrize("expected, input",
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index bce62459..e8371247 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import os
 import json
 
@@ -221,6 +219,6 @@ def test_serializer(input, expected, options):
 
     result = serialize_html(input, options)
     if len(expected) == 1:
-        assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
+        assert expected[0] == result, "Expected:\n{}\nActual:\n{}\nOptions:\n{}".format(expected[0], result, str(options))
     elif result not in expected:
-        assert False, "Expected: %s, Received: %s" % (expected, result)
+        assert False, "Expected: {}, Received: {}".format(expected, result)
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
index efe9b472..413b43de 100644
--- a/html5lib/tests/test_stream.py
+++ b/html5lib/tests/test_stream.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from . import support  # noqa
 
 import codecs
@@ -105,7 +103,7 @@ def test_char_ascii():
 
 
 def test_char_utf8():
-    stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8')
+    stream = HTMLInputStream('\u2018'.encode(), override_encoding='utf-8')
     assert stream.charEncoding[0].name == 'utf-8'
     assert stream.char() == '\u2018'
 
@@ -186,7 +184,7 @@ def test_python_issue_20007():
     Make sure we have a work-around for Python bug #20007
     http://bugs.python.org/issue20007
     """
-    class FakeSocket(object):
+    class FakeSocket:
         def makefile(self, _mode, _bufsize=None):
             # pylint:disable=unused-argument
             return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
@@ -205,7 +203,7 @@ def test_python_issue_20007_b():
     if six.PY2:
         return
 
-    class FakeSocket(object):
+    class FakeSocket:
         def makefile(self, _mode, _bufsize=None):
             # pylint:disable=unused-argument
             return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py
index 158d847a..93a43f4e 100644
--- a/html5lib/tests/test_tokenizer2.py
+++ b/html5lib/tests/test_tokenizer2.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import io
 
 from six import unichr, text_type
@@ -16,8 +14,8 @@ def ignore_parse_errors(toks):
 
 def test_maintain_attribute_order():
     # generate loads to maximize the chance a hash-based mutation will occur
-    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
-    stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">")
+    attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    stream = io.StringIO("<span " + " ".join("{}='{}'".format(x, i) for x, i in attrs) + ">")
 
     toks = HTMLTokenizer(stream)
     out = list(ignore_parse_errors(toks))
@@ -49,8 +47,8 @@ def test_duplicate_attribute():
 
 def test_maintain_duplicate_attribute_order():
     # generate loads to maximize the chance a hash-based mutation will occur
-    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
-    stream = io.StringIO("<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + " a=100>")
+    attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    stream = io.StringIO("<span " + " ".join("{}='{}'".format(x, i) for x, i in attrs) + " a=100>")
 
     toks = HTMLTokenizer(stream)
     out = list(ignore_parse_errors(toks))
diff --git a/html5lib/tests/test_treeadapters.py b/html5lib/tests/test_treeadapters.py
index 95e56c00..1e396ed9 100644
--- a/html5lib/tests/test_treeadapters.py
+++ b/html5lib/tests/test_treeadapters.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from . import support  # noqa
 
 import html5lib
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 780ca964..3266361a 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import itertools
 import sys
 
@@ -74,11 +72,11 @@ def param_treewalker_six_mix():
     # fragment but not using the u'' syntax nor importing unicode_literals
     sm_tests = [
         ('<a href="http://example.com">Example</a>',
-         [(str('class'), str('test123'))],
+         [('class', 'test123')],
          '<a>\n  class="test123"\n  href="http://example.com"\n  "Example"'),
 
         ('<link href="http://example.com/cow">',
-         [(str('rel'), str('alternate'))],
+         [('rel', 'alternate')],
          '<link>\n  href="http://example.com/cow"\n  rel="alternate"\n  "Example"')
     ]
 
@@ -102,7 +100,7 @@ def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
     output = treewalkers.pprint(treeClass["walker"](document))
     output = attrlist.sub(sortattrs, output)
     if output not in expected:
-        raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
+        raise AssertionError("TreewalkerEditTest: {}\nExpected:\n{}\nReceived:\n{}".format(treeName, expected, output))
 
 
 @pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
@@ -151,8 +149,8 @@ def test_maintain_attribute_order(treeName):
         pytest.skip("Treebuilder not loaded")
 
     # generate loads to maximize the chance a hash-based mutation will occur
-    attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
-    data = "<span " + " ".join("%s='%s'" % (x, i) for x, i in attrs) + ">"
+    attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+    data = "<span " + " ".join("{}='{}'".format(x, i) for x, i in attrs) + ">"
 
     parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
     document = parser.parseFragment(data)
diff --git a/html5lib/tests/test_whitespace_filter.py b/html5lib/tests/test_whitespace_filter.py
index e9da6140..0daf1c52 100644
--- a/html5lib/tests/test_whitespace_filter.py
+++ b/html5lib/tests/test_whitespace_filter.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from html5lib.filters.whitespace import Filter
 from html5lib.constants import spaceCharacters
 spaceCharacters = "".join(spaceCharacters)
diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py
index cc9897a4..8cf1f1dd 100644
--- a/html5lib/tests/tokenizer.py
+++ b/html5lib/tests/tokenizer.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import codecs
 import json
 import warnings
@@ -12,7 +10,7 @@
 from html5lib import constants, _utils
 
 
-class TokenizerTestParser(object):
+class TokenizerTestParser:
     def __init__(self, initialState, lastStartTag=None):
         self.tokenizer = HTMLTokenizer
         self._state = initialState
@@ -146,11 +144,11 @@ def repl(m):
                 low = int(m.group(2), 16)
                 if 0xD800 <= high <= 0xDBFF and 0xDC00 <= low <= 0xDFFF:
                     cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000
-                    return unichr(cp)
+                    return chr(cp)
                 else:
-                    return unichr(high) + unichr(low)
+                    return chr(high) + chr(low)
             else:
-                return unichr(int(m.group(1), 16))
+                return chr(int(m.group(1), 16))
         try:
             return _surrogateRe.sub(repl, inp)
         except ValueError:
@@ -197,7 +195,7 @@ def collect(self):
 
 class TokenizerTestCollector(pytest.Collector):
     def __init__(self, name, parent=None, config=None, session=None, testdata=None):
-        super(TokenizerTestCollector, self).__init__(name, parent, config, session)
+        super().__init__(name, parent, config, session)
         if 'initialStates' not in testdata:
             testdata["initialStates"] = ["Data state"]
         if 'doubleEscaped' in testdata:
@@ -218,7 +216,7 @@ def collect(self):
 
 class TokenizerTest(pytest.Item):
     def __init__(self, name, parent, test, initialState):
-        super(TokenizerTest, self).__init__(name, parent)
+        super().__init__(name, parent)
         self.obj = lambda: 1  # this is to hack around skipif needing a function!
         self.test = test
         self.initialState = initialState
diff --git a/html5lib/tests/tokenizertotree.py b/html5lib/tests/tokenizertotree.py
index 8528e876..2ba74cad 100644
--- a/html5lib/tests/tokenizertotree.py
+++ b/html5lib/tests/tokenizertotree.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import sys
 import os
 import json
@@ -25,7 +23,7 @@ def main(out_path):
 
 def run_file(filename, out_path):
     try:
-        tests_data = json.load(open(filename, "r"))
+        tests_data = json.load(open(filename))
     except ValueError:
         sys.stderr.write("Failed to load %s\n" % filename)
         return
diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py
index fb0657bf..e1b0c180 100644
--- a/html5lib/tests/tree_construction.py
+++ b/html5lib/tests/tree_construction.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 import itertools
 import re
 import warnings
@@ -31,14 +29,13 @@ def collect(self):
 
 class TreeConstructionTest(pytest.Collector):
     def __init__(self, name, parent=None, config=None, session=None, testdata=None):
-        super(TreeConstructionTest, self).__init__(name, parent, config, session)
+        super().__init__(name, parent, config, session)
         self.testdata = testdata
 
     def collect(self):
         for treeName, treeAPIs in sorted(treeTypes.items()):
-            for x in itertools.chain(self._getParserTests(treeName, treeAPIs),
-                                     self._getTreeWalkerTests(treeName, treeAPIs)):
-                yield x
+            yield from itertools.chain(self._getParserTests(treeName, treeAPIs),
+                                       self._getTreeWalkerTests(treeName, treeAPIs))
 
     def _getParserTests(self, treeName, treeAPIs):
         if treeAPIs is not None and "adapter" in treeAPIs:
@@ -79,7 +76,7 @@ def convertTreeDump(data):
 
 class ParserTest(pytest.Item):
     def __init__(self, name, parent, test, treeClass, namespaceHTMLElements):
-        super(ParserTest, self).__init__(name, parent)
+        super().__init__(name, parent)
         self.test = test
         self.treeClass = treeClass
         self.namespaceHTMLElements = namespaceHTMLElements
@@ -122,7 +119,7 @@ def runtest(self):
 
         errStr = []
         for (line, col), errorcode, datavars in p.errors:
-            assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars))
+            assert isinstance(datavars, dict), "{}, {}".format(errorcode, repr(datavars))
             errStr.append("Line: %i Col: %i %s" % (line, col,
                                                    constants.E[errorcode] % datavars))
 
@@ -144,7 +141,7 @@ def repr_failure(self, excinfo):
 
 class TreeWalkerTest(pytest.Item):
     def __init__(self, name, parent, test, treeAPIs):
-        super(TreeWalkerTest, self).__init__(name, parent)
+        super().__init__(name, parent)
         self.test = test
         self.treeAPIs = treeAPIs
 
diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py
index dfeb0ba5..1444fc9a 100644
--- a/html5lib/treeadapters/__init__.py
+++ b/html5lib/treeadapters/__init__.py
@@ -16,7 +16,6 @@
    genshi_tree = genshi.to_genshi(TreeWalker(tree))
 
 """
-from __future__ import absolute_import, division, unicode_literals
 
 from . import sax
 
diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py
index 61d5fb6a..804a980e 100644
--- a/html5lib/treeadapters/genshi.py
+++ b/html5lib/treeadapters/genshi.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from genshi.core import QName, Attrs
 from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
 
@@ -23,7 +21,7 @@ def to_genshi(walker):
 
         if type in ("StartTag", "EmptyTag"):
             if token["namespace"]:
-                name = "{%s}%s" % (token["namespace"], token["name"])
+                name = "{{{}}}{}".format(token["namespace"], token["name"])
             else:
                 name = token["name"]
             attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
@@ -34,7 +32,7 @@ def to_genshi(walker):
 
         if type == "EndTag":
             if token["namespace"]:
-                name = "{%s}%s" % (token["namespace"], token["name"])
+                name = "{{{}}}{}".format(token["namespace"], token["name"])
             else:
                 name = token["name"]
 
diff --git a/html5lib/treeadapters/sax.py b/html5lib/treeadapters/sax.py
index f4ccea5a..04ec1ef0 100644
--- a/html5lib/treeadapters/sax.py
+++ b/html5lib/treeadapters/sax.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from xml.sax.xmlreader import AttributesNSImpl
 
 from ..constants import adjustForeignAttributes, unadjustForeignAttributes
diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py
index d44447ea..90aad5fb 100644
--- a/html5lib/treebuilders/__init__.py
+++ b/html5lib/treebuilders/__init__.py
@@ -29,7 +29,6 @@
 
 """
 
-from __future__ import absolute_import, division, unicode_literals
 
 from .._utils import default_etree
 
diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py
index e4a3d710..b48ddf84 100644
--- a/html5lib/treebuilders/base.py
+++ b/html5lib/treebuilders/base.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 
 from ..constants import scopingElements, tableInsertModeElements, namespaces
@@ -20,7 +19,7 @@
 }
 
 
-class Node(object):
+class Node:
     """Represents an item in the tree"""
     def __init__(self, name):
         """Creates a Node
@@ -43,11 +42,11 @@ def __init__(self, name):
         self._flags = []
 
     def __str__(self):
-        attributesStr = " ".join(["%s=\"%s\"" % (name, value)
-                                  for name, value in
-                                  self.attributes.items()])
+        attributesStr = " ".join("{}=\"{}\"".format(name, value)
+                                 for name, value in
+                                 self.attributes.items())
         if attributesStr:
-            return "<%s %s>" % (self.name, attributesStr)
+            return "<{} {}>".format(self.name, attributesStr)
         else:
             return "<%s>" % (self.name)
 
@@ -143,7 +142,7 @@ def nodesEqual(self, node1, node2):
         return True
 
 
-class TreeBuilder(object):
+class TreeBuilder:
     """Base treebuilder implementation
 
     * documentClass - the class to use for the bottommost node of a document
@@ -199,7 +198,7 @@ def elementInScope(self, target, variant=None):
         # match any node with that name
         exactNode = hasattr(target, "nameTuple")
         if not exactNode:
-            if isinstance(target, text_type):
+            if isinstance(target, str):
                 target = (namespaces["html"], target)
             assert isinstance(target, tuple)
 
@@ -322,7 +321,7 @@ def _setInsertFromTable(self, value):
 
     def insertElementNormal(self, token):
         name = token["name"]
-        assert isinstance(name, text_type), "Element %s not unicode" % name
+        assert isinstance(name, str), "Element %s not unicode" % name
         namespace = token.get("namespace", self.defaultNamespace)
         element = self.elementClass(name, namespace)
         element.attributes = token["data"]
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
index 818a3343..51219093 100644
--- a/html5lib/treebuilders/dom.py
+++ b/html5lib/treebuilders/dom.py
@@ -1,6 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
 from collections.abc import MutableMapping
 from xml.dom import minidom, Node
 import weakref
@@ -188,25 +185,25 @@ def serializeElement(element, indent=0):
                         rv.append("""|%s<!DOCTYPE %s "%s" "%s">""" %
                                   (' ' * indent, element.name, publicId, systemId))
                     else:
-                        rv.append("|%s<!DOCTYPE %s>" % (' ' * indent, element.name))
+                        rv.append("|{}<!DOCTYPE {}>".format(' ' * indent, element.name))
                 else:
-                    rv.append("|%s<!DOCTYPE >" % (' ' * indent,))
+                    rv.append("|{}<!DOCTYPE >".format(' ' * indent))
             elif element.nodeType == Node.DOCUMENT_NODE:
                 rv.append("#document")
             elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
                 rv.append("#document-fragment")
             elif element.nodeType == Node.COMMENT_NODE:
-                rv.append("|%s<!-- %s -->" % (' ' * indent, element.nodeValue))
+                rv.append("|{}<!-- {} -->".format(' ' * indent, element.nodeValue))
             elif element.nodeType == Node.TEXT_NODE:
-                rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
+                rv.append("|{}\"{}\"".format(' ' * indent, element.nodeValue))
             else:
                 if (hasattr(element, "namespaceURI") and
                         element.namespaceURI is not None):
-                    name = "%s %s" % (constants.prefixes[element.namespaceURI],
-                                      element.nodeName)
+                    name = "{} {}".format(constants.prefixes[element.namespaceURI],
+                                          element.nodeName)
                 else:
                     name = element.nodeName
-                rv.append("|%s<%s>" % (' ' * indent, name))
+                rv.append("|{}<{}>".format(' ' * indent, name))
                 if element.hasAttributes():
                     attributes = []
                     for i in range(len(element.attributes)):
@@ -215,13 +212,13 @@ def serializeElement(element, indent=0):
                         value = attr.value
                         ns = attr.namespaceURI
                         if ns:
-                            name = "%s %s" % (constants.prefixes[ns], attr.localName)
+                            name = "{} {}".format(constants.prefixes[ns], attr.localName)
                         else:
                             name = attr.nodeName
                         attributes.append((name, value))
 
                     for name, value in sorted(attributes):
-                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
+                        rv.append('|{}{}="{}"'.format(' ' * (indent + 2), name, value))
             indent += 2
             for child in element.childNodes:
                 serializeElement(child, indent)
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index 086bed4e..2a7c80db 100644
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 # pylint:disable=protected-access
 
 from six import text_type
@@ -38,7 +37,7 @@ def _getETreeTag(self, name, namespace):
             if namespace is None:
                 etree_tag = name
             else:
-                etree_tag = "{%s}%s" % (namespace, name)
+                etree_tag = "{{{}}}{}".format(namespace, name)
             return etree_tag
 
         def _setName(self, name):
@@ -70,7 +69,7 @@ def _setAttributes(self, attributes):
                 # allocation on average
                 for key, value in attributes.items():
                     if isinstance(key, tuple):
-                        name = "{%s}%s" % (key[2], key[1])
+                        name = "{{{}}}{}".format(key[2], key[1])
                     else:
                         name = key
                     el_attrib[name] = value
@@ -210,20 +209,20 @@ def serializeElement(element, indent=0):
                     rv.append("""<!DOCTYPE %s "%s" "%s">""" %
                               (element.text, publicId, systemId))
                 else:
-                    rv.append("<!DOCTYPE %s>" % (element.text,))
+                    rv.append("<!DOCTYPE {}>".format(element.text))
             elif element.tag == "DOCUMENT_ROOT":
                 rv.append("#document")
                 if element.text is not None:
-                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
+                    rv.append("|{}\"{}\"".format(' ' * (indent + 2), element.text))
                 if element.tail is not None:
                     raise TypeError("Document node cannot have tail")
                 if hasattr(element, "attrib") and len(element.attrib):
                     raise TypeError("Document node cannot have attributes")
             elif element.tag == ElementTreeCommentType:
-                rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
+                rv.append("|{}<!-- {} -->".format(' ' * indent, element.text))
             else:
-                assert isinstance(element.tag, text_type), \
-                    "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
+                assert isinstance(element.tag, str), \
+                    "Expected unicode, got {}, {}".format(type(element.tag), element.tag)
                 nsmatch = tag_regexp.match(element.tag)
 
                 if nsmatch is None:
@@ -231,8 +230,8 @@ def serializeElement(element, indent=0):
                 else:
                     ns, name = nsmatch.groups()
                     prefix = constants.prefixes[ns]
-                    name = "%s %s" % (prefix, name)
-                rv.append("|%s<%s>" % (' ' * indent, name))
+                    name = "{} {}".format(prefix, name)
+                rv.append("|{}<{}>".format(' ' * indent, name))
 
                 if hasattr(element, "attrib"):
                     attributes = []
@@ -241,20 +240,20 @@ def serializeElement(element, indent=0):
                         if nsmatch is not None:
                             ns, name = nsmatch.groups()
                             prefix = constants.prefixes[ns]
-                            attr_string = "%s %s" % (prefix, name)
+                            attr_string = "{} {}".format(prefix, name)
                         else:
                             attr_string = name
                         attributes.append((attr_string, value))
 
                     for name, value in sorted(attributes):
-                        rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
+                        rv.append('|{}{}="{}"'.format(' ' * (indent + 2), name, value))
                 if element.text:
-                    rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
+                    rv.append("|{}\"{}\"".format(' ' * (indent + 2), element.text))
             indent += 2
             for child in element:
                 serializeElement(child, indent)
             if element.tail:
-                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
+                rv.append("|{}\"{}\"".format(' ' * (indent - 2), element.tail))
         serializeElement(element, 0)
 
         return "\n".join(rv)
@@ -275,7 +274,7 @@ def serializeElement(element):
                     rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
                               (element.text, publicId, systemId))
                 else:
-                    rv.append("<!DOCTYPE %s>" % (element.text,))
+                    rv.append("<!DOCTYPE {}>".format(element.text))
             elif element.tag == "DOCUMENT_ROOT":
                 if element.text is not None:
                     rv.append(element.text)
@@ -288,23 +287,23 @@ def serializeElement(element):
                     serializeElement(child)
 
             elif element.tag == ElementTreeCommentType:
-                rv.append("<!--%s-->" % (element.text,))
+                rv.append("<!--{}-->".format(element.text))
             else:
                 # This is assumed to be an ordinary element
                 if not element.attrib:
-                    rv.append("<%s>" % (filter.fromXmlName(element.tag),))
+                    rv.append("<{}>".format(filter.fromXmlName(element.tag)))
                 else:
-                    attr = " ".join(["%s=\"%s\"" % (
+                    attr = " ".join("{}=\"{}\"".format(
                         filter.fromXmlName(name), value)
-                        for name, value in element.attrib.items()])
-                    rv.append("<%s %s>" % (element.tag, attr))
+                        for name, value in element.attrib.items())
+                    rv.append("<{} {}>".format(element.tag, attr))
                 if element.text:
                     rv.append(element.text)
 
                 for child in element:
                     serializeElement(child)
 
-                rv.append("</%s>" % (element.tag,))
+                rv.append("</{}>".format(element.tag))
 
             if element.tail:
                 rv.append(element.tail)
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index d99a51a9..3ec133bd 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -9,7 +9,6 @@
 When any of these things occur, we emit a DataLossWarning
 """
 
-from __future__ import absolute_import, division, unicode_literals
 # pylint:disable=protected-access
 
 import warnings
@@ -33,14 +32,14 @@
 comment_type = etree.Comment("asd").tag
 
 
-class DocumentType(object):
+class DocumentType:
     def __init__(self, name, publicId, systemId):
         self.name = name
         self.publicId = publicId
         self.systemId = systemId
 
 
-class Document(object):
+class Document:
     def __init__(self):
         self._elementTree = None
         self._childNodes = []
@@ -72,11 +71,11 @@ def serializeElement(element, indent=0):
                             element.docinfo.system_url):
                         dtd_str = "<!DOCTYPE %s>" % element.docinfo.root_name
                     else:
-                        dtd_str = """<!DOCTYPE %s "%s" "%s">""" % (
+                        dtd_str = """<!DOCTYPE {} "{}" "{}">""".format(
                             element.docinfo.root_name,
                             element.docinfo.public_id,
                             element.docinfo.system_url)
-                    rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
+                    rv.append("|{}{}".format(' ' * (indent + 2), dtd_str))
                 next_element = element.getroot()
                 while next_element.getprevious() is not None:
                     next_element = next_element.getprevious()
@@ -86,16 +85,16 @@ def serializeElement(element, indent=0):
             elif isinstance(element, str) or isinstance(element, bytes):
                 # Text in a fragment
                 assert isinstance(element, str)
-                rv.append("|%s\"%s\"" % (' ' * indent, element))
+                rv.append("|{}\"{}\"".format(' ' * indent, element))
             else:
                 # Fragment case
                 rv.append("#document-fragment")
                 for next_element in element:
                     serializeElement(next_element, indent + 2)
         elif element.tag == comment_type:
-            rv.append("|%s<!-- %s -->" % (' ' * indent, element.text))
+            rv.append("|{}<!-- {} -->".format(' ' * indent, element.text))
             if hasattr(element, "tail") and element.tail:
-                rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
+                rv.append("|{}\"{}\"".format(' ' * indent, element.tail))
         else:
             assert isinstance(element, etree._Element)
             nsmatch = etree_builders.tag_regexp.match(element.tag)
@@ -103,11 +102,11 @@ def serializeElement(element, indent=0):
                 ns = nsmatch.group(1)
                 tag = nsmatch.group(2)
                 prefix = constants.prefixes[ns]
-                rv.append("|%s<%s %s>" % (' ' * indent, prefix,
-                                          infosetFilter.fromXmlName(tag)))
+                rv.append("|{}<{} {}>".format(' ' * indent, prefix,
+                                              infosetFilter.fromXmlName(tag)))
             else:
-                rv.append("|%s<%s>" % (' ' * indent,
-                                       infosetFilter.fromXmlName(element.tag)))
+                rv.append("|{}<{}>".format(' ' * indent,
+                                           infosetFilter.fromXmlName(element.tag)))
 
             if hasattr(element, "attrib"):
                 attributes = []
@@ -117,21 +116,21 @@ def serializeElement(element, indent=0):
                         ns, name = nsmatch.groups()
                         name = infosetFilter.fromXmlName(name)
                         prefix = constants.prefixes[ns]
-                        attr_string = "%s %s" % (prefix, name)
+                        attr_string = "{} {}".format(prefix, name)
                     else:
                         attr_string = infosetFilter.fromXmlName(name)
                     attributes.append((attr_string, value))
 
                 for name, value in sorted(attributes):
-                    rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
+                    rv.append('|{}{}="{}"'.format(' ' * (indent + 2), name, value))
 
             if element.text:
-                rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
+                rv.append("|{}\"{}\"".format(' ' * (indent + 2), element.text))
             indent += 2
             for child in element:
                 serializeElement(child, indent)
             if hasattr(element, "tail") and element.tail:
-                rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
+                rv.append("|{}\"{}\"".format(' ' * (indent - 2), element.tail))
     serializeElement(element, 0)
 
     return "\n".join(rv)
@@ -152,23 +151,23 @@ def serializeElement(element):
             serializeElement(element.getroot())
 
         elif element.tag == comment_type:
-            rv.append("<!--%s-->" % (element.text,))
+            rv.append("<!--{}-->".format(element.text))
 
         else:
             # This is assumed to be an ordinary element
             if not element.attrib:
-                rv.append("<%s>" % (element.tag,))
+                rv.append("<{}>".format(element.tag))
             else:
-                attr = " ".join(["%s=\"%s\"" % (name, value)
-                                 for name, value in element.attrib.items()])
-                rv.append("<%s %s>" % (element.tag, attr))
+                attr = " ".join("{}=\"{}\"".format(name, value)
+                                for name, value in element.attrib.items())
+                rv.append("<{} {}>".format(element.tag, attr))
             if element.text:
                 rv.append(element.text)
 
             for child in element:
                 serializeElement(child)
 
-            rv.append("</%s>" % (element.tag,))
+            rv.append("</{}>".format(element.tag))
 
         if hasattr(element, "tail") and element.tail:
             rv.append(element.tail)
@@ -197,14 +196,14 @@ def __init__(self, element):
 
             def _coerceKey(self, key):
                 if isinstance(key, tuple):
-                    name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
+                    name = "{{{}}}{}".format(key[2], infosetFilter.coerceAttribute(key[1]))
                 else:
                     name = infosetFilter.coerceAttribute(key)
                 return name
 
             def __getitem__(self, key):
                 value = self._element._element.attrib[self._coerceKey(key)]
-                if not PY3 and isinstance(value, binary_type):
+                if not PY3 and isinstance(value, bytes):
                     value = value.decode("ascii")
                 return value
 
@@ -328,7 +327,7 @@ def insertCommentMain(self, data, parent=None):
         if (parent == self.document and
                 self.document._elementTree.getroot()[-1].tag == comment_type):
             warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
-        super(TreeBuilder, self).insertComment(data, parent)
+        super().insertComment(data, parent)
 
     def insertRoot(self, token):
         # Because of the way libxml2 works, it doesn't seem to be possible to
@@ -375,7 +374,7 @@ def insertRoot(self, token):
         if namespace is None:
             etree_tag = name
         else:
-            etree_tag = "{%s}%s" % (namespace, name)
+            etree_tag = "{{{}}}{}".format(namespace, name)
         root.tag = etree_tag
 
         # Add the root element to the internal child/open data structures
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
index b2d3aac3..c8ecc081 100644
--- a/html5lib/treewalkers/__init__.py
+++ b/html5lib/treewalkers/__init__.py
@@ -8,7 +8,6 @@
 returns an iterator which generates tokens.
 """
 
-from __future__ import absolute_import, division, unicode_literals
 
 from .. import constants
 from .._utils import default_etree
@@ -96,10 +95,10 @@ def pprint(walker):
                     ns = constants.prefixes[token["namespace"]]
                 else:
                     ns = token["namespace"]
-                name = "%s %s" % (ns, token["name"])
+                name = "{} {}".format(ns, token["name"])
             else:
                 name = token["name"]
-            output.append("%s<%s>" % (" " * indent, name))
+            output.append("{}<{}>".format(" " * indent, name))
             indent += 2
             # attributes (sorted for consistent ordering)
             attrs = token["data"]
@@ -109,10 +108,10 @@ def pprint(walker):
                         ns = constants.prefixes[namespace]
                     else:
                         ns = namespace
-                    name = "%s %s" % (ns, localname)
+                    name = "{} {}".format(ns, localname)
                 else:
                     name = localname
-                output.append("%s%s=\"%s\"" % (" " * indent, name, value))
+                output.append("{}{}=\"{}\"".format(" " * indent, name, value))
             # self-closing
             if type == "EmptyTag":
                 indent -= 2
@@ -121,7 +120,7 @@ def pprint(walker):
             indent -= 2
 
         elif type == "Comment":
-            output.append("%s<!-- %s -->" % (" " * indent, token["data"]))
+            output.append("{}<!-- {} -->".format(" " * indent, token["data"]))
 
         elif type == "Doctype":
             if token["name"]:
@@ -137,13 +136,13 @@ def pprint(walker):
                                    token["name"],
                                    token["systemId"]))
                 else:
-                    output.append("%s<!DOCTYPE %s>" % (" " * indent,
-                                                       token["name"]))
+                    output.append("{}<!DOCTYPE {}>".format(" " * indent,
+                                                           token["name"]))
             else:
-                output.append("%s<!DOCTYPE >" % (" " * indent,))
+                output.append("{}<!DOCTYPE >".format(" " * indent))
 
         elif type == "Characters":
-            output.append("%s\"%s\"" % (" " * indent, token["data"]))
+            output.append("{}\"{}\"".format(" " * indent, token["data"]))
 
         elif type == "SpaceCharacters":
             assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
diff --git a/html5lib/treewalkers/base.py b/html5lib/treewalkers/base.py
index 80c474c4..a4a9c71a 100644
--- a/html5lib/treewalkers/base.py
+++ b/html5lib/treewalkers/base.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from xml.dom import Node
 from ..constants import namespaces, voidElements, spaceCharacters
 
@@ -17,7 +15,7 @@
 spaceCharacters = "".join(spaceCharacters)
 
 
-class TreeWalker(object):
+class TreeWalker:
     """Walks a tree yielding tokens
 
     Tokens are dicts that all have a ``type`` field specifying the type of the
@@ -201,15 +199,13 @@ def __iter__(self):
                 yield self.doctype(*details)
 
             elif type == TEXT:
-                for token in self.text(*details):
-                    yield token
+                yield from self.text(*details)
 
             elif type == ELEMENT:
                 namespace, name, attributes, hasChildren = details
                 if (not namespace or namespace == namespaces["html"]) and name in voidElements:
-                    for token in self.emptyTag(namespace, name, attributes,
-                                               hasChildren):
-                        yield token
+                    yield from self.emptyTag(namespace, name, attributes,
+                                             hasChildren)
                     hasChildren = False
                 else:
                     yield self.startTag(namespace, name, attributes)
diff --git a/html5lib/treewalkers/dom.py b/html5lib/treewalkers/dom.py
index b0c89b00..ac88cd9d 100644
--- a/html5lib/treewalkers/dom.py
+++ b/html5lib/treewalkers/dom.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from xml.dom import Node
 
 from . import base
diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
index 44653372..8e68f56f 100644
--- a/html5lib/treewalkers/etree.py
+++ b/html5lib/treewalkers/etree.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from collections import OrderedDict
 import re
 
@@ -51,7 +49,7 @@ def getNodeDetails(self, node):
                 return base.COMMENT, node.text
 
             else:
-                assert isinstance(node.tag, string_types), type(node.tag)
+                assert isinstance(node.tag, str), type(node.tag)
                 # This is assumed to be an ordinary element
                 match = tag_regexp.match(node.tag)
                 if match:
diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py
index a614ac5b..744b94c1 100644
--- a/html5lib/treewalkers/etree_lxml.py
+++ b/html5lib/treewalkers/etree_lxml.py
@@ -1,4 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
 from six import text_type
 
 from collections import OrderedDict
@@ -14,13 +13,13 @@
 def ensure_str(s):
     if s is None:
         return None
-    elif isinstance(s, text_type):
+    elif isinstance(s, str):
         return s
     else:
         return s.decode("ascii", "strict")
 
 
-class Root(object):
+class Root:
     def __init__(self, et):
         self.elementtree = et
         self.children = []
@@ -58,7 +57,7 @@ def __len__(self):
         return 1
 
 
-class Doctype(object):
+class Doctype:
     def __init__(self, root_node, name, public_id, system_id):
         self.root_node = root_node
         self.name = name
@@ -81,7 +80,7 @@ def getnext(self):
         return None
 
 
-class FragmentWrapper(object):
+class FragmentWrapper:
     def __init__(self, fragment_root, obj):
         self.root_node = fragment_root
         self.obj = obj
diff --git a/html5lib/treewalkers/genshi.py b/html5lib/treewalkers/genshi.py
index 7483be27..d4757af2 100644
--- a/html5lib/treewalkers/genshi.py
+++ b/html5lib/treewalkers/genshi.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
 from genshi.core import QName
 from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
 from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
@@ -15,14 +13,12 @@ def __iter__(self):
         previous = None
         for event in self.tree:
             if previous is not None:
-                for token in self.tokens(previous, event):
-                    yield token
+                yield from self.tokens(previous, event)
             previous = event
 
         # Don't forget the final event!
         if previous is not None:
-            for token in self.tokens(previous, None):
-                yield token
+            yield from self.tokens(previous, None)
 
     def tokens(self, event, next):
         kind, data, _ = event
@@ -38,10 +34,9 @@ def tokens(self, event, next):
                     converted_attribs[(None, k)] = v
 
             if namespace == namespaces["html"] and name in voidElements:
-                for token in self.emptyTag(namespace, name, converted_attribs,
-                                           not next or next[0] != END or
-                                           next[1] != tag):
-                    yield token
+                yield from self.emptyTag(namespace, name, converted_attribs,
+                                         not next or next[0] != END or
+                                         next[1] != tag)
             else:
                 yield self.startTag(namespace, name, converted_attribs)
 
@@ -55,8 +50,7 @@ def tokens(self, event, next):
             yield self.comment(data)
 
         elif kind == TEXT:
-            for token in self.text(data):
-                yield token
+            yield from self.text(data)
 
         elif kind == DOCTYPE:
             yield self.doctype(*data)
diff --git a/parse.py b/parse.py
index f973fbb4..b72d2ef7 100755
--- a/parse.py
+++ b/parse.py
@@ -41,7 +41,7 @@ def parse():
             try:
                 # Try opening from file system
                 f = open(f, "rb")
-            except IOError as e:
+            except OSError as e:
                 sys.stderr.write("Unable to open file: %s\n" % e)
                 sys.exit(1)
     except IndexError:
@@ -80,7 +80,7 @@ def parse():
         if document:
             printOutput(p, document, opts)
             t2 = time.time()
-            sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)" % (t1 - t0, t2 - t1))
+            sys.stderr.write("\n\nRun took: {:f}s (plus {:f}s to print the output)".format(t1 - t0, t2 - t1))
         else:
             sys.stderr.write("\n\nRun took: %fs" % (t1 - t0))
     else:
diff --git a/setup.py b/setup.py
index 33ab359d..50054020 100644
--- a/setup.py
+++ b/setup.py
@@ -1,5 +1,3 @@
-from __future__ import print_function
-
 import ast
 import codecs
 
diff --git a/utils/entities.py b/utils/entities.py
index 6e8ca458..c70504c8 100644
--- a/utils/entities.py
+++ b/utils/entities.py
@@ -49,9 +49,9 @@ def test_description(name, good):
     semicolon_text = {True: "with a semi-colon",
                       False: "without a semi-colon"}[with_semicolon]
     if good:
-        text = "Named entity: %s %s" % (name, semicolon_text)
+        text = "Named entity: {} {}".format(name, semicolon_text)
     else:
-        text = "Bad named entity: %s %s" % (name, semicolon_text)
+        text = "Bad named entity: {} {}".format(name, semicolon_text)
     return text
 
 
@@ -80,7 +80,7 @@ def subentity_exists(entity_name, entities):
 
 
 def make_entities_code(entities):
-    entities_text = "\n".join("    \"%s\": u\"%s\"," % (
+    entities_text = "\n".join("    \"{}\": u\"{}\",".format(
         name, entities[name].encode(
             "unicode-escape").replace("\"", "\\\""))
         for name in sorted(entities.keys()))

From f375ba88b92d92084ba0562f9972df6d5e8aea54 Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Mon, 24 May 2021 21:03:52 +0300
Subject: [PATCH 03/10] Remove six

---
 .travis.yml                         |  2 +-
 debug-info.py                       |  2 +-
 html5lib/_inputstream.py            |  9 ++++-----
 html5lib/_tokenizer.py              |  2 --
 html5lib/_trie/py.py                |  1 -
 html5lib/_utils.py                  |  1 -
 html5lib/filters/lint.py            |  1 -
 html5lib/filters/sanitizer.py       |  5 ++---
 html5lib/html5parser.py             |  1 -
 html5lib/serializer.py              |  1 -
 html5lib/tests/test_meta.py         |  2 --
 html5lib/tests/test_parser2.py      |  7 -------
 html5lib/tests/test_stream.py       | 12 ++++--------
 html5lib/tests/test_tokenizer2.py   |  1 -
 html5lib/tests/test_treewalkers.py  |  1 -
 html5lib/tests/tokenizer.py         |  1 -
 html5lib/treebuilders/base.py       |  1 -
 html5lib/treebuilders/etree.py      |  1 -
 html5lib/treebuilders/etree_lxml.py |  3 ---
 html5lib/treewalkers/etree.py       |  1 -
 html5lib/treewalkers/etree_lxml.py  |  1 -
 requirements-install.sh             |  4 ----
 requirements.txt                    |  1 -
 setup.py                            |  1 -
 tox.ini                             |  3 +--
 25 files changed, 13 insertions(+), 52 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index a530faeb..8b964f4c 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -10,7 +10,7 @@ cache: pip
 
 env:
   global:
-    - TOXENV=base,optional,six19-optional
+    - TOXENV=base,optional
 
 install:
   - pip install tox
diff --git a/debug-info.py b/debug-info.py
index 6e2a19bf..eb5a73f5 100644
--- a/debug-info.py
+++ b/debug-info.py
@@ -10,7 +10,7 @@
     "maxsize": sys.maxsize
 }
 
-search_modules = ["chardet", "genshi", "html5lib", "lxml", "six"]
+search_modules = ["chardet", "genshi", "html5lib", "lxml"]
 found_modules = []
 
 for m in search_modules:
diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py
index b3a75452..23128ec8 100644
--- a/html5lib/_inputstream.py
+++ b/html5lib/_inputstream.py
@@ -1,8 +1,7 @@
-from six import text_type
-from six.moves import http_client, urllib
-
 import codecs
+import http.client
 import re
+import urllib
 from io import BytesIO, StringIO
 
 import webencodings
@@ -123,10 +122,10 @@ def _readFromBuffer(self, bytes):
 def HTMLInputStream(source, **kwargs):
     # Work around Python bug #20007: read(0) closes the connection.
     # http://bugs.python.org/issue20007
-    if (isinstance(source, http_client.HTTPResponse) or
+    if (isinstance(source, http.client.HTTPResponse) or
         # Also check for addinfourl wrapping HTTPResponse
         (isinstance(source, urllib.response.addbase) and
-         isinstance(source.fp, http_client.HTTPResponse))):
+         isinstance(source.fp, http.client.HTTPResponse))):
         isUnicode = False
     elif hasattr(source, "read"):
         isUnicode = isinstance(source.read(0), str)
diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py
index e3750c5c..b5219836 100644
--- a/html5lib/_tokenizer.py
+++ b/html5lib/_tokenizer.py
@@ -1,5 +1,3 @@
-from six import unichr as chr
-
 from collections import deque, OrderedDict
 from sys import version_info
 
diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py
index 2e1aa188..05084863 100644
--- a/html5lib/_trie/py.py
+++ b/html5lib/_trie/py.py
@@ -1,4 +1,3 @@
-from six import text_type
 
 from bisect import bisect_left
 
diff --git a/html5lib/_utils.py b/html5lib/_utils.py
index f59eec1e..dc51de4a 100644
--- a/html5lib/_utils.py
+++ b/html5lib/_utils.py
@@ -2,7 +2,6 @@
 
 from collections.abc import Mapping
 
-from six import text_type, PY3
 
 import xml.etree.ElementTree as default_etree
 
diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py
index ff6c5bd7..f091adb1 100644
--- a/html5lib/filters/lint.py
+++ b/html5lib/filters/lint.py
@@ -1,4 +1,3 @@
-from six import text_type
 
 from . import base
 from ..constants import namespaces, voidElements
diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
index 4b753d30..6f1220bf 100644
--- a/html5lib/filters/sanitizer.py
+++ b/html5lib/filters/sanitizer.py
@@ -10,8 +10,7 @@
 import re
 import warnings
 from xml.sax.saxutils import escape, unescape
-
-from six.moves import urllib_parse as urlparse
+import urllib.parse
 
 from . import base
 from ..constants import namespaces, prefixes
@@ -837,7 +836,7 @@ def allowed_token(self, token):
                 # remove replacement characters from unescaped characters
                 val_unescaped = val_unescaped.replace("\ufffd", "")
                 try:
-                    uri = urlparse.urlparse(val_unescaped)
+                    uri = urllib.parse.urlparse(val_unescaped)
                 except ValueError:
                     uri = None
                     del attrs[attr]
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index b4c51821..1f29d728 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -1,4 +1,3 @@
-from six import with_metaclass, viewkeys
 
 import types
 
diff --git a/html5lib/serializer.py b/html5lib/serializer.py
index bbcbf9d6..16e94c27 100644
--- a/html5lib/serializer.py
+++ b/html5lib/serializer.py
@@ -1,4 +1,3 @@
-from six import text_type
 
 import re
 
diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py
index 8f49bb57..2fc6140d 100644
--- a/html5lib/tests/test_meta.py
+++ b/html5lib/tests/test_meta.py
@@ -1,4 +1,3 @@
-import six
 from unittest.mock import Mock
 
 from . import support
@@ -25,7 +24,6 @@ def test_errorMessage():
     r = support.errorMessage(input, expected, actual)
 
     # Assertions!
-    assert six.PY3
     assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
 
     assert input.__repr__.call_count == 1
diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
index 8f40bc60..89c9cef1 100644
--- a/html5lib/tests/test_parser2.py
+++ b/html5lib/tests/test_parser2.py
@@ -1,5 +1,3 @@
-from six import PY2, text_type
-
 import io
 
 from . import support  # noqa
@@ -73,11 +71,6 @@ def test_debug_log():
                 ('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}),
                 ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})]
 
-    if PY2:
-        for i, log in enumerate(expected):
-            log = [x.encode("ascii") if isinstance(x, str) else x for x in log]
-            expected[i] = tuple(log)
-
     assert parser.log == expected
 
 
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
index 413b43de..b94c7a5c 100644
--- a/html5lib/tests/test_stream.py
+++ b/html5lib/tests/test_stream.py
@@ -1,14 +1,13 @@
 from . import support  # noqa
 
 import codecs
+import http.client
 import sys
+import urllib
 from io import BytesIO, StringIO
 
 import pytest
 
-import six
-from six.moves import http_client, urllib
-
 from html5lib._inputstream import (BufferedStream, HTMLInputStream,
                                    HTMLUnicodeInputStream, HTMLBinaryInputStream)
 from html5lib._utils import supports_lone_surrogates
@@ -189,7 +188,7 @@ def makefile(self, _mode, _bufsize=None):
             # pylint:disable=unused-argument
             return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
 
-    source = http_client.HTTPResponse(FakeSocket())
+    source = http.client.HTTPResponse(FakeSocket())
     source.begin()
     stream = HTMLInputStream(source)
     assert stream.charsUntil(" ") == "Text"
@@ -200,15 +199,12 @@ def test_python_issue_20007_b():
     Make sure we have a work-around for Python bug #20007
     http://bugs.python.org/issue20007
     """
-    if six.PY2:
-        return
-
     class FakeSocket:
         def makefile(self, _mode, _bufsize=None):
             # pylint:disable=unused-argument
             return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
 
-    source = http_client.HTTPResponse(FakeSocket())
+    source = http.client.HTTPResponse(FakeSocket())
     source.begin()
     wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com")
     stream = HTMLInputStream(wrapped)
diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py
index 93a43f4e..bc97943f 100644
--- a/html5lib/tests/test_tokenizer2.py
+++ b/html5lib/tests/test_tokenizer2.py
@@ -1,6 +1,5 @@
 import io
 
-from six import unichr, text_type
 
 from html5lib._tokenizer import HTMLTokenizer
 from html5lib.constants import tokenTypes
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 3266361a..108963f0 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -1,7 +1,6 @@
 import itertools
 import sys
 
-from six import unichr, text_type
 import pytest
 
 try:
diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py
index 8cf1f1dd..273955c3 100644
--- a/html5lib/tests/tokenizer.py
+++ b/html5lib/tests/tokenizer.py
@@ -4,7 +4,6 @@
 import re
 
 import pytest
-from six import unichr
 
 from html5lib._tokenizer import HTMLTokenizer
 from html5lib import constants, _utils
diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py
index b48ddf84..ec32501c 100644
--- a/html5lib/treebuilders/base.py
+++ b/html5lib/treebuilders/base.py
@@ -1,4 +1,3 @@
-from six import text_type
 
 from ..constants import scopingElements, tableInsertModeElements, namespaces
 
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index 2a7c80db..11582ee5 100644
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -1,6 +1,5 @@
 # pylint:disable=protected-access
 
-from six import text_type
 
 import re
 
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 3ec133bd..29d34e27 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -23,7 +23,6 @@
 from .. import _ihatexml
 
 import lxml.etree as etree
-from six import PY3, binary_type
 
 
 fullTree = True
@@ -203,8 +202,6 @@ def _coerceKey(self, key):
 
             def __getitem__(self, key):
                 value = self._element._element.attrib[self._coerceKey(key)]
-                if not PY3 and isinstance(value, bytes):
-                    value = value.decode("ascii")
                 return value
 
             def __setitem__(self, key, value):
diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
index 8e68f56f..4ad3d58d 100644
--- a/html5lib/treewalkers/etree.py
+++ b/html5lib/treewalkers/etree.py
@@ -1,7 +1,6 @@
 from collections import OrderedDict
 import re
 
-from six import string_types
 
 from . import base
 from .._utils import moduleFactoryFactory
diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py
index 744b94c1..d8c4daa5 100644
--- a/html5lib/treewalkers/etree_lxml.py
+++ b/html5lib/treewalkers/etree_lxml.py
@@ -1,4 +1,3 @@
-from six import text_type
 
 from collections import OrderedDict
 
diff --git a/requirements-install.sh b/requirements-install.sh
index b7a8d96d..41d9bc42 100755
--- a/requirements-install.sh
+++ b/requirements-install.sh
@@ -1,9 +1,5 @@
 #!/bin/bash -ex
 
-if [[ $SIX_VERSION ]]; then
-  pip install six==$SIX_VERSION
-fi
-
 pip install -r requirements-test.txt
 
 if [[ $USE_OPTIONAL == "true" ]]; then
diff --git a/requirements.txt b/requirements.txt
index ae7ec3d0..be8fcb77 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1 @@
-six>=1.9
 webencodings
diff --git a/setup.py b/setup.py
index 50054020..d0f5814e 100644
--- a/setup.py
+++ b/setup.py
@@ -98,7 +98,6 @@ def default_environment():
       maintainer_email='james@hoppipolla.co.uk',
       packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
       install_requires=[
-          'six>=1.9',
           'webencodings',
       ],
       python_requires=">=3.6",
diff --git a/tox.ini b/tox.ini
index 05ca59aa..3ae8afe0 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py{36,37,38,py3}-{base,six19,optional}
+envlist = py{36,37,38,py3}-{base,optional}
 
 [testenv]
 deps =
@@ -11,7 +11,6 @@ passenv =
   PYTEST_COMMAND  # this is maintained so one can, e.g., PYTEST_COMMAND="coverage run -m pytest"
   COVERAGE_RUN_OPTIONS
 commands =
-  six19: pip install six==1.9
   {env:PYTEST_COMMAND:{envbindir}/py.test} {posargs}
   flake8 {toxinidir}
 

From c289f9d1a7909ed0bda926fdf1e5557b392dcda7 Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Mon, 24 May 2021 21:24:53 +0300
Subject: [PATCH 04/10] Upgrade Python syntax with pyupgrade --py36-plus

---
 html5lib/_inputstream.py            |  2 +-
 html5lib/_trie/py.py                |  1 -
 html5lib/_utils.py                  |  2 +-
 html5lib/filters/lint.py            |  5 ++---
 html5lib/filters/sanitizer.py       |  2 +-
 html5lib/html5parser.py             |  1 -
 html5lib/serializer.py              |  1 -
 html5lib/tests/conftest.py          |  4 ++--
 html5lib/tests/support.py           |  2 +-
 html5lib/tests/test_sanitizer.py    | 22 +++++++++++-----------
 html5lib/tests/test_serializer.py   |  4 ++--
 html5lib/tests/test_tokenizer2.py   |  4 ++--
 html5lib/tests/test_treewalkers.py  |  4 ++--
 html5lib/tests/tree_construction.py |  2 +-
 html5lib/treebuilders/base.py       |  5 ++---
 html5lib/treebuilders/dom.py        |  2 +-
 html5lib/treebuilders/etree.py      | 22 +++++++++++-----------
 html5lib/treebuilders/etree_lxml.py | 16 ++++++++--------
 html5lib/treewalkers/__init__.py    |  2 +-
 html5lib/treewalkers/etree_lxml.py  |  1 -
 parse.py                            |  2 +-
 utils/entities.py                   |  4 ++--
 22 files changed, 52 insertions(+), 58 deletions(-)

diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py
index 23128ec8..078026b7 100644
--- a/html5lib/_inputstream.py
+++ b/html5lib/_inputstream.py
@@ -521,7 +521,7 @@ def changeEncoding(self, newEncoding):
             self.rawStream.seek(0)
             self.charEncoding = (newEncoding, "certain")
             self.reset()
-            raise _ReparseException("Encoding changed from {} to {}".format(self.charEncoding[0], newEncoding))
+            raise _ReparseException(f"Encoding changed from {self.charEncoding[0]} to {newEncoding}")
 
     def detectBOM(self):
         """Attempts to detect at BOM at the start of the stream. If
diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py
index 05084863..bc6363c4 100644
--- a/html5lib/_trie/py.py
+++ b/html5lib/_trie/py.py
@@ -1,4 +1,3 @@
-
 from bisect import bisect_left
 
 from ._base import Trie as ABCTrie
diff --git a/html5lib/_utils.py b/html5lib/_utils.py
index dc51de4a..95a5569b 100644
--- a/html5lib/_utils.py
+++ b/html5lib/_utils.py
@@ -110,7 +110,7 @@ def moduleFactoryFactory(factory):
     moduleCache = {}
 
     def moduleFactory(baseModule, *args, **kwargs):
-        if isinstance(ModuleType.__name__, type("")):
+        if isinstance(ModuleType.__name__, str):
             name = "_%s_factory" % baseModule.__name__
         else:
             name = b"_%s_factory" % baseModule.__name__
diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py
index f091adb1..f0ffce61 100644
--- a/html5lib/filters/lint.py
+++ b/html5lib/filters/lint.py
@@ -1,4 +1,3 @@
-
 from . import base
 from ..constants import namespaces, voidElements
 
@@ -56,7 +55,7 @@ def __iter__(self):
                 assert isinstance(name, str)
                 assert name != ""
                 if (not namespace or namespace == namespaces["html"]) and name in voidElements:
-                    assert False, "Void element reported as EndTag token: {tag}".format(tag=name)
+                    assert False, f"Void element reported as EndTag token: {name}"
                 elif self.require_matching_tags:
                     start = open_elements.pop()
                     assert start == (namespace, name)
@@ -85,6 +84,6 @@ def __iter__(self):
                 assert isinstance(token["data"], str)
 
             else:
-                assert False, "Unknown token type: {type}".format(type=type)
+                assert False, f"Unknown token type: {type}"
 
             yield token
diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
index 6f1220bf..a1b61099 100644
--- a/html5lib/filters/sanitizer.py
+++ b/html5lib/filters/sanitizer.py
@@ -872,7 +872,7 @@ def disallowed_token(self, token):
             assert token_type in ("StartTag", "EmptyTag")
             attrs = []
             for (ns, name), v in token["data"].items():
-                attrs.append(' {}="{}"'.format(name if ns is None else "{}:{}".format(prefixes[ns], name), escape(v)))
+                attrs.append(' {}="{}"'.format(name if ns is None else f"{prefixes[ns]}:{name}", escape(v)))
             token["data"] = "<{}{}>".format(token["name"], ''.join(attrs))
         else:
             token["data"] = "<%s>" % token["name"]
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 1f29d728..3996c9b8 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -1,4 +1,3 @@
-
 import types
 
 from . import _inputstream
diff --git a/html5lib/serializer.py b/html5lib/serializer.py
index 16e94c27..cd4631f6 100644
--- a/html5lib/serializer.py
+++ b/html5lib/serializer.py
@@ -1,4 +1,3 @@
-
 import re
 
 from codecs import register_error, xmlcharrefreplace_errors
diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py
index 62a7991b..e5f7a2ac 100644
--- a/html5lib/tests/conftest.py
+++ b/html5lib/tests/conftest.py
@@ -69,7 +69,7 @@ def pytest_configure(config):
                             try:
                                 installed = pkg_resources.working_set.find(req)
                             except pkg_resources.VersionConflict:
-                                msgs.append("Outdated version of {} installed, need {}".format(req.name, spec))
+                                msgs.append(f"Outdated version of {req.name} installed, need {spec}")
                             else:
                                 if not installed:
                                     msgs.append("Need %s" % spec)
@@ -78,7 +78,7 @@ def pytest_configure(config):
         import xml.etree.ElementTree as ElementTree
 
         try:
-            import xml.etree.cElementTree as cElementTree
+            import xml.etree.ElementTree as cElementTree
         except ImportError:
             msgs.append("cElementTree unable to be imported")
         else:
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index d51cae12..f311fb92 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -33,7 +33,7 @@
 }
 
 try:
-    import xml.etree.cElementTree as cElementTree  # noqa
+    import xml.etree.ElementTree as cElementTree  # noqa
 except ImportError:
     treeTypes['cElementTree'] = None
 else:
diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py
index e5cdc2af..f8b4b4b6 100644
--- a/html5lib/tests/test_sanitizer.py
+++ b/html5lib/tests/test_sanitizer.py
@@ -58,19 +58,19 @@ def param_sanitizer():
         if tag_name == 'image':
             yield ("test_should_allow_%s_tag" % tag_name,
                    "<img title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz",
-                   "<{} title='1'>foo <bad>bar</bad> baz</{}>".format(tag_name, tag_name))
+                   f"<{tag_name} title='1'>foo <bad>bar</bad> baz</{tag_name}>")
         elif tag_name == 'br':
             yield ("test_should_allow_%s_tag" % tag_name,
                    "<br title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz<br/>",
-                   "<{} title='1'>foo <bad>bar</bad> baz</{}>".format(tag_name, tag_name))
+                   f"<{tag_name} title='1'>foo <bad>bar</bad> baz</{tag_name}>")
         elif tag_name in constants.voidElements:
             yield ("test_should_allow_%s_tag" % tag_name,
                    "<%s title=\"1\"/>foo &lt;bad&gt;bar&lt;/bad&gt; baz" % tag_name,
-                   "<{} title='1'>foo <bad>bar</bad> baz</{}>".format(tag_name, tag_name))
+                   f"<{tag_name} title='1'>foo <bad>bar</bad> baz</{tag_name}>")
         else:
             yield ("test_should_allow_%s_tag" % tag_name,
-                   "<{} title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</{}>".format(tag_name, tag_name),
-                   "<{} title='1'>foo <bad>bar</bad> baz</{}>".format(tag_name, tag_name))
+                   f"<{tag_name} title=\"1\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</{tag_name}>",
+                   f"<{tag_name} title='1'>foo <bad>bar</bad> baz</{tag_name}>")
 
     for ns, attribute_name in sanitizer.allowed_attributes:
         if ns is not None:
@@ -83,16 +83,16 @@ def param_sanitizer():
         if attribute_name in sanitizer.attr_val_is_uri:
             attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
         yield ("test_should_allow_%s_attribute" % attribute_name,
-               "<p {}=\"{}\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>".format(attribute_name, attribute_value),
-               "<p {}='{}'>foo <bad>bar</bad> baz</p>".format(attribute_name, attribute_value))
+               f"<p {attribute_name}=\"{attribute_value}\">foo &lt;bad&gt;bar&lt;/bad&gt; baz</p>",
+               f"<p {attribute_name}='{attribute_value}'>foo <bad>bar</bad> baz</p>")
 
     for protocol in sanitizer.allowed_protocols:
         rest_of_uri = '//sub.domain.tld/path/object.ext'
         if protocol == 'data':
             rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
         yield ("test_should_allow_uppercase_%s_uris" % protocol,
-               "<img src=\"{}:{}\">foo</a>".format(protocol, rest_of_uri),
-               """<img src="{}:{}">foo</a>""".format(protocol, rest_of_uri))
+               f"<img src=\"{protocol}:{rest_of_uri}\">foo</a>",
+               f"""<img src="{protocol}:{rest_of_uri}">foo</a>""")
 
     for protocol in sanitizer.allowed_protocols:
         rest_of_uri = '//sub.domain.tld/path/object.ext'
@@ -100,8 +100,8 @@ def param_sanitizer():
             rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
         protocol = protocol.upper()
         yield ("test_should_allow_uppercase_%s_uris" % protocol,
-               "<img src=\"{}:{}\">foo</a>".format(protocol, rest_of_uri),
-               """<img src="{}:{}">foo</a>""".format(protocol, rest_of_uri))
+               f"<img src=\"{protocol}:{rest_of_uri}\">foo</a>",
+               f"""<img src="{protocol}:{rest_of_uri}">foo</a>""")
 
 
 @pytest.mark.parametrize("expected, input",
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index e8371247..2ed71de6 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -219,6 +219,6 @@ def test_serializer(input, expected, options):
 
     result = serialize_html(input, options)
     if len(expected) == 1:
-        assert expected[0] == result, "Expected:\n{}\nActual:\n{}\nOptions:\n{}".format(expected[0], result, str(options))
+        assert expected[0] == result, f"Expected:\n{expected[0]}\nActual:\n{result}\nOptions:\n{str(options)}"
     elif result not in expected:
-        assert False, "Expected: {}, Received: {}".format(expected, result)
+        assert False, f"Expected: {expected}, Received: {result}"
diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py
index bc97943f..a616288f 100644
--- a/html5lib/tests/test_tokenizer2.py
+++ b/html5lib/tests/test_tokenizer2.py
@@ -14,7 +14,7 @@ def ignore_parse_errors(toks):
 def test_maintain_attribute_order():
     # generate loads to maximize the chance a hash-based mutation will occur
     attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
-    stream = io.StringIO("<span " + " ".join("{}='{}'".format(x, i) for x, i in attrs) + ">")
+    stream = io.StringIO("<span " + " ".join(f"{x}='{i}'" for x, i in attrs) + ">")
 
     toks = HTMLTokenizer(stream)
     out = list(ignore_parse_errors(toks))
@@ -47,7 +47,7 @@ def test_duplicate_attribute():
 def test_maintain_duplicate_attribute_order():
     # generate loads to maximize the chance a hash-based mutation will occur
     attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
-    stream = io.StringIO("<span " + " ".join("{}='{}'".format(x, i) for x, i in attrs) + " a=100>")
+    stream = io.StringIO("<span " + " ".join(f"{x}='{i}'" for x, i in attrs) + " a=100>")
 
     toks = HTMLTokenizer(stream)
     out = list(ignore_parse_errors(toks))
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 108963f0..a15a54e3 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -99,7 +99,7 @@ def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
     output = treewalkers.pprint(treeClass["walker"](document))
     output = attrlist.sub(sortattrs, output)
     if output not in expected:
-        raise AssertionError("TreewalkerEditTest: {}\nExpected:\n{}\nReceived:\n{}".format(treeName, expected, output))
+        raise AssertionError(f"TreewalkerEditTest: {treeName}\nExpected:\n{expected}\nReceived:\n{output}")
 
 
 @pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
@@ -149,7 +149,7 @@ def test_maintain_attribute_order(treeName):
 
     # generate loads to maximize the chance a hash-based mutation will occur
     attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
-    data = "<span " + " ".join("{}='{}'".format(x, i) for x, i in attrs) + ">"
+    data = "<span " + " ".join(f"{x}='{i}'" for x, i in attrs) + ">"
 
     parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
     document = parser.parseFragment(data)
diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py
index e1b0c180..204865ba 100644
--- a/html5lib/tests/tree_construction.py
+++ b/html5lib/tests/tree_construction.py
@@ -119,7 +119,7 @@ def runtest(self):
 
         errStr = []
         for (line, col), errorcode, datavars in p.errors:
-            assert isinstance(datavars, dict), "{}, {}".format(errorcode, repr(datavars))
+            assert isinstance(datavars, dict), f"{errorcode}, {repr(datavars)}"
             errStr.append("Line: %i Col: %i %s" % (line, col,
                                                    constants.E[errorcode] % datavars))
 
diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py
index ec32501c..4afd3c56 100644
--- a/html5lib/treebuilders/base.py
+++ b/html5lib/treebuilders/base.py
@@ -1,4 +1,3 @@
-
 from ..constants import scopingElements, tableInsertModeElements, namespaces
 
 # The scope markers are inserted when entering object elements,
@@ -41,11 +40,11 @@ def __init__(self, name):
         self._flags = []
 
     def __str__(self):
-        attributesStr = " ".join("{}=\"{}\"".format(name, value)
+        attributesStr = " ".join(f"{name}=\"{value}\""
                                  for name, value in
                                  self.attributes.items())
         if attributesStr:
-            return "<{} {}>".format(self.name, attributesStr)
+            return f"<{self.name} {attributesStr}>"
         else:
             return "<%s>" % (self.name)
 
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
index 51219093..2d632d6e 100644
--- a/html5lib/treebuilders/dom.py
+++ b/html5lib/treebuilders/dom.py
@@ -212,7 +212,7 @@ def serializeElement(element, indent=0):
                         value = attr.value
                         ns = attr.namespaceURI
                         if ns:
-                            name = "{} {}".format(constants.prefixes[ns], attr.localName)
+                            name = f"{constants.prefixes[ns]} {attr.localName}"
                         else:
                             name = attr.nodeName
                         attributes.append((name, value))
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index 11582ee5..f3fea0f9 100644
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -36,7 +36,7 @@ def _getETreeTag(self, name, namespace):
             if namespace is None:
                 etree_tag = name
             else:
-                etree_tag = "{{{}}}{}".format(namespace, name)
+                etree_tag = f"{{{namespace}}}{name}"
             return etree_tag
 
         def _setName(self, name):
@@ -68,7 +68,7 @@ def _setAttributes(self, attributes):
                 # allocation on average
                 for key, value in attributes.items():
                     if isinstance(key, tuple):
-                        name = "{{{}}}{}".format(key[2], key[1])
+                        name = f"{{{key[2]}}}{key[1]}"
                     else:
                         name = key
                     el_attrib[name] = value
@@ -208,7 +208,7 @@ def serializeElement(element, indent=0):
                     rv.append("""<!DOCTYPE %s "%s" "%s">""" %
                               (element.text, publicId, systemId))
                 else:
-                    rv.append("<!DOCTYPE {}>".format(element.text))
+                    rv.append(f"<!DOCTYPE {element.text}>")
             elif element.tag == "DOCUMENT_ROOT":
                 rv.append("#document")
                 if element.text is not None:
@@ -221,7 +221,7 @@ def serializeElement(element, indent=0):
                 rv.append("|{}<!-- {} -->".format(' ' * indent, element.text))
             else:
                 assert isinstance(element.tag, str), \
-                    "Expected unicode, got {}, {}".format(type(element.tag), element.tag)
+                    f"Expected unicode, got {type(element.tag)}, {element.tag}"
                 nsmatch = tag_regexp.match(element.tag)
 
                 if nsmatch is None:
@@ -229,7 +229,7 @@ def serializeElement(element, indent=0):
                 else:
                     ns, name = nsmatch.groups()
                     prefix = constants.prefixes[ns]
-                    name = "{} {}".format(prefix, name)
+                    name = f"{prefix} {name}"
                 rv.append("|{}<{}>".format(' ' * indent, name))
 
                 if hasattr(element, "attrib"):
@@ -239,7 +239,7 @@ def serializeElement(element, indent=0):
                         if nsmatch is not None:
                             ns, name = nsmatch.groups()
                             prefix = constants.prefixes[ns]
-                            attr_string = "{} {}".format(prefix, name)
+                            attr_string = f"{prefix} {name}"
                         else:
                             attr_string = name
                         attributes.append((attr_string, value))
@@ -273,7 +273,7 @@ def serializeElement(element):
                     rv.append("""<!DOCTYPE %s PUBLIC "%s" "%s">""" %
                               (element.text, publicId, systemId))
                 else:
-                    rv.append("<!DOCTYPE {}>".format(element.text))
+                    rv.append(f"<!DOCTYPE {element.text}>")
             elif element.tag == "DOCUMENT_ROOT":
                 if element.text is not None:
                     rv.append(element.text)
@@ -286,23 +286,23 @@ def serializeElement(element):
                     serializeElement(child)
 
             elif element.tag == ElementTreeCommentType:
-                rv.append("<!--{}-->".format(element.text))
+                rv.append(f"<!--{element.text}-->")
             else:
                 # This is assumed to be an ordinary element
                 if not element.attrib:
-                    rv.append("<{}>".format(filter.fromXmlName(element.tag)))
+                    rv.append(f"<{filter.fromXmlName(element.tag)}>")
                 else:
                     attr = " ".join("{}=\"{}\"".format(
                         filter.fromXmlName(name), value)
                         for name, value in element.attrib.items())
-                    rv.append("<{} {}>".format(element.tag, attr))
+                    rv.append(f"<{element.tag} {attr}>")
                 if element.text:
                     rv.append(element.text)
 
                 for child in element:
                     serializeElement(child)
 
-                rv.append("</{}>".format(element.tag))
+                rv.append(f"</{element.tag}>")
 
             if element.tail:
                 rv.append(element.tail)
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index 29d34e27..3bcf8c96 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -115,7 +115,7 @@ def serializeElement(element, indent=0):
                         ns, name = nsmatch.groups()
                         name = infosetFilter.fromXmlName(name)
                         prefix = constants.prefixes[ns]
-                        attr_string = "{} {}".format(prefix, name)
+                        attr_string = f"{prefix} {name}"
                     else:
                         attr_string = infosetFilter.fromXmlName(name)
                     attributes.append((attr_string, value))
@@ -150,23 +150,23 @@ def serializeElement(element):
             serializeElement(element.getroot())
 
         elif element.tag == comment_type:
-            rv.append("<!--{}-->".format(element.text))
+            rv.append(f"<!--{element.text}-->")
 
         else:
             # This is assumed to be an ordinary element
             if not element.attrib:
-                rv.append("<{}>".format(element.tag))
+                rv.append(f"<{element.tag}>")
             else:
-                attr = " ".join("{}=\"{}\"".format(name, value)
+                attr = " ".join(f"{name}=\"{value}\""
                                 for name, value in element.attrib.items())
-                rv.append("<{} {}>".format(element.tag, attr))
+                rv.append(f"<{element.tag} {attr}>")
             if element.text:
                 rv.append(element.text)
 
             for child in element:
                 serializeElement(child)
 
-            rv.append("</{}>".format(element.tag))
+            rv.append(f"</{element.tag}>")
 
         if hasattr(element, "tail") and element.tail:
             rv.append(element.tail)
@@ -195,7 +195,7 @@ def __init__(self, element):
 
             def _coerceKey(self, key):
                 if isinstance(key, tuple):
-                    name = "{{{}}}{}".format(key[2], infosetFilter.coerceAttribute(key[1]))
+                    name = f"{{{key[2]}}}{infosetFilter.coerceAttribute(key[1])}"
                 else:
                     name = infosetFilter.coerceAttribute(key)
                 return name
@@ -371,7 +371,7 @@ def insertRoot(self, token):
         if namespace is None:
             etree_tag = name
         else:
-            etree_tag = "{{{}}}{}".format(namespace, name)
+            etree_tag = f"{{{namespace}}}{name}"
         root.tag = etree_tag
 
         # Add the root element to the internal child/open data structures
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
index c8ecc081..70e0fff6 100644
--- a/html5lib/treewalkers/__init__.py
+++ b/html5lib/treewalkers/__init__.py
@@ -108,7 +108,7 @@ def pprint(walker):
                         ns = constants.prefixes[namespace]
                     else:
                         ns = namespace
-                    name = "{} {}".format(ns, localname)
+                    name = f"{ns} {localname}"
                 else:
                     name = localname
                 output.append("{}{}=\"{}\"".format(" " * indent, name, value))
diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py
index d8c4daa5..0ec633ac 100644
--- a/html5lib/treewalkers/etree_lxml.py
+++ b/html5lib/treewalkers/etree_lxml.py
@@ -1,4 +1,3 @@
-
 from collections import OrderedDict
 
 from lxml import etree
diff --git a/parse.py b/parse.py
index b72d2ef7..dd919364 100755
--- a/parse.py
+++ b/parse.py
@@ -80,7 +80,7 @@ def parse():
         if document:
             printOutput(p, document, opts)
             t2 = time.time()
-            sys.stderr.write("\n\nRun took: {:f}s (plus {:f}s to print the output)".format(t1 - t0, t2 - t1))
+            sys.stderr.write(f"\n\nRun took: {t1 - t0:f}s (plus {t2 - t1:f}s to print the output)")
         else:
             sys.stderr.write("\n\nRun took: %fs" % (t1 - t0))
     else:
diff --git a/utils/entities.py b/utils/entities.py
index c70504c8..faeb4b45 100644
--- a/utils/entities.py
+++ b/utils/entities.py
@@ -49,9 +49,9 @@ def test_description(name, good):
     semicolon_text = {True: "with a semi-colon",
                       False: "without a semi-colon"}[with_semicolon]
     if good:
-        text = "Named entity: {} {}".format(name, semicolon_text)
+        text = f"Named entity: {name} {semicolon_text}"
     else:
-        text = "Bad named entity: {} {}".format(name, semicolon_text)
+        text = f"Bad named entity: {name} {semicolon_text}"
     return text
 
 

From 03f46e23663aa6cc3f5d873a4db65d9d020fb5ca Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Mon, 24 May 2021 23:53:39 +0300
Subject: [PATCH 05/10] AppVeyor: Use Python 3.6 as base version

---
 .appveyor.yml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index 0b3c4e8a..e022e685 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,7 +1,7 @@
 # To activate, change the Appveyor settings to use `.appveyor.yml`.
 environment:
   global:
-    PATH: "C:\\Python27\\Scripts\\;%PATH%"
+    PATH: "C:\\Python36\\Scripts\\;%PATH%"
   matrix:
     - TOXENV: py36-base
     - TOXENV: py36-optional
@@ -12,7 +12,7 @@ environment:
 
 install:
   - git submodule update --init --recursive
-  - python -m pip install tox
+  - C:\\Python36\\python.exe -m pip install tox
 
 build: off
 
@@ -20,4 +20,4 @@ test_script:
   - tox
 
 after_test:
-  - python debug-info.py
+  - C:\\Python36\\python.exe debug-info.py

From a0903811fe76e475b4f6d37b7279508cb9a8ff70 Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Sun, 3 Apr 2022 20:52:09 +0300
Subject: [PATCH 06/10] Universal wheels for supporting Python 2 and 3

---
 setup.cfg | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/setup.cfg b/setup.cfg
index 0b2bb9c7..2a44c0f2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,3 @@
-[bdist_wheel]
-universal = 1
-
 [pep8]
 ignore = N
 max-line-length = 139

From 043c019f816aa7261b567cd5619f50178d701831 Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Sun, 3 Apr 2022 22:22:46 +0300
Subject: [PATCH 07/10] Remove unused mock dependency

---
 README.rst            | 2 +-
 requirements-test.txt | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/README.rst b/README.rst
index fef6d315..f4943e52 100644
--- a/README.rst
+++ b/README.rst
@@ -127,7 +127,7 @@ Please report any bugs on the `issue tracker
 Tests
 -----
 
-Unit tests require the ``pytest`` and ``mock`` libraries and can be
+Unit tests require the ``pytest`` library and can be
 run using the ``py.test`` command in the root directory.
 
 Test data are contained in a separate `html5lib-tests
diff --git a/requirements-test.txt b/requirements-test.txt
index ce882670..fa6f2c62 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -5,4 +5,3 @@ flake8>=3.8.1,<3.9
 pytest>=5.4.2,<7
 coverage>=5.1,<6
 pytest-expect>=1.1.0,<2
-mock>=4.0.2,<5

From 589b01c61f706c4e9c69337692b8d1d4679288b5 Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Wed, 20 Apr 2022 18:47:31 +0300
Subject: [PATCH 08/10] Fix PytestRemovedIn8Warning: The (fspath:
 py.path.local) argument to SanitizerFile is deprecated.

---
 html5lib/tests/conftest.py | 8 ++++----
 requirements-test.txt      | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py
index e5f7a2ac..0b3fc4df 100644
--- a/html5lib/tests/conftest.py
+++ b/html5lib/tests/conftest.py
@@ -89,7 +89,7 @@ def pytest_configure(config):
         pytest.exit("\n".join(msgs))
 
 
-def pytest_collect_file(path, parent):
+def pytest_collect_file(file_path, path, parent):
     dir = os.path.abspath(path.dirname)
     dir_and_parents = set()
     while dir not in dir_and_parents:
@@ -98,13 +98,13 @@ def pytest_collect_file(path, parent):
 
     if _tree_construction in dir_and_parents:
         if path.ext == ".dat":
-            return TreeConstructionFile.from_parent(parent, fspath=path)
+            return TreeConstructionFile.from_parent(parent, path=file_path)
     elif _tokenizer in dir_and_parents:
         if path.ext == ".test":
-            return TokenizerFile.from_parent(parent, fspath=path)
+            return TokenizerFile.from_parent(parent, path=file_path)
     elif _sanitizer_testdata in dir_and_parents:
         if path.ext == ".dat":
-            return SanitizerFile.from_parent(parent, fspath=path)
+            return SanitizerFile.from_parent(parent, path=file_path)
 
 
 # Tiny wrapper to allow .from_parent constructors on older pytest for PY27
diff --git a/requirements-test.txt b/requirements-test.txt
index fa6f2c62..06e0d48e 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -2,6 +2,6 @@
 
 tox>=3.15.1,<4
 flake8>=3.8.1,<3.9
-pytest>=5.4.2,<7
+pytest>=7,<8
 coverage>=5.1,<6
 pytest-expect>=1.1.0,<2

From 4d975c6d5ff04f49d9a66826ed6be5a1c754c7fa Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Wed, 6 Jul 2022 10:42:30 +0300
Subject: [PATCH 09/10] Drop support for EOL Python 3.6

---
 .appveyor.yml                    | 8 +++-----
 .github/workflows/python-tox.yml | 7 ++++---
 .travis.yml                      | 1 -
 setup.py                         | 3 +--
 tox.ini                          | 4 ++--
 5 files changed, 10 insertions(+), 13 deletions(-)

diff --git a/.appveyor.yml b/.appveyor.yml
index e022e685..3088260f 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,10 +1,8 @@
 # To activate, change the Appveyor settings to use `.appveyor.yml`.
 environment:
   global:
-    PATH: "C:\\Python36\\Scripts\\;%PATH%"
+    PATH: "C:\\Python38\\Scripts\\;%PATH%"
   matrix:
-    - TOXENV: py36-base
-    - TOXENV: py36-optional
     - TOXENV: py37-base
     - TOXENV: py37-optional
     - TOXENV: py38-base
@@ -12,7 +10,7 @@ environment:
 
 install:
   - git submodule update --init --recursive
-  - C:\\Python36\\python.exe -m pip install tox
+  - C:\\Python38\\python.exe -m pip install tox
 
 build: off
 
@@ -20,4 +18,4 @@ test_script:
   - tox
 
 after_test:
-  - C:\\Python36\\python.exe debug-info.py
+  - C:\\Python38\\python.exe debug-info.py
diff --git a/.github/workflows/python-tox.yml b/.github/workflows/python-tox.yml
index 9673f503..99f9dbf9 100644
--- a/.github/workflows/python-tox.yml
+++ b/.github/workflows/python-tox.yml
@@ -6,13 +6,14 @@ jobs:
     if: github.event.push || github.event.pull_request.head.repo.full_name != github.repository
     runs-on: ubuntu-latest
     strategy:
+      fail-fast: false
       matrix:
-        python: [3.6, 3.7, 3.8, pypy3]
+        python: [3.7, 3.8, pypy3.8]
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v3
         with:
           submodules: true
-      - uses: actions/setup-python@v2
+      - uses: actions/setup-python@v4
         with:
           python-version: ${{ matrix.python }}
       - run: pip install tox
diff --git a/.travis.yml b/.travis.yml
index 8b964f4c..a477f122 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -4,7 +4,6 @@ python:
   - "3.9"
   - "3.8"
   - "3.7"
-  - "3.6"
 
 cache: pip
 
diff --git a/setup.py b/setup.py
index d0f5814e..e24296a4 100644
--- a/setup.py
+++ b/setup.py
@@ -60,7 +60,6 @@ def default_environment():
     'Operating System :: OS Independent',
     'Programming Language :: Python',
     'Programming Language :: Python :: 3',
-    'Programming Language :: Python :: 3.6',
     'Programming Language :: Python :: 3.7',
     'Programming Language :: Python :: 3.8',
     'Programming Language :: Python :: 3 :: Only',
@@ -100,7 +99,7 @@ def default_environment():
       install_requires=[
           'webencodings',
       ],
-      python_requires=">=3.6",
+      python_requires=">=3.7",
       extras_require={
           # A conditional extra will only install these items when the extra is
           # requested and the condition matches.
diff --git a/tox.ini b/tox.ini
index 3ae8afe0..027278f2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
 [tox]
-envlist = py{36,37,38,py3}-{base,optional}
+envlist = py{37,38,py3}-{base,optional}
 
 [testenv]
 deps =
@@ -11,7 +11,7 @@ passenv =
   PYTEST_COMMAND  # this is maintained so one can, e.g., PYTEST_COMMAND="coverage run -m pytest"
   COVERAGE_RUN_OPTIONS
 commands =
-  {env:PYTEST_COMMAND:{envbindir}/py.test} {posargs}
+  {env:PYTEST_COMMAND:{envbindir}/pytest} {posargs}
   flake8 {toxinidir}
 
 [testenv:doc]

From 67b42b705cdb129e5833f113914764aaecc9e34b Mon Sep 17 00:00:00 2001
From: Hugo van Kemenade <hugovk@users.noreply.github.com>
Date: Wed, 6 Jul 2022 10:54:56 +0300
Subject: [PATCH 10/10] Upgrade Python syntax for Python 3.7+

---
 html5lib/_tokenizer.py             | 10 ++--------
 html5lib/tests/test_treewalkers.py |  9 ++++-----
 2 files changed, 6 insertions(+), 13 deletions(-)

diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py
index b5219836..91699519 100644
--- a/html5lib/_tokenizer.py
+++ b/html5lib/_tokenizer.py
@@ -1,5 +1,4 @@
-from collections import deque, OrderedDict
-from sys import version_info
+from collections import deque
 
 from .constants import spaceCharacters
 from .constants import entities
@@ -14,11 +13,6 @@
 
 entitiesTrie = Trie(entities)
 
-if version_info >= (3, 7):
-    attributeMap = dict
-else:
-    attributeMap = OrderedDict
-
 
 class HTMLTokenizer:
     """ This class takes care of tokenizing HTML.
@@ -232,7 +226,7 @@ def emitCurrentToken(self):
             token["name"] = token["name"].translate(asciiUpper2Lower)
             if token["type"] == tokenTypes["StartTag"]:
                 raw = token["data"]
-                data = attributeMap(raw)
+                data = dict(raw)
                 if len(raw) > len(data):
                     # we had some duplicated attribute, fix so first wins
                     data.update(raw[::-1])
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index a15a54e3..18d77128 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -1,5 +1,4 @@
 import itertools
-import sys
 
 import pytest
 
@@ -139,8 +138,8 @@ def test_lxml_xml():
 @pytest.mark.parametrize("treeName",
                          [pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
                                                         pytest.mark.skipif(
-                                                            treeName != "lxml" or
-                                                            sys.version_info < (3, 7), reason="dict order undef")])
+                                                            treeName != "lxml",
+                                                            reason="dict order undef")])
                           for treeName in sorted(treeTypes.keys())])
 def test_maintain_attribute_order(treeName):
     treeAPIs = treeTypes[treeName]
@@ -172,8 +171,8 @@ def test_maintain_attribute_order(treeName):
 @pytest.mark.parametrize("treeName",
                          [pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
                                                         pytest.mark.skipif(
-                                                            treeName != "lxml" or
-                                                            sys.version_info < (3, 7), reason="dict order undef")])
+                                                            treeName != "lxml",
+                                                            reason="dict order undef")])
                           for treeName in sorted(treeTypes.keys())])
 def test_maintain_attribute_order_adjusted(treeName):
     treeAPIs = treeTypes[treeName]