diff --git a/.appveyor.yml b/.appveyor.yml
index a1a3e347..3088260f 100644
--- a/.appveyor.yml
+++ b/.appveyor.yml
@@ -1,14 +1,8 @@
# To activate, change the Appveyor settings to use `.appveyor.yml`.
environment:
global:
- PATH: "C:\\Python27\\Scripts\\;%PATH%"
+ PATH: "C:\\Python38\\Scripts\\;%PATH%"
matrix:
- - TOXENV: py27-base
- - TOXENV: py27-optional
- - TOXENV: py35-base
- - TOXENV: py35-optional
- - TOXENV: py36-base
- - TOXENV: py36-optional
- TOXENV: py37-base
- TOXENV: py37-optional
- TOXENV: py38-base
@@ -16,7 +10,7 @@ environment:
install:
- git submodule update --init --recursive
- - python -m pip install tox
+ - C:\\Python38\\python.exe -m pip install tox
build: off
@@ -24,4 +18,4 @@ test_script:
- tox
after_test:
- - python debug-info.py
+ - C:\\Python38\\python.exe debug-info.py
diff --git a/.github/workflows/python-tox.yml b/.github/workflows/python-tox.yml
index ec5cf636..99f9dbf9 100644
--- a/.github/workflows/python-tox.yml
+++ b/.github/workflows/python-tox.yml
@@ -6,13 +6,14 @@ jobs:
if: github.event.push || github.event.pull_request.head.repo.full_name != github.repository
runs-on: ubuntu-latest
strategy:
+ fail-fast: false
matrix:
- python: [2.7, 3.5, 3.6, 3.7, 3.8, pypy-2.7, pypy3]
+ python: [3.7, 3.8, pypy3.8]
steps:
- - uses: actions/checkout@v2
+ - uses: actions/checkout@v3
with:
submodules: true
- - uses: actions/setup-python@v2
+ - uses: actions/setup-python@v4
with:
python-version: ${{ matrix.python }}
- run: pip install tox
diff --git a/.travis.yml b/.travis.yml
index d2d9e30e..a477f122 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -1,19 +1,15 @@
language: python
python:
- "pypy3"
- - "pypy"
+ - "3.9"
- "3.8"
- "3.7"
- - "3.6"
- - "3.5"
- - "2.7"
- - "3.9-dev"
cache: pip
env:
global:
- - TOXENV=base,optional,six19-optional
+ - TOXENV=base,optional
install:
- pip install tox
diff --git a/README.rst b/README.rst
index d367905d..f4943e52 100644
--- a/README.rst
+++ b/README.rst
@@ -91,7 +91,7 @@ More documentation is available at https://html5lib.readthedocs.io/.
Installation
------------
-html5lib works on CPython 2.7+, CPython 3.5+ and PyPy. To install:
+html5lib works on CPython 3.6+ and PyPy3. To install:
.. code-block:: bash
@@ -127,7 +127,7 @@ Please report any bugs on the `issue tracker
Tests
-----
-Unit tests require the ``pytest`` and ``mock`` libraries and can be
+Unit tests require the ``pytest`` library and can be
run using the ``py.test`` command in the root directory.
Test data are contained in a separate `html5lib-tests
diff --git a/debug-info.py b/debug-info.py
index b47b8ebf..eb5a73f5 100644
--- a/debug-info.py
+++ b/debug-info.py
@@ -1,5 +1,3 @@
-from __future__ import print_function, unicode_literals
-
import platform
import sys
@@ -12,7 +10,7 @@
"maxsize": sys.maxsize
}
-search_modules = ["chardet", "genshi", "html5lib", "lxml", "six"]
+search_modules = ["chardet", "genshi", "html5lib", "lxml"]
found_modules = []
for m in search_modules:
diff --git a/doc/conf.py b/doc/conf.py
index 22ebab4f..d28655ac 100644
--- a/doc/conf.py
+++ b/doc/conf.py
@@ -1,5 +1,4 @@
#!/usr/bin/env python3
-# -*- coding: utf-8 -*-
#
# html5lib documentation build configuration file, created by
# sphinx-quickstart on Wed May 8 00:04:49 2013.
@@ -92,7 +91,7 @@
]
-class CExtMock(object):
+class CExtMock:
"""Required for autodoc on readthedocs.org where you cannot build C extensions."""
def __init__(self, *args, **kwargs):
pass
diff --git a/html5lib/__init__.py b/html5lib/__init__.py
index 7b854f99..d2c68855 100644
--- a/html5lib/__init__.py
+++ b/html5lib/__init__.py
@@ -20,7 +20,6 @@
* :func:`~.serializer.serialize`
"""
-from __future__ import absolute_import, division, unicode_literals
from .html5parser import HTMLParser, parse, parseFragment
from .treebuilders import getTreeBuilder
diff --git a/html5lib/_ihatexml.py b/html5lib/_ihatexml.py
index 3ff803c1..90757cbf 100644
--- a/html5lib/_ihatexml.py
+++ b/html5lib/_ihatexml.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
import re
import warnings
@@ -184,7 +182,7 @@ def escapeRegexp(string):
nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]")
-class InfosetFilter(object):
+class InfosetFilter:
replacementRegexp = re.compile(r"U[\dA-F]{5,5}")
def __init__(self,
diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py
index 0207dd21..078026b7 100644
--- a/html5lib/_inputstream.py
+++ b/html5lib/_inputstream.py
@@ -1,10 +1,7 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from six import text_type
-from six.moves import http_client, urllib
-
import codecs
+import http.client
import re
+import urllib
from io import BytesIO, StringIO
import webencodings
@@ -14,9 +11,9 @@
from . import _utils
# Non-unicode versions of constants for use in the pre-parser
-spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters])
-asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters])
-asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase])
+spaceCharactersBytes = frozenset(item.encode("ascii") for item in spaceCharacters)
+asciiLettersBytes = frozenset(item.encode("ascii") for item in asciiLetters)
+asciiUppercaseBytes = frozenset(item.encode("ascii") for item in asciiUppercase)
spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"])
@@ -48,7 +45,7 @@
charsUntilRegEx = {}
-class BufferedStream(object):
+class BufferedStream:
"""Buffering for streams that do not have buffering of their own
The buffer is implemented as a list of chunks on the assumption that
@@ -86,7 +83,7 @@ def read(self, bytes):
return self._readFromBuffer(bytes)
def _bufferedBytes(self):
- return sum([len(item) for item in self.buffer])
+ return sum(len(item) for item in self.buffer)
def _readStream(self, bytes):
data = self.stream.read(bytes)
@@ -125,15 +122,15 @@ def _readFromBuffer(self, bytes):
def HTMLInputStream(source, **kwargs):
# Work around Python bug #20007: read(0) closes the connection.
# http://bugs.python.org/issue20007
- if (isinstance(source, http_client.HTTPResponse) or
+ if (isinstance(source, http.client.HTTPResponse) or
# Also check for addinfourl wrapping HTTPResponse
(isinstance(source, urllib.response.addbase) and
- isinstance(source.fp, http_client.HTTPResponse))):
+ isinstance(source.fp, http.client.HTTPResponse))):
isUnicode = False
elif hasattr(source, "read"):
- isUnicode = isinstance(source.read(0), text_type)
+ isUnicode = isinstance(source.read(0), str)
else:
- isUnicode = isinstance(source, text_type)
+ isUnicode = isinstance(source, str)
if isUnicode:
encodings = [x for x in kwargs if x.endswith("_encoding")]
@@ -145,7 +142,7 @@ def HTMLInputStream(source, **kwargs):
return HTMLBinaryInputStream(source, **kwargs)
-class HTMLUnicodeInputStream(object):
+class HTMLUnicodeInputStream:
"""Provides a unicode stream of characters to the HTMLTokenizer.
This class takes care of character encoding and removing or replacing
@@ -325,7 +322,7 @@ def charsUntil(self, characters, opposite=False):
if __debug__:
for c in characters:
assert(ord(c) < 128)
- regex = "".join(["\\x%02x" % ord(c) for c in characters])
+ regex = "".join("\\x%02x" % ord(c) for c in characters)
if not opposite:
regex = "^%s" % regex
chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex)
@@ -524,7 +521,7 @@ def changeEncoding(self, newEncoding):
self.rawStream.seek(0)
self.charEncoding = (newEncoding, "certain")
self.reset()
- raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding))
+ raise _ReparseException(f"Encoding changed from {self.charEncoding[0]} to {newEncoding}")
def detectBOM(self):
"""Attempts to detect at BOM at the start of the stream. If
@@ -673,7 +670,7 @@ def jumpTo(self, bytes):
return True
-class EncodingParser(object):
+class EncodingParser:
"""Mini parser for detecting character encoding from meta elements"""
def __init__(self, data):
@@ -861,7 +858,7 @@ def getAttribute(self):
attrValue.append(c)
-class ContentAttrParser(object):
+class ContentAttrParser:
def __init__(self, data):
assert isinstance(data, bytes)
self.data = data
diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py
index 4748a197..91699519 100644
--- a/html5lib/_tokenizer.py
+++ b/html5lib/_tokenizer.py
@@ -1,9 +1,4 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from six import unichr as chr
-
-from collections import deque, OrderedDict
-from sys import version_info
+from collections import deque
from .constants import spaceCharacters
from .constants import entities
@@ -18,13 +13,8 @@
entitiesTrie = Trie(entities)
-if version_info >= (3, 7):
- attributeMap = dict
-else:
- attributeMap = OrderedDict
-
-class HTMLTokenizer(object):
+class HTMLTokenizer:
""" This class takes care of tokenizing HTML.
* self.currentToken
@@ -50,7 +40,7 @@ def __init__(self, stream, parser=None, **kwargs):
# The current token being created
self.currentToken = None
- super(HTMLTokenizer, self).__init__()
+ super().__init__()
def __iter__(self):
""" This is where the magic happens.
@@ -236,7 +226,7 @@ def emitCurrentToken(self):
token["name"] = token["name"].translate(asciiUpper2Lower)
if token["type"] == tokenTypes["StartTag"]:
raw = token["data"]
- data = attributeMap(raw)
+ data = dict(raw)
if len(raw) > len(data):
# we had some duplicated attribute, fix so first wins
data.update(raw[::-1])
diff --git a/html5lib/_trie/__init__.py b/html5lib/_trie/__init__.py
index 07bad5d3..98a6841a 100644
--- a/html5lib/_trie/__init__.py
+++ b/html5lib/_trie/__init__.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from .py import Trie
__all__ = ["Trie"]
diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py
index 6b71975f..6b2977b2 100644
--- a/html5lib/_trie/_base.py
+++ b/html5lib/_trie/_base.py
@@ -1,9 +1,4 @@
-from __future__ import absolute_import, division, unicode_literals
-
-try:
- from collections.abc import Mapping
-except ImportError: # Python 2.7
- from collections import Mapping
+from collections.abc import Mapping
class Trie(Mapping):
@@ -11,7 +6,7 @@ class Trie(Mapping):
def keys(self, prefix=None):
# pylint:disable=arguments-differ
- keys = super(Trie, self).keys()
+ keys = super().keys()
if prefix is None:
return set(keys)
diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py
index c2ba3da7..bc6363c4 100644
--- a/html5lib/_trie/py.py
+++ b/html5lib/_trie/py.py
@@ -1,6 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-from six import text_type
-
from bisect import bisect_left
from ._base import Trie as ABCTrie
@@ -8,7 +5,7 @@
class Trie(ABCTrie):
def __init__(self, data):
- if not all(isinstance(x, text_type) for x in data.keys()):
+ if not all(isinstance(x, str) for x in data.keys()):
raise TypeError("All keys must be strings")
self._data = data
diff --git a/html5lib/_utils.py b/html5lib/_utils.py
index 9ea57942..95a5569b 100644
--- a/html5lib/_utils.py
+++ b/html5lib/_utils.py
@@ -1,21 +1,9 @@
-from __future__ import absolute_import, division, unicode_literals
-
from types import ModuleType
-try:
- from collections.abc import Mapping
-except ImportError:
- from collections import Mapping
+from collections.abc import Mapping
-from six import text_type, PY3
-if PY3:
- import xml.etree.ElementTree as default_etree
-else:
- try:
- import xml.etree.cElementTree as default_etree
- except ImportError:
- import xml.etree.ElementTree as default_etree
+import xml.etree.ElementTree as default_etree
__all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair",
@@ -31,10 +19,10 @@
# escapes.
try:
_x = eval('"\\uD800"') # pylint:disable=eval-used
- if not isinstance(_x, text_type):
+ if not isinstance(_x, str):
# We need this with u"" because of http://bugs.jython.org/issue2039
_x = eval('u"\\uD800"') # pylint:disable=eval-used
- assert isinstance(_x, text_type)
+ assert isinstance(_x, str)
except Exception:
supports_lone_surrogates = False
else:
@@ -122,7 +110,7 @@ def moduleFactoryFactory(factory):
moduleCache = {}
def moduleFactory(baseModule, *args, **kwargs):
- if isinstance(ModuleType.__name__, type("")):
+ if isinstance(ModuleType.__name__, str):
name = "_%s_factory" % baseModule.__name__
else:
name = b"_%s_factory" % baseModule.__name__
diff --git a/html5lib/constants.py b/html5lib/constants.py
index fe3e237c..3596ea21 100644
--- a/html5lib/constants.py
+++ b/html5lib/constants.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
import string
EOF = None
diff --git a/html5lib/filters/alphabeticalattributes.py b/html5lib/filters/alphabeticalattributes.py
index 5ba926e3..d96ad62a 100644
--- a/html5lib/filters/alphabeticalattributes.py
+++ b/html5lib/filters/alphabeticalattributes.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from . import base
from collections import OrderedDict
diff --git a/html5lib/filters/base.py b/html5lib/filters/base.py
index c7dbaed0..6937911d 100644
--- a/html5lib/filters/base.py
+++ b/html5lib/filters/base.py
@@ -1,7 +1,4 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-class Filter(object):
+class Filter:
def __init__(self, source):
self.source = source
diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py
index aefb5c84..cfa469c3 100644
--- a/html5lib/filters/inject_meta_charset.py
+++ b/html5lib/filters/inject_meta_charset.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from . import base
diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py
index acd4d7a2..f0ffce61 100644
--- a/html5lib/filters/lint.py
+++ b/html5lib/filters/lint.py
@@ -1,7 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from six import text_type
-
from . import base
from ..constants import namespaces, voidElements
@@ -23,7 +19,7 @@ def __init__(self, source, require_matching_tags=True):
:arg require_matching_tags: whether or not to require matching tags
"""
- super(Filter, self).__init__(source)
+ super().__init__(source)
self.require_matching_tags = require_matching_tags
def __iter__(self):
@@ -33,9 +29,9 @@ def __iter__(self):
if type in ("StartTag", "EmptyTag"):
namespace = token["namespace"]
name = token["name"]
- assert namespace is None or isinstance(namespace, text_type)
+ assert namespace is None or isinstance(namespace, str)
assert namespace != ""
- assert isinstance(name, text_type)
+ assert isinstance(name, str)
assert name != ""
assert isinstance(token["data"], dict)
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
@@ -45,49 +41,49 @@ def __iter__(self):
if type == "StartTag" and self.require_matching_tags:
open_elements.append((namespace, name))
for (namespace, name), value in token["data"].items():
- assert namespace is None or isinstance(namespace, text_type)
+ assert namespace is None or isinstance(namespace, str)
assert namespace != ""
- assert isinstance(name, text_type)
+ assert isinstance(name, str)
assert name != ""
- assert isinstance(value, text_type)
+ assert isinstance(value, str)
elif type == "EndTag":
namespace = token["namespace"]
name = token["name"]
- assert namespace is None or isinstance(namespace, text_type)
+ assert namespace is None or isinstance(namespace, str)
assert namespace != ""
- assert isinstance(name, text_type)
+ assert isinstance(name, str)
assert name != ""
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
- assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name}
+ assert False, f"Void element reported as EndTag token: {name}"
elif self.require_matching_tags:
start = open_elements.pop()
assert start == (namespace, name)
elif type == "Comment":
data = token["data"]
- assert isinstance(data, text_type)
+ assert isinstance(data, str)
elif type in ("Characters", "SpaceCharacters"):
data = token["data"]
- assert isinstance(data, text_type)
+ assert isinstance(data, str)
assert data != ""
if type == "SpaceCharacters":
assert data.strip(spaceCharacters) == ""
elif type == "Doctype":
name = token["name"]
- assert name is None or isinstance(name, text_type)
- assert token["publicId"] is None or isinstance(name, text_type)
- assert token["systemId"] is None or isinstance(name, text_type)
+ assert name is None or isinstance(name, str)
+ assert token["publicId"] is None or isinstance(name, str)
+ assert token["systemId"] is None or isinstance(name, str)
elif type == "Entity":
- assert isinstance(token["name"], text_type)
+ assert isinstance(token["name"], str)
elif type == "SerializerError":
- assert isinstance(token["data"], text_type)
+ assert isinstance(token["data"], str)
else:
- assert False, "Unknown token type: %(type)s" % {"type": type}
+ assert False, f"Unknown token type: {type}"
yield token
diff --git a/html5lib/filters/optionaltags.py b/html5lib/filters/optionaltags.py
index 4a865012..f1c21118 100644
--- a/html5lib/filters/optionaltags.py
+++ b/html5lib/filters/optionaltags.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from . import base
diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py
index 70ef9066..a1b61099 100644
--- a/html5lib/filters/sanitizer.py
+++ b/html5lib/filters/sanitizer.py
@@ -6,13 +6,11 @@
if Bleach is unsuitable for your needs.
"""
-from __future__ import absolute_import, division, unicode_literals
import re
import warnings
from xml.sax.saxutils import escape, unescape
-
-from six.moves import urllib_parse as urlparse
+import urllib.parse
from . import base
from ..constants import namespaces, prefixes
@@ -766,7 +764,7 @@ def __init__(self,
hrefs--these are removed
"""
- super(Filter, self).__init__(source)
+ super().__init__(source)
warnings.warn(_deprecation_msg, DeprecationWarning)
@@ -838,7 +836,7 @@ def allowed_token(self, token):
# remove replacement characters from unescaped characters
val_unescaped = val_unescaped.replace("\ufffd", "")
try:
- uri = urlparse.urlparse(val_unescaped)
+ uri = urllib.parse.urlparse(val_unescaped)
except ValueError:
uri = None
del attrs[attr]
@@ -874,8 +872,8 @@ def disallowed_token(self, token):
assert token_type in ("StartTag", "EmptyTag")
attrs = []
for (ns, name), v in token["data"].items():
- attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v)))
- token["data"] = "<%s%s>" % (token["name"], ''.join(attrs))
+ attrs.append(' {}="{}"'.format(name if ns is None else f"{prefixes[ns]}:{name}", escape(v)))
+ token["data"] = "<{}{}>".format(token["name"], ''.join(attrs))
else:
token["data"] = "<%s>" % token["name"]
if token.get("selfClosing"):
diff --git a/html5lib/filters/whitespace.py b/html5lib/filters/whitespace.py
index 0d12584b..2f35f4a0 100644
--- a/html5lib/filters/whitespace.py
+++ b/html5lib/filters/whitespace.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
import re
from . import base
diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py
index 74d829d9..3996c9b8 100644
--- a/html5lib/html5parser.py
+++ b/html5lib/html5parser.py
@@ -1,6 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-from six import with_metaclass, viewkeys
-
import types
from . import _inputstream
@@ -83,7 +80,7 @@ def __new__(meta, classname, bases, classDict):
return Decorated
-class HTMLParser(object):
+class HTMLParser:
"""HTML parser
Generates a tree structure from a stream of (possibly malformed) HTML.
@@ -423,7 +420,7 @@ def getMetaclass(use_metaclass, metaclass_func):
return type
# pylint:disable=unused-argument
- class Phase(with_metaclass(getMetaclass(debug, log))):
+ class Phase(metaclass=getMetaclass(debug, log)):
"""Base class for helper object that implements each phase of processing
"""
__slots__ = ("parser", "tree", "__startTagCache", "__endTagCache")
@@ -944,7 +941,7 @@ class InBodyPhase(Phase):
__slots__ = ("processSpaceCharacters",)
def __init__(self, *args, **kwargs):
- super(InBodyPhase, self).__init__(*args, **kwargs)
+ super().__init__(*args, **kwargs)
# Set this to the default handler
self.processSpaceCharacters = self.processSpaceCharactersNonPre
@@ -1002,8 +999,8 @@ def processCharacters(self, token):
self.tree.insertText(token["data"])
# This must be bad for performance
if (self.parser.framesetOK and
- any([char not in spaceCharacters
- for char in token["data"]])):
+ any(char not in spaceCharacters
+ for char in token["data"])):
self.parser.framesetOK = False
def processSpaceCharactersNonPre(self, token):
@@ -1844,13 +1841,13 @@ class InTableTextPhase(Phase):
__slots__ = ("originalPhase", "characterTokens")
def __init__(self, *args, **kwargs):
- super(InTableTextPhase, self).__init__(*args, **kwargs)
+ super().__init__(*args, **kwargs)
self.originalPhase = None
self.characterTokens = []
def flushCharacters(self):
- data = "".join([item["data"] for item in self.characterTokens])
- if any([item not in spaceCharacters for item in data]):
+ data = "".join(item["data"] for item in self.characterTokens)
+ if any(item not in spaceCharacters for item in data):
token = {"type": tokenTypes["Characters"], "data": data}
self.parser.phases["inTable"].insertText(token)
elif data:
@@ -2776,7 +2773,7 @@ def processEndTag(self, token):
def adjust_attributes(token, replacements):
- needs_adjustment = viewkeys(token['data']) & viewkeys(replacements)
+ needs_adjustment = token['data'].keys() & replacements.keys()
if needs_adjustment:
token['data'] = type(token['data'])((replacements.get(k, k), v)
for k, v in token['data'].items())
diff --git a/html5lib/serializer.py b/html5lib/serializer.py
index c66df683..cd4631f6 100644
--- a/html5lib/serializer.py
+++ b/html5lib/serializer.py
@@ -1,6 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-from six import text_type
-
import re
from codecs import register_error, xmlcharrefreplace_errors
@@ -101,7 +98,7 @@ def serialize(input, tree="etree", encoding=None, **serializer_opts):
return s.render(walker(input), encoding)
-class HTMLSerializer(object):
+class HTMLSerializer:
# attribute quoting options
quote_attr_values = "legacy" # be secure by default
@@ -222,14 +219,14 @@ def __init__(self, **kwargs):
self.strict = False
def encode(self, string):
- assert(isinstance(string, text_type))
+ assert(isinstance(string, str))
if self.encoding:
return string.encode(self.encoding, "htmlentityreplace")
else:
return string
def encodeStrict(self, string):
- assert(isinstance(string, text_type))
+ assert(isinstance(string, str))
if self.encoding:
return string.encode(self.encoding, "strict")
else:
@@ -278,7 +275,7 @@ def serialize(self, treewalker, encoding=None):
quote_char = "'"
else:
quote_char = '"'
- doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char)
+ doctype += " {}{}{}".format(quote_char, token["systemId"], quote_char)
doctype += ">"
yield self.encodeStrict(doctype)
diff --git a/html5lib/tests/__init__.py b/html5lib/tests/__init__.py
index b8ce2de3..e69de29b 100644
--- a/html5lib/tests/__init__.py
+++ b/html5lib/tests/__init__.py
@@ -1 +0,0 @@
-from __future__ import absolute_import, division, unicode_literals
diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py
index fffeb50c..0b3fc4df 100644
--- a/html5lib/tests/conftest.py
+++ b/html5lib/tests/conftest.py
@@ -1,4 +1,3 @@
-from __future__ import print_function
import os.path
import sys
@@ -54,7 +53,7 @@ def pytest_configure(config):
# Check for optional requirements
req_file = os.path.join(_root, "requirements-optional.txt")
if os.path.exists(req_file):
- with open(req_file, "r") as fp:
+ with open(req_file) as fp:
for line in fp:
if (line.strip() and
not (line.startswith("-r") or
@@ -70,7 +69,7 @@ def pytest_configure(config):
try:
installed = pkg_resources.working_set.find(req)
except pkg_resources.VersionConflict:
- msgs.append("Outdated version of %s installed, need %s" % (req.name, spec))
+ msgs.append(f"Outdated version of {req.name} installed, need {spec}")
else:
if not installed:
msgs.append("Need %s" % spec)
@@ -79,7 +78,7 @@ def pytest_configure(config):
import xml.etree.ElementTree as ElementTree
try:
- import xml.etree.cElementTree as cElementTree
+ import xml.etree.ElementTree as cElementTree
except ImportError:
msgs.append("cElementTree unable to be imported")
else:
@@ -90,7 +89,7 @@ def pytest_configure(config):
pytest.exit("\n".join(msgs))
-def pytest_collect_file(path, parent):
+def pytest_collect_file(file_path, path, parent):
dir = os.path.abspath(path.dirname)
dir_and_parents = set()
while dir not in dir_and_parents:
@@ -99,13 +98,13 @@ def pytest_collect_file(path, parent):
if _tree_construction in dir_and_parents:
if path.ext == ".dat":
- return TreeConstructionFile.from_parent(parent, fspath=path)
+ return TreeConstructionFile.from_parent(parent, path=file_path)
elif _tokenizer in dir_and_parents:
if path.ext == ".test":
- return TokenizerFile.from_parent(parent, fspath=path)
+ return TokenizerFile.from_parent(parent, path=file_path)
elif _sanitizer_testdata in dir_and_parents:
if path.ext == ".dat":
- return SanitizerFile.from_parent(parent, fspath=path)
+ return SanitizerFile.from_parent(parent, path=file_path)
# Tiny wrapper to allow .from_parent constructors on older pytest for PY27
diff --git a/html5lib/tests/sanitizer.py b/html5lib/tests/sanitizer.py
index 16e53868..fb7fadf9 100644
--- a/html5lib/tests/sanitizer.py
+++ b/html5lib/tests/sanitizer.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
import codecs
import json
@@ -18,7 +16,7 @@ def collect(self):
class SanitizerTest(pytest.Item):
def __init__(self, name, parent, test):
- super(SanitizerTest, self).__init__(name, parent)
+ super().__init__(name, parent)
self.obj = lambda: 1 # this is to hack around skipif needing a function!
self.test = test
diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py
index 9cd5afbe..f311fb92 100644
--- a/html5lib/tests/support.py
+++ b/html5lib/tests/support.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
# pylint:disable=wrong-import-position
import os
@@ -35,7 +33,7 @@
}
try:
- import xml.etree.cElementTree as cElementTree # noqa
+ import xml.etree.ElementTree as cElementTree # noqa
except ImportError:
treeTypes['cElementTree'] = None
else:
@@ -86,7 +84,7 @@ def __getitem__(self, key):
return dict.get(self, key, self.default)
-class TestData(object):
+class TestData:
def __init__(self, filename, newTestHeading="data", encoding="utf8"):
if encoding is None:
self.f = open(filename, mode="rb")
@@ -150,8 +148,6 @@ def convertData(data):
def errorMessage(input, expected, actual):
msg = ("Input:\n%s\nExpected:\n%s\nReceived\n%s\n" %
(repr(input), repr(expected), repr(actual)))
- if sys.version_info[0] == 2:
- msg = msg.encode("ascii", "backslashreplace")
return msg
diff --git a/html5lib/tests/test_alphabeticalattributes.py b/html5lib/tests/test_alphabeticalattributes.py
index 7d5b8e0f..3b82c2b0 100644
--- a/html5lib/tests/test_alphabeticalattributes.py
+++ b/html5lib/tests/test_alphabeticalattributes.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from collections import OrderedDict
import pytest
diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py
index 47c4814a..ddad9100 100644
--- a/html5lib/tests/test_encoding.py
+++ b/html5lib/tests/test_encoding.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
import os
import pytest
@@ -9,7 +7,7 @@
def test_basic_prescan_length():
- data = "
Caf\u00E9".encode('utf-8')
+ data = "Caf\u00E9".encode()
pad = 1024 - len(data) + 1
data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
assert len(data) == 1024 # Sanity
@@ -18,7 +16,7 @@ def test_basic_prescan_length():
def test_parser_reparse():
- data = "Caf\u00E9".encode('utf-8')
+ data = "Caf\u00E9".encode()
pad = 10240 - len(data) + 1
data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-")
assert len(data) == 10240 # Sanity
diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py
index dd02dd7f..2fc6140d 100644
--- a/html5lib/tests/test_meta.py
+++ b/html5lib/tests/test_meta.py
@@ -1,7 +1,4 @@
-from __future__ import absolute_import, division, unicode_literals
-
-import six
-from mock import Mock
+from unittest.mock import Mock
from . import support
@@ -27,11 +24,7 @@ def test_errorMessage():
r = support.errorMessage(input, expected, actual)
# Assertions!
- if six.PY2:
- assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r
- else:
- assert six.PY3
- assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
+ assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r
assert input.__repr__.call_count == 1
assert expected.__repr__.call_count == 1
diff --git a/html5lib/tests/test_optionaltags_filter.py b/html5lib/tests/test_optionaltags_filter.py
index cd282149..1b054f40 100644
--- a/html5lib/tests/test_optionaltags_filter.py
+++ b/html5lib/tests/test_optionaltags_filter.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from html5lib.filters.optionaltags import Filter
diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py
index 879d2447..89c9cef1 100644
--- a/html5lib/tests/test_parser2.py
+++ b/html5lib/tests/test_parser2.py
@@ -1,7 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
-from six import PY2, text_type
-
import io
from . import support # noqa
@@ -39,7 +35,7 @@ def test_namespace_html_elements_0_etree():
doc = parse("",
treebuilder="etree",
namespaceHTMLElements=True)
- assert doc.tag == "{%s}html" % (namespaces["html"],)
+ assert doc.tag == "{{{}}}html".format(namespaces["html"])
def test_namespace_html_elements_1_etree():
@@ -75,11 +71,6 @@ def test_debug_log():
('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}),
('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})]
- if PY2:
- for i, log in enumerate(expected):
- log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log]
- expected[i] = tuple(log)
-
assert parser.log == expected
diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py
index f3faeb80..f8b4b4b6 100644
--- a/html5lib/tests/test_sanitizer.py
+++ b/html5lib/tests/test_sanitizer.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
import pytest
from html5lib import constants, parseFragment, serialize
@@ -60,19 +58,19 @@ def param_sanitizer():
if tag_name == 'image':
yield ("test_should_allow_%s_tag" % tag_name,
"
foo <bad>bar</bad> baz",
- "<%s title='1'>foo bar baz%s>" % (tag_name, tag_name))
+ f"<{tag_name} title='1'>foo bar baz{tag_name}>")
elif tag_name == 'br':
yield ("test_should_allow_%s_tag" % tag_name,
"
foo <bad>bar</bad> baz
",
- "<%s title='1'>foo bar baz%s>" % (tag_name, tag_name))
+ f"<{tag_name} title='1'>foo bar baz{tag_name}>")
elif tag_name in constants.voidElements:
yield ("test_should_allow_%s_tag" % tag_name,
"<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name,
- "<%s title='1'>foo bar baz%s>" % (tag_name, tag_name))
+ f"<{tag_name} title='1'>foo bar baz{tag_name}>")
else:
yield ("test_should_allow_%s_tag" % tag_name,
- "<%s title=\"1\">foo <bad>bar</bad> baz%s>" % (tag_name, tag_name),
- "<%s title='1'>foo bar baz%s>" % (tag_name, tag_name))
+ f"<{tag_name} title=\"1\">foo <bad>bar</bad> baz{tag_name}>",
+ f"<{tag_name} title='1'>foo bar baz{tag_name}>")
for ns, attribute_name in sanitizer.allowed_attributes:
if ns is not None:
@@ -85,16 +83,16 @@ def param_sanitizer():
if attribute_name in sanitizer.attr_val_is_uri:
attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0]
yield ("test_should_allow_%s_attribute" % attribute_name,
- "foo <bad>bar</bad> baz
" % (attribute_name, attribute_value),
- "foo bar baz
" % (attribute_name, attribute_value))
+ f"foo <bad>bar</bad> baz
",
+ f"foo bar baz
")
for protocol in sanitizer.allowed_protocols:
rest_of_uri = '//sub.domain.tld/path/object.ext'
if protocol == 'data':
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
yield ("test_should_allow_uppercase_%s_uris" % protocol,
- "
foo" % (protocol, rest_of_uri),
- """
foo""" % (protocol, rest_of_uri))
+ f"
foo",
+ f"""
foo""")
for protocol in sanitizer.allowed_protocols:
rest_of_uri = '//sub.domain.tld/path/object.ext'
@@ -102,8 +100,8 @@ def param_sanitizer():
rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ='
protocol = protocol.upper()
yield ("test_should_allow_uppercase_%s_uris" % protocol,
- "
foo" % (protocol, rest_of_uri),
- """
foo""" % (protocol, rest_of_uri))
+ f"
foo",
+ f"""
foo""")
@pytest.mark.parametrize("expected, input",
diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py
index bce62459..2ed71de6 100644
--- a/html5lib/tests/test_serializer.py
+++ b/html5lib/tests/test_serializer.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
import os
import json
@@ -221,6 +219,6 @@ def test_serializer(input, expected, options):
result = serialize_html(input, options)
if len(expected) == 1:
- assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options))
+ assert expected[0] == result, f"Expected:\n{expected[0]}\nActual:\n{result}\nOptions:\n{str(options)}"
elif result not in expected:
- assert False, "Expected: %s, Received: %s" % (expected, result)
+ assert False, f"Expected: {expected}, Received: {result}"
diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py
index efe9b472..b94c7a5c 100644
--- a/html5lib/tests/test_stream.py
+++ b/html5lib/tests/test_stream.py
@@ -1,16 +1,13 @@
-from __future__ import absolute_import, division, unicode_literals
-
from . import support # noqa
import codecs
+import http.client
import sys
+import urllib
from io import BytesIO, StringIO
import pytest
-import six
-from six.moves import http_client, urllib
-
from html5lib._inputstream import (BufferedStream, HTMLInputStream,
HTMLUnicodeInputStream, HTMLBinaryInputStream)
from html5lib._utils import supports_lone_surrogates
@@ -105,7 +102,7 @@ def test_char_ascii():
def test_char_utf8():
- stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8')
+ stream = HTMLInputStream('\u2018'.encode(), override_encoding='utf-8')
assert stream.charEncoding[0].name == 'utf-8'
assert stream.char() == '\u2018'
@@ -186,12 +183,12 @@ def test_python_issue_20007():
Make sure we have a work-around for Python bug #20007
http://bugs.python.org/issue20007
"""
- class FakeSocket(object):
+ class FakeSocket:
def makefile(self, _mode, _bufsize=None):
# pylint:disable=unused-argument
return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
- source = http_client.HTTPResponse(FakeSocket())
+ source = http.client.HTTPResponse(FakeSocket())
source.begin()
stream = HTMLInputStream(source)
assert stream.charsUntil(" ") == "Text"
@@ -202,15 +199,12 @@ def test_python_issue_20007_b():
Make sure we have a work-around for Python bug #20007
http://bugs.python.org/issue20007
"""
- if six.PY2:
- return
-
- class FakeSocket(object):
+ class FakeSocket:
def makefile(self, _mode, _bufsize=None):
# pylint:disable=unused-argument
return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText")
- source = http_client.HTTPResponse(FakeSocket())
+ source = http.client.HTTPResponse(FakeSocket())
source.begin()
wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com")
stream = HTMLInputStream(wrapped)
diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py
index 158d847a..a616288f 100644
--- a/html5lib/tests/test_tokenizer2.py
+++ b/html5lib/tests/test_tokenizer2.py
@@ -1,8 +1,5 @@
-from __future__ import absolute_import, division, unicode_literals
-
import io
-from six import unichr, text_type
from html5lib._tokenizer import HTMLTokenizer
from html5lib.constants import tokenTypes
@@ -16,8 +13,8 @@ def ignore_parse_errors(toks):
def test_maintain_attribute_order():
# generate loads to maximize the chance a hash-based mutation will occur
- attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
- stream = io.StringIO("")
+ attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+ stream = io.StringIO("")
toks = HTMLTokenizer(stream)
out = list(ignore_parse_errors(toks))
@@ -49,8 +46,8 @@ def test_duplicate_attribute():
def test_maintain_duplicate_attribute_order():
# generate loads to maximize the chance a hash-based mutation will occur
- attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
- stream = io.StringIO("")
+ attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+ stream = io.StringIO("")
toks = HTMLTokenizer(stream)
out = list(ignore_parse_errors(toks))
diff --git a/html5lib/tests/test_treeadapters.py b/html5lib/tests/test_treeadapters.py
index 95e56c00..1e396ed9 100644
--- a/html5lib/tests/test_treeadapters.py
+++ b/html5lib/tests/test_treeadapters.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from . import support # noqa
import html5lib
diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py
index 780ca964..18d77128 100644
--- a/html5lib/tests/test_treewalkers.py
+++ b/html5lib/tests/test_treewalkers.py
@@ -1,9 +1,5 @@
-from __future__ import absolute_import, division, unicode_literals
-
import itertools
-import sys
-from six import unichr, text_type
import pytest
try:
@@ -74,11 +70,11 @@ def param_treewalker_six_mix():
# fragment but not using the u'' syntax nor importing unicode_literals
sm_tests = [
('Example',
- [(str('class'), str('test123'))],
+ [('class', 'test123')],
'\n class="test123"\n href="http://example.com"\n "Example"'),
('',
- [(str('rel'), str('alternate'))],
+ [('rel', 'alternate')],
'\n href="http://example.com/cow"\n rel="alternate"\n "Example"')
]
@@ -102,7 +98,7 @@ def test_treewalker_six_mix(intext, expected, attrs_to_add, tree):
output = treewalkers.pprint(treeClass["walker"](document))
output = attrlist.sub(sortattrs, output)
if output not in expected:
- raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output))
+ raise AssertionError(f"TreewalkerEditTest: {treeName}\nExpected:\n{expected}\nReceived:\n{output}")
@pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"]))
@@ -142,8 +138,8 @@ def test_lxml_xml():
@pytest.mark.parametrize("treeName",
[pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
pytest.mark.skipif(
- treeName != "lxml" or
- sys.version_info < (3, 7), reason="dict order undef")])
+ treeName != "lxml",
+ reason="dict order undef")])
for treeName in sorted(treeTypes.keys())])
def test_maintain_attribute_order(treeName):
treeAPIs = treeTypes[treeName]
@@ -151,8 +147,8 @@ def test_maintain_attribute_order(treeName):
pytest.skip("Treebuilder not loaded")
# generate loads to maximize the chance a hash-based mutation will occur
- attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
- data = ""
+ attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))]
+ data = ""
parser = html5parser.HTMLParser(tree=treeAPIs["builder"])
document = parser.parseFragment(data)
@@ -175,8 +171,8 @@ def test_maintain_attribute_order(treeName):
@pytest.mark.parametrize("treeName",
[pytest.param(treeName, marks=[getattr(pytest.mark, treeName),
pytest.mark.skipif(
- treeName != "lxml" or
- sys.version_info < (3, 7), reason="dict order undef")])
+ treeName != "lxml",
+ reason="dict order undef")])
for treeName in sorted(treeTypes.keys())])
def test_maintain_attribute_order_adjusted(treeName):
treeAPIs = treeTypes[treeName]
diff --git a/html5lib/tests/test_whitespace_filter.py b/html5lib/tests/test_whitespace_filter.py
index e9da6140..0daf1c52 100644
--- a/html5lib/tests/test_whitespace_filter.py
+++ b/html5lib/tests/test_whitespace_filter.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from html5lib.filters.whitespace import Filter
from html5lib.constants import spaceCharacters
spaceCharacters = "".join(spaceCharacters)
diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py
index cc9897a4..273955c3 100644
--- a/html5lib/tests/tokenizer.py
+++ b/html5lib/tests/tokenizer.py
@@ -1,18 +1,15 @@
-from __future__ import absolute_import, division, unicode_literals
-
import codecs
import json
import warnings
import re
import pytest
-from six import unichr
from html5lib._tokenizer import HTMLTokenizer
from html5lib import constants, _utils
-class TokenizerTestParser(object):
+class TokenizerTestParser:
def __init__(self, initialState, lastStartTag=None):
self.tokenizer = HTMLTokenizer
self._state = initialState
@@ -146,11 +143,11 @@ def repl(m):
low = int(m.group(2), 16)
if 0xD800 <= high <= 0xDBFF and 0xDC00 <= low <= 0xDFFF:
cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000
- return unichr(cp)
+ return chr(cp)
else:
- return unichr(high) + unichr(low)
+ return chr(high) + chr(low)
else:
- return unichr(int(m.group(1), 16))
+ return chr(int(m.group(1), 16))
try:
return _surrogateRe.sub(repl, inp)
except ValueError:
@@ -197,7 +194,7 @@ def collect(self):
class TokenizerTestCollector(pytest.Collector):
def __init__(self, name, parent=None, config=None, session=None, testdata=None):
- super(TokenizerTestCollector, self).__init__(name, parent, config, session)
+ super().__init__(name, parent, config, session)
if 'initialStates' not in testdata:
testdata["initialStates"] = ["Data state"]
if 'doubleEscaped' in testdata:
@@ -218,7 +215,7 @@ def collect(self):
class TokenizerTest(pytest.Item):
def __init__(self, name, parent, test, initialState):
- super(TokenizerTest, self).__init__(name, parent)
+ super().__init__(name, parent)
self.obj = lambda: 1 # this is to hack around skipif needing a function!
self.test = test
self.initialState = initialState
diff --git a/html5lib/tests/tokenizertotree.py b/html5lib/tests/tokenizertotree.py
index 8528e876..2ba74cad 100644
--- a/html5lib/tests/tokenizertotree.py
+++ b/html5lib/tests/tokenizertotree.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
import sys
import os
import json
@@ -25,7 +23,7 @@ def main(out_path):
def run_file(filename, out_path):
try:
- tests_data = json.load(open(filename, "r"))
+ tests_data = json.load(open(filename))
except ValueError:
sys.stderr.write("Failed to load %s\n" % filename)
return
diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py
index fb0657bf..204865ba 100644
--- a/html5lib/tests/tree_construction.py
+++ b/html5lib/tests/tree_construction.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
import itertools
import re
import warnings
@@ -31,14 +29,13 @@ def collect(self):
class TreeConstructionTest(pytest.Collector):
def __init__(self, name, parent=None, config=None, session=None, testdata=None):
- super(TreeConstructionTest, self).__init__(name, parent, config, session)
+ super().__init__(name, parent, config, session)
self.testdata = testdata
def collect(self):
for treeName, treeAPIs in sorted(treeTypes.items()):
- for x in itertools.chain(self._getParserTests(treeName, treeAPIs),
- self._getTreeWalkerTests(treeName, treeAPIs)):
- yield x
+ yield from itertools.chain(self._getParserTests(treeName, treeAPIs),
+ self._getTreeWalkerTests(treeName, treeAPIs))
def _getParserTests(self, treeName, treeAPIs):
if treeAPIs is not None and "adapter" in treeAPIs:
@@ -79,7 +76,7 @@ def convertTreeDump(data):
class ParserTest(pytest.Item):
def __init__(self, name, parent, test, treeClass, namespaceHTMLElements):
- super(ParserTest, self).__init__(name, parent)
+ super().__init__(name, parent)
self.test = test
self.treeClass = treeClass
self.namespaceHTMLElements = namespaceHTMLElements
@@ -122,7 +119,7 @@ def runtest(self):
errStr = []
for (line, col), errorcode, datavars in p.errors:
- assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars))
+ assert isinstance(datavars, dict), f"{errorcode}, {repr(datavars)}"
errStr.append("Line: %i Col: %i %s" % (line, col,
constants.E[errorcode] % datavars))
@@ -144,7 +141,7 @@ def repr_failure(self, excinfo):
class TreeWalkerTest(pytest.Item):
def __init__(self, name, parent, test, treeAPIs):
- super(TreeWalkerTest, self).__init__(name, parent)
+ super().__init__(name, parent)
self.test = test
self.treeAPIs = treeAPIs
diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py
index dfeb0ba5..1444fc9a 100644
--- a/html5lib/treeadapters/__init__.py
+++ b/html5lib/treeadapters/__init__.py
@@ -16,7 +16,6 @@
genshi_tree = genshi.to_genshi(TreeWalker(tree))
"""
-from __future__ import absolute_import, division, unicode_literals
from . import sax
diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py
index 61d5fb6a..804a980e 100644
--- a/html5lib/treeadapters/genshi.py
+++ b/html5lib/treeadapters/genshi.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from genshi.core import QName, Attrs
from genshi.core import START, END, TEXT, COMMENT, DOCTYPE
@@ -23,7 +21,7 @@ def to_genshi(walker):
if type in ("StartTag", "EmptyTag"):
if token["namespace"]:
- name = "{%s}%s" % (token["namespace"], token["name"])
+ name = "{{{}}}{}".format(token["namespace"], token["name"])
else:
name = token["name"]
attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value)
@@ -34,7 +32,7 @@ def to_genshi(walker):
if type == "EndTag":
if token["namespace"]:
- name = "{%s}%s" % (token["namespace"], token["name"])
+ name = "{{{}}}{}".format(token["namespace"], token["name"])
else:
name = token["name"]
diff --git a/html5lib/treeadapters/sax.py b/html5lib/treeadapters/sax.py
index f4ccea5a..04ec1ef0 100644
--- a/html5lib/treeadapters/sax.py
+++ b/html5lib/treeadapters/sax.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from xml.sax.xmlreader import AttributesNSImpl
from ..constants import adjustForeignAttributes, unadjustForeignAttributes
diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py
index d44447ea..90aad5fb 100644
--- a/html5lib/treebuilders/__init__.py
+++ b/html5lib/treebuilders/__init__.py
@@ -29,7 +29,6 @@
"""
-from __future__ import absolute_import, division, unicode_literals
from .._utils import default_etree
diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py
index e4a3d710..4afd3c56 100644
--- a/html5lib/treebuilders/base.py
+++ b/html5lib/treebuilders/base.py
@@ -1,6 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-from six import text_type
-
from ..constants import scopingElements, tableInsertModeElements, namespaces
# The scope markers are inserted when entering object elements,
@@ -20,7 +17,7 @@
}
-class Node(object):
+class Node:
"""Represents an item in the tree"""
def __init__(self, name):
"""Creates a Node
@@ -43,11 +40,11 @@ def __init__(self, name):
self._flags = []
def __str__(self):
- attributesStr = " ".join(["%s=\"%s\"" % (name, value)
- for name, value in
- self.attributes.items()])
+ attributesStr = " ".join(f"{name}=\"{value}\""
+ for name, value in
+ self.attributes.items())
if attributesStr:
- return "<%s %s>" % (self.name, attributesStr)
+ return f"<{self.name} {attributesStr}>"
else:
return "<%s>" % (self.name)
@@ -143,7 +140,7 @@ def nodesEqual(self, node1, node2):
return True
-class TreeBuilder(object):
+class TreeBuilder:
"""Base treebuilder implementation
* documentClass - the class to use for the bottommost node of a document
@@ -199,7 +196,7 @@ def elementInScope(self, target, variant=None):
# match any node with that name
exactNode = hasattr(target, "nameTuple")
if not exactNode:
- if isinstance(target, text_type):
+ if isinstance(target, str):
target = (namespaces["html"], target)
assert isinstance(target, tuple)
@@ -322,7 +319,7 @@ def _setInsertFromTable(self, value):
def insertElementNormal(self, token):
name = token["name"]
- assert isinstance(name, text_type), "Element %s not unicode" % name
+ assert isinstance(name, str), "Element %s not unicode" % name
namespace = token.get("namespace", self.defaultNamespace)
element = self.elementClass(name, namespace)
element.attributes = token["data"]
diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py
index d8b53004..2d632d6e 100644
--- a/html5lib/treebuilders/dom.py
+++ b/html5lib/treebuilders/dom.py
@@ -1,10 +1,4 @@
-from __future__ import absolute_import, division, unicode_literals
-
-
-try:
- from collections.abc import MutableMapping
-except ImportError: # Python 2.7
- from collections import MutableMapping
+from collections.abc import MutableMapping
from xml.dom import minidom, Node
import weakref
@@ -191,25 +185,25 @@ def serializeElement(element, indent=0):
rv.append("""|%s""" %
(' ' * indent, element.name, publicId, systemId))
else:
- rv.append("|%s" % (' ' * indent, element.name))
+ rv.append("|{}".format(' ' * indent, element.name))
else:
- rv.append("|%s" % (' ' * indent,))
+ rv.append("|{}".format(' ' * indent))
elif element.nodeType == Node.DOCUMENT_NODE:
rv.append("#document")
elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE:
rv.append("#document-fragment")
elif element.nodeType == Node.COMMENT_NODE:
- rv.append("|%s" % (' ' * indent, element.nodeValue))
+ rv.append("|{}".format(' ' * indent, element.nodeValue))
elif element.nodeType == Node.TEXT_NODE:
- rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue))
+ rv.append("|{}\"{}\"".format(' ' * indent, element.nodeValue))
else:
if (hasattr(element, "namespaceURI") and
element.namespaceURI is not None):
- name = "%s %s" % (constants.prefixes[element.namespaceURI],
- element.nodeName)
+ name = "{} {}".format(constants.prefixes[element.namespaceURI],
+ element.nodeName)
else:
name = element.nodeName
- rv.append("|%s<%s>" % (' ' * indent, name))
+ rv.append("|{}<{}>".format(' ' * indent, name))
if element.hasAttributes():
attributes = []
for i in range(len(element.attributes)):
@@ -218,13 +212,13 @@ def serializeElement(element, indent=0):
value = attr.value
ns = attr.namespaceURI
if ns:
- name = "%s %s" % (constants.prefixes[ns], attr.localName)
+ name = f"{constants.prefixes[ns]} {attr.localName}"
else:
name = attr.nodeName
attributes.append((name, value))
for name, value in sorted(attributes):
- rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
+ rv.append('|{}{}="{}"'.format(' ' * (indent + 2), name, value))
indent += 2
for child in element.childNodes:
serializeElement(child, indent)
diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py
index 086bed4e..f3fea0f9 100644
--- a/html5lib/treebuilders/etree.py
+++ b/html5lib/treebuilders/etree.py
@@ -1,7 +1,5 @@
-from __future__ import absolute_import, division, unicode_literals
# pylint:disable=protected-access
-from six import text_type
import re
@@ -38,7 +36,7 @@ def _getETreeTag(self, name, namespace):
if namespace is None:
etree_tag = name
else:
- etree_tag = "{%s}%s" % (namespace, name)
+ etree_tag = f"{{{namespace}}}{name}"
return etree_tag
def _setName(self, name):
@@ -70,7 +68,7 @@ def _setAttributes(self, attributes):
# allocation on average
for key, value in attributes.items():
if isinstance(key, tuple):
- name = "{%s}%s" % (key[2], key[1])
+ name = f"{{{key[2]}}}{key[1]}"
else:
name = key
el_attrib[name] = value
@@ -210,20 +208,20 @@ def serializeElement(element, indent=0):
rv.append("""""" %
(element.text, publicId, systemId))
else:
- rv.append("" % (element.text,))
+ rv.append(f"")
elif element.tag == "DOCUMENT_ROOT":
rv.append("#document")
if element.text is not None:
- rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
+ rv.append("|{}\"{}\"".format(' ' * (indent + 2), element.text))
if element.tail is not None:
raise TypeError("Document node cannot have tail")
if hasattr(element, "attrib") and len(element.attrib):
raise TypeError("Document node cannot have attributes")
elif element.tag == ElementTreeCommentType:
- rv.append("|%s" % (' ' * indent, element.text))
+ rv.append("|{}".format(' ' * indent, element.text))
else:
- assert isinstance(element.tag, text_type), \
- "Expected unicode, got %s, %s" % (type(element.tag), element.tag)
+ assert isinstance(element.tag, str), \
+ f"Expected unicode, got {type(element.tag)}, {element.tag}"
nsmatch = tag_regexp.match(element.tag)
if nsmatch is None:
@@ -231,8 +229,8 @@ def serializeElement(element, indent=0):
else:
ns, name = nsmatch.groups()
prefix = constants.prefixes[ns]
- name = "%s %s" % (prefix, name)
- rv.append("|%s<%s>" % (' ' * indent, name))
+ name = f"{prefix} {name}"
+ rv.append("|{}<{}>".format(' ' * indent, name))
if hasattr(element, "attrib"):
attributes = []
@@ -241,20 +239,20 @@ def serializeElement(element, indent=0):
if nsmatch is not None:
ns, name = nsmatch.groups()
prefix = constants.prefixes[ns]
- attr_string = "%s %s" % (prefix, name)
+ attr_string = f"{prefix} {name}"
else:
attr_string = name
attributes.append((attr_string, value))
for name, value in sorted(attributes):
- rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
+ rv.append('|{}{}="{}"'.format(' ' * (indent + 2), name, value))
if element.text:
- rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
+ rv.append("|{}\"{}\"".format(' ' * (indent + 2), element.text))
indent += 2
for child in element:
serializeElement(child, indent)
if element.tail:
- rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
+ rv.append("|{}\"{}\"".format(' ' * (indent - 2), element.tail))
serializeElement(element, 0)
return "\n".join(rv)
@@ -275,7 +273,7 @@ def serializeElement(element):
rv.append("""""" %
(element.text, publicId, systemId))
else:
- rv.append("" % (element.text,))
+ rv.append(f"")
elif element.tag == "DOCUMENT_ROOT":
if element.text is not None:
rv.append(element.text)
@@ -288,23 +286,23 @@ def serializeElement(element):
serializeElement(child)
elif element.tag == ElementTreeCommentType:
- rv.append("" % (element.text,))
+ rv.append(f"")
else:
# This is assumed to be an ordinary element
if not element.attrib:
- rv.append("<%s>" % (filter.fromXmlName(element.tag),))
+ rv.append(f"<{filter.fromXmlName(element.tag)}>")
else:
- attr = " ".join(["%s=\"%s\"" % (
+ attr = " ".join("{}=\"{}\"".format(
filter.fromXmlName(name), value)
- for name, value in element.attrib.items()])
- rv.append("<%s %s>" % (element.tag, attr))
+ for name, value in element.attrib.items())
+ rv.append(f"<{element.tag} {attr}>")
if element.text:
rv.append(element.text)
for child in element:
serializeElement(child)
- rv.append("%s>" % (element.tag,))
+ rv.append(f"{element.tag}>")
if element.tail:
rv.append(element.tail)
diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py
index e73de61a..3bcf8c96 100644
--- a/html5lib/treebuilders/etree_lxml.py
+++ b/html5lib/treebuilders/etree_lxml.py
@@ -9,17 +9,12 @@
When any of these things occur, we emit a DataLossWarning
"""
-from __future__ import absolute_import, division, unicode_literals
# pylint:disable=protected-access
import warnings
import re
-import sys
-try:
- from collections.abc import MutableMapping
-except ImportError:
- from collections import MutableMapping
+from collections.abc import MutableMapping
from . import base
from ..constants import DataLossWarning
@@ -28,7 +23,6 @@
from .. import _ihatexml
import lxml.etree as etree
-from six import PY3, binary_type
fullTree = True
@@ -37,14 +31,14 @@
comment_type = etree.Comment("asd").tag
-class DocumentType(object):
+class DocumentType:
def __init__(self, name, publicId, systemId):
self.name = name
self.publicId = publicId
self.systemId = systemId
-class Document(object):
+class Document:
def __init__(self):
self._elementTree = None
self._childNodes = []
@@ -76,11 +70,11 @@ def serializeElement(element, indent=0):
element.docinfo.system_url):
dtd_str = "" % element.docinfo.root_name
else:
- dtd_str = """""" % (
+ dtd_str = """""".format(
element.docinfo.root_name,
element.docinfo.public_id,
element.docinfo.system_url)
- rv.append("|%s%s" % (' ' * (indent + 2), dtd_str))
+ rv.append("|{}{}".format(' ' * (indent + 2), dtd_str))
next_element = element.getroot()
while next_element.getprevious() is not None:
next_element = next_element.getprevious()
@@ -89,17 +83,17 @@ def serializeElement(element, indent=0):
next_element = next_element.getnext()
elif isinstance(element, str) or isinstance(element, bytes):
# Text in a fragment
- assert isinstance(element, str) or sys.version_info[0] == 2
- rv.append("|%s\"%s\"" % (' ' * indent, element))
+ assert isinstance(element, str)
+ rv.append("|{}\"{}\"".format(' ' * indent, element))
else:
# Fragment case
rv.append("#document-fragment")
for next_element in element:
serializeElement(next_element, indent + 2)
elif element.tag == comment_type:
- rv.append("|%s" % (' ' * indent, element.text))
+ rv.append("|{}".format(' ' * indent, element.text))
if hasattr(element, "tail") and element.tail:
- rv.append("|%s\"%s\"" % (' ' * indent, element.tail))
+ rv.append("|{}\"{}\"".format(' ' * indent, element.tail))
else:
assert isinstance(element, etree._Element)
nsmatch = etree_builders.tag_regexp.match(element.tag)
@@ -107,11 +101,11 @@ def serializeElement(element, indent=0):
ns = nsmatch.group(1)
tag = nsmatch.group(2)
prefix = constants.prefixes[ns]
- rv.append("|%s<%s %s>" % (' ' * indent, prefix,
- infosetFilter.fromXmlName(tag)))
+ rv.append("|{}<{} {}>".format(' ' * indent, prefix,
+ infosetFilter.fromXmlName(tag)))
else:
- rv.append("|%s<%s>" % (' ' * indent,
- infosetFilter.fromXmlName(element.tag)))
+ rv.append("|{}<{}>".format(' ' * indent,
+ infosetFilter.fromXmlName(element.tag)))
if hasattr(element, "attrib"):
attributes = []
@@ -121,21 +115,21 @@ def serializeElement(element, indent=0):
ns, name = nsmatch.groups()
name = infosetFilter.fromXmlName(name)
prefix = constants.prefixes[ns]
- attr_string = "%s %s" % (prefix, name)
+ attr_string = f"{prefix} {name}"
else:
attr_string = infosetFilter.fromXmlName(name)
attributes.append((attr_string, value))
for name, value in sorted(attributes):
- rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value))
+ rv.append('|{}{}="{}"'.format(' ' * (indent + 2), name, value))
if element.text:
- rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text))
+ rv.append("|{}\"{}\"".format(' ' * (indent + 2), element.text))
indent += 2
for child in element:
serializeElement(child, indent)
if hasattr(element, "tail") and element.tail:
- rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail))
+ rv.append("|{}\"{}\"".format(' ' * (indent - 2), element.tail))
serializeElement(element, 0)
return "\n".join(rv)
@@ -156,23 +150,23 @@ def serializeElement(element):
serializeElement(element.getroot())
elif element.tag == comment_type:
- rv.append("" % (element.text,))
+ rv.append(f"")
else:
# This is assumed to be an ordinary element
if not element.attrib:
- rv.append("<%s>" % (element.tag,))
+ rv.append(f"<{element.tag}>")
else:
- attr = " ".join(["%s=\"%s\"" % (name, value)
- for name, value in element.attrib.items()])
- rv.append("<%s %s>" % (element.tag, attr))
+ attr = " ".join(f"{name}=\"{value}\""
+ for name, value in element.attrib.items())
+ rv.append(f"<{element.tag} {attr}>")
if element.text:
rv.append(element.text)
for child in element:
serializeElement(child)
- rv.append("%s>" % (element.tag,))
+ rv.append(f"{element.tag}>")
if hasattr(element, "tail") and element.tail:
rv.append(element.tail)
@@ -201,15 +195,13 @@ def __init__(self, element):
def _coerceKey(self, key):
if isinstance(key, tuple):
- name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1]))
+ name = f"{{{key[2]}}}{infosetFilter.coerceAttribute(key[1])}"
else:
name = infosetFilter.coerceAttribute(key)
return name
def __getitem__(self, key):
value = self._element._element.attrib[self._coerceKey(key)]
- if not PY3 and isinstance(value, binary_type):
- value = value.decode("ascii")
return value
def __setitem__(self, key, value):
@@ -332,7 +324,7 @@ def insertCommentMain(self, data, parent=None):
if (parent == self.document and
self.document._elementTree.getroot()[-1].tag == comment_type):
warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning)
- super(TreeBuilder, self).insertComment(data, parent)
+ super().insertComment(data, parent)
def insertRoot(self, token):
# Because of the way libxml2 works, it doesn't seem to be possible to
@@ -379,7 +371,7 @@ def insertRoot(self, token):
if namespace is None:
etree_tag = name
else:
- etree_tag = "{%s}%s" % (namespace, name)
+ etree_tag = f"{{{namespace}}}{name}"
root.tag = etree_tag
# Add the root element to the internal child/open data structures
diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py
index b2d3aac3..70e0fff6 100644
--- a/html5lib/treewalkers/__init__.py
+++ b/html5lib/treewalkers/__init__.py
@@ -8,7 +8,6 @@
returns an iterator which generates tokens.
"""
-from __future__ import absolute_import, division, unicode_literals
from .. import constants
from .._utils import default_etree
@@ -96,10 +95,10 @@ def pprint(walker):
ns = constants.prefixes[token["namespace"]]
else:
ns = token["namespace"]
- name = "%s %s" % (ns, token["name"])
+ name = "{} {}".format(ns, token["name"])
else:
name = token["name"]
- output.append("%s<%s>" % (" " * indent, name))
+ output.append("{}<{}>".format(" " * indent, name))
indent += 2
# attributes (sorted for consistent ordering)
attrs = token["data"]
@@ -109,10 +108,10 @@ def pprint(walker):
ns = constants.prefixes[namespace]
else:
ns = namespace
- name = "%s %s" % (ns, localname)
+ name = f"{ns} {localname}"
else:
name = localname
- output.append("%s%s=\"%s\"" % (" " * indent, name, value))
+ output.append("{}{}=\"{}\"".format(" " * indent, name, value))
# self-closing
if type == "EmptyTag":
indent -= 2
@@ -121,7 +120,7 @@ def pprint(walker):
indent -= 2
elif type == "Comment":
- output.append("%s" % (" " * indent, token["data"]))
+ output.append("{}".format(" " * indent, token["data"]))
elif type == "Doctype":
if token["name"]:
@@ -137,13 +136,13 @@ def pprint(walker):
token["name"],
token["systemId"]))
else:
- output.append("%s" % (" " * indent,
- token["name"]))
+ output.append("{}".format(" " * indent,
+ token["name"]))
else:
- output.append("%s" % (" " * indent,))
+ output.append("{}".format(" " * indent))
elif type == "Characters":
- output.append("%s\"%s\"" % (" " * indent, token["data"]))
+ output.append("{}\"{}\"".format(" " * indent, token["data"]))
elif type == "SpaceCharacters":
assert False, "concatenateCharacterTokens should have got rid of all Space tokens"
diff --git a/html5lib/treewalkers/base.py b/html5lib/treewalkers/base.py
index 80c474c4..a4a9c71a 100644
--- a/html5lib/treewalkers/base.py
+++ b/html5lib/treewalkers/base.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from xml.dom import Node
from ..constants import namespaces, voidElements, spaceCharacters
@@ -17,7 +15,7 @@
spaceCharacters = "".join(spaceCharacters)
-class TreeWalker(object):
+class TreeWalker:
"""Walks a tree yielding tokens
Tokens are dicts that all have a ``type`` field specifying the type of the
@@ -201,15 +199,13 @@ def __iter__(self):
yield self.doctype(*details)
elif type == TEXT:
- for token in self.text(*details):
- yield token
+ yield from self.text(*details)
elif type == ELEMENT:
namespace, name, attributes, hasChildren = details
if (not namespace or namespace == namespaces["html"]) and name in voidElements:
- for token in self.emptyTag(namespace, name, attributes,
- hasChildren):
- yield token
+ yield from self.emptyTag(namespace, name, attributes,
+ hasChildren)
hasChildren = False
else:
yield self.startTag(namespace, name, attributes)
diff --git a/html5lib/treewalkers/dom.py b/html5lib/treewalkers/dom.py
index b0c89b00..ac88cd9d 100644
--- a/html5lib/treewalkers/dom.py
+++ b/html5lib/treewalkers/dom.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from xml.dom import Node
from . import base
diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py
index 44653372..4ad3d58d 100644
--- a/html5lib/treewalkers/etree.py
+++ b/html5lib/treewalkers/etree.py
@@ -1,9 +1,6 @@
-from __future__ import absolute_import, division, unicode_literals
-
from collections import OrderedDict
import re
-from six import string_types
from . import base
from .._utils import moduleFactoryFactory
@@ -51,7 +48,7 @@ def getNodeDetails(self, node):
return base.COMMENT, node.text
else:
- assert isinstance(node.tag, string_types), type(node.tag)
+ assert isinstance(node.tag, str), type(node.tag)
# This is assumed to be an ordinary element
match = tag_regexp.match(node.tag)
if match:
diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py
index a614ac5b..0ec633ac 100644
--- a/html5lib/treewalkers/etree_lxml.py
+++ b/html5lib/treewalkers/etree_lxml.py
@@ -1,6 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-from six import text_type
-
from collections import OrderedDict
from lxml import etree
@@ -14,13 +11,13 @@
def ensure_str(s):
if s is None:
return None
- elif isinstance(s, text_type):
+ elif isinstance(s, str):
return s
else:
return s.decode("ascii", "strict")
-class Root(object):
+class Root:
def __init__(self, et):
self.elementtree = et
self.children = []
@@ -58,7 +55,7 @@ def __len__(self):
return 1
-class Doctype(object):
+class Doctype:
def __init__(self, root_node, name, public_id, system_id):
self.root_node = root_node
self.name = name
@@ -81,7 +78,7 @@ def getnext(self):
return None
-class FragmentWrapper(object):
+class FragmentWrapper:
def __init__(self, fragment_root, obj):
self.root_node = fragment_root
self.obj = obj
diff --git a/html5lib/treewalkers/genshi.py b/html5lib/treewalkers/genshi.py
index 7483be27..d4757af2 100644
--- a/html5lib/treewalkers/genshi.py
+++ b/html5lib/treewalkers/genshi.py
@@ -1,5 +1,3 @@
-from __future__ import absolute_import, division, unicode_literals
-
from genshi.core import QName
from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT
from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT
@@ -15,14 +13,12 @@ def __iter__(self):
previous = None
for event in self.tree:
if previous is not None:
- for token in self.tokens(previous, event):
- yield token
+ yield from self.tokens(previous, event)
previous = event
# Don't forget the final event!
if previous is not None:
- for token in self.tokens(previous, None):
- yield token
+ yield from self.tokens(previous, None)
def tokens(self, event, next):
kind, data, _ = event
@@ -38,10 +34,9 @@ def tokens(self, event, next):
converted_attribs[(None, k)] = v
if namespace == namespaces["html"] and name in voidElements:
- for token in self.emptyTag(namespace, name, converted_attribs,
- not next or next[0] != END or
- next[1] != tag):
- yield token
+ yield from self.emptyTag(namespace, name, converted_attribs,
+ not next or next[0] != END or
+ next[1] != tag)
else:
yield self.startTag(namespace, name, converted_attribs)
@@ -55,8 +50,7 @@ def tokens(self, event, next):
yield self.comment(data)
elif kind == TEXT:
- for token in self.text(data):
- yield token
+ yield from self.text(data)
elif kind == DOCTYPE:
yield self.doctype(*data)
diff --git a/parse.py b/parse.py
index e6806b46..dd919364 100755
--- a/parse.py
+++ b/parse.py
@@ -36,13 +36,12 @@ def parse():
pass
elif f == '-':
f = sys.stdin
- if sys.version_info[0] >= 3:
- encoding = None
+ encoding = None
else:
try:
# Try opening from file system
f = open(f, "rb")
- except IOError as e:
+ except OSError as e:
sys.stderr.write("Unable to open file: %s\n" % e)
sys.exit(1)
except IndexError:
@@ -81,7 +80,7 @@ def parse():
if document:
printOutput(p, document, opts)
t2 = time.time()
- sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)" % (t1 - t0, t2 - t1))
+ sys.stderr.write(f"\n\nRun took: {t1 - t0:f}s (plus {t2 - t1:f}s to print the output)")
else:
sys.stderr.write("\n\nRun took: %fs" % (t1 - t0))
else:
@@ -135,11 +134,7 @@ def printOutput(parser, document, opts):
kwargs["sanitize"] = True
tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
- if sys.version_info[0] >= 3:
- encoding = None
- else:
- encoding = "utf-8"
- for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding):
+ for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=None):
sys.stdout.write(text)
if not text.endswith('\n'):
sys.stdout.write('\n')
diff --git a/requirements-install.sh b/requirements-install.sh
index b7a8d96d..41d9bc42 100755
--- a/requirements-install.sh
+++ b/requirements-install.sh
@@ -1,9 +1,5 @@
#!/bin/bash -ex
-if [[ $SIX_VERSION ]]; then
- pip install six==$SIX_VERSION
-fi
-
pip install -r requirements-test.txt
if [[ $USE_OPTIONAL == "true" ]]; then
diff --git a/requirements-test.txt b/requirements-test.txt
index 57f8f617..06e0d48e 100644
--- a/requirements-test.txt
+++ b/requirements-test.txt
@@ -2,9 +2,6 @@
tox>=3.15.1,<4
flake8>=3.8.1,<3.9
-pytest>=4.6.10,<5 ; python_version < '3'
-pytest>=5.4.2,<7 ; python_version >= '3'
+pytest>=7,<8
coverage>=5.1,<6
pytest-expect>=1.1.0,<2
-mock>=3.0.5,<4 ; python_version < '3.6'
-mock>=4.0.2,<5 ; python_version >= '3.6'
diff --git a/requirements.txt b/requirements.txt
index ae7ec3d0..be8fcb77 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,2 +1 @@
-six>=1.9
webencodings
diff --git a/setup.cfg b/setup.cfg
index 0b2bb9c7..2a44c0f2 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -1,6 +1,3 @@
-[bdist_wheel]
-universal = 1
-
[pep8]
ignore = N
max-line-length = 139
diff --git a/setup.py b/setup.py
index f84c1284..e24296a4 100644
--- a/setup.py
+++ b/setup.py
@@ -1,8 +1,5 @@
-from __future__ import print_function
-
import ast
import codecs
-import sys
from os.path import join, dirname
from setuptools import setup, find_packages, __version__ as setuptools_version
@@ -18,8 +15,7 @@
# _markerlib.default_environment() obtains its data from _VARS
# and wraps it in another dict, but _markerlib_evaluate writes
-# to the dict while it is iterating the keys, causing an error
-# on Python 3 only.
+# to the dict while it is iterating the keys, causing an error.
# Replace _markerlib.default_environment to return a custom dict
# that has all the necessary markers, and ignores any writes.
@@ -32,7 +28,7 @@ def pop(self, i=-1):
return self[i]
-if _markerlib and sys.version_info[0] == 3:
+if _markerlib:
env = _markerlib.markers._VARS
for key in list(env.keys()):
new_key = key.replace('.', '_')
@@ -63,13 +59,10 @@ def default_environment():
'License :: OSI Approved :: MIT License',
'Operating System :: OS Independent',
'Programming Language :: Python',
- 'Programming Language :: Python :: 2',
- 'Programming Language :: Python :: 2.7',
'Programming Language :: Python :: 3',
- 'Programming Language :: Python :: 3.5',
- 'Programming Language :: Python :: 3.6',
'Programming Language :: Python :: 3.7',
'Programming Language :: Python :: 3.8',
+ 'Programming Language :: Python :: 3 :: Only',
'Programming Language :: Python :: Implementation :: CPython',
'Programming Language :: Python :: Implementation :: PyPy',
'Topic :: Software Development :: Libraries :: Python Modules',
@@ -104,10 +97,9 @@ def default_environment():
maintainer_email='james@hoppipolla.co.uk',
packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]),
install_requires=[
- 'six>=1.9',
'webencodings',
],
- python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*",
+ python_requires=">=3.7",
extras_require={
# A conditional extra will only install these items when the extra is
# requested and the condition matches.
diff --git a/tox.ini b/tox.ini
index 16b8cf41..027278f2 100644
--- a/tox.ini
+++ b/tox.ini
@@ -1,5 +1,5 @@
[tox]
-envlist = py{27,35,36,37,38,py,py3}-{base,six19,optional}
+envlist = py{37,38,py3}-{base,optional}
[testenv]
deps =
@@ -11,8 +11,7 @@ passenv =
PYTEST_COMMAND # this is maintained so one can, e.g., PYTEST_COMMAND="coverage run -m pytest"
COVERAGE_RUN_OPTIONS
commands =
- six19: pip install six==1.9
- {env:PYTEST_COMMAND:{envbindir}/py.test} {posargs}
+ {env:PYTEST_COMMAND:{envbindir}/pytest} {posargs}
flake8 {toxinidir}
[testenv:doc]
diff --git a/utils/entities.py b/utils/entities.py
index 6e8ca458..faeb4b45 100644
--- a/utils/entities.py
+++ b/utils/entities.py
@@ -49,9 +49,9 @@ def test_description(name, good):
semicolon_text = {True: "with a semi-colon",
False: "without a semi-colon"}[with_semicolon]
if good:
- text = "Named entity: %s %s" % (name, semicolon_text)
+ text = f"Named entity: {name} {semicolon_text}"
else:
- text = "Bad named entity: %s %s" % (name, semicolon_text)
+ text = f"Bad named entity: {name} {semicolon_text}"
return text
@@ -80,7 +80,7 @@ def subentity_exists(entity_name, entities):
def make_entities_code(entities):
- entities_text = "\n".join(" \"%s\": u\"%s\"," % (
+ entities_text = "\n".join(" \"{}\": u\"{}\",".format(
name, entities[name].encode(
"unicode-escape").replace("\"", "\\\""))
for name in sorted(entities.keys()))