From 637e90b430f2dedf5810509e84c10141250ce46c Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Mon, 11 Jan 2021 17:35:44 +0200 Subject: [PATCH 01/10] Drop support for EOL Python 2.7 and 3.5 --- .appveyor.yml | 4 ---- .github/workflows/python-tox.yml | 2 +- .travis.yml | 5 +---- README.rst | 2 +- html5lib/_trie/_base.py | 5 +---- html5lib/_utils.py | 5 +---- html5lib/tests/support.py | 2 -- html5lib/treebuilders/dom.py | 5 +---- html5lib/treebuilders/etree_lxml.py | 8 ++------ parse.py | 9 ++------- requirements-test.txt | 6 ++---- setup.py | 12 ++++-------- tox.ini | 2 +- 13 files changed, 17 insertions(+), 50 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index a1a3e347..0b3c4e8a 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -3,10 +3,6 @@ environment: global: PATH: "C:\\Python27\\Scripts\\;%PATH%" matrix: - - TOXENV: py27-base - - TOXENV: py27-optional - - TOXENV: py35-base - - TOXENV: py35-optional - TOXENV: py36-base - TOXENV: py36-optional - TOXENV: py37-base diff --git a/.github/workflows/python-tox.yml b/.github/workflows/python-tox.yml index ec5cf636..9673f503 100644 --- a/.github/workflows/python-tox.yml +++ b/.github/workflows/python-tox.yml @@ -7,7 +7,7 @@ jobs: runs-on: ubuntu-latest strategy: matrix: - python: [2.7, 3.5, 3.6, 3.7, 3.8, pypy-2.7, pypy3] + python: [3.6, 3.7, 3.8, pypy3] steps: - uses: actions/checkout@v2 with: diff --git a/.travis.yml b/.travis.yml index d2d9e30e..a530faeb 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,13 +1,10 @@ language: python python: - "pypy3" - - "pypy" + - "3.9" - "3.8" - "3.7" - "3.6" - - "3.5" - - "2.7" - - "3.9-dev" cache: pip diff --git a/README.rst b/README.rst index d367905d..fef6d315 100644 --- a/README.rst +++ b/README.rst @@ -91,7 +91,7 @@ More documentation is available at https://html5lib.readthedocs.io/. Installation ------------ -html5lib works on CPython 2.7+, CPython 3.5+ and PyPy. To install: +html5lib works on CPython 3.6+ and PyPy3. To install: .. code-block:: bash diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py index 6b71975f..a9295a2b 100644 --- a/html5lib/_trie/_base.py +++ b/html5lib/_trie/_base.py @@ -1,9 +1,6 @@ from __future__ import absolute_import, division, unicode_literals -try: - from collections.abc import Mapping -except ImportError: # Python 2.7 - from collections import Mapping +from collections.abc import Mapping class Trie(Mapping): diff --git a/html5lib/_utils.py b/html5lib/_utils.py index 9ea57942..13d4c656 100644 --- a/html5lib/_utils.py +++ b/html5lib/_utils.py @@ -2,10 +2,7 @@ from types import ModuleType -try: - from collections.abc import Mapping -except ImportError: - from collections import Mapping +from collections.abc import Mapping from six import text_type, PY3 diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index 9cd5afbe..4a53dc18 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -150,8 +150,6 @@ def convertData(data): def errorMessage(input, expected, actual): msg = ("Input:\n%s\nExpected:\n%s\nReceived\n%s\n" % (repr(input), repr(expected), repr(actual))) - if sys.version_info[0] == 2: - msg = msg.encode("ascii", "backslashreplace") return msg diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py index d8b53004..818a3343 100644 --- a/html5lib/treebuilders/dom.py +++ b/html5lib/treebuilders/dom.py @@ -1,10 +1,7 @@ from __future__ import absolute_import, division, unicode_literals -try: - from collections.abc import MutableMapping -except ImportError: # Python 2.7 - from collections import MutableMapping +from collections.abc import MutableMapping from xml.dom import minidom, Node import weakref diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index e73de61a..d99a51a9 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -14,12 +14,8 @@ import warnings import re -import sys -try: - from collections.abc import MutableMapping -except ImportError: - from collections import MutableMapping +from collections.abc import MutableMapping from . import base from ..constants import DataLossWarning @@ -89,7 +85,7 @@ def serializeElement(element, indent=0): next_element = next_element.getnext() elif isinstance(element, str) or isinstance(element, bytes): # Text in a fragment - assert isinstance(element, str) or sys.version_info[0] == 2 + assert isinstance(element, str) rv.append("|%s\"%s\"" % (' ' * indent, element)) else: # Fragment case diff --git a/parse.py b/parse.py index e6806b46..f973fbb4 100755 --- a/parse.py +++ b/parse.py @@ -36,8 +36,7 @@ def parse(): pass elif f == '-': f = sys.stdin - if sys.version_info[0] >= 3: - encoding = None + encoding = None else: try: # Try opening from file system @@ -135,11 +134,7 @@ def printOutput(parser, document, opts): kwargs["sanitize"] = True tokens = treewalkers.getTreeWalker(opts.treebuilder)(document) - if sys.version_info[0] >= 3: - encoding = None - else: - encoding = "utf-8" - for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding): + for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=None): sys.stdout.write(text) if not text.endswith('\n'): sys.stdout.write('\n') diff --git a/requirements-test.txt b/requirements-test.txt index 57f8f617..ce882670 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -2,9 +2,7 @@ tox>=3.15.1,<4 flake8>=3.8.1,<3.9 -pytest>=4.6.10,<5 ; python_version < '3' -pytest>=5.4.2,<7 ; python_version >= '3' +pytest>=5.4.2,<7 coverage>=5.1,<6 pytest-expect>=1.1.0,<2 -mock>=3.0.5,<4 ; python_version < '3.6' -mock>=4.0.2,<5 ; python_version >= '3.6' +mock>=4.0.2,<5 diff --git a/setup.py b/setup.py index f84c1284..33ab359d 100644 --- a/setup.py +++ b/setup.py @@ -2,7 +2,6 @@ import ast import codecs -import sys from os.path import join, dirname from setuptools import setup, find_packages, __version__ as setuptools_version @@ -18,8 +17,7 @@ # _markerlib.default_environment() obtains its data from _VARS # and wraps it in another dict, but _markerlib_evaluate writes -# to the dict while it is iterating the keys, causing an error -# on Python 3 only. +# to the dict while it is iterating the keys, causing an error. # Replace _markerlib.default_environment to return a custom dict # that has all the necessary markers, and ignores any writes. @@ -32,7 +30,7 @@ def pop(self, i=-1): return self[i] -if _markerlib and sys.version_info[0] == 3: +if _markerlib: env = _markerlib.markers._VARS for key in list(env.keys()): new_key = key.replace('.', '_') @@ -63,13 +61,11 @@ def default_environment(): 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3 :: Only', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Topic :: Software Development :: Libraries :: Python Modules', @@ -107,7 +103,7 @@ def default_environment(): 'six>=1.9', 'webencodings', ], - python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*", + python_requires=">=3.6", extras_require={ # A conditional extra will only install these items when the extra is # requested and the condition matches. diff --git a/tox.ini b/tox.ini index 16b8cf41..05ca59aa 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{27,35,36,37,38,py,py3}-{base,six19,optional} +envlist = py{36,37,38,py3}-{base,six19,optional} [testenv] deps = From 2f1d6e09265e5fb05f137fa4cb7eb2264552a73a Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Mon, 24 May 2021 20:41:49 +0300 Subject: [PATCH 02/10] Upgrade Python syntax with pyupgrade --py3-plus --- debug-info.py | 2 - doc/conf.py | 3 +- html5lib/__init__.py | 1 - html5lib/_ihatexml.py | 4 +- html5lib/_inputstream.py | 26 +++++----- html5lib/_tokenizer.py | 6 +-- html5lib/_trie/__init__.py | 2 - html5lib/_trie/_base.py | 4 +- html5lib/_trie/py.py | 3 +- html5lib/_utils.py | 14 ++--- html5lib/constants.py | 2 - html5lib/filters/alphabeticalattributes.py | 2 - html5lib/filters/base.py | 5 +- html5lib/filters/inject_meta_charset.py | 2 - html5lib/filters/lint.py | 36 +++++++------ html5lib/filters/optionaltags.py | 2 - html5lib/filters/sanitizer.py | 7 ++- html5lib/filters/whitespace.py | 2 - html5lib/html5parser.py | 19 ++++--- html5lib/serializer.py | 9 ++-- html5lib/tests/__init__.py | 1 - html5lib/tests/conftest.py | 5 +- html5lib/tests/sanitizer.py | 4 +- html5lib/tests/support.py | 4 +- html5lib/tests/test_alphabeticalattributes.py | 2 - html5lib/tests/test_encoding.py | 6 +-- html5lib/tests/test_meta.py | 11 ++-- html5lib/tests/test_optionaltags_filter.py | 2 - html5lib/tests/test_parser2.py | 6 +-- html5lib/tests/test_sanitizer.py | 24 ++++----- html5lib/tests/test_serializer.py | 6 +-- html5lib/tests/test_stream.py | 8 ++- html5lib/tests/test_tokenizer2.py | 10 ++-- html5lib/tests/test_treeadapters.py | 2 - html5lib/tests/test_treewalkers.py | 12 ++--- html5lib/tests/test_whitespace_filter.py | 2 - html5lib/tests/tokenizer.py | 14 +++-- html5lib/tests/tokenizertotree.py | 4 +- html5lib/tests/tree_construction.py | 15 +++--- html5lib/treeadapters/__init__.py | 1 - html5lib/treeadapters/genshi.py | 6 +-- html5lib/treeadapters/sax.py | 2 - html5lib/treebuilders/__init__.py | 1 - html5lib/treebuilders/base.py | 17 +++---- html5lib/treebuilders/dom.py | 21 ++++---- html5lib/treebuilders/etree.py | 41 ++++++++------- html5lib/treebuilders/etree_lxml.py | 51 +++++++++---------- html5lib/treewalkers/__init__.py | 19 ++++--- html5lib/treewalkers/base.py | 12 ++--- html5lib/treewalkers/dom.py | 2 - html5lib/treewalkers/etree.py | 4 +- html5lib/treewalkers/etree_lxml.py | 9 ++-- html5lib/treewalkers/genshi.py | 18 +++---- parse.py | 4 +- setup.py | 2 - utils/entities.py | 6 +-- 56 files changed, 197 insertions(+), 308 deletions(-) diff --git a/debug-info.py b/debug-info.py index b47b8ebf..6e2a19bf 100644 --- a/debug-info.py +++ b/debug-info.py @@ -1,5 +1,3 @@ -from __future__ import print_function, unicode_literals - import platform import sys diff --git a/doc/conf.py b/doc/conf.py index 22ebab4f..d28655ac 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # html5lib documentation build configuration file, created by # sphinx-quickstart on Wed May 8 00:04:49 2013. @@ -92,7 +91,7 @@ ] -class CExtMock(object): +class CExtMock: """Required for autodoc on readthedocs.org where you cannot build C extensions.""" def __init__(self, *args, **kwargs): pass diff --git a/html5lib/__init__.py b/html5lib/__init__.py index 7b854f99..d2c68855 100644 --- a/html5lib/__init__.py +++ b/html5lib/__init__.py @@ -20,7 +20,6 @@ * :func:`~.serializer.serialize` """ -from __future__ import absolute_import, division, unicode_literals from .html5parser import HTMLParser, parse, parseFragment from .treebuilders import getTreeBuilder diff --git a/html5lib/_ihatexml.py b/html5lib/_ihatexml.py index 3ff803c1..90757cbf 100644 --- a/html5lib/_ihatexml.py +++ b/html5lib/_ihatexml.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import re import warnings @@ -184,7 +182,7 @@ def escapeRegexp(string): nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]") -class InfosetFilter(object): +class InfosetFilter: replacementRegexp = re.compile(r"U[\dA-F]{5,5}") def __init__(self, diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py index 0207dd21..b3a75452 100644 --- a/html5lib/_inputstream.py +++ b/html5lib/_inputstream.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from six import text_type from six.moves import http_client, urllib @@ -14,9 +12,9 @@ from . import _utils # Non-unicode versions of constants for use in the pre-parser -spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) -asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) -asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) +spaceCharactersBytes = frozenset(item.encode("ascii") for item in spaceCharacters) +asciiLettersBytes = frozenset(item.encode("ascii") for item in asciiLetters) +asciiUppercaseBytes = frozenset(item.encode("ascii") for item in asciiUppercase) spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) @@ -48,7 +46,7 @@ charsUntilRegEx = {} -class BufferedStream(object): +class BufferedStream: """Buffering for streams that do not have buffering of their own The buffer is implemented as a list of chunks on the assumption that @@ -86,7 +84,7 @@ def read(self, bytes): return self._readFromBuffer(bytes) def _bufferedBytes(self): - return sum([len(item) for item in self.buffer]) + return sum(len(item) for item in self.buffer) def _readStream(self, bytes): data = self.stream.read(bytes) @@ -131,9 +129,9 @@ def HTMLInputStream(source, **kwargs): isinstance(source.fp, http_client.HTTPResponse))): isUnicode = False elif hasattr(source, "read"): - isUnicode = isinstance(source.read(0), text_type) + isUnicode = isinstance(source.read(0), str) else: - isUnicode = isinstance(source, text_type) + isUnicode = isinstance(source, str) if isUnicode: encodings = [x for x in kwargs if x.endswith("_encoding")] @@ -145,7 +143,7 @@ def HTMLInputStream(source, **kwargs): return HTMLBinaryInputStream(source, **kwargs) -class HTMLUnicodeInputStream(object): +class HTMLUnicodeInputStream: """Provides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing @@ -325,7 +323,7 @@ def charsUntil(self, characters, opposite=False): if __debug__: for c in characters: assert(ord(c) < 128) - regex = "".join(["\\x%02x" % ord(c) for c in characters]) + regex = "".join("\\x%02x" % ord(c) for c in characters) if not opposite: regex = "^%s" % regex chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) @@ -524,7 +522,7 @@ def changeEncoding(self, newEncoding): self.rawStream.seek(0) self.charEncoding = (newEncoding, "certain") self.reset() - raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) + raise _ReparseException("Encoding changed from {} to {}".format(self.charEncoding[0], newEncoding)) def detectBOM(self): """Attempts to detect at BOM at the start of the stream. If @@ -673,7 +671,7 @@ def jumpTo(self, bytes): return True -class EncodingParser(object): +class EncodingParser: """Mini parser for detecting character encoding from meta elements""" def __init__(self, data): @@ -861,7 +859,7 @@ def getAttribute(self): attrValue.append(c) -class ContentAttrParser(object): +class ContentAttrParser: def __init__(self, data): assert isinstance(data, bytes) self.data = data diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py index 4748a197..e3750c5c 100644 --- a/html5lib/_tokenizer.py +++ b/html5lib/_tokenizer.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from six import unichr as chr from collections import deque, OrderedDict @@ -24,7 +22,7 @@ attributeMap = OrderedDict -class HTMLTokenizer(object): +class HTMLTokenizer: """ This class takes care of tokenizing HTML. * self.currentToken @@ -50,7 +48,7 @@ def __init__(self, stream, parser=None, **kwargs): # The current token being created self.currentToken = None - super(HTMLTokenizer, self).__init__() + super().__init__() def __iter__(self): """ This is where the magic happens. diff --git a/html5lib/_trie/__init__.py b/html5lib/_trie/__init__.py index 07bad5d3..98a6841a 100644 --- a/html5lib/_trie/__init__.py +++ b/html5lib/_trie/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from .py import Trie __all__ = ["Trie"] diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py index a9295a2b..6b2977b2 100644 --- a/html5lib/_trie/_base.py +++ b/html5lib/_trie/_base.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from collections.abc import Mapping @@ -8,7 +6,7 @@ class Trie(Mapping): def keys(self, prefix=None): # pylint:disable=arguments-differ - keys = super(Trie, self).keys() + keys = super().keys() if prefix is None: return set(keys) diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py index c2ba3da7..2e1aa188 100644 --- a/html5lib/_trie/py.py +++ b/html5lib/_trie/py.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type from bisect import bisect_left @@ -8,7 +7,7 @@ class Trie(ABCTrie): def __init__(self, data): - if not all(isinstance(x, text_type) for x in data.keys()): + if not all(isinstance(x, str) for x in data.keys()): raise TypeError("All keys must be strings") self._data = data diff --git a/html5lib/_utils.py b/html5lib/_utils.py index 13d4c656..f59eec1e 100644 --- a/html5lib/_utils.py +++ b/html5lib/_utils.py @@ -1,18 +1,10 @@ -from __future__ import absolute_import, division, unicode_literals - from types import ModuleType from collections.abc import Mapping from six import text_type, PY3 -if PY3: - import xml.etree.ElementTree as default_etree -else: - try: - import xml.etree.cElementTree as default_etree - except ImportError: - import xml.etree.ElementTree as default_etree +import xml.etree.ElementTree as default_etree __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", @@ -28,10 +20,10 @@ # escapes. try: _x = eval('"\\uD800"') # pylint:disable=eval-used - if not isinstance(_x, text_type): + if not isinstance(_x, str): # We need this with u"" because of http://bugs.jython.org/issue2039 _x = eval('u"\\uD800"') # pylint:disable=eval-used - assert isinstance(_x, text_type) + assert isinstance(_x, str) except Exception: supports_lone_surrogates = False else: diff --git a/html5lib/constants.py b/html5lib/constants.py index fe3e237c..3596ea21 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import string EOF = None diff --git a/html5lib/filters/alphabeticalattributes.py b/html5lib/filters/alphabeticalattributes.py index 5ba926e3..d96ad62a 100644 --- a/html5lib/filters/alphabeticalattributes.py +++ b/html5lib/filters/alphabeticalattributes.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from . import base from collections import OrderedDict diff --git a/html5lib/filters/base.py b/html5lib/filters/base.py index c7dbaed0..6937911d 100644 --- a/html5lib/filters/base.py +++ b/html5lib/filters/base.py @@ -1,7 +1,4 @@ -from __future__ import absolute_import, division, unicode_literals - - -class Filter(object): +class Filter: def __init__(self, source): self.source = source diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py index aefb5c84..cfa469c3 100644 --- a/html5lib/filters/inject_meta_charset.py +++ b/html5lib/filters/inject_meta_charset.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from . import base diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py index acd4d7a2..ff6c5bd7 100644 --- a/html5lib/filters/lint.py +++ b/html5lib/filters/lint.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from six import text_type from . import base @@ -23,7 +21,7 @@ def __init__(self, source, require_matching_tags=True): :arg require_matching_tags: whether or not to require matching tags """ - super(Filter, self).__init__(source) + super().__init__(source) self.require_matching_tags = require_matching_tags def __iter__(self): @@ -33,9 +31,9 @@ def __iter__(self): if type in ("StartTag", "EmptyTag"): namespace = token["namespace"] name = token["name"] - assert namespace is None or isinstance(namespace, text_type) + assert namespace is None or isinstance(namespace, str) assert namespace != "" - assert isinstance(name, text_type) + assert isinstance(name, str) assert name != "" assert isinstance(token["data"], dict) if (not namespace or namespace == namespaces["html"]) and name in voidElements: @@ -45,49 +43,49 @@ def __iter__(self): if type == "StartTag" and self.require_matching_tags: open_elements.append((namespace, name)) for (namespace, name), value in token["data"].items(): - assert namespace is None or isinstance(namespace, text_type) + assert namespace is None or isinstance(namespace, str) assert namespace != "" - assert isinstance(name, text_type) + assert isinstance(name, str) assert name != "" - assert isinstance(value, text_type) + assert isinstance(value, str) elif type == "EndTag": namespace = token["namespace"] name = token["name"] - assert namespace is None or isinstance(namespace, text_type) + assert namespace is None or isinstance(namespace, str) assert namespace != "" - assert isinstance(name, text_type) + assert isinstance(name, str) assert name != "" if (not namespace or namespace == namespaces["html"]) and name in voidElements: - assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} + assert False, "Void element reported as EndTag token: {tag}".format(tag=name) elif self.require_matching_tags: start = open_elements.pop() assert start == (namespace, name) elif type == "Comment": data = token["data"] - assert isinstance(data, text_type) + assert isinstance(data, str) elif type in ("Characters", "SpaceCharacters"): data = token["data"] - assert isinstance(data, text_type) + assert isinstance(data, str) assert data != "" if type == "SpaceCharacters": assert data.strip(spaceCharacters) == "" elif type == "Doctype": name = token["name"] - assert name is None or isinstance(name, text_type) - assert token["publicId"] is None or isinstance(name, text_type) - assert token["systemId"] is None or isinstance(name, text_type) + assert name is None or isinstance(name, str) + assert token["publicId"] is None or isinstance(name, str) + assert token["systemId"] is None or isinstance(name, str) elif type == "Entity": - assert isinstance(token["name"], text_type) + assert isinstance(token["name"], str) elif type == "SerializerError": - assert isinstance(token["data"], text_type) + assert isinstance(token["data"], str) else: - assert False, "Unknown token type: %(type)s" % {"type": type} + assert False, "Unknown token type: {type}".format(type=type) yield token diff --git a/html5lib/filters/optionaltags.py b/html5lib/filters/optionaltags.py index 4a865012..f1c21118 100644 --- a/html5lib/filters/optionaltags.py +++ b/html5lib/filters/optionaltags.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from . import base diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index 70ef9066..4b753d30 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -6,7 +6,6 @@ if Bleach is unsuitable for your needs. """ -from __future__ import absolute_import, division, unicode_literals import re import warnings @@ -766,7 +765,7 @@ def __init__(self, hrefs--these are removed """ - super(Filter, self).__init__(source) + super().__init__(source) warnings.warn(_deprecation_msg, DeprecationWarning) @@ -874,8 +873,8 @@ def disallowed_token(self, token): assert token_type in ("StartTag", "EmptyTag") attrs = [] for (ns, name), v in token["data"].items(): - attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) - token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) + attrs.append(' {}="{}"'.format(name if ns is None else "{}:{}".format(prefixes[ns], name), escape(v))) + token["data"] = "<{}{}>".format(token["name"], ''.join(attrs)) else: token["data"] = "<%s>" % token["name"] if token.get("selfClosing"): diff --git a/html5lib/filters/whitespace.py b/html5lib/filters/whitespace.py index 0d12584b..2f35f4a0 100644 --- a/html5lib/filters/whitespace.py +++ b/html5lib/filters/whitespace.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import re from . import base diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 74d829d9..b4c51821 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import with_metaclass, viewkeys import types @@ -83,7 +82,7 @@ def __new__(meta, classname, bases, classDict): return Decorated -class HTMLParser(object): +class HTMLParser: """HTML parser Generates a tree structure from a stream of (possibly malformed) HTML. @@ -423,7 +422,7 @@ def getMetaclass(use_metaclass, metaclass_func): return type # pylint:disable=unused-argument - class Phase(with_metaclass(getMetaclass(debug, log))): + class Phase(metaclass=getMetaclass(debug, log)): """Base class for helper object that implements each phase of processing """ __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") @@ -944,7 +943,7 @@ class InBodyPhase(Phase): __slots__ = ("processSpaceCharacters",) def __init__(self, *args, **kwargs): - super(InBodyPhase, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) # Set this to the default handler self.processSpaceCharacters = self.processSpaceCharactersNonPre @@ -1002,8 +1001,8 @@ def processCharacters(self, token): self.tree.insertText(token["data"]) # This must be bad for performance if (self.parser.framesetOK and - any([char not in spaceCharacters - for char in token["data"]])): + any(char not in spaceCharacters + for char in token["data"])): self.parser.framesetOK = False def processSpaceCharactersNonPre(self, token): @@ -1844,13 +1843,13 @@ class InTableTextPhase(Phase): __slots__ = ("originalPhase", "characterTokens") def __init__(self, *args, **kwargs): - super(InTableTextPhase, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.originalPhase = None self.characterTokens = [] def flushCharacters(self): - data = "".join([item["data"] for item in self.characterTokens]) - if any([item not in spaceCharacters for item in data]): + data = "".join(item["data"] for item in self.characterTokens) + if any(item not in spaceCharacters for item in data): token = {"type": tokenTypes["Characters"], "data": data} self.parser.phases["inTable"].insertText(token) elif data: @@ -2776,7 +2775,7 @@ def processEndTag(self, token): def adjust_attributes(token, replacements): - needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) + needs_adjustment = token['data'].keys() & replacements.keys() if needs_adjustment: token['data'] = type(token['data'])((replacements.get(k, k), v) for k, v in token['data'].items()) diff --git a/html5lib/serializer.py b/html5lib/serializer.py index c66df683..bbcbf9d6 100644 --- a/html5lib/serializer.py +++ b/html5lib/serializer.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type import re @@ -101,7 +100,7 @@ def serialize(input, tree="etree", encoding=None, **serializer_opts): return s.render(walker(input), encoding) -class HTMLSerializer(object): +class HTMLSerializer: # attribute quoting options quote_attr_values = "legacy" # be secure by default @@ -222,14 +221,14 @@ def __init__(self, **kwargs): self.strict = False def encode(self, string): - assert(isinstance(string, text_type)) + assert(isinstance(string, str)) if self.encoding: return string.encode(self.encoding, "htmlentityreplace") else: return string def encodeStrict(self, string): - assert(isinstance(string, text_type)) + assert(isinstance(string, str)) if self.encoding: return string.encode(self.encoding, "strict") else: @@ -278,7 +277,7 @@ def serialize(self, treewalker, encoding=None): quote_char = "'" else: quote_char = '"' - doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char) + doctype += " {}{}{}".format(quote_char, token["systemId"], quote_char) doctype += ">" yield self.encodeStrict(doctype) diff --git a/html5lib/tests/__init__.py b/html5lib/tests/__init__.py index b8ce2de3..e69de29b 100644 --- a/html5lib/tests/__init__.py +++ b/html5lib/tests/__init__.py @@ -1 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py index fffeb50c..62a7991b 100644 --- a/html5lib/tests/conftest.py +++ b/html5lib/tests/conftest.py @@ -1,4 +1,3 @@ -from __future__ import print_function import os.path import sys @@ -54,7 +53,7 @@ def pytest_configure(config): # Check for optional requirements req_file = os.path.join(_root, "requirements-optional.txt") if os.path.exists(req_file): - with open(req_file, "r") as fp: + with open(req_file) as fp: for line in fp: if (line.strip() and not (line.startswith("-r") or @@ -70,7 +69,7 @@ def pytest_configure(config): try: installed = pkg_resources.working_set.find(req) except pkg_resources.VersionConflict: - msgs.append("Outdated version of %s installed, need %s" % (req.name, spec)) + msgs.append("Outdated version of {} installed, need {}".format(req.name, spec)) else: if not installed: msgs.append("Need %s" % spec) diff --git a/html5lib/tests/sanitizer.py b/html5lib/tests/sanitizer.py index 16e53868..fb7fadf9 100644 --- a/html5lib/tests/sanitizer.py +++ b/html5lib/tests/sanitizer.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import codecs import json @@ -18,7 +16,7 @@ def collect(self): class SanitizerTest(pytest.Item): def __init__(self, name, parent, test): - super(SanitizerTest, self).__init__(name, parent) + super().__init__(name, parent) self.obj = lambda: 1 # this is to hack around skipif needing a function! self.test = test diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index 4a53dc18..d51cae12 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - # pylint:disable=wrong-import-position import os @@ -86,7 +84,7 @@ def __getitem__(self, key): return dict.get(self, key, self.default) -class TestData(object): +class TestData: def __init__(self, filename, newTestHeading="data", encoding="utf8"): if encoding is None: self.f = open(filename, mode="rb") diff --git a/html5lib/tests/test_alphabeticalattributes.py b/html5lib/tests/test_alphabeticalattributes.py index 7d5b8e0f..3b82c2b0 100644 --- a/html5lib/tests/test_alphabeticalattributes.py +++ b/html5lib/tests/test_alphabeticalattributes.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from collections import OrderedDict import pytest diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index 47c4814a..ddad9100 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import os import pytest @@ -9,7 +7,7 @@ def test_basic_prescan_length(): - data = "Caf\u00E9".encode('utf-8') + data = "Caf\u00E9".encode() pad = 1024 - len(data) + 1 data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-") assert len(data) == 1024 # Sanity @@ -18,7 +16,7 @@ def test_basic_prescan_length(): def test_parser_reparse(): - data = "Caf\u00E9".encode('utf-8') + data = "Caf\u00E9".encode() pad = 10240 - len(data) + 1 data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-") assert len(data) == 10240 # Sanity diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py index dd02dd7f..8f49bb57 100644 --- a/html5lib/tests/test_meta.py +++ b/html5lib/tests/test_meta.py @@ -1,7 +1,5 @@ -from __future__ import absolute_import, division, unicode_literals - import six -from mock import Mock +from unittest.mock import Mock from . import support @@ -27,11 +25,8 @@ def test_errorMessage(): r = support.errorMessage(input, expected, actual) # Assertions! - if six.PY2: - assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r - else: - assert six.PY3 - assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r + assert six.PY3 + assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r assert input.__repr__.call_count == 1 assert expected.__repr__.call_count == 1 diff --git a/html5lib/tests/test_optionaltags_filter.py b/html5lib/tests/test_optionaltags_filter.py index cd282149..1b054f40 100644 --- a/html5lib/tests/test_optionaltags_filter.py +++ b/html5lib/tests/test_optionaltags_filter.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from html5lib.filters.optionaltags import Filter diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 879d2447..8f40bc60 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from six import PY2, text_type import io @@ -39,7 +37,7 @@ def test_namespace_html_elements_0_etree(): doc = parse("", treebuilder="etree", namespaceHTMLElements=True) - assert doc.tag == "{%s}html" % (namespaces["html"],) + assert doc.tag == "{{{}}}html".format(namespaces["html"]) def test_namespace_html_elements_1_etree(): @@ -77,7 +75,7 @@ def test_debug_log(): if PY2: for i, log in enumerate(expected): - log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log] + log = [x.encode("ascii") if isinstance(x, str) else x for x in log] expected[i] = tuple(log) assert parser.log == expected diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py index f3faeb80..e5cdc2af 100644 --- a/html5lib/tests/test_sanitizer.py +++ b/html5lib/tests/test_sanitizer.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import pytest from html5lib import constants, parseFragment, serialize @@ -60,19 +58,19 @@ def param_sanitizer(): if tag_name == 'image': yield ("test_should_allow_%s_tag" % tag_name, "foo <bad>bar</bad> baz", - "<%s title='1'>foo bar baz" % (tag_name, tag_name)) + "<{} title='1'>foo bar baz".format(tag_name, tag_name)) elif tag_name == 'br': yield ("test_should_allow_%s_tag" % tag_name, "
foo <bad>bar</bad> baz
", - "<%s title='1'>foo bar baz" % (tag_name, tag_name)) + "<{} title='1'>foo bar baz".format(tag_name, tag_name)) elif tag_name in constants.voidElements: yield ("test_should_allow_%s_tag" % tag_name, "<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name, - "<%s title='1'>foo bar baz" % (tag_name, tag_name)) + "<{} title='1'>foo bar baz".format(tag_name, tag_name)) else: yield ("test_should_allow_%s_tag" % tag_name, - "<%s title=\"1\">foo <bad>bar</bad> baz" % (tag_name, tag_name), - "<%s title='1'>foo bar baz" % (tag_name, tag_name)) + "<{} title=\"1\">foo <bad>bar</bad> baz".format(tag_name, tag_name), + "<{} title='1'>foo bar baz".format(tag_name, tag_name)) for ns, attribute_name in sanitizer.allowed_attributes: if ns is not None: @@ -85,16 +83,16 @@ def param_sanitizer(): if attribute_name in sanitizer.attr_val_is_uri: attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0] yield ("test_should_allow_%s_attribute" % attribute_name, - "

foo <bad>bar</bad> baz

" % (attribute_name, attribute_value), - "

foo bar baz

" % (attribute_name, attribute_value)) + "

foo <bad>bar</bad> baz

".format(attribute_name, attribute_value), + "

foo bar baz

".format(attribute_name, attribute_value)) for protocol in sanitizer.allowed_protocols: rest_of_uri = '//sub.domain.tld/path/object.ext' if protocol == 'data': rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ=' yield ("test_should_allow_uppercase_%s_uris" % protocol, - "foo" % (protocol, rest_of_uri), - """foo""" % (protocol, rest_of_uri)) + "foo".format(protocol, rest_of_uri), + """foo""".format(protocol, rest_of_uri)) for protocol in sanitizer.allowed_protocols: rest_of_uri = '//sub.domain.tld/path/object.ext' @@ -102,8 +100,8 @@ def param_sanitizer(): rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ=' protocol = protocol.upper() yield ("test_should_allow_uppercase_%s_uris" % protocol, - "foo" % (protocol, rest_of_uri), - """foo""" % (protocol, rest_of_uri)) + "foo".format(protocol, rest_of_uri), + """foo""".format(protocol, rest_of_uri)) @pytest.mark.parametrize("expected, input", diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index bce62459..e8371247 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import os import json @@ -221,6 +219,6 @@ def test_serializer(input, expected, options): result = serialize_html(input, options) if len(expected) == 1: - assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options)) + assert expected[0] == result, "Expected:\n{}\nActual:\n{}\nOptions:\n{}".format(expected[0], result, str(options)) elif result not in expected: - assert False, "Expected: %s, Received: %s" % (expected, result) + assert False, "Expected: {}, Received: {}".format(expected, result) diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index efe9b472..413b43de 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from . import support # noqa import codecs @@ -105,7 +103,7 @@ def test_char_ascii(): def test_char_utf8(): - stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8') + stream = HTMLInputStream('\u2018'.encode(), override_encoding='utf-8') assert stream.charEncoding[0].name == 'utf-8' assert stream.char() == '\u2018' @@ -186,7 +184,7 @@ def test_python_issue_20007(): Make sure we have a work-around for Python bug #20007 http://bugs.python.org/issue20007 """ - class FakeSocket(object): + class FakeSocket: def makefile(self, _mode, _bufsize=None): # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") @@ -205,7 +203,7 @@ def test_python_issue_20007_b(): if six.PY2: return - class FakeSocket(object): + class FakeSocket: def makefile(self, _mode, _bufsize=None): # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py index 158d847a..93a43f4e 100644 --- a/html5lib/tests/test_tokenizer2.py +++ b/html5lib/tests/test_tokenizer2.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import io from six import unichr, text_type @@ -16,8 +14,8 @@ def ignore_parse_errors(toks): def test_maintain_attribute_order(): # generate loads to maximize the chance a hash-based mutation will occur - attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] - stream = io.StringIO("") + attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))] + stream = io.StringIO("") toks = HTMLTokenizer(stream) out = list(ignore_parse_errors(toks)) @@ -49,8 +47,8 @@ def test_duplicate_attribute(): def test_maintain_duplicate_attribute_order(): # generate loads to maximize the chance a hash-based mutation will occur - attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] - stream = io.StringIO("") + attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))] + stream = io.StringIO("") toks = HTMLTokenizer(stream) out = list(ignore_parse_errors(toks)) diff --git a/html5lib/tests/test_treeadapters.py b/html5lib/tests/test_treeadapters.py index 95e56c00..1e396ed9 100644 --- a/html5lib/tests/test_treeadapters.py +++ b/html5lib/tests/test_treeadapters.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from . import support # noqa import html5lib diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 780ca964..3266361a 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import itertools import sys @@ -74,11 +72,11 @@ def param_treewalker_six_mix(): # fragment but not using the u'' syntax nor importing unicode_literals sm_tests = [ ('Example', - [(str('class'), str('test123'))], + [('class', 'test123')], '\n class="test123"\n href="http://example.com"\n "Example"'), ('', - [(str('rel'), str('alternate'))], + [('rel', 'alternate')], '\n href="http://example.com/cow"\n rel="alternate"\n "Example"') ] @@ -102,7 +100,7 @@ def test_treewalker_six_mix(intext, expected, attrs_to_add, tree): output = treewalkers.pprint(treeClass["walker"](document)) output = attrlist.sub(sortattrs, output) if output not in expected: - raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output)) + raise AssertionError("TreewalkerEditTest: {}\nExpected:\n{}\nReceived:\n{}".format(treeName, expected, output)) @pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"])) @@ -151,8 +149,8 @@ def test_maintain_attribute_order(treeName): pytest.skip("Treebuilder not loaded") # generate loads to maximize the chance a hash-based mutation will occur - attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] - data = "" + attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))] + data = "" parser = html5parser.HTMLParser(tree=treeAPIs["builder"]) document = parser.parseFragment(data) diff --git a/html5lib/tests/test_whitespace_filter.py b/html5lib/tests/test_whitespace_filter.py index e9da6140..0daf1c52 100644 --- a/html5lib/tests/test_whitespace_filter.py +++ b/html5lib/tests/test_whitespace_filter.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from html5lib.filters.whitespace import Filter from html5lib.constants import spaceCharacters spaceCharacters = "".join(spaceCharacters) diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py index cc9897a4..8cf1f1dd 100644 --- a/html5lib/tests/tokenizer.py +++ b/html5lib/tests/tokenizer.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import codecs import json import warnings @@ -12,7 +10,7 @@ from html5lib import constants, _utils -class TokenizerTestParser(object): +class TokenizerTestParser: def __init__(self, initialState, lastStartTag=None): self.tokenizer = HTMLTokenizer self._state = initialState @@ -146,11 +144,11 @@ def repl(m): low = int(m.group(2), 16) if 0xD800 <= high <= 0xDBFF and 0xDC00 <= low <= 0xDFFF: cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000 - return unichr(cp) + return chr(cp) else: - return unichr(high) + unichr(low) + return chr(high) + chr(low) else: - return unichr(int(m.group(1), 16)) + return chr(int(m.group(1), 16)) try: return _surrogateRe.sub(repl, inp) except ValueError: @@ -197,7 +195,7 @@ def collect(self): class TokenizerTestCollector(pytest.Collector): def __init__(self, name, parent=None, config=None, session=None, testdata=None): - super(TokenizerTestCollector, self).__init__(name, parent, config, session) + super().__init__(name, parent, config, session) if 'initialStates' not in testdata: testdata["initialStates"] = ["Data state"] if 'doubleEscaped' in testdata: @@ -218,7 +216,7 @@ def collect(self): class TokenizerTest(pytest.Item): def __init__(self, name, parent, test, initialState): - super(TokenizerTest, self).__init__(name, parent) + super().__init__(name, parent) self.obj = lambda: 1 # this is to hack around skipif needing a function! self.test = test self.initialState = initialState diff --git a/html5lib/tests/tokenizertotree.py b/html5lib/tests/tokenizertotree.py index 8528e876..2ba74cad 100644 --- a/html5lib/tests/tokenizertotree.py +++ b/html5lib/tests/tokenizertotree.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import sys import os import json @@ -25,7 +23,7 @@ def main(out_path): def run_file(filename, out_path): try: - tests_data = json.load(open(filename, "r")) + tests_data = json.load(open(filename)) except ValueError: sys.stderr.write("Failed to load %s\n" % filename) return diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py index fb0657bf..e1b0c180 100644 --- a/html5lib/tests/tree_construction.py +++ b/html5lib/tests/tree_construction.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import itertools import re import warnings @@ -31,14 +29,13 @@ def collect(self): class TreeConstructionTest(pytest.Collector): def __init__(self, name, parent=None, config=None, session=None, testdata=None): - super(TreeConstructionTest, self).__init__(name, parent, config, session) + super().__init__(name, parent, config, session) self.testdata = testdata def collect(self): for treeName, treeAPIs in sorted(treeTypes.items()): - for x in itertools.chain(self._getParserTests(treeName, treeAPIs), - self._getTreeWalkerTests(treeName, treeAPIs)): - yield x + yield from itertools.chain(self._getParserTests(treeName, treeAPIs), + self._getTreeWalkerTests(treeName, treeAPIs)) def _getParserTests(self, treeName, treeAPIs): if treeAPIs is not None and "adapter" in treeAPIs: @@ -79,7 +76,7 @@ def convertTreeDump(data): class ParserTest(pytest.Item): def __init__(self, name, parent, test, treeClass, namespaceHTMLElements): - super(ParserTest, self).__init__(name, parent) + super().__init__(name, parent) self.test = test self.treeClass = treeClass self.namespaceHTMLElements = namespaceHTMLElements @@ -122,7 +119,7 @@ def runtest(self): errStr = [] for (line, col), errorcode, datavars in p.errors: - assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars)) + assert isinstance(datavars, dict), "{}, {}".format(errorcode, repr(datavars)) errStr.append("Line: %i Col: %i %s" % (line, col, constants.E[errorcode] % datavars)) @@ -144,7 +141,7 @@ def repr_failure(self, excinfo): class TreeWalkerTest(pytest.Item): def __init__(self, name, parent, test, treeAPIs): - super(TreeWalkerTest, self).__init__(name, parent) + super().__init__(name, parent) self.test = test self.treeAPIs = treeAPIs diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py index dfeb0ba5..1444fc9a 100644 --- a/html5lib/treeadapters/__init__.py +++ b/html5lib/treeadapters/__init__.py @@ -16,7 +16,6 @@ genshi_tree = genshi.to_genshi(TreeWalker(tree)) """ -from __future__ import absolute_import, division, unicode_literals from . import sax diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py index 61d5fb6a..804a980e 100644 --- a/html5lib/treeadapters/genshi.py +++ b/html5lib/treeadapters/genshi.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from genshi.core import QName, Attrs from genshi.core import START, END, TEXT, COMMENT, DOCTYPE @@ -23,7 +21,7 @@ def to_genshi(walker): if type in ("StartTag", "EmptyTag"): if token["namespace"]: - name = "{%s}%s" % (token["namespace"], token["name"]) + name = "{{{}}}{}".format(token["namespace"], token["name"]) else: name = token["name"] attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) @@ -34,7 +32,7 @@ def to_genshi(walker): if type == "EndTag": if token["namespace"]: - name = "{%s}%s" % (token["namespace"], token["name"]) + name = "{{{}}}{}".format(token["namespace"], token["name"]) else: name = token["name"] diff --git a/html5lib/treeadapters/sax.py b/html5lib/treeadapters/sax.py index f4ccea5a..04ec1ef0 100644 --- a/html5lib/treeadapters/sax.py +++ b/html5lib/treeadapters/sax.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from xml.sax.xmlreader import AttributesNSImpl from ..constants import adjustForeignAttributes, unadjustForeignAttributes diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py index d44447ea..90aad5fb 100644 --- a/html5lib/treebuilders/__init__.py +++ b/html5lib/treebuilders/__init__.py @@ -29,7 +29,6 @@ """ -from __future__ import absolute_import, division, unicode_literals from .._utils import default_etree diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py index e4a3d710..b48ddf84 100644 --- a/html5lib/treebuilders/base.py +++ b/html5lib/treebuilders/base.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type from ..constants import scopingElements, tableInsertModeElements, namespaces @@ -20,7 +19,7 @@ } -class Node(object): +class Node: """Represents an item in the tree""" def __init__(self, name): """Creates a Node @@ -43,11 +42,11 @@ def __init__(self, name): self._flags = [] def __str__(self): - attributesStr = " ".join(["%s=\"%s\"" % (name, value) - for name, value in - self.attributes.items()]) + attributesStr = " ".join("{}=\"{}\"".format(name, value) + for name, value in + self.attributes.items()) if attributesStr: - return "<%s %s>" % (self.name, attributesStr) + return "<{} {}>".format(self.name, attributesStr) else: return "<%s>" % (self.name) @@ -143,7 +142,7 @@ def nodesEqual(self, node1, node2): return True -class TreeBuilder(object): +class TreeBuilder: """Base treebuilder implementation * documentClass - the class to use for the bottommost node of a document @@ -199,7 +198,7 @@ def elementInScope(self, target, variant=None): # match any node with that name exactNode = hasattr(target, "nameTuple") if not exactNode: - if isinstance(target, text_type): + if isinstance(target, str): target = (namespaces["html"], target) assert isinstance(target, tuple) @@ -322,7 +321,7 @@ def _setInsertFromTable(self, value): def insertElementNormal(self, token): name = token["name"] - assert isinstance(name, text_type), "Element %s not unicode" % name + assert isinstance(name, str), "Element %s not unicode" % name namespace = token.get("namespace", self.defaultNamespace) element = self.elementClass(name, namespace) element.attributes = token["data"] diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py index 818a3343..51219093 100644 --- a/html5lib/treebuilders/dom.py +++ b/html5lib/treebuilders/dom.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - - from collections.abc import MutableMapping from xml.dom import minidom, Node import weakref @@ -188,25 +185,25 @@ def serializeElement(element, indent=0): rv.append("""|%s""" % (' ' * indent, element.name, publicId, systemId)) else: - rv.append("|%s" % (' ' * indent, element.name)) + rv.append("|{}".format(' ' * indent, element.name)) else: - rv.append("|%s" % (' ' * indent,)) + rv.append("|{}".format(' ' * indent)) elif element.nodeType == Node.DOCUMENT_NODE: rv.append("#document") elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE: rv.append("#document-fragment") elif element.nodeType == Node.COMMENT_NODE: - rv.append("|%s" % (' ' * indent, element.nodeValue)) + rv.append("|{}".format(' ' * indent, element.nodeValue)) elif element.nodeType == Node.TEXT_NODE: - rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue)) + rv.append("|{}\"{}\"".format(' ' * indent, element.nodeValue)) else: if (hasattr(element, "namespaceURI") and element.namespaceURI is not None): - name = "%s %s" % (constants.prefixes[element.namespaceURI], - element.nodeName) + name = "{} {}".format(constants.prefixes[element.namespaceURI], + element.nodeName) else: name = element.nodeName - rv.append("|%s<%s>" % (' ' * indent, name)) + rv.append("|{}<{}>".format(' ' * indent, name)) if element.hasAttributes(): attributes = [] for i in range(len(element.attributes)): @@ -215,13 +212,13 @@ def serializeElement(element, indent=0): value = attr.value ns = attr.namespaceURI if ns: - name = "%s %s" % (constants.prefixes[ns], attr.localName) + name = "{} {}".format(constants.prefixes[ns], attr.localName) else: name = attr.nodeName attributes.append((name, value)) for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + rv.append('|{}{}="{}"'.format(' ' * (indent + 2), name, value)) indent += 2 for child in element.childNodes: serializeElement(child, indent) diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py index 086bed4e..2a7c80db 100644 --- a/html5lib/treebuilders/etree.py +++ b/html5lib/treebuilders/etree.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access from six import text_type @@ -38,7 +37,7 @@ def _getETreeTag(self, name, namespace): if namespace is None: etree_tag = name else: - etree_tag = "{%s}%s" % (namespace, name) + etree_tag = "{{{}}}{}".format(namespace, name) return etree_tag def _setName(self, name): @@ -70,7 +69,7 @@ def _setAttributes(self, attributes): # allocation on average for key, value in attributes.items(): if isinstance(key, tuple): - name = "{%s}%s" % (key[2], key[1]) + name = "{{{}}}{}".format(key[2], key[1]) else: name = key el_attrib[name] = value @@ -210,20 +209,20 @@ def serializeElement(element, indent=0): rv.append("""""" % (element.text, publicId, systemId)) else: - rv.append("" % (element.text,)) + rv.append("".format(element.text)) elif element.tag == "DOCUMENT_ROOT": rv.append("#document") if element.text is not None: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + rv.append("|{}\"{}\"".format(' ' * (indent + 2), element.text)) if element.tail is not None: raise TypeError("Document node cannot have tail") if hasattr(element, "attrib") and len(element.attrib): raise TypeError("Document node cannot have attributes") elif element.tag == ElementTreeCommentType: - rv.append("|%s" % (' ' * indent, element.text)) + rv.append("|{}".format(' ' * indent, element.text)) else: - assert isinstance(element.tag, text_type), \ - "Expected unicode, got %s, %s" % (type(element.tag), element.tag) + assert isinstance(element.tag, str), \ + "Expected unicode, got {}, {}".format(type(element.tag), element.tag) nsmatch = tag_regexp.match(element.tag) if nsmatch is None: @@ -231,8 +230,8 @@ def serializeElement(element, indent=0): else: ns, name = nsmatch.groups() prefix = constants.prefixes[ns] - name = "%s %s" % (prefix, name) - rv.append("|%s<%s>" % (' ' * indent, name)) + name = "{} {}".format(prefix, name) + rv.append("|{}<{}>".format(' ' * indent, name)) if hasattr(element, "attrib"): attributes = [] @@ -241,20 +240,20 @@ def serializeElement(element, indent=0): if nsmatch is not None: ns, name = nsmatch.groups() prefix = constants.prefixes[ns] - attr_string = "%s %s" % (prefix, name) + attr_string = "{} {}".format(prefix, name) else: attr_string = name attributes.append((attr_string, value)) for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + rv.append('|{}{}="{}"'.format(' ' * (indent + 2), name, value)) if element.text: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + rv.append("|{}\"{}\"".format(' ' * (indent + 2), element.text)) indent += 2 for child in element: serializeElement(child, indent) if element.tail: - rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) + rv.append("|{}\"{}\"".format(' ' * (indent - 2), element.tail)) serializeElement(element, 0) return "\n".join(rv) @@ -275,7 +274,7 @@ def serializeElement(element): rv.append("""""" % (element.text, publicId, systemId)) else: - rv.append("" % (element.text,)) + rv.append("".format(element.text)) elif element.tag == "DOCUMENT_ROOT": if element.text is not None: rv.append(element.text) @@ -288,23 +287,23 @@ def serializeElement(element): serializeElement(child) elif element.tag == ElementTreeCommentType: - rv.append("" % (element.text,)) + rv.append("".format(element.text)) else: # This is assumed to be an ordinary element if not element.attrib: - rv.append("<%s>" % (filter.fromXmlName(element.tag),)) + rv.append("<{}>".format(filter.fromXmlName(element.tag))) else: - attr = " ".join(["%s=\"%s\"" % ( + attr = " ".join("{}=\"{}\"".format( filter.fromXmlName(name), value) - for name, value in element.attrib.items()]) - rv.append("<%s %s>" % (element.tag, attr)) + for name, value in element.attrib.items()) + rv.append("<{} {}>".format(element.tag, attr)) if element.text: rv.append(element.text) for child in element: serializeElement(child) - rv.append("" % (element.tag,)) + rv.append("".format(element.tag)) if element.tail: rv.append(element.tail) diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index d99a51a9..3ec133bd 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -9,7 +9,6 @@ When any of these things occur, we emit a DataLossWarning """ -from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access import warnings @@ -33,14 +32,14 @@ comment_type = etree.Comment("asd").tag -class DocumentType(object): +class DocumentType: def __init__(self, name, publicId, systemId): self.name = name self.publicId = publicId self.systemId = systemId -class Document(object): +class Document: def __init__(self): self._elementTree = None self._childNodes = [] @@ -72,11 +71,11 @@ def serializeElement(element, indent=0): element.docinfo.system_url): dtd_str = "" % element.docinfo.root_name else: - dtd_str = """""" % ( + dtd_str = """""".format( element.docinfo.root_name, element.docinfo.public_id, element.docinfo.system_url) - rv.append("|%s%s" % (' ' * (indent + 2), dtd_str)) + rv.append("|{}{}".format(' ' * (indent + 2), dtd_str)) next_element = element.getroot() while next_element.getprevious() is not None: next_element = next_element.getprevious() @@ -86,16 +85,16 @@ def serializeElement(element, indent=0): elif isinstance(element, str) or isinstance(element, bytes): # Text in a fragment assert isinstance(element, str) - rv.append("|%s\"%s\"" % (' ' * indent, element)) + rv.append("|{}\"{}\"".format(' ' * indent, element)) else: # Fragment case rv.append("#document-fragment") for next_element in element: serializeElement(next_element, indent + 2) elif element.tag == comment_type: - rv.append("|%s" % (' ' * indent, element.text)) + rv.append("|{}".format(' ' * indent, element.text)) if hasattr(element, "tail") and element.tail: - rv.append("|%s\"%s\"" % (' ' * indent, element.tail)) + rv.append("|{}\"{}\"".format(' ' * indent, element.tail)) else: assert isinstance(element, etree._Element) nsmatch = etree_builders.tag_regexp.match(element.tag) @@ -103,11 +102,11 @@ def serializeElement(element, indent=0): ns = nsmatch.group(1) tag = nsmatch.group(2) prefix = constants.prefixes[ns] - rv.append("|%s<%s %s>" % (' ' * indent, prefix, - infosetFilter.fromXmlName(tag))) + rv.append("|{}<{} {}>".format(' ' * indent, prefix, + infosetFilter.fromXmlName(tag))) else: - rv.append("|%s<%s>" % (' ' * indent, - infosetFilter.fromXmlName(element.tag))) + rv.append("|{}<{}>".format(' ' * indent, + infosetFilter.fromXmlName(element.tag))) if hasattr(element, "attrib"): attributes = [] @@ -117,21 +116,21 @@ def serializeElement(element, indent=0): ns, name = nsmatch.groups() name = infosetFilter.fromXmlName(name) prefix = constants.prefixes[ns] - attr_string = "%s %s" % (prefix, name) + attr_string = "{} {}".format(prefix, name) else: attr_string = infosetFilter.fromXmlName(name) attributes.append((attr_string, value)) for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + rv.append('|{}{}="{}"'.format(' ' * (indent + 2), name, value)) if element.text: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + rv.append("|{}\"{}\"".format(' ' * (indent + 2), element.text)) indent += 2 for child in element: serializeElement(child, indent) if hasattr(element, "tail") and element.tail: - rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) + rv.append("|{}\"{}\"".format(' ' * (indent - 2), element.tail)) serializeElement(element, 0) return "\n".join(rv) @@ -152,23 +151,23 @@ def serializeElement(element): serializeElement(element.getroot()) elif element.tag == comment_type: - rv.append("" % (element.text,)) + rv.append("".format(element.text)) else: # This is assumed to be an ordinary element if not element.attrib: - rv.append("<%s>" % (element.tag,)) + rv.append("<{}>".format(element.tag)) else: - attr = " ".join(["%s=\"%s\"" % (name, value) - for name, value in element.attrib.items()]) - rv.append("<%s %s>" % (element.tag, attr)) + attr = " ".join("{}=\"{}\"".format(name, value) + for name, value in element.attrib.items()) + rv.append("<{} {}>".format(element.tag, attr)) if element.text: rv.append(element.text) for child in element: serializeElement(child) - rv.append("" % (element.tag,)) + rv.append("".format(element.tag)) if hasattr(element, "tail") and element.tail: rv.append(element.tail) @@ -197,14 +196,14 @@ def __init__(self, element): def _coerceKey(self, key): if isinstance(key, tuple): - name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) + name = "{{{}}}{}".format(key[2], infosetFilter.coerceAttribute(key[1])) else: name = infosetFilter.coerceAttribute(key) return name def __getitem__(self, key): value = self._element._element.attrib[self._coerceKey(key)] - if not PY3 and isinstance(value, binary_type): + if not PY3 and isinstance(value, bytes): value = value.decode("ascii") return value @@ -328,7 +327,7 @@ def insertCommentMain(self, data, parent=None): if (parent == self.document and self.document._elementTree.getroot()[-1].tag == comment_type): warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) - super(TreeBuilder, self).insertComment(data, parent) + super().insertComment(data, parent) def insertRoot(self, token): # Because of the way libxml2 works, it doesn't seem to be possible to @@ -375,7 +374,7 @@ def insertRoot(self, token): if namespace is None: etree_tag = name else: - etree_tag = "{%s}%s" % (namespace, name) + etree_tag = "{{{}}}{}".format(namespace, name) root.tag = etree_tag # Add the root element to the internal child/open data structures diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py index b2d3aac3..c8ecc081 100644 --- a/html5lib/treewalkers/__init__.py +++ b/html5lib/treewalkers/__init__.py @@ -8,7 +8,6 @@ returns an iterator which generates tokens. """ -from __future__ import absolute_import, division, unicode_literals from .. import constants from .._utils import default_etree @@ -96,10 +95,10 @@ def pprint(walker): ns = constants.prefixes[token["namespace"]] else: ns = token["namespace"] - name = "%s %s" % (ns, token["name"]) + name = "{} {}".format(ns, token["name"]) else: name = token["name"] - output.append("%s<%s>" % (" " * indent, name)) + output.append("{}<{}>".format(" " * indent, name)) indent += 2 # attributes (sorted for consistent ordering) attrs = token["data"] @@ -109,10 +108,10 @@ def pprint(walker): ns = constants.prefixes[namespace] else: ns = namespace - name = "%s %s" % (ns, localname) + name = "{} {}".format(ns, localname) else: name = localname - output.append("%s%s=\"%s\"" % (" " * indent, name, value)) + output.append("{}{}=\"{}\"".format(" " * indent, name, value)) # self-closing if type == "EmptyTag": indent -= 2 @@ -121,7 +120,7 @@ def pprint(walker): indent -= 2 elif type == "Comment": - output.append("%s" % (" " * indent, token["data"])) + output.append("{}".format(" " * indent, token["data"])) elif type == "Doctype": if token["name"]: @@ -137,13 +136,13 @@ def pprint(walker): token["name"], token["systemId"])) else: - output.append("%s" % (" " * indent, - token["name"])) + output.append("{}".format(" " * indent, + token["name"])) else: - output.append("%s" % (" " * indent,)) + output.append("{}".format(" " * indent)) elif type == "Characters": - output.append("%s\"%s\"" % (" " * indent, token["data"])) + output.append("{}\"{}\"".format(" " * indent, token["data"])) elif type == "SpaceCharacters": assert False, "concatenateCharacterTokens should have got rid of all Space tokens" diff --git a/html5lib/treewalkers/base.py b/html5lib/treewalkers/base.py index 80c474c4..a4a9c71a 100644 --- a/html5lib/treewalkers/base.py +++ b/html5lib/treewalkers/base.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from xml.dom import Node from ..constants import namespaces, voidElements, spaceCharacters @@ -17,7 +15,7 @@ spaceCharacters = "".join(spaceCharacters) -class TreeWalker(object): +class TreeWalker: """Walks a tree yielding tokens Tokens are dicts that all have a ``type`` field specifying the type of the @@ -201,15 +199,13 @@ def __iter__(self): yield self.doctype(*details) elif type == TEXT: - for token in self.text(*details): - yield token + yield from self.text(*details) elif type == ELEMENT: namespace, name, attributes, hasChildren = details if (not namespace or namespace == namespaces["html"]) and name in voidElements: - for token in self.emptyTag(namespace, name, attributes, - hasChildren): - yield token + yield from self.emptyTag(namespace, name, attributes, + hasChildren) hasChildren = False else: yield self.startTag(namespace, name, attributes) diff --git a/html5lib/treewalkers/dom.py b/html5lib/treewalkers/dom.py index b0c89b00..ac88cd9d 100644 --- a/html5lib/treewalkers/dom.py +++ b/html5lib/treewalkers/dom.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from xml.dom import Node from . import base diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index 44653372..8e68f56f 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from collections import OrderedDict import re @@ -51,7 +49,7 @@ def getNodeDetails(self, node): return base.COMMENT, node.text else: - assert isinstance(node.tag, string_types), type(node.tag) + assert isinstance(node.tag, str), type(node.tag) # This is assumed to be an ordinary element match = tag_regexp.match(node.tag) if match: diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py index a614ac5b..744b94c1 100644 --- a/html5lib/treewalkers/etree_lxml.py +++ b/html5lib/treewalkers/etree_lxml.py @@ -1,4 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals from six import text_type from collections import OrderedDict @@ -14,13 +13,13 @@ def ensure_str(s): if s is None: return None - elif isinstance(s, text_type): + elif isinstance(s, str): return s else: return s.decode("ascii", "strict") -class Root(object): +class Root: def __init__(self, et): self.elementtree = et self.children = [] @@ -58,7 +57,7 @@ def __len__(self): return 1 -class Doctype(object): +class Doctype: def __init__(self, root_node, name, public_id, system_id): self.root_node = root_node self.name = name @@ -81,7 +80,7 @@ def getnext(self): return None -class FragmentWrapper(object): +class FragmentWrapper: def __init__(self, fragment_root, obj): self.root_node = fragment_root self.obj = obj diff --git a/html5lib/treewalkers/genshi.py b/html5lib/treewalkers/genshi.py index 7483be27..d4757af2 100644 --- a/html5lib/treewalkers/genshi.py +++ b/html5lib/treewalkers/genshi.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from genshi.core import QName from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT @@ -15,14 +13,12 @@ def __iter__(self): previous = None for event in self.tree: if previous is not None: - for token in self.tokens(previous, event): - yield token + yield from self.tokens(previous, event) previous = event # Don't forget the final event! if previous is not None: - for token in self.tokens(previous, None): - yield token + yield from self.tokens(previous, None) def tokens(self, event, next): kind, data, _ = event @@ -38,10 +34,9 @@ def tokens(self, event, next): converted_attribs[(None, k)] = v if namespace == namespaces["html"] and name in voidElements: - for token in self.emptyTag(namespace, name, converted_attribs, - not next or next[0] != END or - next[1] != tag): - yield token + yield from self.emptyTag(namespace, name, converted_attribs, + not next or next[0] != END or + next[1] != tag) else: yield self.startTag(namespace, name, converted_attribs) @@ -55,8 +50,7 @@ def tokens(self, event, next): yield self.comment(data) elif kind == TEXT: - for token in self.text(data): - yield token + yield from self.text(data) elif kind == DOCTYPE: yield self.doctype(*data) diff --git a/parse.py b/parse.py index f973fbb4..b72d2ef7 100755 --- a/parse.py +++ b/parse.py @@ -41,7 +41,7 @@ def parse(): try: # Try opening from file system f = open(f, "rb") - except IOError as e: + except OSError as e: sys.stderr.write("Unable to open file: %s\n" % e) sys.exit(1) except IndexError: @@ -80,7 +80,7 @@ def parse(): if document: printOutput(p, document, opts) t2 = time.time() - sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)" % (t1 - t0, t2 - t1)) + sys.stderr.write("\n\nRun took: {:f}s (plus {:f}s to print the output)".format(t1 - t0, t2 - t1)) else: sys.stderr.write("\n\nRun took: %fs" % (t1 - t0)) else: diff --git a/setup.py b/setup.py index 33ab359d..50054020 100644 --- a/setup.py +++ b/setup.py @@ -1,5 +1,3 @@ -from __future__ import print_function - import ast import codecs diff --git a/utils/entities.py b/utils/entities.py index 6e8ca458..c70504c8 100644 --- a/utils/entities.py +++ b/utils/entities.py @@ -49,9 +49,9 @@ def test_description(name, good): semicolon_text = {True: "with a semi-colon", False: "without a semi-colon"}[with_semicolon] if good: - text = "Named entity: %s %s" % (name, semicolon_text) + text = "Named entity: {} {}".format(name, semicolon_text) else: - text = "Bad named entity: %s %s" % (name, semicolon_text) + text = "Bad named entity: {} {}".format(name, semicolon_text) return text @@ -80,7 +80,7 @@ def subentity_exists(entity_name, entities): def make_entities_code(entities): - entities_text = "\n".join(" \"%s\": u\"%s\"," % ( + entities_text = "\n".join(" \"{}\": u\"{}\",".format( name, entities[name].encode( "unicode-escape").replace("\"", "\\\"")) for name in sorted(entities.keys())) From f375ba88b92d92084ba0562f9972df6d5e8aea54 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Mon, 24 May 2021 21:03:52 +0300 Subject: [PATCH 03/10] Remove six --- .travis.yml | 2 +- debug-info.py | 2 +- html5lib/_inputstream.py | 9 ++++----- html5lib/_tokenizer.py | 2 -- html5lib/_trie/py.py | 1 - html5lib/_utils.py | 1 - html5lib/filters/lint.py | 1 - html5lib/filters/sanitizer.py | 5 ++--- html5lib/html5parser.py | 1 - html5lib/serializer.py | 1 - html5lib/tests/test_meta.py | 2 -- html5lib/tests/test_parser2.py | 7 ------- html5lib/tests/test_stream.py | 12 ++++-------- html5lib/tests/test_tokenizer2.py | 1 - html5lib/tests/test_treewalkers.py | 1 - html5lib/tests/tokenizer.py | 1 - html5lib/treebuilders/base.py | 1 - html5lib/treebuilders/etree.py | 1 - html5lib/treebuilders/etree_lxml.py | 3 --- html5lib/treewalkers/etree.py | 1 - html5lib/treewalkers/etree_lxml.py | 1 - requirements-install.sh | 4 ---- requirements.txt | 1 - setup.py | 1 - tox.ini | 3 +-- 25 files changed, 13 insertions(+), 52 deletions(-) diff --git a/.travis.yml b/.travis.yml index a530faeb..8b964f4c 100644 --- a/.travis.yml +++ b/.travis.yml @@ -10,7 +10,7 @@ cache: pip env: global: - - TOXENV=base,optional,six19-optional + - TOXENV=base,optional install: - pip install tox diff --git a/debug-info.py b/debug-info.py index 6e2a19bf..eb5a73f5 100644 --- a/debug-info.py +++ b/debug-info.py @@ -10,7 +10,7 @@ "maxsize": sys.maxsize } -search_modules = ["chardet", "genshi", "html5lib", "lxml", "six"] +search_modules = ["chardet", "genshi", "html5lib", "lxml"] found_modules = [] for m in search_modules: diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py index b3a75452..23128ec8 100644 --- a/html5lib/_inputstream.py +++ b/html5lib/_inputstream.py @@ -1,8 +1,7 @@ -from six import text_type -from six.moves import http_client, urllib - import codecs +import http.client import re +import urllib from io import BytesIO, StringIO import webencodings @@ -123,10 +122,10 @@ def _readFromBuffer(self, bytes): def HTMLInputStream(source, **kwargs): # Work around Python bug #20007: read(0) closes the connection. # http://bugs.python.org/issue20007 - if (isinstance(source, http_client.HTTPResponse) or + if (isinstance(source, http.client.HTTPResponse) or # Also check for addinfourl wrapping HTTPResponse (isinstance(source, urllib.response.addbase) and - isinstance(source.fp, http_client.HTTPResponse))): + isinstance(source.fp, http.client.HTTPResponse))): isUnicode = False elif hasattr(source, "read"): isUnicode = isinstance(source.read(0), str) diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py index e3750c5c..b5219836 100644 --- a/html5lib/_tokenizer.py +++ b/html5lib/_tokenizer.py @@ -1,5 +1,3 @@ -from six import unichr as chr - from collections import deque, OrderedDict from sys import version_info diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py index 2e1aa188..05084863 100644 --- a/html5lib/_trie/py.py +++ b/html5lib/_trie/py.py @@ -1,4 +1,3 @@ -from six import text_type from bisect import bisect_left diff --git a/html5lib/_utils.py b/html5lib/_utils.py index f59eec1e..dc51de4a 100644 --- a/html5lib/_utils.py +++ b/html5lib/_utils.py @@ -2,7 +2,6 @@ from collections.abc import Mapping -from six import text_type, PY3 import xml.etree.ElementTree as default_etree diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py index ff6c5bd7..f091adb1 100644 --- a/html5lib/filters/lint.py +++ b/html5lib/filters/lint.py @@ -1,4 +1,3 @@ -from six import text_type from . import base from ..constants import namespaces, voidElements diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index 4b753d30..6f1220bf 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -10,8 +10,7 @@ import re import warnings from xml.sax.saxutils import escape, unescape - -from six.moves import urllib_parse as urlparse +import urllib.parse from . import base from ..constants import namespaces, prefixes @@ -837,7 +836,7 @@ def allowed_token(self, token): # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") try: - uri = urlparse.urlparse(val_unescaped) + uri = urllib.parse.urlparse(val_unescaped) except ValueError: uri = None del attrs[attr] diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index b4c51821..1f29d728 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -1,4 +1,3 @@ -from six import with_metaclass, viewkeys import types diff --git a/html5lib/serializer.py b/html5lib/serializer.py index bbcbf9d6..16e94c27 100644 --- a/html5lib/serializer.py +++ b/html5lib/serializer.py @@ -1,4 +1,3 @@ -from six import text_type import re diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py index 8f49bb57..2fc6140d 100644 --- a/html5lib/tests/test_meta.py +++ b/html5lib/tests/test_meta.py @@ -1,4 +1,3 @@ -import six from unittest.mock import Mock from . import support @@ -25,7 +24,6 @@ def test_errorMessage(): r = support.errorMessage(input, expected, actual) # Assertions! - assert six.PY3 assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r assert input.__repr__.call_count == 1 diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 8f40bc60..89c9cef1 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -1,5 +1,3 @@ -from six import PY2, text_type - import io from . import support # noqa @@ -73,11 +71,6 @@ def test_debug_log(): ('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}), ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})] - if PY2: - for i, log in enumerate(expected): - log = [x.encode("ascii") if isinstance(x, str) else x for x in log] - expected[i] = tuple(log) - assert parser.log == expected diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index 413b43de..b94c7a5c 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -1,14 +1,13 @@ from . import support # noqa import codecs +import http.client import sys +import urllib from io import BytesIO, StringIO import pytest -import six -from six.moves import http_client, urllib - from html5lib._inputstream import (BufferedStream, HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream) from html5lib._utils import supports_lone_surrogates @@ -189,7 +188,7 @@ def makefile(self, _mode, _bufsize=None): # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") - source = http_client.HTTPResponse(FakeSocket()) + source = http.client.HTTPResponse(FakeSocket()) source.begin() stream = HTMLInputStream(source) assert stream.charsUntil(" ") == "Text" @@ -200,15 +199,12 @@ def test_python_issue_20007_b(): Make sure we have a work-around for Python bug #20007 http://bugs.python.org/issue20007 """ - if six.PY2: - return - class FakeSocket: def makefile(self, _mode, _bufsize=None): # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") - source = http_client.HTTPResponse(FakeSocket()) + source = http.client.HTTPResponse(FakeSocket()) source.begin() wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com") stream = HTMLInputStream(wrapped) diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py index 93a43f4e..bc97943f 100644 --- a/html5lib/tests/test_tokenizer2.py +++ b/html5lib/tests/test_tokenizer2.py @@ -1,6 +1,5 @@ import io -from six import unichr, text_type from html5lib._tokenizer import HTMLTokenizer from html5lib.constants import tokenTypes diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 3266361a..108963f0 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -1,7 +1,6 @@ import itertools import sys -from six import unichr, text_type import pytest try: diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py index 8cf1f1dd..273955c3 100644 --- a/html5lib/tests/tokenizer.py +++ b/html5lib/tests/tokenizer.py @@ -4,7 +4,6 @@ import re import pytest -from six import unichr from html5lib._tokenizer import HTMLTokenizer from html5lib import constants, _utils diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py index b48ddf84..ec32501c 100644 --- a/html5lib/treebuilders/base.py +++ b/html5lib/treebuilders/base.py @@ -1,4 +1,3 @@ -from six import text_type from ..constants import scopingElements, tableInsertModeElements, namespaces diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py index 2a7c80db..11582ee5 100644 --- a/html5lib/treebuilders/etree.py +++ b/html5lib/treebuilders/etree.py @@ -1,6 +1,5 @@ # pylint:disable=protected-access -from six import text_type import re diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index 3ec133bd..29d34e27 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -23,7 +23,6 @@ from .. import _ihatexml import lxml.etree as etree -from six import PY3, binary_type fullTree = True @@ -203,8 +202,6 @@ def _coerceKey(self, key): def __getitem__(self, key): value = self._element._element.attrib[self._coerceKey(key)] - if not PY3 and isinstance(value, bytes): - value = value.decode("ascii") return value def __setitem__(self, key, value): diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index 8e68f56f..4ad3d58d 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -1,7 +1,6 @@ from collections import OrderedDict import re -from six import string_types from . import base from .._utils import moduleFactoryFactory diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py index 744b94c1..d8c4daa5 100644 --- a/html5lib/treewalkers/etree_lxml.py +++ b/html5lib/treewalkers/etree_lxml.py @@ -1,4 +1,3 @@ -from six import text_type from collections import OrderedDict diff --git a/requirements-install.sh b/requirements-install.sh index b7a8d96d..41d9bc42 100755 --- a/requirements-install.sh +++ b/requirements-install.sh @@ -1,9 +1,5 @@ #!/bin/bash -ex -if [[ $SIX_VERSION ]]; then - pip install six==$SIX_VERSION -fi - pip install -r requirements-test.txt if [[ $USE_OPTIONAL == "true" ]]; then diff --git a/requirements.txt b/requirements.txt index ae7ec3d0..be8fcb77 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -six>=1.9 webencodings diff --git a/setup.py b/setup.py index 50054020..d0f5814e 100644 --- a/setup.py +++ b/setup.py @@ -98,7 +98,6 @@ def default_environment(): maintainer_email='james@hoppipolla.co.uk', packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), install_requires=[ - 'six>=1.9', 'webencodings', ], python_requires=">=3.6", diff --git a/tox.ini b/tox.ini index 05ca59aa..3ae8afe0 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{36,37,38,py3}-{base,six19,optional} +envlist = py{36,37,38,py3}-{base,optional} [testenv] deps = @@ -11,7 +11,6 @@ passenv = PYTEST_COMMAND # this is maintained so one can, e.g., PYTEST_COMMAND="coverage run -m pytest" COVERAGE_RUN_OPTIONS commands = - six19: pip install six==1.9 {env:PYTEST_COMMAND:{envbindir}/py.test} {posargs} flake8 {toxinidir} From c289f9d1a7909ed0bda926fdf1e5557b392dcda7 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Mon, 24 May 2021 21:24:53 +0300 Subject: [PATCH 04/10] Upgrade Python syntax with pyupgrade --py36-plus --- html5lib/_inputstream.py | 2 +- html5lib/_trie/py.py | 1 - html5lib/_utils.py | 2 +- html5lib/filters/lint.py | 5 ++--- html5lib/filters/sanitizer.py | 2 +- html5lib/html5parser.py | 1 - html5lib/serializer.py | 1 - html5lib/tests/conftest.py | 4 ++-- html5lib/tests/support.py | 2 +- html5lib/tests/test_sanitizer.py | 22 +++++++++++----------- html5lib/tests/test_serializer.py | 4 ++-- html5lib/tests/test_tokenizer2.py | 4 ++-- html5lib/tests/test_treewalkers.py | 4 ++-- html5lib/tests/tree_construction.py | 2 +- html5lib/treebuilders/base.py | 5 ++--- html5lib/treebuilders/dom.py | 2 +- html5lib/treebuilders/etree.py | 22 +++++++++++----------- html5lib/treebuilders/etree_lxml.py | 16 ++++++++-------- html5lib/treewalkers/__init__.py | 2 +- html5lib/treewalkers/etree_lxml.py | 1 - parse.py | 2 +- utils/entities.py | 4 ++-- 22 files changed, 52 insertions(+), 58 deletions(-) diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py index 23128ec8..078026b7 100644 --- a/html5lib/_inputstream.py +++ b/html5lib/_inputstream.py @@ -521,7 +521,7 @@ def changeEncoding(self, newEncoding): self.rawStream.seek(0) self.charEncoding = (newEncoding, "certain") self.reset() - raise _ReparseException("Encoding changed from {} to {}".format(self.charEncoding[0], newEncoding)) + raise _ReparseException(f"Encoding changed from {self.charEncoding[0]} to {newEncoding}") def detectBOM(self): """Attempts to detect at BOM at the start of the stream. If diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py index 05084863..bc6363c4 100644 --- a/html5lib/_trie/py.py +++ b/html5lib/_trie/py.py @@ -1,4 +1,3 @@ - from bisect import bisect_left from ._base import Trie as ABCTrie diff --git a/html5lib/_utils.py b/html5lib/_utils.py index dc51de4a..95a5569b 100644 --- a/html5lib/_utils.py +++ b/html5lib/_utils.py @@ -110,7 +110,7 @@ def moduleFactoryFactory(factory): moduleCache = {} def moduleFactory(baseModule, *args, **kwargs): - if isinstance(ModuleType.__name__, type("")): + if isinstance(ModuleType.__name__, str): name = "_%s_factory" % baseModule.__name__ else: name = b"_%s_factory" % baseModule.__name__ diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py index f091adb1..f0ffce61 100644 --- a/html5lib/filters/lint.py +++ b/html5lib/filters/lint.py @@ -1,4 +1,3 @@ - from . import base from ..constants import namespaces, voidElements @@ -56,7 +55,7 @@ def __iter__(self): assert isinstance(name, str) assert name != "" if (not namespace or namespace == namespaces["html"]) and name in voidElements: - assert False, "Void element reported as EndTag token: {tag}".format(tag=name) + assert False, f"Void element reported as EndTag token: {name}" elif self.require_matching_tags: start = open_elements.pop() assert start == (namespace, name) @@ -85,6 +84,6 @@ def __iter__(self): assert isinstance(token["data"], str) else: - assert False, "Unknown token type: {type}".format(type=type) + assert False, f"Unknown token type: {type}" yield token diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index 6f1220bf..a1b61099 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -872,7 +872,7 @@ def disallowed_token(self, token): assert token_type in ("StartTag", "EmptyTag") attrs = [] for (ns, name), v in token["data"].items(): - attrs.append(' {}="{}"'.format(name if ns is None else "{}:{}".format(prefixes[ns], name), escape(v))) + attrs.append(' {}="{}"'.format(name if ns is None else f"{prefixes[ns]}:{name}", escape(v))) token["data"] = "<{}{}>".format(token["name"], ''.join(attrs)) else: token["data"] = "<%s>" % token["name"] diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 1f29d728..3996c9b8 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -1,4 +1,3 @@ - import types from . import _inputstream diff --git a/html5lib/serializer.py b/html5lib/serializer.py index 16e94c27..cd4631f6 100644 --- a/html5lib/serializer.py +++ b/html5lib/serializer.py @@ -1,4 +1,3 @@ - import re from codecs import register_error, xmlcharrefreplace_errors diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py index 62a7991b..e5f7a2ac 100644 --- a/html5lib/tests/conftest.py +++ b/html5lib/tests/conftest.py @@ -69,7 +69,7 @@ def pytest_configure(config): try: installed = pkg_resources.working_set.find(req) except pkg_resources.VersionConflict: - msgs.append("Outdated version of {} installed, need {}".format(req.name, spec)) + msgs.append(f"Outdated version of {req.name} installed, need {spec}") else: if not installed: msgs.append("Need %s" % spec) @@ -78,7 +78,7 @@ def pytest_configure(config): import xml.etree.ElementTree as ElementTree try: - import xml.etree.cElementTree as cElementTree + import xml.etree.ElementTree as cElementTree except ImportError: msgs.append("cElementTree unable to be imported") else: diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index d51cae12..f311fb92 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -33,7 +33,7 @@ } try: - import xml.etree.cElementTree as cElementTree # noqa + import xml.etree.ElementTree as cElementTree # noqa except ImportError: treeTypes['cElementTree'] = None else: diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py index e5cdc2af..f8b4b4b6 100644 --- a/html5lib/tests/test_sanitizer.py +++ b/html5lib/tests/test_sanitizer.py @@ -58,19 +58,19 @@ def param_sanitizer(): if tag_name == 'image': yield ("test_should_allow_%s_tag" % tag_name, "foo <bad>bar</bad> baz", - "<{} title='1'>foo bar baz".format(tag_name, tag_name)) + f"<{tag_name} title='1'>foo bar baz") elif tag_name == 'br': yield ("test_should_allow_%s_tag" % tag_name, "
foo <bad>bar</bad> baz
", - "<{} title='1'>foo bar baz".format(tag_name, tag_name)) + f"<{tag_name} title='1'>foo bar baz") elif tag_name in constants.voidElements: yield ("test_should_allow_%s_tag" % tag_name, "<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name, - "<{} title='1'>foo bar baz".format(tag_name, tag_name)) + f"<{tag_name} title='1'>foo bar baz") else: yield ("test_should_allow_%s_tag" % tag_name, - "<{} title=\"1\">foo <bad>bar</bad> baz".format(tag_name, tag_name), - "<{} title='1'>foo bar baz".format(tag_name, tag_name)) + f"<{tag_name} title=\"1\">foo <bad>bar</bad> baz", + f"<{tag_name} title='1'>foo bar baz") for ns, attribute_name in sanitizer.allowed_attributes: if ns is not None: @@ -83,16 +83,16 @@ def param_sanitizer(): if attribute_name in sanitizer.attr_val_is_uri: attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0] yield ("test_should_allow_%s_attribute" % attribute_name, - "

foo <bad>bar</bad> baz

".format(attribute_name, attribute_value), - "

foo bar baz

".format(attribute_name, attribute_value)) + f"

foo <bad>bar</bad> baz

", + f"

foo bar baz

") for protocol in sanitizer.allowed_protocols: rest_of_uri = '//sub.domain.tld/path/object.ext' if protocol == 'data': rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ=' yield ("test_should_allow_uppercase_%s_uris" % protocol, - "foo
".format(protocol, rest_of_uri), - """foo""".format(protocol, rest_of_uri)) + f"foo", + f"""foo""") for protocol in sanitizer.allowed_protocols: rest_of_uri = '//sub.domain.tld/path/object.ext' @@ -100,8 +100,8 @@ def param_sanitizer(): rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ=' protocol = protocol.upper() yield ("test_should_allow_uppercase_%s_uris" % protocol, - "foo".format(protocol, rest_of_uri), - """foo""".format(protocol, rest_of_uri)) + f"foo", + f"""foo""") @pytest.mark.parametrize("expected, input", diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index e8371247..2ed71de6 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -219,6 +219,6 @@ def test_serializer(input, expected, options): result = serialize_html(input, options) if len(expected) == 1: - assert expected[0] == result, "Expected:\n{}\nActual:\n{}\nOptions:\n{}".format(expected[0], result, str(options)) + assert expected[0] == result, f"Expected:\n{expected[0]}\nActual:\n{result}\nOptions:\n{str(options)}" elif result not in expected: - assert False, "Expected: {}, Received: {}".format(expected, result) + assert False, f"Expected: {expected}, Received: {result}" diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py index bc97943f..a616288f 100644 --- a/html5lib/tests/test_tokenizer2.py +++ b/html5lib/tests/test_tokenizer2.py @@ -14,7 +14,7 @@ def ignore_parse_errors(toks): def test_maintain_attribute_order(): # generate loads to maximize the chance a hash-based mutation will occur attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))] - stream = io.StringIO("") + stream = io.StringIO("") toks = HTMLTokenizer(stream) out = list(ignore_parse_errors(toks)) @@ -47,7 +47,7 @@ def test_duplicate_attribute(): def test_maintain_duplicate_attribute_order(): # generate loads to maximize the chance a hash-based mutation will occur attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))] - stream = io.StringIO("") + stream = io.StringIO("") toks = HTMLTokenizer(stream) out = list(ignore_parse_errors(toks)) diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 108963f0..a15a54e3 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -99,7 +99,7 @@ def test_treewalker_six_mix(intext, expected, attrs_to_add, tree): output = treewalkers.pprint(treeClass["walker"](document)) output = attrlist.sub(sortattrs, output) if output not in expected: - raise AssertionError("TreewalkerEditTest: {}\nExpected:\n{}\nReceived:\n{}".format(treeName, expected, output)) + raise AssertionError(f"TreewalkerEditTest: {treeName}\nExpected:\n{expected}\nReceived:\n{output}") @pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"])) @@ -149,7 +149,7 @@ def test_maintain_attribute_order(treeName): # generate loads to maximize the chance a hash-based mutation will occur attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))] - data = "" + data = "" parser = html5parser.HTMLParser(tree=treeAPIs["builder"]) document = parser.parseFragment(data) diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py index e1b0c180..204865ba 100644 --- a/html5lib/tests/tree_construction.py +++ b/html5lib/tests/tree_construction.py @@ -119,7 +119,7 @@ def runtest(self): errStr = [] for (line, col), errorcode, datavars in p.errors: - assert isinstance(datavars, dict), "{}, {}".format(errorcode, repr(datavars)) + assert isinstance(datavars, dict), f"{errorcode}, {repr(datavars)}" errStr.append("Line: %i Col: %i %s" % (line, col, constants.E[errorcode] % datavars)) diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py index ec32501c..4afd3c56 100644 --- a/html5lib/treebuilders/base.py +++ b/html5lib/treebuilders/base.py @@ -1,4 +1,3 @@ - from ..constants import scopingElements, tableInsertModeElements, namespaces # The scope markers are inserted when entering object elements, @@ -41,11 +40,11 @@ def __init__(self, name): self._flags = [] def __str__(self): - attributesStr = " ".join("{}=\"{}\"".format(name, value) + attributesStr = " ".join(f"{name}=\"{value}\"" for name, value in self.attributes.items()) if attributesStr: - return "<{} {}>".format(self.name, attributesStr) + return f"<{self.name} {attributesStr}>" else: return "<%s>" % (self.name) diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py index 51219093..2d632d6e 100644 --- a/html5lib/treebuilders/dom.py +++ b/html5lib/treebuilders/dom.py @@ -212,7 +212,7 @@ def serializeElement(element, indent=0): value = attr.value ns = attr.namespaceURI if ns: - name = "{} {}".format(constants.prefixes[ns], attr.localName) + name = f"{constants.prefixes[ns]} {attr.localName}" else: name = attr.nodeName attributes.append((name, value)) diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py index 11582ee5..f3fea0f9 100644 --- a/html5lib/treebuilders/etree.py +++ b/html5lib/treebuilders/etree.py @@ -36,7 +36,7 @@ def _getETreeTag(self, name, namespace): if namespace is None: etree_tag = name else: - etree_tag = "{{{}}}{}".format(namespace, name) + etree_tag = f"{{{namespace}}}{name}" return etree_tag def _setName(self, name): @@ -68,7 +68,7 @@ def _setAttributes(self, attributes): # allocation on average for key, value in attributes.items(): if isinstance(key, tuple): - name = "{{{}}}{}".format(key[2], key[1]) + name = f"{{{key[2]}}}{key[1]}" else: name = key el_attrib[name] = value @@ -208,7 +208,7 @@ def serializeElement(element, indent=0): rv.append("""""" % (element.text, publicId, systemId)) else: - rv.append("".format(element.text)) + rv.append(f"") elif element.tag == "DOCUMENT_ROOT": rv.append("#document") if element.text is not None: @@ -221,7 +221,7 @@ def serializeElement(element, indent=0): rv.append("|{}".format(' ' * indent, element.text)) else: assert isinstance(element.tag, str), \ - "Expected unicode, got {}, {}".format(type(element.tag), element.tag) + f"Expected unicode, got {type(element.tag)}, {element.tag}" nsmatch = tag_regexp.match(element.tag) if nsmatch is None: @@ -229,7 +229,7 @@ def serializeElement(element, indent=0): else: ns, name = nsmatch.groups() prefix = constants.prefixes[ns] - name = "{} {}".format(prefix, name) + name = f"{prefix} {name}" rv.append("|{}<{}>".format(' ' * indent, name)) if hasattr(element, "attrib"): @@ -239,7 +239,7 @@ def serializeElement(element, indent=0): if nsmatch is not None: ns, name = nsmatch.groups() prefix = constants.prefixes[ns] - attr_string = "{} {}".format(prefix, name) + attr_string = f"{prefix} {name}" else: attr_string = name attributes.append((attr_string, value)) @@ -273,7 +273,7 @@ def serializeElement(element): rv.append("""""" % (element.text, publicId, systemId)) else: - rv.append("".format(element.text)) + rv.append(f"") elif element.tag == "DOCUMENT_ROOT": if element.text is not None: rv.append(element.text) @@ -286,23 +286,23 @@ def serializeElement(element): serializeElement(child) elif element.tag == ElementTreeCommentType: - rv.append("".format(element.text)) + rv.append(f"") else: # This is assumed to be an ordinary element if not element.attrib: - rv.append("<{}>".format(filter.fromXmlName(element.tag))) + rv.append(f"<{filter.fromXmlName(element.tag)}>") else: attr = " ".join("{}=\"{}\"".format( filter.fromXmlName(name), value) for name, value in element.attrib.items()) - rv.append("<{} {}>".format(element.tag, attr)) + rv.append(f"<{element.tag} {attr}>") if element.text: rv.append(element.text) for child in element: serializeElement(child) - rv.append("".format(element.tag)) + rv.append(f"") if element.tail: rv.append(element.tail) diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index 29d34e27..3bcf8c96 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -115,7 +115,7 @@ def serializeElement(element, indent=0): ns, name = nsmatch.groups() name = infosetFilter.fromXmlName(name) prefix = constants.prefixes[ns] - attr_string = "{} {}".format(prefix, name) + attr_string = f"{prefix} {name}" else: attr_string = infosetFilter.fromXmlName(name) attributes.append((attr_string, value)) @@ -150,23 +150,23 @@ def serializeElement(element): serializeElement(element.getroot()) elif element.tag == comment_type: - rv.append("".format(element.text)) + rv.append(f"") else: # This is assumed to be an ordinary element if not element.attrib: - rv.append("<{}>".format(element.tag)) + rv.append(f"<{element.tag}>") else: - attr = " ".join("{}=\"{}\"".format(name, value) + attr = " ".join(f"{name}=\"{value}\"" for name, value in element.attrib.items()) - rv.append("<{} {}>".format(element.tag, attr)) + rv.append(f"<{element.tag} {attr}>") if element.text: rv.append(element.text) for child in element: serializeElement(child) - rv.append("".format(element.tag)) + rv.append(f"") if hasattr(element, "tail") and element.tail: rv.append(element.tail) @@ -195,7 +195,7 @@ def __init__(self, element): def _coerceKey(self, key): if isinstance(key, tuple): - name = "{{{}}}{}".format(key[2], infosetFilter.coerceAttribute(key[1])) + name = f"{{{key[2]}}}{infosetFilter.coerceAttribute(key[1])}" else: name = infosetFilter.coerceAttribute(key) return name @@ -371,7 +371,7 @@ def insertRoot(self, token): if namespace is None: etree_tag = name else: - etree_tag = "{{{}}}{}".format(namespace, name) + etree_tag = f"{{{namespace}}}{name}" root.tag = etree_tag # Add the root element to the internal child/open data structures diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py index c8ecc081..70e0fff6 100644 --- a/html5lib/treewalkers/__init__.py +++ b/html5lib/treewalkers/__init__.py @@ -108,7 +108,7 @@ def pprint(walker): ns = constants.prefixes[namespace] else: ns = namespace - name = "{} {}".format(ns, localname) + name = f"{ns} {localname}" else: name = localname output.append("{}{}=\"{}\"".format(" " * indent, name, value)) diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py index d8c4daa5..0ec633ac 100644 --- a/html5lib/treewalkers/etree_lxml.py +++ b/html5lib/treewalkers/etree_lxml.py @@ -1,4 +1,3 @@ - from collections import OrderedDict from lxml import etree diff --git a/parse.py b/parse.py index b72d2ef7..dd919364 100755 --- a/parse.py +++ b/parse.py @@ -80,7 +80,7 @@ def parse(): if document: printOutput(p, document, opts) t2 = time.time() - sys.stderr.write("\n\nRun took: {:f}s (plus {:f}s to print the output)".format(t1 - t0, t2 - t1)) + sys.stderr.write(f"\n\nRun took: {t1 - t0:f}s (plus {t2 - t1:f}s to print the output)") else: sys.stderr.write("\n\nRun took: %fs" % (t1 - t0)) else: diff --git a/utils/entities.py b/utils/entities.py index c70504c8..faeb4b45 100644 --- a/utils/entities.py +++ b/utils/entities.py @@ -49,9 +49,9 @@ def test_description(name, good): semicolon_text = {True: "with a semi-colon", False: "without a semi-colon"}[with_semicolon] if good: - text = "Named entity: {} {}".format(name, semicolon_text) + text = f"Named entity: {name} {semicolon_text}" else: - text = "Bad named entity: {} {}".format(name, semicolon_text) + text = f"Bad named entity: {name} {semicolon_text}" return text From 03f46e23663aa6cc3f5d873a4db65d9d020fb5ca Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Mon, 24 May 2021 23:53:39 +0300 Subject: [PATCH 05/10] AppVeyor: Use Python 3.6 as base version --- .appveyor.yml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index 0b3c4e8a..e022e685 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -1,7 +1,7 @@ # To activate, change the Appveyor settings to use `.appveyor.yml`. environment: global: - PATH: "C:\\Python27\\Scripts\\;%PATH%" + PATH: "C:\\Python36\\Scripts\\;%PATH%" matrix: - TOXENV: py36-base - TOXENV: py36-optional @@ -12,7 +12,7 @@ environment: install: - git submodule update --init --recursive - - python -m pip install tox + - C:\\Python36\\python.exe -m pip install tox build: off @@ -20,4 +20,4 @@ test_script: - tox after_test: - - python debug-info.py + - C:\\Python36\\python.exe debug-info.py From a0903811fe76e475b4f6d37b7279508cb9a8ff70 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 3 Apr 2022 20:52:09 +0300 Subject: [PATCH 06/10] Universal wheels for supporting Python 2 and 3 --- setup.cfg | 3 --- 1 file changed, 3 deletions(-) diff --git a/setup.cfg b/setup.cfg index 0b2bb9c7..2a44c0f2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,3 @@ -[bdist_wheel] -universal = 1 - [pep8] ignore = N max-line-length = 139 From 043c019f816aa7261b567cd5619f50178d701831 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Sun, 3 Apr 2022 22:22:46 +0300 Subject: [PATCH 07/10] Remove unused mock dependency --- README.rst | 2 +- requirements-test.txt | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/README.rst b/README.rst index fef6d315..f4943e52 100644 --- a/README.rst +++ b/README.rst @@ -127,7 +127,7 @@ Please report any bugs on the `issue tracker Tests ----- -Unit tests require the ``pytest`` and ``mock`` libraries and can be +Unit tests require the ``pytest`` library and can be run using the ``py.test`` command in the root directory. Test data are contained in a separate `html5lib-tests diff --git a/requirements-test.txt b/requirements-test.txt index ce882670..fa6f2c62 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -5,4 +5,3 @@ flake8>=3.8.1,<3.9 pytest>=5.4.2,<7 coverage>=5.1,<6 pytest-expect>=1.1.0,<2 -mock>=4.0.2,<5 From 589b01c61f706c4e9c69337692b8d1d4679288b5 Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Wed, 20 Apr 2022 18:47:31 +0300 Subject: [PATCH 08/10] Fix PytestRemovedIn8Warning: The (fspath: py.path.local) argument to SanitizerFile is deprecated. --- html5lib/tests/conftest.py | 8 ++++---- requirements-test.txt | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py index e5f7a2ac..0b3fc4df 100644 --- a/html5lib/tests/conftest.py +++ b/html5lib/tests/conftest.py @@ -89,7 +89,7 @@ def pytest_configure(config): pytest.exit("\n".join(msgs)) -def pytest_collect_file(path, parent): +def pytest_collect_file(file_path, path, parent): dir = os.path.abspath(path.dirname) dir_and_parents = set() while dir not in dir_and_parents: @@ -98,13 +98,13 @@ def pytest_collect_file(path, parent): if _tree_construction in dir_and_parents: if path.ext == ".dat": - return TreeConstructionFile.from_parent(parent, fspath=path) + return TreeConstructionFile.from_parent(parent, path=file_path) elif _tokenizer in dir_and_parents: if path.ext == ".test": - return TokenizerFile.from_parent(parent, fspath=path) + return TokenizerFile.from_parent(parent, path=file_path) elif _sanitizer_testdata in dir_and_parents: if path.ext == ".dat": - return SanitizerFile.from_parent(parent, fspath=path) + return SanitizerFile.from_parent(parent, path=file_path) # Tiny wrapper to allow .from_parent constructors on older pytest for PY27 diff --git a/requirements-test.txt b/requirements-test.txt index fa6f2c62..06e0d48e 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -2,6 +2,6 @@ tox>=3.15.1,<4 flake8>=3.8.1,<3.9 -pytest>=5.4.2,<7 +pytest>=7,<8 coverage>=5.1,<6 pytest-expect>=1.1.0,<2 From 4d975c6d5ff04f49d9a66826ed6be5a1c754c7fa Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Wed, 6 Jul 2022 10:42:30 +0300 Subject: [PATCH 09/10] Drop support for EOL Python 3.6 --- .appveyor.yml | 8 +++----- .github/workflows/python-tox.yml | 7 ++++--- .travis.yml | 1 - setup.py | 3 +-- tox.ini | 4 ++-- 5 files changed, 10 insertions(+), 13 deletions(-) diff --git a/.appveyor.yml b/.appveyor.yml index e022e685..3088260f 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -1,10 +1,8 @@ # To activate, change the Appveyor settings to use `.appveyor.yml`. environment: global: - PATH: "C:\\Python36\\Scripts\\;%PATH%" + PATH: "C:\\Python38\\Scripts\\;%PATH%" matrix: - - TOXENV: py36-base - - TOXENV: py36-optional - TOXENV: py37-base - TOXENV: py37-optional - TOXENV: py38-base @@ -12,7 +10,7 @@ environment: install: - git submodule update --init --recursive - - C:\\Python36\\python.exe -m pip install tox + - C:\\Python38\\python.exe -m pip install tox build: off @@ -20,4 +18,4 @@ test_script: - tox after_test: - - C:\\Python36\\python.exe debug-info.py + - C:\\Python38\\python.exe debug-info.py diff --git a/.github/workflows/python-tox.yml b/.github/workflows/python-tox.yml index 9673f503..99f9dbf9 100644 --- a/.github/workflows/python-tox.yml +++ b/.github/workflows/python-tox.yml @@ -6,13 +6,14 @@ jobs: if: github.event.push || github.event.pull_request.head.repo.full_name != github.repository runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - python: [3.6, 3.7, 3.8, pypy3] + python: [3.7, 3.8, pypy3.8] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: submodules: true - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} - run: pip install tox diff --git a/.travis.yml b/.travis.yml index 8b964f4c..a477f122 100644 --- a/.travis.yml +++ b/.travis.yml @@ -4,7 +4,6 @@ python: - "3.9" - "3.8" - "3.7" - - "3.6" cache: pip diff --git a/setup.py b/setup.py index d0f5814e..e24296a4 100644 --- a/setup.py +++ b/setup.py @@ -60,7 +60,6 @@ def default_environment(): 'Operating System :: OS Independent', 'Programming Language :: Python', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', 'Programming Language :: Python :: 3 :: Only', @@ -100,7 +99,7 @@ def default_environment(): install_requires=[ 'webencodings', ], - python_requires=">=3.6", + python_requires=">=3.7", extras_require={ # A conditional extra will only install these items when the extra is # requested and the condition matches. diff --git a/tox.ini b/tox.ini index 3ae8afe0..027278f2 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{36,37,38,py3}-{base,optional} +envlist = py{37,38,py3}-{base,optional} [testenv] deps = @@ -11,7 +11,7 @@ passenv = PYTEST_COMMAND # this is maintained so one can, e.g., PYTEST_COMMAND="coverage run -m pytest" COVERAGE_RUN_OPTIONS commands = - {env:PYTEST_COMMAND:{envbindir}/py.test} {posargs} + {env:PYTEST_COMMAND:{envbindir}/pytest} {posargs} flake8 {toxinidir} [testenv:doc] From 67b42b705cdb129e5833f113914764aaecc9e34b Mon Sep 17 00:00:00 2001 From: Hugo van Kemenade Date: Wed, 6 Jul 2022 10:54:56 +0300 Subject: [PATCH 10/10] Upgrade Python syntax for Python 3.7+ --- html5lib/_tokenizer.py | 10 ++-------- html5lib/tests/test_treewalkers.py | 9 ++++----- 2 files changed, 6 insertions(+), 13 deletions(-) diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py index b5219836..91699519 100644 --- a/html5lib/_tokenizer.py +++ b/html5lib/_tokenizer.py @@ -1,5 +1,4 @@ -from collections import deque, OrderedDict -from sys import version_info +from collections import deque from .constants import spaceCharacters from .constants import entities @@ -14,11 +13,6 @@ entitiesTrie = Trie(entities) -if version_info >= (3, 7): - attributeMap = dict -else: - attributeMap = OrderedDict - class HTMLTokenizer: """ This class takes care of tokenizing HTML. @@ -232,7 +226,7 @@ def emitCurrentToken(self): token["name"] = token["name"].translate(asciiUpper2Lower) if token["type"] == tokenTypes["StartTag"]: raw = token["data"] - data = attributeMap(raw) + data = dict(raw) if len(raw) > len(data): # we had some duplicated attribute, fix so first wins data.update(raw[::-1]) diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index a15a54e3..18d77128 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -1,5 +1,4 @@ import itertools -import sys import pytest @@ -139,8 +138,8 @@ def test_lxml_xml(): @pytest.mark.parametrize("treeName", [pytest.param(treeName, marks=[getattr(pytest.mark, treeName), pytest.mark.skipif( - treeName != "lxml" or - sys.version_info < (3, 7), reason="dict order undef")]) + treeName != "lxml", + reason="dict order undef")]) for treeName in sorted(treeTypes.keys())]) def test_maintain_attribute_order(treeName): treeAPIs = treeTypes[treeName] @@ -172,8 +171,8 @@ def test_maintain_attribute_order(treeName): @pytest.mark.parametrize("treeName", [pytest.param(treeName, marks=[getattr(pytest.mark, treeName), pytest.mark.skipif( - treeName != "lxml" or - sys.version_info < (3, 7), reason="dict order undef")]) + treeName != "lxml", + reason="dict order undef")]) for treeName in sorted(treeTypes.keys())]) def test_maintain_attribute_order_adjusted(treeName): treeAPIs = treeTypes[treeName]