diff --git a/.appveyor.yml b/.appveyor.yml index a1a3e347..3088260f 100644 --- a/.appveyor.yml +++ b/.appveyor.yml @@ -1,14 +1,8 @@ # To activate, change the Appveyor settings to use `.appveyor.yml`. environment: global: - PATH: "C:\\Python27\\Scripts\\;%PATH%" + PATH: "C:\\Python38\\Scripts\\;%PATH%" matrix: - - TOXENV: py27-base - - TOXENV: py27-optional - - TOXENV: py35-base - - TOXENV: py35-optional - - TOXENV: py36-base - - TOXENV: py36-optional - TOXENV: py37-base - TOXENV: py37-optional - TOXENV: py38-base @@ -16,7 +10,7 @@ environment: install: - git submodule update --init --recursive - - python -m pip install tox + - C:\\Python38\\python.exe -m pip install tox build: off @@ -24,4 +18,4 @@ test_script: - tox after_test: - - python debug-info.py + - C:\\Python38\\python.exe debug-info.py diff --git a/.github/workflows/python-tox.yml b/.github/workflows/python-tox.yml index ec5cf636..99f9dbf9 100644 --- a/.github/workflows/python-tox.yml +++ b/.github/workflows/python-tox.yml @@ -6,13 +6,14 @@ jobs: if: github.event.push || github.event.pull_request.head.repo.full_name != github.repository runs-on: ubuntu-latest strategy: + fail-fast: false matrix: - python: [2.7, 3.5, 3.6, 3.7, 3.8, pypy-2.7, pypy3] + python: [3.7, 3.8, pypy3.8] steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 with: submodules: true - - uses: actions/setup-python@v2 + - uses: actions/setup-python@v4 with: python-version: ${{ matrix.python }} - run: pip install tox diff --git a/.travis.yml b/.travis.yml index d2d9e30e..a477f122 100644 --- a/.travis.yml +++ b/.travis.yml @@ -1,19 +1,15 @@ language: python python: - "pypy3" - - "pypy" + - "3.9" - "3.8" - "3.7" - - "3.6" - - "3.5" - - "2.7" - - "3.9-dev" cache: pip env: global: - - TOXENV=base,optional,six19-optional + - TOXENV=base,optional install: - pip install tox diff --git a/README.rst b/README.rst index d367905d..f4943e52 100644 --- a/README.rst +++ b/README.rst @@ -91,7 +91,7 @@ More documentation is available at https://html5lib.readthedocs.io/. Installation ------------ -html5lib works on CPython 2.7+, CPython 3.5+ and PyPy. To install: +html5lib works on CPython 3.6+ and PyPy3. To install: .. code-block:: bash @@ -127,7 +127,7 @@ Please report any bugs on the `issue tracker Tests ----- -Unit tests require the ``pytest`` and ``mock`` libraries and can be +Unit tests require the ``pytest`` library and can be run using the ``py.test`` command in the root directory. Test data are contained in a separate `html5lib-tests diff --git a/debug-info.py b/debug-info.py index b47b8ebf..eb5a73f5 100644 --- a/debug-info.py +++ b/debug-info.py @@ -1,5 +1,3 @@ -from __future__ import print_function, unicode_literals - import platform import sys @@ -12,7 +10,7 @@ "maxsize": sys.maxsize } -search_modules = ["chardet", "genshi", "html5lib", "lxml", "six"] +search_modules = ["chardet", "genshi", "html5lib", "lxml"] found_modules = [] for m in search_modules: diff --git a/doc/conf.py b/doc/conf.py index 22ebab4f..d28655ac 100644 --- a/doc/conf.py +++ b/doc/conf.py @@ -1,5 +1,4 @@ #!/usr/bin/env python3 -# -*- coding: utf-8 -*- # # html5lib documentation build configuration file, created by # sphinx-quickstart on Wed May 8 00:04:49 2013. @@ -92,7 +91,7 @@ ] -class CExtMock(object): +class CExtMock: """Required for autodoc on readthedocs.org where you cannot build C extensions.""" def __init__(self, *args, **kwargs): pass diff --git a/html5lib/__init__.py b/html5lib/__init__.py index 7b854f99..d2c68855 100644 --- a/html5lib/__init__.py +++ b/html5lib/__init__.py @@ -20,7 +20,6 @@ * :func:`~.serializer.serialize` """ -from __future__ import absolute_import, division, unicode_literals from .html5parser import HTMLParser, parse, parseFragment from .treebuilders import getTreeBuilder diff --git a/html5lib/_ihatexml.py b/html5lib/_ihatexml.py index 3ff803c1..90757cbf 100644 --- a/html5lib/_ihatexml.py +++ b/html5lib/_ihatexml.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import re import warnings @@ -184,7 +182,7 @@ def escapeRegexp(string): nonPubidCharRegexp = re.compile("[^\x20\x0D\x0Aa-zA-Z0-9\\-'()+,./:=?;!*#@$_%]") -class InfosetFilter(object): +class InfosetFilter: replacementRegexp = re.compile(r"U[\dA-F]{5,5}") def __init__(self, diff --git a/html5lib/_inputstream.py b/html5lib/_inputstream.py index 0207dd21..078026b7 100644 --- a/html5lib/_inputstream.py +++ b/html5lib/_inputstream.py @@ -1,10 +1,7 @@ -from __future__ import absolute_import, division, unicode_literals - -from six import text_type -from six.moves import http_client, urllib - import codecs +import http.client import re +import urllib from io import BytesIO, StringIO import webencodings @@ -14,9 +11,9 @@ from . import _utils # Non-unicode versions of constants for use in the pre-parser -spaceCharactersBytes = frozenset([item.encode("ascii") for item in spaceCharacters]) -asciiLettersBytes = frozenset([item.encode("ascii") for item in asciiLetters]) -asciiUppercaseBytes = frozenset([item.encode("ascii") for item in asciiUppercase]) +spaceCharactersBytes = frozenset(item.encode("ascii") for item in spaceCharacters) +asciiLettersBytes = frozenset(item.encode("ascii") for item in asciiLetters) +asciiUppercaseBytes = frozenset(item.encode("ascii") for item in asciiUppercase) spacesAngleBrackets = spaceCharactersBytes | frozenset([b">", b"<"]) @@ -48,7 +45,7 @@ charsUntilRegEx = {} -class BufferedStream(object): +class BufferedStream: """Buffering for streams that do not have buffering of their own The buffer is implemented as a list of chunks on the assumption that @@ -86,7 +83,7 @@ def read(self, bytes): return self._readFromBuffer(bytes) def _bufferedBytes(self): - return sum([len(item) for item in self.buffer]) + return sum(len(item) for item in self.buffer) def _readStream(self, bytes): data = self.stream.read(bytes) @@ -125,15 +122,15 @@ def _readFromBuffer(self, bytes): def HTMLInputStream(source, **kwargs): # Work around Python bug #20007: read(0) closes the connection. # http://bugs.python.org/issue20007 - if (isinstance(source, http_client.HTTPResponse) or + if (isinstance(source, http.client.HTTPResponse) or # Also check for addinfourl wrapping HTTPResponse (isinstance(source, urllib.response.addbase) and - isinstance(source.fp, http_client.HTTPResponse))): + isinstance(source.fp, http.client.HTTPResponse))): isUnicode = False elif hasattr(source, "read"): - isUnicode = isinstance(source.read(0), text_type) + isUnicode = isinstance(source.read(0), str) else: - isUnicode = isinstance(source, text_type) + isUnicode = isinstance(source, str) if isUnicode: encodings = [x for x in kwargs if x.endswith("_encoding")] @@ -145,7 +142,7 @@ def HTMLInputStream(source, **kwargs): return HTMLBinaryInputStream(source, **kwargs) -class HTMLUnicodeInputStream(object): +class HTMLUnicodeInputStream: """Provides a unicode stream of characters to the HTMLTokenizer. This class takes care of character encoding and removing or replacing @@ -325,7 +322,7 @@ def charsUntil(self, characters, opposite=False): if __debug__: for c in characters: assert(ord(c) < 128) - regex = "".join(["\\x%02x" % ord(c) for c in characters]) + regex = "".join("\\x%02x" % ord(c) for c in characters) if not opposite: regex = "^%s" % regex chars = charsUntilRegEx[(characters, opposite)] = re.compile("[%s]+" % regex) @@ -524,7 +521,7 @@ def changeEncoding(self, newEncoding): self.rawStream.seek(0) self.charEncoding = (newEncoding, "certain") self.reset() - raise _ReparseException("Encoding changed from %s to %s" % (self.charEncoding[0], newEncoding)) + raise _ReparseException(f"Encoding changed from {self.charEncoding[0]} to {newEncoding}") def detectBOM(self): """Attempts to detect at BOM at the start of the stream. If @@ -673,7 +670,7 @@ def jumpTo(self, bytes): return True -class EncodingParser(object): +class EncodingParser: """Mini parser for detecting character encoding from meta elements""" def __init__(self, data): @@ -861,7 +858,7 @@ def getAttribute(self): attrValue.append(c) -class ContentAttrParser(object): +class ContentAttrParser: def __init__(self, data): assert isinstance(data, bytes) self.data = data diff --git a/html5lib/_tokenizer.py b/html5lib/_tokenizer.py index 4748a197..91699519 100644 --- a/html5lib/_tokenizer.py +++ b/html5lib/_tokenizer.py @@ -1,9 +1,4 @@ -from __future__ import absolute_import, division, unicode_literals - -from six import unichr as chr - -from collections import deque, OrderedDict -from sys import version_info +from collections import deque from .constants import spaceCharacters from .constants import entities @@ -18,13 +13,8 @@ entitiesTrie = Trie(entities) -if version_info >= (3, 7): - attributeMap = dict -else: - attributeMap = OrderedDict - -class HTMLTokenizer(object): +class HTMLTokenizer: """ This class takes care of tokenizing HTML. * self.currentToken @@ -50,7 +40,7 @@ def __init__(self, stream, parser=None, **kwargs): # The current token being created self.currentToken = None - super(HTMLTokenizer, self).__init__() + super().__init__() def __iter__(self): """ This is where the magic happens. @@ -236,7 +226,7 @@ def emitCurrentToken(self): token["name"] = token["name"].translate(asciiUpper2Lower) if token["type"] == tokenTypes["StartTag"]: raw = token["data"] - data = attributeMap(raw) + data = dict(raw) if len(raw) > len(data): # we had some duplicated attribute, fix so first wins data.update(raw[::-1]) diff --git a/html5lib/_trie/__init__.py b/html5lib/_trie/__init__.py index 07bad5d3..98a6841a 100644 --- a/html5lib/_trie/__init__.py +++ b/html5lib/_trie/__init__.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from .py import Trie __all__ = ["Trie"] diff --git a/html5lib/_trie/_base.py b/html5lib/_trie/_base.py index 6b71975f..6b2977b2 100644 --- a/html5lib/_trie/_base.py +++ b/html5lib/_trie/_base.py @@ -1,9 +1,4 @@ -from __future__ import absolute_import, division, unicode_literals - -try: - from collections.abc import Mapping -except ImportError: # Python 2.7 - from collections import Mapping +from collections.abc import Mapping class Trie(Mapping): @@ -11,7 +6,7 @@ class Trie(Mapping): def keys(self, prefix=None): # pylint:disable=arguments-differ - keys = super(Trie, self).keys() + keys = super().keys() if prefix is None: return set(keys) diff --git a/html5lib/_trie/py.py b/html5lib/_trie/py.py index c2ba3da7..bc6363c4 100644 --- a/html5lib/_trie/py.py +++ b/html5lib/_trie/py.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals -from six import text_type - from bisect import bisect_left from ._base import Trie as ABCTrie @@ -8,7 +5,7 @@ class Trie(ABCTrie): def __init__(self, data): - if not all(isinstance(x, text_type) for x in data.keys()): + if not all(isinstance(x, str) for x in data.keys()): raise TypeError("All keys must be strings") self._data = data diff --git a/html5lib/_utils.py b/html5lib/_utils.py index 9ea57942..95a5569b 100644 --- a/html5lib/_utils.py +++ b/html5lib/_utils.py @@ -1,21 +1,9 @@ -from __future__ import absolute_import, division, unicode_literals - from types import ModuleType -try: - from collections.abc import Mapping -except ImportError: - from collections import Mapping +from collections.abc import Mapping -from six import text_type, PY3 -if PY3: - import xml.etree.ElementTree as default_etree -else: - try: - import xml.etree.cElementTree as default_etree - except ImportError: - import xml.etree.ElementTree as default_etree +import xml.etree.ElementTree as default_etree __all__ = ["default_etree", "MethodDispatcher", "isSurrogatePair", @@ -31,10 +19,10 @@ # escapes. try: _x = eval('"\\uD800"') # pylint:disable=eval-used - if not isinstance(_x, text_type): + if not isinstance(_x, str): # We need this with u"" because of http://bugs.jython.org/issue2039 _x = eval('u"\\uD800"') # pylint:disable=eval-used - assert isinstance(_x, text_type) + assert isinstance(_x, str) except Exception: supports_lone_surrogates = False else: @@ -122,7 +110,7 @@ def moduleFactoryFactory(factory): moduleCache = {} def moduleFactory(baseModule, *args, **kwargs): - if isinstance(ModuleType.__name__, type("")): + if isinstance(ModuleType.__name__, str): name = "_%s_factory" % baseModule.__name__ else: name = b"_%s_factory" % baseModule.__name__ diff --git a/html5lib/constants.py b/html5lib/constants.py index fe3e237c..3596ea21 100644 --- a/html5lib/constants.py +++ b/html5lib/constants.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import string EOF = None diff --git a/html5lib/filters/alphabeticalattributes.py b/html5lib/filters/alphabeticalattributes.py index 5ba926e3..d96ad62a 100644 --- a/html5lib/filters/alphabeticalattributes.py +++ b/html5lib/filters/alphabeticalattributes.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from . import base from collections import OrderedDict diff --git a/html5lib/filters/base.py b/html5lib/filters/base.py index c7dbaed0..6937911d 100644 --- a/html5lib/filters/base.py +++ b/html5lib/filters/base.py @@ -1,7 +1,4 @@ -from __future__ import absolute_import, division, unicode_literals - - -class Filter(object): +class Filter: def __init__(self, source): self.source = source diff --git a/html5lib/filters/inject_meta_charset.py b/html5lib/filters/inject_meta_charset.py index aefb5c84..cfa469c3 100644 --- a/html5lib/filters/inject_meta_charset.py +++ b/html5lib/filters/inject_meta_charset.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from . import base diff --git a/html5lib/filters/lint.py b/html5lib/filters/lint.py index acd4d7a2..f0ffce61 100644 --- a/html5lib/filters/lint.py +++ b/html5lib/filters/lint.py @@ -1,7 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - -from six import text_type - from . import base from ..constants import namespaces, voidElements @@ -23,7 +19,7 @@ def __init__(self, source, require_matching_tags=True): :arg require_matching_tags: whether or not to require matching tags """ - super(Filter, self).__init__(source) + super().__init__(source) self.require_matching_tags = require_matching_tags def __iter__(self): @@ -33,9 +29,9 @@ def __iter__(self): if type in ("StartTag", "EmptyTag"): namespace = token["namespace"] name = token["name"] - assert namespace is None or isinstance(namespace, text_type) + assert namespace is None or isinstance(namespace, str) assert namespace != "" - assert isinstance(name, text_type) + assert isinstance(name, str) assert name != "" assert isinstance(token["data"], dict) if (not namespace or namespace == namespaces["html"]) and name in voidElements: @@ -45,49 +41,49 @@ def __iter__(self): if type == "StartTag" and self.require_matching_tags: open_elements.append((namespace, name)) for (namespace, name), value in token["data"].items(): - assert namespace is None or isinstance(namespace, text_type) + assert namespace is None or isinstance(namespace, str) assert namespace != "" - assert isinstance(name, text_type) + assert isinstance(name, str) assert name != "" - assert isinstance(value, text_type) + assert isinstance(value, str) elif type == "EndTag": namespace = token["namespace"] name = token["name"] - assert namespace is None or isinstance(namespace, text_type) + assert namespace is None or isinstance(namespace, str) assert namespace != "" - assert isinstance(name, text_type) + assert isinstance(name, str) assert name != "" if (not namespace or namespace == namespaces["html"]) and name in voidElements: - assert False, "Void element reported as EndTag token: %(tag)s" % {"tag": name} + assert False, f"Void element reported as EndTag token: {name}" elif self.require_matching_tags: start = open_elements.pop() assert start == (namespace, name) elif type == "Comment": data = token["data"] - assert isinstance(data, text_type) + assert isinstance(data, str) elif type in ("Characters", "SpaceCharacters"): data = token["data"] - assert isinstance(data, text_type) + assert isinstance(data, str) assert data != "" if type == "SpaceCharacters": assert data.strip(spaceCharacters) == "" elif type == "Doctype": name = token["name"] - assert name is None or isinstance(name, text_type) - assert token["publicId"] is None or isinstance(name, text_type) - assert token["systemId"] is None or isinstance(name, text_type) + assert name is None or isinstance(name, str) + assert token["publicId"] is None or isinstance(name, str) + assert token["systemId"] is None or isinstance(name, str) elif type == "Entity": - assert isinstance(token["name"], text_type) + assert isinstance(token["name"], str) elif type == "SerializerError": - assert isinstance(token["data"], text_type) + assert isinstance(token["data"], str) else: - assert False, "Unknown token type: %(type)s" % {"type": type} + assert False, f"Unknown token type: {type}" yield token diff --git a/html5lib/filters/optionaltags.py b/html5lib/filters/optionaltags.py index 4a865012..f1c21118 100644 --- a/html5lib/filters/optionaltags.py +++ b/html5lib/filters/optionaltags.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from . import base diff --git a/html5lib/filters/sanitizer.py b/html5lib/filters/sanitizer.py index 70ef9066..a1b61099 100644 --- a/html5lib/filters/sanitizer.py +++ b/html5lib/filters/sanitizer.py @@ -6,13 +6,11 @@ if Bleach is unsuitable for your needs. """ -from __future__ import absolute_import, division, unicode_literals import re import warnings from xml.sax.saxutils import escape, unescape - -from six.moves import urllib_parse as urlparse +import urllib.parse from . import base from ..constants import namespaces, prefixes @@ -766,7 +764,7 @@ def __init__(self, hrefs--these are removed """ - super(Filter, self).__init__(source) + super().__init__(source) warnings.warn(_deprecation_msg, DeprecationWarning) @@ -838,7 +836,7 @@ def allowed_token(self, token): # remove replacement characters from unescaped characters val_unescaped = val_unescaped.replace("\ufffd", "") try: - uri = urlparse.urlparse(val_unescaped) + uri = urllib.parse.urlparse(val_unescaped) except ValueError: uri = None del attrs[attr] @@ -874,8 +872,8 @@ def disallowed_token(self, token): assert token_type in ("StartTag", "EmptyTag") attrs = [] for (ns, name), v in token["data"].items(): - attrs.append(' %s="%s"' % (name if ns is None else "%s:%s" % (prefixes[ns], name), escape(v))) - token["data"] = "<%s%s>" % (token["name"], ''.join(attrs)) + attrs.append(' {}="{}"'.format(name if ns is None else f"{prefixes[ns]}:{name}", escape(v))) + token["data"] = "<{}{}>".format(token["name"], ''.join(attrs)) else: token["data"] = "<%s>" % token["name"] if token.get("selfClosing"): diff --git a/html5lib/filters/whitespace.py b/html5lib/filters/whitespace.py index 0d12584b..2f35f4a0 100644 --- a/html5lib/filters/whitespace.py +++ b/html5lib/filters/whitespace.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import re from . import base diff --git a/html5lib/html5parser.py b/html5lib/html5parser.py index 74d829d9..3996c9b8 100644 --- a/html5lib/html5parser.py +++ b/html5lib/html5parser.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals -from six import with_metaclass, viewkeys - import types from . import _inputstream @@ -83,7 +80,7 @@ def __new__(meta, classname, bases, classDict): return Decorated -class HTMLParser(object): +class HTMLParser: """HTML parser Generates a tree structure from a stream of (possibly malformed) HTML. @@ -423,7 +420,7 @@ def getMetaclass(use_metaclass, metaclass_func): return type # pylint:disable=unused-argument - class Phase(with_metaclass(getMetaclass(debug, log))): + class Phase(metaclass=getMetaclass(debug, log)): """Base class for helper object that implements each phase of processing """ __slots__ = ("parser", "tree", "__startTagCache", "__endTagCache") @@ -944,7 +941,7 @@ class InBodyPhase(Phase): __slots__ = ("processSpaceCharacters",) def __init__(self, *args, **kwargs): - super(InBodyPhase, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) # Set this to the default handler self.processSpaceCharacters = self.processSpaceCharactersNonPre @@ -1002,8 +999,8 @@ def processCharacters(self, token): self.tree.insertText(token["data"]) # This must be bad for performance if (self.parser.framesetOK and - any([char not in spaceCharacters - for char in token["data"]])): + any(char not in spaceCharacters + for char in token["data"])): self.parser.framesetOK = False def processSpaceCharactersNonPre(self, token): @@ -1844,13 +1841,13 @@ class InTableTextPhase(Phase): __slots__ = ("originalPhase", "characterTokens") def __init__(self, *args, **kwargs): - super(InTableTextPhase, self).__init__(*args, **kwargs) + super().__init__(*args, **kwargs) self.originalPhase = None self.characterTokens = [] def flushCharacters(self): - data = "".join([item["data"] for item in self.characterTokens]) - if any([item not in spaceCharacters for item in data]): + data = "".join(item["data"] for item in self.characterTokens) + if any(item not in spaceCharacters for item in data): token = {"type": tokenTypes["Characters"], "data": data} self.parser.phases["inTable"].insertText(token) elif data: @@ -2776,7 +2773,7 @@ def processEndTag(self, token): def adjust_attributes(token, replacements): - needs_adjustment = viewkeys(token['data']) & viewkeys(replacements) + needs_adjustment = token['data'].keys() & replacements.keys() if needs_adjustment: token['data'] = type(token['data'])((replacements.get(k, k), v) for k, v in token['data'].items()) diff --git a/html5lib/serializer.py b/html5lib/serializer.py index c66df683..cd4631f6 100644 --- a/html5lib/serializer.py +++ b/html5lib/serializer.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals -from six import text_type - import re from codecs import register_error, xmlcharrefreplace_errors @@ -101,7 +98,7 @@ def serialize(input, tree="etree", encoding=None, **serializer_opts): return s.render(walker(input), encoding) -class HTMLSerializer(object): +class HTMLSerializer: # attribute quoting options quote_attr_values = "legacy" # be secure by default @@ -222,14 +219,14 @@ def __init__(self, **kwargs): self.strict = False def encode(self, string): - assert(isinstance(string, text_type)) + assert(isinstance(string, str)) if self.encoding: return string.encode(self.encoding, "htmlentityreplace") else: return string def encodeStrict(self, string): - assert(isinstance(string, text_type)) + assert(isinstance(string, str)) if self.encoding: return string.encode(self.encoding, "strict") else: @@ -278,7 +275,7 @@ def serialize(self, treewalker, encoding=None): quote_char = "'" else: quote_char = '"' - doctype += " %s%s%s" % (quote_char, token["systemId"], quote_char) + doctype += " {}{}{}".format(quote_char, token["systemId"], quote_char) doctype += ">" yield self.encodeStrict(doctype) diff --git a/html5lib/tests/__init__.py b/html5lib/tests/__init__.py index b8ce2de3..e69de29b 100644 --- a/html5lib/tests/__init__.py +++ b/html5lib/tests/__init__.py @@ -1 +0,0 @@ -from __future__ import absolute_import, division, unicode_literals diff --git a/html5lib/tests/conftest.py b/html5lib/tests/conftest.py index fffeb50c..0b3fc4df 100644 --- a/html5lib/tests/conftest.py +++ b/html5lib/tests/conftest.py @@ -1,4 +1,3 @@ -from __future__ import print_function import os.path import sys @@ -54,7 +53,7 @@ def pytest_configure(config): # Check for optional requirements req_file = os.path.join(_root, "requirements-optional.txt") if os.path.exists(req_file): - with open(req_file, "r") as fp: + with open(req_file) as fp: for line in fp: if (line.strip() and not (line.startswith("-r") or @@ -70,7 +69,7 @@ def pytest_configure(config): try: installed = pkg_resources.working_set.find(req) except pkg_resources.VersionConflict: - msgs.append("Outdated version of %s installed, need %s" % (req.name, spec)) + msgs.append(f"Outdated version of {req.name} installed, need {spec}") else: if not installed: msgs.append("Need %s" % spec) @@ -79,7 +78,7 @@ def pytest_configure(config): import xml.etree.ElementTree as ElementTree try: - import xml.etree.cElementTree as cElementTree + import xml.etree.ElementTree as cElementTree except ImportError: msgs.append("cElementTree unable to be imported") else: @@ -90,7 +89,7 @@ def pytest_configure(config): pytest.exit("\n".join(msgs)) -def pytest_collect_file(path, parent): +def pytest_collect_file(file_path, path, parent): dir = os.path.abspath(path.dirname) dir_and_parents = set() while dir not in dir_and_parents: @@ -99,13 +98,13 @@ def pytest_collect_file(path, parent): if _tree_construction in dir_and_parents: if path.ext == ".dat": - return TreeConstructionFile.from_parent(parent, fspath=path) + return TreeConstructionFile.from_parent(parent, path=file_path) elif _tokenizer in dir_and_parents: if path.ext == ".test": - return TokenizerFile.from_parent(parent, fspath=path) + return TokenizerFile.from_parent(parent, path=file_path) elif _sanitizer_testdata in dir_and_parents: if path.ext == ".dat": - return SanitizerFile.from_parent(parent, fspath=path) + return SanitizerFile.from_parent(parent, path=file_path) # Tiny wrapper to allow .from_parent constructors on older pytest for PY27 diff --git a/html5lib/tests/sanitizer.py b/html5lib/tests/sanitizer.py index 16e53868..fb7fadf9 100644 --- a/html5lib/tests/sanitizer.py +++ b/html5lib/tests/sanitizer.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import codecs import json @@ -18,7 +16,7 @@ def collect(self): class SanitizerTest(pytest.Item): def __init__(self, name, parent, test): - super(SanitizerTest, self).__init__(name, parent) + super().__init__(name, parent) self.obj = lambda: 1 # this is to hack around skipif needing a function! self.test = test diff --git a/html5lib/tests/support.py b/html5lib/tests/support.py index 9cd5afbe..f311fb92 100644 --- a/html5lib/tests/support.py +++ b/html5lib/tests/support.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - # pylint:disable=wrong-import-position import os @@ -35,7 +33,7 @@ } try: - import xml.etree.cElementTree as cElementTree # noqa + import xml.etree.ElementTree as cElementTree # noqa except ImportError: treeTypes['cElementTree'] = None else: @@ -86,7 +84,7 @@ def __getitem__(self, key): return dict.get(self, key, self.default) -class TestData(object): +class TestData: def __init__(self, filename, newTestHeading="data", encoding="utf8"): if encoding is None: self.f = open(filename, mode="rb") @@ -150,8 +148,6 @@ def convertData(data): def errorMessage(input, expected, actual): msg = ("Input:\n%s\nExpected:\n%s\nReceived\n%s\n" % (repr(input), repr(expected), repr(actual))) - if sys.version_info[0] == 2: - msg = msg.encode("ascii", "backslashreplace") return msg diff --git a/html5lib/tests/test_alphabeticalattributes.py b/html5lib/tests/test_alphabeticalattributes.py index 7d5b8e0f..3b82c2b0 100644 --- a/html5lib/tests/test_alphabeticalattributes.py +++ b/html5lib/tests/test_alphabeticalattributes.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from collections import OrderedDict import pytest diff --git a/html5lib/tests/test_encoding.py b/html5lib/tests/test_encoding.py index 47c4814a..ddad9100 100644 --- a/html5lib/tests/test_encoding.py +++ b/html5lib/tests/test_encoding.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import os import pytest @@ -9,7 +7,7 @@ def test_basic_prescan_length(): - data = "Caf\u00E9".encode('utf-8') + data = "Caf\u00E9".encode() pad = 1024 - len(data) + 1 data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-") assert len(data) == 1024 # Sanity @@ -18,7 +16,7 @@ def test_basic_prescan_length(): def test_parser_reparse(): - data = "Caf\u00E9".encode('utf-8') + data = "Caf\u00E9".encode() pad = 10240 - len(data) + 1 data = data.replace(b"-a-", b"-" + (b"a" * pad) + b"-") assert len(data) == 10240 # Sanity diff --git a/html5lib/tests/test_meta.py b/html5lib/tests/test_meta.py index dd02dd7f..2fc6140d 100644 --- a/html5lib/tests/test_meta.py +++ b/html5lib/tests/test_meta.py @@ -1,7 +1,4 @@ -from __future__ import absolute_import, division, unicode_literals - -import six -from mock import Mock +from unittest.mock import Mock from . import support @@ -27,11 +24,7 @@ def test_errorMessage(): r = support.errorMessage(input, expected, actual) # Assertions! - if six.PY2: - assert b"Input:\n1\nExpected:\n2\nReceived\n3\n" == r - else: - assert six.PY3 - assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r + assert "Input:\n1\nExpected:\n2\nReceived\n3\n" == r assert input.__repr__.call_count == 1 assert expected.__repr__.call_count == 1 diff --git a/html5lib/tests/test_optionaltags_filter.py b/html5lib/tests/test_optionaltags_filter.py index cd282149..1b054f40 100644 --- a/html5lib/tests/test_optionaltags_filter.py +++ b/html5lib/tests/test_optionaltags_filter.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from html5lib.filters.optionaltags import Filter diff --git a/html5lib/tests/test_parser2.py b/html5lib/tests/test_parser2.py index 879d2447..89c9cef1 100644 --- a/html5lib/tests/test_parser2.py +++ b/html5lib/tests/test_parser2.py @@ -1,7 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - -from six import PY2, text_type - import io from . import support # noqa @@ -39,7 +35,7 @@ def test_namespace_html_elements_0_etree(): doc = parse("", treebuilder="etree", namespaceHTMLElements=True) - assert doc.tag == "{%s}html" % (namespaces["html"],) + assert doc.tag == "{{{}}}html".format(namespaces["html"]) def test_namespace_html_elements_1_etree(): @@ -75,11 +71,6 @@ def test_debug_log(): ('dataState', 'InBodyPhase', 'InBodyPhase', 'processEndTag', {'name': 'p', 'type': 'EndTag'}), ('dataState', 'InBodyPhase', 'InBodyPhase', 'processCharacters', {'type': 'Characters'})] - if PY2: - for i, log in enumerate(expected): - log = [x.encode("ascii") if isinstance(x, text_type) else x for x in log] - expected[i] = tuple(log) - assert parser.log == expected diff --git a/html5lib/tests/test_sanitizer.py b/html5lib/tests/test_sanitizer.py index f3faeb80..f8b4b4b6 100644 --- a/html5lib/tests/test_sanitizer.py +++ b/html5lib/tests/test_sanitizer.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import pytest from html5lib import constants, parseFragment, serialize @@ -60,19 +58,19 @@ def param_sanitizer(): if tag_name == 'image': yield ("test_should_allow_%s_tag" % tag_name, "foo <bad>bar</bad> baz", - "<%s title='1'>foo bar baz" % (tag_name, tag_name)) + f"<{tag_name} title='1'>foo bar baz") elif tag_name == 'br': yield ("test_should_allow_%s_tag" % tag_name, "
foo <bad>bar</bad> baz
", - "<%s title='1'>foo bar baz" % (tag_name, tag_name)) + f"<{tag_name} title='1'>foo bar baz") elif tag_name in constants.voidElements: yield ("test_should_allow_%s_tag" % tag_name, "<%s title=\"1\"/>foo <bad>bar</bad> baz" % tag_name, - "<%s title='1'>foo bar baz" % (tag_name, tag_name)) + f"<{tag_name} title='1'>foo bar baz") else: yield ("test_should_allow_%s_tag" % tag_name, - "<%s title=\"1\">foo <bad>bar</bad> baz" % (tag_name, tag_name), - "<%s title='1'>foo bar baz" % (tag_name, tag_name)) + f"<{tag_name} title=\"1\">foo <bad>bar</bad> baz", + f"<{tag_name} title='1'>foo bar baz") for ns, attribute_name in sanitizer.allowed_attributes: if ns is not None: @@ -85,16 +83,16 @@ def param_sanitizer(): if attribute_name in sanitizer.attr_val_is_uri: attribute_value = '%s://sub.domain.tld/path/object.ext' % sanitizer.allowed_protocols[0] yield ("test_should_allow_%s_attribute" % attribute_name, - "

foo <bad>bar</bad> baz

" % (attribute_name, attribute_value), - "

foo bar baz

" % (attribute_name, attribute_value)) + f"

foo <bad>bar</bad> baz

", + f"

foo bar baz

") for protocol in sanitizer.allowed_protocols: rest_of_uri = '//sub.domain.tld/path/object.ext' if protocol == 'data': rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ=' yield ("test_should_allow_uppercase_%s_uris" % protocol, - "foo" % (protocol, rest_of_uri), - """foo""" % (protocol, rest_of_uri)) + f"foo", + f"""foo""") for protocol in sanitizer.allowed_protocols: rest_of_uri = '//sub.domain.tld/path/object.ext' @@ -102,8 +100,8 @@ def param_sanitizer(): rest_of_uri = 'image/png;base64,aGVsbG8gd29ybGQ=' protocol = protocol.upper() yield ("test_should_allow_uppercase_%s_uris" % protocol, - "foo" % (protocol, rest_of_uri), - """foo""" % (protocol, rest_of_uri)) + f"foo", + f"""foo""") @pytest.mark.parametrize("expected, input", diff --git a/html5lib/tests/test_serializer.py b/html5lib/tests/test_serializer.py index bce62459..2ed71de6 100644 --- a/html5lib/tests/test_serializer.py +++ b/html5lib/tests/test_serializer.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import os import json @@ -221,6 +219,6 @@ def test_serializer(input, expected, options): result = serialize_html(input, options) if len(expected) == 1: - assert expected[0] == result, "Expected:\n%s\nActual:\n%s\nOptions:\n%s" % (expected[0], result, str(options)) + assert expected[0] == result, f"Expected:\n{expected[0]}\nActual:\n{result}\nOptions:\n{str(options)}" elif result not in expected: - assert False, "Expected: %s, Received: %s" % (expected, result) + assert False, f"Expected: {expected}, Received: {result}" diff --git a/html5lib/tests/test_stream.py b/html5lib/tests/test_stream.py index efe9b472..b94c7a5c 100644 --- a/html5lib/tests/test_stream.py +++ b/html5lib/tests/test_stream.py @@ -1,16 +1,13 @@ -from __future__ import absolute_import, division, unicode_literals - from . import support # noqa import codecs +import http.client import sys +import urllib from io import BytesIO, StringIO import pytest -import six -from six.moves import http_client, urllib - from html5lib._inputstream import (BufferedStream, HTMLInputStream, HTMLUnicodeInputStream, HTMLBinaryInputStream) from html5lib._utils import supports_lone_surrogates @@ -105,7 +102,7 @@ def test_char_ascii(): def test_char_utf8(): - stream = HTMLInputStream('\u2018'.encode('utf-8'), override_encoding='utf-8') + stream = HTMLInputStream('\u2018'.encode(), override_encoding='utf-8') assert stream.charEncoding[0].name == 'utf-8' assert stream.char() == '\u2018' @@ -186,12 +183,12 @@ def test_python_issue_20007(): Make sure we have a work-around for Python bug #20007 http://bugs.python.org/issue20007 """ - class FakeSocket(object): + class FakeSocket: def makefile(self, _mode, _bufsize=None): # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") - source = http_client.HTTPResponse(FakeSocket()) + source = http.client.HTTPResponse(FakeSocket()) source.begin() stream = HTMLInputStream(source) assert stream.charsUntil(" ") == "Text" @@ -202,15 +199,12 @@ def test_python_issue_20007_b(): Make sure we have a work-around for Python bug #20007 http://bugs.python.org/issue20007 """ - if six.PY2: - return - - class FakeSocket(object): + class FakeSocket: def makefile(self, _mode, _bufsize=None): # pylint:disable=unused-argument return BytesIO(b"HTTP/1.1 200 Ok\r\n\r\nText") - source = http_client.HTTPResponse(FakeSocket()) + source = http.client.HTTPResponse(FakeSocket()) source.begin() wrapped = urllib.response.addinfourl(source, source.msg, "http://example.com") stream = HTMLInputStream(wrapped) diff --git a/html5lib/tests/test_tokenizer2.py b/html5lib/tests/test_tokenizer2.py index 158d847a..a616288f 100644 --- a/html5lib/tests/test_tokenizer2.py +++ b/html5lib/tests/test_tokenizer2.py @@ -1,8 +1,5 @@ -from __future__ import absolute_import, division, unicode_literals - import io -from six import unichr, text_type from html5lib._tokenizer import HTMLTokenizer from html5lib.constants import tokenTypes @@ -16,8 +13,8 @@ def ignore_parse_errors(toks): def test_maintain_attribute_order(): # generate loads to maximize the chance a hash-based mutation will occur - attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] - stream = io.StringIO("") + attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))] + stream = io.StringIO("") toks = HTMLTokenizer(stream) out = list(ignore_parse_errors(toks)) @@ -49,8 +46,8 @@ def test_duplicate_attribute(): def test_maintain_duplicate_attribute_order(): # generate loads to maximize the chance a hash-based mutation will occur - attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] - stream = io.StringIO("") + attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))] + stream = io.StringIO("") toks = HTMLTokenizer(stream) out = list(ignore_parse_errors(toks)) diff --git a/html5lib/tests/test_treeadapters.py b/html5lib/tests/test_treeadapters.py index 95e56c00..1e396ed9 100644 --- a/html5lib/tests/test_treeadapters.py +++ b/html5lib/tests/test_treeadapters.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from . import support # noqa import html5lib diff --git a/html5lib/tests/test_treewalkers.py b/html5lib/tests/test_treewalkers.py index 780ca964..18d77128 100644 --- a/html5lib/tests/test_treewalkers.py +++ b/html5lib/tests/test_treewalkers.py @@ -1,9 +1,5 @@ -from __future__ import absolute_import, division, unicode_literals - import itertools -import sys -from six import unichr, text_type import pytest try: @@ -74,11 +70,11 @@ def param_treewalker_six_mix(): # fragment but not using the u'' syntax nor importing unicode_literals sm_tests = [ ('Example', - [(str('class'), str('test123'))], + [('class', 'test123')], '\n class="test123"\n href="http://example.com"\n "Example"'), ('', - [(str('rel'), str('alternate'))], + [('rel', 'alternate')], '\n href="http://example.com/cow"\n rel="alternate"\n "Example"') ] @@ -102,7 +98,7 @@ def test_treewalker_six_mix(intext, expected, attrs_to_add, tree): output = treewalkers.pprint(treeClass["walker"](document)) output = attrlist.sub(sortattrs, output) if output not in expected: - raise AssertionError("TreewalkerEditTest: %s\nExpected:\n%s\nReceived:\n%s" % (treeName, expected, output)) + raise AssertionError(f"TreewalkerEditTest: {treeName}\nExpected:\n{expected}\nReceived:\n{output}") @pytest.mark.parametrize("tree,char", itertools.product(sorted(treeTypes.items()), ["x", "\u1234"])) @@ -142,8 +138,8 @@ def test_lxml_xml(): @pytest.mark.parametrize("treeName", [pytest.param(treeName, marks=[getattr(pytest.mark, treeName), pytest.mark.skipif( - treeName != "lxml" or - sys.version_info < (3, 7), reason="dict order undef")]) + treeName != "lxml", + reason="dict order undef")]) for treeName in sorted(treeTypes.keys())]) def test_maintain_attribute_order(treeName): treeAPIs = treeTypes[treeName] @@ -151,8 +147,8 @@ def test_maintain_attribute_order(treeName): pytest.skip("Treebuilder not loaded") # generate loads to maximize the chance a hash-based mutation will occur - attrs = [(unichr(x), text_type(i)) for i, x in enumerate(range(ord('a'), ord('z')))] - data = "" + attrs = [(chr(x), str(i)) for i, x in enumerate(range(ord('a'), ord('z')))] + data = "" parser = html5parser.HTMLParser(tree=treeAPIs["builder"]) document = parser.parseFragment(data) @@ -175,8 +171,8 @@ def test_maintain_attribute_order(treeName): @pytest.mark.parametrize("treeName", [pytest.param(treeName, marks=[getattr(pytest.mark, treeName), pytest.mark.skipif( - treeName != "lxml" or - sys.version_info < (3, 7), reason="dict order undef")]) + treeName != "lxml", + reason="dict order undef")]) for treeName in sorted(treeTypes.keys())]) def test_maintain_attribute_order_adjusted(treeName): treeAPIs = treeTypes[treeName] diff --git a/html5lib/tests/test_whitespace_filter.py b/html5lib/tests/test_whitespace_filter.py index e9da6140..0daf1c52 100644 --- a/html5lib/tests/test_whitespace_filter.py +++ b/html5lib/tests/test_whitespace_filter.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from html5lib.filters.whitespace import Filter from html5lib.constants import spaceCharacters spaceCharacters = "".join(spaceCharacters) diff --git a/html5lib/tests/tokenizer.py b/html5lib/tests/tokenizer.py index cc9897a4..273955c3 100644 --- a/html5lib/tests/tokenizer.py +++ b/html5lib/tests/tokenizer.py @@ -1,18 +1,15 @@ -from __future__ import absolute_import, division, unicode_literals - import codecs import json import warnings import re import pytest -from six import unichr from html5lib._tokenizer import HTMLTokenizer from html5lib import constants, _utils -class TokenizerTestParser(object): +class TokenizerTestParser: def __init__(self, initialState, lastStartTag=None): self.tokenizer = HTMLTokenizer self._state = initialState @@ -146,11 +143,11 @@ def repl(m): low = int(m.group(2), 16) if 0xD800 <= high <= 0xDBFF and 0xDC00 <= low <= 0xDFFF: cp = ((high - 0xD800) << 10) + (low - 0xDC00) + 0x10000 - return unichr(cp) + return chr(cp) else: - return unichr(high) + unichr(low) + return chr(high) + chr(low) else: - return unichr(int(m.group(1), 16)) + return chr(int(m.group(1), 16)) try: return _surrogateRe.sub(repl, inp) except ValueError: @@ -197,7 +194,7 @@ def collect(self): class TokenizerTestCollector(pytest.Collector): def __init__(self, name, parent=None, config=None, session=None, testdata=None): - super(TokenizerTestCollector, self).__init__(name, parent, config, session) + super().__init__(name, parent, config, session) if 'initialStates' not in testdata: testdata["initialStates"] = ["Data state"] if 'doubleEscaped' in testdata: @@ -218,7 +215,7 @@ def collect(self): class TokenizerTest(pytest.Item): def __init__(self, name, parent, test, initialState): - super(TokenizerTest, self).__init__(name, parent) + super().__init__(name, parent) self.obj = lambda: 1 # this is to hack around skipif needing a function! self.test = test self.initialState = initialState diff --git a/html5lib/tests/tokenizertotree.py b/html5lib/tests/tokenizertotree.py index 8528e876..2ba74cad 100644 --- a/html5lib/tests/tokenizertotree.py +++ b/html5lib/tests/tokenizertotree.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import sys import os import json @@ -25,7 +23,7 @@ def main(out_path): def run_file(filename, out_path): try: - tests_data = json.load(open(filename, "r")) + tests_data = json.load(open(filename)) except ValueError: sys.stderr.write("Failed to load %s\n" % filename) return diff --git a/html5lib/tests/tree_construction.py b/html5lib/tests/tree_construction.py index fb0657bf..204865ba 100644 --- a/html5lib/tests/tree_construction.py +++ b/html5lib/tests/tree_construction.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - import itertools import re import warnings @@ -31,14 +29,13 @@ def collect(self): class TreeConstructionTest(pytest.Collector): def __init__(self, name, parent=None, config=None, session=None, testdata=None): - super(TreeConstructionTest, self).__init__(name, parent, config, session) + super().__init__(name, parent, config, session) self.testdata = testdata def collect(self): for treeName, treeAPIs in sorted(treeTypes.items()): - for x in itertools.chain(self._getParserTests(treeName, treeAPIs), - self._getTreeWalkerTests(treeName, treeAPIs)): - yield x + yield from itertools.chain(self._getParserTests(treeName, treeAPIs), + self._getTreeWalkerTests(treeName, treeAPIs)) def _getParserTests(self, treeName, treeAPIs): if treeAPIs is not None and "adapter" in treeAPIs: @@ -79,7 +76,7 @@ def convertTreeDump(data): class ParserTest(pytest.Item): def __init__(self, name, parent, test, treeClass, namespaceHTMLElements): - super(ParserTest, self).__init__(name, parent) + super().__init__(name, parent) self.test = test self.treeClass = treeClass self.namespaceHTMLElements = namespaceHTMLElements @@ -122,7 +119,7 @@ def runtest(self): errStr = [] for (line, col), errorcode, datavars in p.errors: - assert isinstance(datavars, dict), "%s, %s" % (errorcode, repr(datavars)) + assert isinstance(datavars, dict), f"{errorcode}, {repr(datavars)}" errStr.append("Line: %i Col: %i %s" % (line, col, constants.E[errorcode] % datavars)) @@ -144,7 +141,7 @@ def repr_failure(self, excinfo): class TreeWalkerTest(pytest.Item): def __init__(self, name, parent, test, treeAPIs): - super(TreeWalkerTest, self).__init__(name, parent) + super().__init__(name, parent) self.test = test self.treeAPIs = treeAPIs diff --git a/html5lib/treeadapters/__init__.py b/html5lib/treeadapters/__init__.py index dfeb0ba5..1444fc9a 100644 --- a/html5lib/treeadapters/__init__.py +++ b/html5lib/treeadapters/__init__.py @@ -16,7 +16,6 @@ genshi_tree = genshi.to_genshi(TreeWalker(tree)) """ -from __future__ import absolute_import, division, unicode_literals from . import sax diff --git a/html5lib/treeadapters/genshi.py b/html5lib/treeadapters/genshi.py index 61d5fb6a..804a980e 100644 --- a/html5lib/treeadapters/genshi.py +++ b/html5lib/treeadapters/genshi.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from genshi.core import QName, Attrs from genshi.core import START, END, TEXT, COMMENT, DOCTYPE @@ -23,7 +21,7 @@ def to_genshi(walker): if type in ("StartTag", "EmptyTag"): if token["namespace"]: - name = "{%s}%s" % (token["namespace"], token["name"]) + name = "{{{}}}{}".format(token["namespace"], token["name"]) else: name = token["name"] attrs = Attrs([(QName("{%s}%s" % attr if attr[0] is not None else attr[1]), value) @@ -34,7 +32,7 @@ def to_genshi(walker): if type == "EndTag": if token["namespace"]: - name = "{%s}%s" % (token["namespace"], token["name"]) + name = "{{{}}}{}".format(token["namespace"], token["name"]) else: name = token["name"] diff --git a/html5lib/treeadapters/sax.py b/html5lib/treeadapters/sax.py index f4ccea5a..04ec1ef0 100644 --- a/html5lib/treeadapters/sax.py +++ b/html5lib/treeadapters/sax.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from xml.sax.xmlreader import AttributesNSImpl from ..constants import adjustForeignAttributes, unadjustForeignAttributes diff --git a/html5lib/treebuilders/__init__.py b/html5lib/treebuilders/__init__.py index d44447ea..90aad5fb 100644 --- a/html5lib/treebuilders/__init__.py +++ b/html5lib/treebuilders/__init__.py @@ -29,7 +29,6 @@ """ -from __future__ import absolute_import, division, unicode_literals from .._utils import default_etree diff --git a/html5lib/treebuilders/base.py b/html5lib/treebuilders/base.py index e4a3d710..4afd3c56 100644 --- a/html5lib/treebuilders/base.py +++ b/html5lib/treebuilders/base.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals -from six import text_type - from ..constants import scopingElements, tableInsertModeElements, namespaces # The scope markers are inserted when entering object elements, @@ -20,7 +17,7 @@ } -class Node(object): +class Node: """Represents an item in the tree""" def __init__(self, name): """Creates a Node @@ -43,11 +40,11 @@ def __init__(self, name): self._flags = [] def __str__(self): - attributesStr = " ".join(["%s=\"%s\"" % (name, value) - for name, value in - self.attributes.items()]) + attributesStr = " ".join(f"{name}=\"{value}\"" + for name, value in + self.attributes.items()) if attributesStr: - return "<%s %s>" % (self.name, attributesStr) + return f"<{self.name} {attributesStr}>" else: return "<%s>" % (self.name) @@ -143,7 +140,7 @@ def nodesEqual(self, node1, node2): return True -class TreeBuilder(object): +class TreeBuilder: """Base treebuilder implementation * documentClass - the class to use for the bottommost node of a document @@ -199,7 +196,7 @@ def elementInScope(self, target, variant=None): # match any node with that name exactNode = hasattr(target, "nameTuple") if not exactNode: - if isinstance(target, text_type): + if isinstance(target, str): target = (namespaces["html"], target) assert isinstance(target, tuple) @@ -322,7 +319,7 @@ def _setInsertFromTable(self, value): def insertElementNormal(self, token): name = token["name"] - assert isinstance(name, text_type), "Element %s not unicode" % name + assert isinstance(name, str), "Element %s not unicode" % name namespace = token.get("namespace", self.defaultNamespace) element = self.elementClass(name, namespace) element.attributes = token["data"] diff --git a/html5lib/treebuilders/dom.py b/html5lib/treebuilders/dom.py index d8b53004..2d632d6e 100644 --- a/html5lib/treebuilders/dom.py +++ b/html5lib/treebuilders/dom.py @@ -1,10 +1,4 @@ -from __future__ import absolute_import, division, unicode_literals - - -try: - from collections.abc import MutableMapping -except ImportError: # Python 2.7 - from collections import MutableMapping +from collections.abc import MutableMapping from xml.dom import minidom, Node import weakref @@ -191,25 +185,25 @@ def serializeElement(element, indent=0): rv.append("""|%s""" % (' ' * indent, element.name, publicId, systemId)) else: - rv.append("|%s" % (' ' * indent, element.name)) + rv.append("|{}".format(' ' * indent, element.name)) else: - rv.append("|%s" % (' ' * indent,)) + rv.append("|{}".format(' ' * indent)) elif element.nodeType == Node.DOCUMENT_NODE: rv.append("#document") elif element.nodeType == Node.DOCUMENT_FRAGMENT_NODE: rv.append("#document-fragment") elif element.nodeType == Node.COMMENT_NODE: - rv.append("|%s" % (' ' * indent, element.nodeValue)) + rv.append("|{}".format(' ' * indent, element.nodeValue)) elif element.nodeType == Node.TEXT_NODE: - rv.append("|%s\"%s\"" % (' ' * indent, element.nodeValue)) + rv.append("|{}\"{}\"".format(' ' * indent, element.nodeValue)) else: if (hasattr(element, "namespaceURI") and element.namespaceURI is not None): - name = "%s %s" % (constants.prefixes[element.namespaceURI], - element.nodeName) + name = "{} {}".format(constants.prefixes[element.namespaceURI], + element.nodeName) else: name = element.nodeName - rv.append("|%s<%s>" % (' ' * indent, name)) + rv.append("|{}<{}>".format(' ' * indent, name)) if element.hasAttributes(): attributes = [] for i in range(len(element.attributes)): @@ -218,13 +212,13 @@ def serializeElement(element, indent=0): value = attr.value ns = attr.namespaceURI if ns: - name = "%s %s" % (constants.prefixes[ns], attr.localName) + name = f"{constants.prefixes[ns]} {attr.localName}" else: name = attr.nodeName attributes.append((name, value)) for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + rv.append('|{}{}="{}"'.format(' ' * (indent + 2), name, value)) indent += 2 for child in element.childNodes: serializeElement(child, indent) diff --git a/html5lib/treebuilders/etree.py b/html5lib/treebuilders/etree.py index 086bed4e..f3fea0f9 100644 --- a/html5lib/treebuilders/etree.py +++ b/html5lib/treebuilders/etree.py @@ -1,7 +1,5 @@ -from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access -from six import text_type import re @@ -38,7 +36,7 @@ def _getETreeTag(self, name, namespace): if namespace is None: etree_tag = name else: - etree_tag = "{%s}%s" % (namespace, name) + etree_tag = f"{{{namespace}}}{name}" return etree_tag def _setName(self, name): @@ -70,7 +68,7 @@ def _setAttributes(self, attributes): # allocation on average for key, value in attributes.items(): if isinstance(key, tuple): - name = "{%s}%s" % (key[2], key[1]) + name = f"{{{key[2]}}}{key[1]}" else: name = key el_attrib[name] = value @@ -210,20 +208,20 @@ def serializeElement(element, indent=0): rv.append("""""" % (element.text, publicId, systemId)) else: - rv.append("" % (element.text,)) + rv.append(f"") elif element.tag == "DOCUMENT_ROOT": rv.append("#document") if element.text is not None: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + rv.append("|{}\"{}\"".format(' ' * (indent + 2), element.text)) if element.tail is not None: raise TypeError("Document node cannot have tail") if hasattr(element, "attrib") and len(element.attrib): raise TypeError("Document node cannot have attributes") elif element.tag == ElementTreeCommentType: - rv.append("|%s" % (' ' * indent, element.text)) + rv.append("|{}".format(' ' * indent, element.text)) else: - assert isinstance(element.tag, text_type), \ - "Expected unicode, got %s, %s" % (type(element.tag), element.tag) + assert isinstance(element.tag, str), \ + f"Expected unicode, got {type(element.tag)}, {element.tag}" nsmatch = tag_regexp.match(element.tag) if nsmatch is None: @@ -231,8 +229,8 @@ def serializeElement(element, indent=0): else: ns, name = nsmatch.groups() prefix = constants.prefixes[ns] - name = "%s %s" % (prefix, name) - rv.append("|%s<%s>" % (' ' * indent, name)) + name = f"{prefix} {name}" + rv.append("|{}<{}>".format(' ' * indent, name)) if hasattr(element, "attrib"): attributes = [] @@ -241,20 +239,20 @@ def serializeElement(element, indent=0): if nsmatch is not None: ns, name = nsmatch.groups() prefix = constants.prefixes[ns] - attr_string = "%s %s" % (prefix, name) + attr_string = f"{prefix} {name}" else: attr_string = name attributes.append((attr_string, value)) for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + rv.append('|{}{}="{}"'.format(' ' * (indent + 2), name, value)) if element.text: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + rv.append("|{}\"{}\"".format(' ' * (indent + 2), element.text)) indent += 2 for child in element: serializeElement(child, indent) if element.tail: - rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) + rv.append("|{}\"{}\"".format(' ' * (indent - 2), element.tail)) serializeElement(element, 0) return "\n".join(rv) @@ -275,7 +273,7 @@ def serializeElement(element): rv.append("""""" % (element.text, publicId, systemId)) else: - rv.append("" % (element.text,)) + rv.append(f"") elif element.tag == "DOCUMENT_ROOT": if element.text is not None: rv.append(element.text) @@ -288,23 +286,23 @@ def serializeElement(element): serializeElement(child) elif element.tag == ElementTreeCommentType: - rv.append("" % (element.text,)) + rv.append(f"") else: # This is assumed to be an ordinary element if not element.attrib: - rv.append("<%s>" % (filter.fromXmlName(element.tag),)) + rv.append(f"<{filter.fromXmlName(element.tag)}>") else: - attr = " ".join(["%s=\"%s\"" % ( + attr = " ".join("{}=\"{}\"".format( filter.fromXmlName(name), value) - for name, value in element.attrib.items()]) - rv.append("<%s %s>" % (element.tag, attr)) + for name, value in element.attrib.items()) + rv.append(f"<{element.tag} {attr}>") if element.text: rv.append(element.text) for child in element: serializeElement(child) - rv.append("" % (element.tag,)) + rv.append(f"") if element.tail: rv.append(element.tail) diff --git a/html5lib/treebuilders/etree_lxml.py b/html5lib/treebuilders/etree_lxml.py index e73de61a..3bcf8c96 100644 --- a/html5lib/treebuilders/etree_lxml.py +++ b/html5lib/treebuilders/etree_lxml.py @@ -9,17 +9,12 @@ When any of these things occur, we emit a DataLossWarning """ -from __future__ import absolute_import, division, unicode_literals # pylint:disable=protected-access import warnings import re -import sys -try: - from collections.abc import MutableMapping -except ImportError: - from collections import MutableMapping +from collections.abc import MutableMapping from . import base from ..constants import DataLossWarning @@ -28,7 +23,6 @@ from .. import _ihatexml import lxml.etree as etree -from six import PY3, binary_type fullTree = True @@ -37,14 +31,14 @@ comment_type = etree.Comment("asd").tag -class DocumentType(object): +class DocumentType: def __init__(self, name, publicId, systemId): self.name = name self.publicId = publicId self.systemId = systemId -class Document(object): +class Document: def __init__(self): self._elementTree = None self._childNodes = [] @@ -76,11 +70,11 @@ def serializeElement(element, indent=0): element.docinfo.system_url): dtd_str = "" % element.docinfo.root_name else: - dtd_str = """""" % ( + dtd_str = """""".format( element.docinfo.root_name, element.docinfo.public_id, element.docinfo.system_url) - rv.append("|%s%s" % (' ' * (indent + 2), dtd_str)) + rv.append("|{}{}".format(' ' * (indent + 2), dtd_str)) next_element = element.getroot() while next_element.getprevious() is not None: next_element = next_element.getprevious() @@ -89,17 +83,17 @@ def serializeElement(element, indent=0): next_element = next_element.getnext() elif isinstance(element, str) or isinstance(element, bytes): # Text in a fragment - assert isinstance(element, str) or sys.version_info[0] == 2 - rv.append("|%s\"%s\"" % (' ' * indent, element)) + assert isinstance(element, str) + rv.append("|{}\"{}\"".format(' ' * indent, element)) else: # Fragment case rv.append("#document-fragment") for next_element in element: serializeElement(next_element, indent + 2) elif element.tag == comment_type: - rv.append("|%s" % (' ' * indent, element.text)) + rv.append("|{}".format(' ' * indent, element.text)) if hasattr(element, "tail") and element.tail: - rv.append("|%s\"%s\"" % (' ' * indent, element.tail)) + rv.append("|{}\"{}\"".format(' ' * indent, element.tail)) else: assert isinstance(element, etree._Element) nsmatch = etree_builders.tag_regexp.match(element.tag) @@ -107,11 +101,11 @@ def serializeElement(element, indent=0): ns = nsmatch.group(1) tag = nsmatch.group(2) prefix = constants.prefixes[ns] - rv.append("|%s<%s %s>" % (' ' * indent, prefix, - infosetFilter.fromXmlName(tag))) + rv.append("|{}<{} {}>".format(' ' * indent, prefix, + infosetFilter.fromXmlName(tag))) else: - rv.append("|%s<%s>" % (' ' * indent, - infosetFilter.fromXmlName(element.tag))) + rv.append("|{}<{}>".format(' ' * indent, + infosetFilter.fromXmlName(element.tag))) if hasattr(element, "attrib"): attributes = [] @@ -121,21 +115,21 @@ def serializeElement(element, indent=0): ns, name = nsmatch.groups() name = infosetFilter.fromXmlName(name) prefix = constants.prefixes[ns] - attr_string = "%s %s" % (prefix, name) + attr_string = f"{prefix} {name}" else: attr_string = infosetFilter.fromXmlName(name) attributes.append((attr_string, value)) for name, value in sorted(attributes): - rv.append('|%s%s="%s"' % (' ' * (indent + 2), name, value)) + rv.append('|{}{}="{}"'.format(' ' * (indent + 2), name, value)) if element.text: - rv.append("|%s\"%s\"" % (' ' * (indent + 2), element.text)) + rv.append("|{}\"{}\"".format(' ' * (indent + 2), element.text)) indent += 2 for child in element: serializeElement(child, indent) if hasattr(element, "tail") and element.tail: - rv.append("|%s\"%s\"" % (' ' * (indent - 2), element.tail)) + rv.append("|{}\"{}\"".format(' ' * (indent - 2), element.tail)) serializeElement(element, 0) return "\n".join(rv) @@ -156,23 +150,23 @@ def serializeElement(element): serializeElement(element.getroot()) elif element.tag == comment_type: - rv.append("" % (element.text,)) + rv.append(f"") else: # This is assumed to be an ordinary element if not element.attrib: - rv.append("<%s>" % (element.tag,)) + rv.append(f"<{element.tag}>") else: - attr = " ".join(["%s=\"%s\"" % (name, value) - for name, value in element.attrib.items()]) - rv.append("<%s %s>" % (element.tag, attr)) + attr = " ".join(f"{name}=\"{value}\"" + for name, value in element.attrib.items()) + rv.append(f"<{element.tag} {attr}>") if element.text: rv.append(element.text) for child in element: serializeElement(child) - rv.append("" % (element.tag,)) + rv.append(f"") if hasattr(element, "tail") and element.tail: rv.append(element.tail) @@ -201,15 +195,13 @@ def __init__(self, element): def _coerceKey(self, key): if isinstance(key, tuple): - name = "{%s}%s" % (key[2], infosetFilter.coerceAttribute(key[1])) + name = f"{{{key[2]}}}{infosetFilter.coerceAttribute(key[1])}" else: name = infosetFilter.coerceAttribute(key) return name def __getitem__(self, key): value = self._element._element.attrib[self._coerceKey(key)] - if not PY3 and isinstance(value, binary_type): - value = value.decode("ascii") return value def __setitem__(self, key, value): @@ -332,7 +324,7 @@ def insertCommentMain(self, data, parent=None): if (parent == self.document and self.document._elementTree.getroot()[-1].tag == comment_type): warnings.warn("lxml cannot represent adjacent comments beyond the root elements", DataLossWarning) - super(TreeBuilder, self).insertComment(data, parent) + super().insertComment(data, parent) def insertRoot(self, token): # Because of the way libxml2 works, it doesn't seem to be possible to @@ -379,7 +371,7 @@ def insertRoot(self, token): if namespace is None: etree_tag = name else: - etree_tag = "{%s}%s" % (namespace, name) + etree_tag = f"{{{namespace}}}{name}" root.tag = etree_tag # Add the root element to the internal child/open data structures diff --git a/html5lib/treewalkers/__init__.py b/html5lib/treewalkers/__init__.py index b2d3aac3..70e0fff6 100644 --- a/html5lib/treewalkers/__init__.py +++ b/html5lib/treewalkers/__init__.py @@ -8,7 +8,6 @@ returns an iterator which generates tokens. """ -from __future__ import absolute_import, division, unicode_literals from .. import constants from .._utils import default_etree @@ -96,10 +95,10 @@ def pprint(walker): ns = constants.prefixes[token["namespace"]] else: ns = token["namespace"] - name = "%s %s" % (ns, token["name"]) + name = "{} {}".format(ns, token["name"]) else: name = token["name"] - output.append("%s<%s>" % (" " * indent, name)) + output.append("{}<{}>".format(" " * indent, name)) indent += 2 # attributes (sorted for consistent ordering) attrs = token["data"] @@ -109,10 +108,10 @@ def pprint(walker): ns = constants.prefixes[namespace] else: ns = namespace - name = "%s %s" % (ns, localname) + name = f"{ns} {localname}" else: name = localname - output.append("%s%s=\"%s\"" % (" " * indent, name, value)) + output.append("{}{}=\"{}\"".format(" " * indent, name, value)) # self-closing if type == "EmptyTag": indent -= 2 @@ -121,7 +120,7 @@ def pprint(walker): indent -= 2 elif type == "Comment": - output.append("%s" % (" " * indent, token["data"])) + output.append("{}".format(" " * indent, token["data"])) elif type == "Doctype": if token["name"]: @@ -137,13 +136,13 @@ def pprint(walker): token["name"], token["systemId"])) else: - output.append("%s" % (" " * indent, - token["name"])) + output.append("{}".format(" " * indent, + token["name"])) else: - output.append("%s" % (" " * indent,)) + output.append("{}".format(" " * indent)) elif type == "Characters": - output.append("%s\"%s\"" % (" " * indent, token["data"])) + output.append("{}\"{}\"".format(" " * indent, token["data"])) elif type == "SpaceCharacters": assert False, "concatenateCharacterTokens should have got rid of all Space tokens" diff --git a/html5lib/treewalkers/base.py b/html5lib/treewalkers/base.py index 80c474c4..a4a9c71a 100644 --- a/html5lib/treewalkers/base.py +++ b/html5lib/treewalkers/base.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from xml.dom import Node from ..constants import namespaces, voidElements, spaceCharacters @@ -17,7 +15,7 @@ spaceCharacters = "".join(spaceCharacters) -class TreeWalker(object): +class TreeWalker: """Walks a tree yielding tokens Tokens are dicts that all have a ``type`` field specifying the type of the @@ -201,15 +199,13 @@ def __iter__(self): yield self.doctype(*details) elif type == TEXT: - for token in self.text(*details): - yield token + yield from self.text(*details) elif type == ELEMENT: namespace, name, attributes, hasChildren = details if (not namespace or namespace == namespaces["html"]) and name in voidElements: - for token in self.emptyTag(namespace, name, attributes, - hasChildren): - yield token + yield from self.emptyTag(namespace, name, attributes, + hasChildren) hasChildren = False else: yield self.startTag(namespace, name, attributes) diff --git a/html5lib/treewalkers/dom.py b/html5lib/treewalkers/dom.py index b0c89b00..ac88cd9d 100644 --- a/html5lib/treewalkers/dom.py +++ b/html5lib/treewalkers/dom.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from xml.dom import Node from . import base diff --git a/html5lib/treewalkers/etree.py b/html5lib/treewalkers/etree.py index 44653372..4ad3d58d 100644 --- a/html5lib/treewalkers/etree.py +++ b/html5lib/treewalkers/etree.py @@ -1,9 +1,6 @@ -from __future__ import absolute_import, division, unicode_literals - from collections import OrderedDict import re -from six import string_types from . import base from .._utils import moduleFactoryFactory @@ -51,7 +48,7 @@ def getNodeDetails(self, node): return base.COMMENT, node.text else: - assert isinstance(node.tag, string_types), type(node.tag) + assert isinstance(node.tag, str), type(node.tag) # This is assumed to be an ordinary element match = tag_regexp.match(node.tag) if match: diff --git a/html5lib/treewalkers/etree_lxml.py b/html5lib/treewalkers/etree_lxml.py index a614ac5b..0ec633ac 100644 --- a/html5lib/treewalkers/etree_lxml.py +++ b/html5lib/treewalkers/etree_lxml.py @@ -1,6 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals -from six import text_type - from collections import OrderedDict from lxml import etree @@ -14,13 +11,13 @@ def ensure_str(s): if s is None: return None - elif isinstance(s, text_type): + elif isinstance(s, str): return s else: return s.decode("ascii", "strict") -class Root(object): +class Root: def __init__(self, et): self.elementtree = et self.children = [] @@ -58,7 +55,7 @@ def __len__(self): return 1 -class Doctype(object): +class Doctype: def __init__(self, root_node, name, public_id, system_id): self.root_node = root_node self.name = name @@ -81,7 +78,7 @@ def getnext(self): return None -class FragmentWrapper(object): +class FragmentWrapper: def __init__(self, fragment_root, obj): self.root_node = fragment_root self.obj = obj diff --git a/html5lib/treewalkers/genshi.py b/html5lib/treewalkers/genshi.py index 7483be27..d4757af2 100644 --- a/html5lib/treewalkers/genshi.py +++ b/html5lib/treewalkers/genshi.py @@ -1,5 +1,3 @@ -from __future__ import absolute_import, division, unicode_literals - from genshi.core import QName from genshi.core import START, END, XML_NAMESPACE, DOCTYPE, TEXT from genshi.core import START_NS, END_NS, START_CDATA, END_CDATA, PI, COMMENT @@ -15,14 +13,12 @@ def __iter__(self): previous = None for event in self.tree: if previous is not None: - for token in self.tokens(previous, event): - yield token + yield from self.tokens(previous, event) previous = event # Don't forget the final event! if previous is not None: - for token in self.tokens(previous, None): - yield token + yield from self.tokens(previous, None) def tokens(self, event, next): kind, data, _ = event @@ -38,10 +34,9 @@ def tokens(self, event, next): converted_attribs[(None, k)] = v if namespace == namespaces["html"] and name in voidElements: - for token in self.emptyTag(namespace, name, converted_attribs, - not next or next[0] != END or - next[1] != tag): - yield token + yield from self.emptyTag(namespace, name, converted_attribs, + not next or next[0] != END or + next[1] != tag) else: yield self.startTag(namespace, name, converted_attribs) @@ -55,8 +50,7 @@ def tokens(self, event, next): yield self.comment(data) elif kind == TEXT: - for token in self.text(data): - yield token + yield from self.text(data) elif kind == DOCTYPE: yield self.doctype(*data) diff --git a/parse.py b/parse.py index e6806b46..dd919364 100755 --- a/parse.py +++ b/parse.py @@ -36,13 +36,12 @@ def parse(): pass elif f == '-': f = sys.stdin - if sys.version_info[0] >= 3: - encoding = None + encoding = None else: try: # Try opening from file system f = open(f, "rb") - except IOError as e: + except OSError as e: sys.stderr.write("Unable to open file: %s\n" % e) sys.exit(1) except IndexError: @@ -81,7 +80,7 @@ def parse(): if document: printOutput(p, document, opts) t2 = time.time() - sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)" % (t1 - t0, t2 - t1)) + sys.stderr.write(f"\n\nRun took: {t1 - t0:f}s (plus {t2 - t1:f}s to print the output)") else: sys.stderr.write("\n\nRun took: %fs" % (t1 - t0)) else: @@ -135,11 +134,7 @@ def printOutput(parser, document, opts): kwargs["sanitize"] = True tokens = treewalkers.getTreeWalker(opts.treebuilder)(document) - if sys.version_info[0] >= 3: - encoding = None - else: - encoding = "utf-8" - for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=encoding): + for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding=None): sys.stdout.write(text) if not text.endswith('\n'): sys.stdout.write('\n') diff --git a/requirements-install.sh b/requirements-install.sh index b7a8d96d..41d9bc42 100755 --- a/requirements-install.sh +++ b/requirements-install.sh @@ -1,9 +1,5 @@ #!/bin/bash -ex -if [[ $SIX_VERSION ]]; then - pip install six==$SIX_VERSION -fi - pip install -r requirements-test.txt if [[ $USE_OPTIONAL == "true" ]]; then diff --git a/requirements-test.txt b/requirements-test.txt index 57f8f617..06e0d48e 100644 --- a/requirements-test.txt +++ b/requirements-test.txt @@ -2,9 +2,6 @@ tox>=3.15.1,<4 flake8>=3.8.1,<3.9 -pytest>=4.6.10,<5 ; python_version < '3' -pytest>=5.4.2,<7 ; python_version >= '3' +pytest>=7,<8 coverage>=5.1,<6 pytest-expect>=1.1.0,<2 -mock>=3.0.5,<4 ; python_version < '3.6' -mock>=4.0.2,<5 ; python_version >= '3.6' diff --git a/requirements.txt b/requirements.txt index ae7ec3d0..be8fcb77 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,2 +1 @@ -six>=1.9 webencodings diff --git a/setup.cfg b/setup.cfg index 0b2bb9c7..2a44c0f2 100644 --- a/setup.cfg +++ b/setup.cfg @@ -1,6 +1,3 @@ -[bdist_wheel] -universal = 1 - [pep8] ignore = N max-line-length = 139 diff --git a/setup.py b/setup.py index f84c1284..e24296a4 100644 --- a/setup.py +++ b/setup.py @@ -1,8 +1,5 @@ -from __future__ import print_function - import ast import codecs -import sys from os.path import join, dirname from setuptools import setup, find_packages, __version__ as setuptools_version @@ -18,8 +15,7 @@ # _markerlib.default_environment() obtains its data from _VARS # and wraps it in another dict, but _markerlib_evaluate writes -# to the dict while it is iterating the keys, causing an error -# on Python 3 only. +# to the dict while it is iterating the keys, causing an error. # Replace _markerlib.default_environment to return a custom dict # that has all the necessary markers, and ignores any writes. @@ -32,7 +28,7 @@ def pop(self, i=-1): return self[i] -if _markerlib and sys.version_info[0] == 3: +if _markerlib: env = _markerlib.markers._VARS for key in list(env.keys()): new_key = key.replace('.', '_') @@ -63,13 +59,10 @@ def default_environment(): 'License :: OSI Approved :: MIT License', 'Operating System :: OS Independent', 'Programming Language :: Python', - 'Programming Language :: Python :: 2', - 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.5', - 'Programming Language :: Python :: 3.6', 'Programming Language :: Python :: 3.7', 'Programming Language :: Python :: 3.8', + 'Programming Language :: Python :: 3 :: Only', 'Programming Language :: Python :: Implementation :: CPython', 'Programming Language :: Python :: Implementation :: PyPy', 'Topic :: Software Development :: Libraries :: Python Modules', @@ -104,10 +97,9 @@ def default_environment(): maintainer_email='james@hoppipolla.co.uk', packages=find_packages(exclude=["*.tests", "*.tests.*", "tests.*", "tests"]), install_requires=[ - 'six>=1.9', 'webencodings', ], - python_requires=">=2.7, !=3.0.*, !=3.1.*, !=3.2.*, !=3.3.*, !=3.4.*", + python_requires=">=3.7", extras_require={ # A conditional extra will only install these items when the extra is # requested and the condition matches. diff --git a/tox.ini b/tox.ini index 16b8cf41..027278f2 100644 --- a/tox.ini +++ b/tox.ini @@ -1,5 +1,5 @@ [tox] -envlist = py{27,35,36,37,38,py,py3}-{base,six19,optional} +envlist = py{37,38,py3}-{base,optional} [testenv] deps = @@ -11,8 +11,7 @@ passenv = PYTEST_COMMAND # this is maintained so one can, e.g., PYTEST_COMMAND="coverage run -m pytest" COVERAGE_RUN_OPTIONS commands = - six19: pip install six==1.9 - {env:PYTEST_COMMAND:{envbindir}/py.test} {posargs} + {env:PYTEST_COMMAND:{envbindir}/pytest} {posargs} flake8 {toxinidir} [testenv:doc] diff --git a/utils/entities.py b/utils/entities.py index 6e8ca458..faeb4b45 100644 --- a/utils/entities.py +++ b/utils/entities.py @@ -49,9 +49,9 @@ def test_description(name, good): semicolon_text = {True: "with a semi-colon", False: "without a semi-colon"}[with_semicolon] if good: - text = "Named entity: %s %s" % (name, semicolon_text) + text = f"Named entity: {name} {semicolon_text}" else: - text = "Bad named entity: %s %s" % (name, semicolon_text) + text = f"Bad named entity: {name} {semicolon_text}" return text @@ -80,7 +80,7 @@ def subentity_exists(entity_name, entities): def make_entities_code(entities): - entities_text = "\n".join(" \"%s\": u\"%s\"," % ( + entities_text = "\n".join(" \"{}\": u\"{}\",".format( name, entities[name].encode( "unicode-escape").replace("\"", "\\\"")) for name in sorted(entities.keys()))