Skip to content

Commit 0c99d3a

Browse files
committed
Fix #37: Preserve order of attributes on serialization.
This doesn't do anything about the fact that none of our treebuilders preserve attribute order: it merely avoids the serializer reordering them from the order it receives them in. This also provides a filter to obtain the previous behaviour of lexicographical ordering, used by the serializer tests to ensure determinism of the output.
1 parent 72e5d8d commit 0c99d3a

8 files changed

+52
-5
lines changed

.travis.yml

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ before_install:
2828
install:
2929
- pip install -r requirements.txt -r requirements-test.txt --use-mirrors
3030
- if [[ $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional.txt --use-mirrors; fi
31+
- if [[ $TRAVIS_PYTHON_VERSION == "2.6" && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-2.6.txt --use-mirrors; fi
3132
- if [[ $TRAVIS_PYTHON_VERSION != "pypy" && $USE_OPTIONAL == "true" ]]; then pip install -r requirements-optional-cpython.txt --use-mirrors; fi
3233
- if [[ $FLAKE == "true" ]]; then pip install --use-mirrors flake8; fi
3334

README.rst

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -63,6 +63,10 @@ format:
6363
6464
More documentation is available at http://html5lib.readthedocs.org/.
6565

66+
- ``ordereddict`` can be used under Python 2.6
67+
(``collections.OrderedDict`` is used instead on later versions) to
68+
serialize attributes in alphabetical order.
69+
6670

6771
Installation
6872
------------
@@ -106,7 +110,8 @@ Tests
106110
-----
107111

108112
Unit tests require the ``nose`` library and can be run using the
109-
``nosetests`` command in the root directory. All should pass.
113+
``nosetests`` command in the root directory; ``ordereddict`` is
114+
required under Python 2.6. All should pass.
110115

111116
Test data are contained in a separate `html5lib-tests
112117
<https://github.com/html5lib/html5lib-tests>`_ repository and included
@@ -115,8 +120,6 @@ as a submodule, thus for git checkouts they must be initialized::
115120
$ git submodule init
116121
$ git submodule update
117122

118-
This is unneeded for release tarballs.
119-
120123
If you have all compatible Python implementations available on your
121124
system, you can run tests on all of them using the ``tox`` utility,
122125
which can be found on PyPI.
Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
from __future__ import absolute_import, division, unicode_literals
2+
3+
from . import _base
4+
5+
try:
6+
from collections import OrderedDict
7+
except ImportError:
8+
from ordereddict import OrderedDict
9+
10+
11+
class Filter(_base.Filter):
12+
def __iter__(self):
13+
for token in _base.Filter.__iter__(self):
14+
if token["type"] in ("StartTag", "EmptyTag"):
15+
attrs = OrderedDict()
16+
for name, value in sorted(token["data"].items(),
17+
key=lambda x: x[0]):
18+
attrs[name] = value
19+
token["data"] = attrs
20+
yield token

html5lib/serializer/htmlserializer.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -221,7 +221,7 @@ def serialize(self, treewalker, encoding=None):
221221
in_cdata = True
222222
elif in_cdata:
223223
self.serializeError(_("Unexpected child element of a CDATA element"))
224-
for (attr_namespace, attr_name), attr_value in sorted(token["data"].items()):
224+
for (attr_namespace, attr_name), attr_value in token["data"].items():
225225
# TODO: Add namespace support here
226226
k = attr_name
227227
v = attr_value

html5lib/tests/test_serializer.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -5,13 +5,20 @@
55

66
from .support import get_data_files
77

8+
try:
9+
from collections import OrderedDict
10+
except ImportError:
11+
# Python 2.6 support
12+
from ordereddict import OrderedDict
13+
814
try:
915
unittest.TestCase.assertEqual
1016
except AttributeError:
1117
unittest.TestCase.assertEqual = unittest.TestCase.assertEquals
1218

1319
import html5lib
1420
from html5lib import serializer, constants
21+
from html5lib.filters.alphabeticalattributes import Filter as AlphabeticalAttributesFilter
1522
from html5lib.treewalkers._base import TreeWalker
1623

1724
optionals_loaded = []
@@ -81,7 +88,8 @@ def _convertAttrib(self, attribs):
8188

8289
def serialize_html(input, options):
8390
options = dict([(str(k), v) for k, v in options.items()])
84-
return serializer.HTMLSerializer(**options).render(JsonWalker(input), options.get("encoding", None))
91+
stream = AlphabeticalAttributesFilter(JsonWalker(input))
92+
return serializer.HTMLSerializer(**options).render(stream, options.get("encoding", None))
8593

8694

8795
def runSerializerTest(input, expected, options):

requirements-optional-2.6.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# Can be used to force attributes to be serialized in alphabetical
2+
# order.
3+
ordereddict

requirements-test.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
nose
2+
ordereddict # Python 2.6

tox.ini

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -20,3 +20,14 @@ deps =
2020
Genshi
2121
nose
2222
six
23+
24+
[testenv:py26]
25+
basepython = python2.6
26+
deps =
27+
charade
28+
datrie
29+
Genshi
30+
lxml
31+
nose
32+
six
33+
ordereddict

0 commit comments

Comments
 (0)