Skip to content

Commit 542dc08

Browse files
author
James Graham
committed
Move tests that are problematic in json or java to a single file. Add double escaping flag to sneak unpaired surrogates past over-zealous json decoders
1 parent 51460c0 commit 542dc08

File tree

1 file changed

+21
-2
lines changed

1 file changed

+21
-2
lines changed

html5lib/tests/test_tokenizer.py

Lines changed: 21 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -86,7 +86,7 @@ def normalizeTokens(tokens):
8686
for i, token in enumerate(tokens):
8787
if token[0] == u'ParseError':
8888
tokens[i] = token[0]
89-
return json.loads(json.dumps(tokens))
89+
return tokens
9090

9191
def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder):
9292
"""Test whether the test has passed or failed
@@ -121,12 +121,31 @@ def tokensMatch(expectedTokens, receivedTokens, ignoreErrorOrder):
121121

122122
return tokens["expected"] == tokens["received"]
123123

124+
def unescape_test(test):
125+
def decode(inp):
126+
return inp.decode("unicode-escape")
127+
128+
test["input"] = decode(test["input"])
129+
for token in test["output"]:
130+
if token == "ParseError":
131+
continue
132+
else:
133+
token[1] = decode(token[1])
134+
if len(token) > 2:
135+
for key, value in token[2]:
136+
del token[2][key]
137+
token[2][decode(key)] = decode(value)
138+
sys.stderr.write(str(test))
139+
return test
124140

125141
class TestCase(unittest.TestCase):
126142
def runTokenizerTest(self, test):
127143
#XXX - move this out into the setup function
128144
#concatenate all consecutive character tokens into a single token
129-
expected = concatenateCharacterTokens(test['output'])
145+
if 'doubleEscaped' in test:
146+
test = unescape_test(test)
147+
148+
expected = concatenateCharacterTokens(test['output'])
130149
if 'lastStartTag' not in test:
131150
test['lastStartTag'] = None
132151
outBuffer = cStringIO.StringIO()

0 commit comments

Comments
 (0)