Skip to content

Commit 9b1096e

Browse files
committed
Avoid O(n^2) string concatenation in concatCharacterTokens()
1 parent 8f7f9f0 commit 9b1096e

File tree

1 file changed

+7
-10
lines changed

1 file changed

+7
-10
lines changed

html5lib/treewalkers/__init__.py

Lines changed: 7 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -62,21 +62,18 @@ def getTreeWalker(treeType, implementation=None, **kwargs):
6262

6363

6464
def concatenateCharacterTokens(tokens):
65-
charactersToken = None
65+
pendingCharacters = []
6666
for token in tokens:
6767
type = token["type"]
6868
if type in ("Characters", "SpaceCharacters"):
69-
if charactersToken is None:
70-
charactersToken = {"type": "Characters", "data": token["data"]}
71-
else:
72-
charactersToken["data"] += token["data"]
69+
pendingCharacters.append(token["data"])
7370
else:
74-
if charactersToken is not None:
75-
yield charactersToken
76-
charactersToken = None
71+
if pendingCharacters:
72+
yield {"type": "Characters", "data": "".join(pendingCharacters)}
73+
pendingCharacters = []
7774
yield token
78-
if charactersToken is not None:
79-
yield charactersToken
75+
if pendingCharacters:
76+
yield {"type": "Characters", "data": "".join(pendingCharacters)}
8077

8178

8279
def pprint(tokens):

0 commit comments

Comments
 (0)