|
5 | 5 | from constants import EOF, spaceCharacters, asciiLetters, asciiUppercase
|
6 | 6 | from constants import encodings
|
7 | 7 | from utils import MethodDispatcher
|
| 8 | +from collections import deque |
8 | 9 |
|
| 10 | +try: |
| 11 | + from collections import deque |
| 12 | +except ImportError: |
| 13 | + from utils import deque |
| 14 | + |
9 | 15 | class HTMLInputStream(object):
|
10 | 16 | """Provides a unicode stream of characters to the HTMLTokenizer.
|
11 | 17 |
|
@@ -55,7 +61,7 @@ def __init__(self, source, encoding=None, parseMeta=True, chardet=True):
|
55 | 61 | self.dataStream = codecs.getreader(self.charEncoding)(self.rawStream,
|
56 | 62 | 'replace')
|
57 | 63 |
|
58 |
| - self.queue = [] |
| 64 | + self.queue = deque([]) |
59 | 65 | self.errors = []
|
60 | 66 |
|
61 | 67 | self.line = self.col = 0
|
@@ -212,7 +218,7 @@ def char(self):
|
212 | 218 | if not self.queue:
|
213 | 219 | return EOF
|
214 | 220 |
|
215 |
| - char = self.queue.pop(0) |
| 221 | + char = self.queue.popleft() |
216 | 222 |
|
217 | 223 | # update position in stream
|
218 | 224 | if char == '\n':
|
@@ -277,8 +283,7 @@ def charsUntil(self, characters, opposite = False):
|
277 | 283 | else:
|
278 | 284 | self.col += 1
|
279 | 285 |
|
280 |
| - rv = u"".join(self.queue[:i]) |
281 |
| - self.queue = self.queue[i:] |
| 286 | + rv = u"".join([ self.queue.popleft() for c in range(i) ]) |
282 | 287 |
|
283 | 288 | #Calculate where we now are in the stream
|
284 | 289 | #One possible optimisation would be to store all read characters and
|
@@ -313,7 +318,9 @@ def charsUntil(self, characters, opposite = False):
|
313 | 318 |
|
314 | 319 | def unget(self, chars):
|
315 | 320 | if chars:
|
316 |
| - self.queue = list(chars) + self.queue |
| 321 | + l = list(chars) |
| 322 | + l.reverse() |
| 323 | + self.queue.extendleft(l) |
317 | 324 | #Alter the current line, col position
|
318 | 325 | for c in chars[::-1]:
|
319 | 326 | if c == '\n':
|
|
0 commit comments