Skip to content

Commit e11ed5a

Browse files
author
James Graham
committed
Add support for logging to parse.py
1 parent d34b631 commit e11ed5a

File tree

1 file changed

+39
-33
lines changed

1 file changed

+39
-33
lines changed

parse.py

Lines changed: 39 additions & 33 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@
66

77
import sys
88
import os
9+
import traceback
910
from optparse import OptionParser
1011

1112
from html5lib import html5parser, sanitizer
@@ -48,10 +49,7 @@ def parse():
4849
else:
4950
tokenizer = HTMLTokenizer
5051

51-
if opts.log:
52-
html5parser.debug_log = True
53-
54-
p = html5parser.HTMLParser(tree=treebuilder, tokenizer=tokenizer)
52+
p = html5parser.HTMLParser(tree=treebuilder, tokenizer=tokenizer, debug=opts.log)
5553

5654
if opts.fragment:
5755
parseMethod = p.parseFragment
@@ -73,46 +71,54 @@ def parse():
7371
elif opts.time:
7472
import time
7573
t0 = time.time()
76-
document = parseMethod(f, encoding=encoding)
74+
document = run(parseMethod, f, encoding)
7775
t1 = time.time()
7876
printOutput(p, document, opts)
7977
t2 = time.time()
8078
sys.stderr.write("\n\nRun took: %fs (plus %fs to print the output)"%(t1-t0, t2-t1))
8179
else:
82-
document = parseMethod(f, encoding=encoding)
80+
document = run(parseMethod, f, encoding)
8381
printOutput(p, document, opts)
8482

83+
def run(parseMethod, f, encoding):
84+
try:
85+
document = parseMethod(f, encoding=encoding)
86+
except:
87+
document = None
88+
traceback.print_exc()
89+
return document
90+
8591
def printOutput(parser, document, opts):
8692
if opts.encoding:
8793
print "Encoding:", parser.tokenizer.stream.charEncoding
8894

89-
if opts.log:
90-
for item in parser.log:
91-
print item
92-
93-
if opts.xml:
94-
sys.stdout.write(document.toxml("utf-8"))
95-
elif opts.tree:
96-
if not hasattr(document,'__getitem__'):
97-
document = [document]
98-
for fragment in document:
99-
print parser.tree.testSerializer(fragment).encode("utf-8")
100-
elif opts.hilite:
101-
sys.stdout.write(document.hilite("utf-8"))
102-
elif opts.html:
103-
kwargs = {}
104-
for opt in serializer.HTMLSerializer.options:
105-
try:
106-
kwargs[opt] = getattr(opts,opt)
107-
except:
108-
pass
109-
if not kwargs['quote_char']:
110-
del kwargs['quote_char']
111-
112-
tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
113-
for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding='utf-8'):
114-
sys.stdout.write(text)
115-
if not text.endswith('\n'): sys.stdout.write('\n')
95+
for item in parser.log:
96+
print item
97+
98+
if document is not None:
99+
if opts.xml:
100+
sys.stdout.write(document.toxml("utf-8"))
101+
elif opts.tree:
102+
if not hasattr(document,'__getitem__'):
103+
document = [document]
104+
for fragment in document:
105+
print parser.tree.testSerializer(fragment).encode("utf-8")
106+
elif opts.hilite:
107+
sys.stdout.write(document.hilite("utf-8"))
108+
elif opts.html:
109+
kwargs = {}
110+
for opt in serializer.HTMLSerializer.options:
111+
try:
112+
kwargs[opt] = getattr(opts,opt)
113+
except:
114+
pass
115+
if not kwargs['quote_char']:
116+
del kwargs['quote_char']
117+
118+
tokens = treewalkers.getTreeWalker(opts.treebuilder)(document)
119+
for text in serializer.HTMLSerializer(**kwargs).serialize(tokens, encoding='utf-8'):
120+
sys.stdout.write(text)
121+
if not text.endswith('\n'): sys.stdout.write('\n')
116122
if opts.error:
117123
errList=[]
118124
for pos, errorcode, datavars in parser.errors:

0 commit comments

Comments
 (0)