6
6
7
7
import sys
8
8
import os
9
+ import traceback
9
10
from optparse import OptionParser
10
11
11
12
from html5lib import html5parser , sanitizer
@@ -48,10 +49,7 @@ def parse():
48
49
else :
49
50
tokenizer = HTMLTokenizer
50
51
51
- if opts .log :
52
- html5parser .debug_log = True
53
-
54
- p = html5parser .HTMLParser (tree = treebuilder , tokenizer = tokenizer )
52
+ p = html5parser .HTMLParser (tree = treebuilder , tokenizer = tokenizer , debug = opts .log )
55
53
56
54
if opts .fragment :
57
55
parseMethod = p .parseFragment
@@ -73,46 +71,54 @@ def parse():
73
71
elif opts .time :
74
72
import time
75
73
t0 = time .time ()
76
- document = parseMethod ( f , encoding = encoding )
74
+ document = run ( parseMethod , f , encoding )
77
75
t1 = time .time ()
78
76
printOutput (p , document , opts )
79
77
t2 = time .time ()
80
78
sys .stderr .write ("\n \n Run took: %fs (plus %fs to print the output)" % (t1 - t0 , t2 - t1 ))
81
79
else :
82
- document = parseMethod ( f , encoding = encoding )
80
+ document = run ( parseMethod , f , encoding )
83
81
printOutput (p , document , opts )
84
82
83
+ def run (parseMethod , f , encoding ):
84
+ try :
85
+ document = parseMethod (f , encoding = encoding )
86
+ except :
87
+ document = None
88
+ traceback .print_exc ()
89
+ return document
90
+
85
91
def printOutput (parser , document , opts ):
86
92
if opts .encoding :
87
93
print "Encoding:" , parser .tokenizer .stream .charEncoding
88
94
89
- if opts .log :
90
- for item in parser . log :
91
- print item
92
-
93
- if opts .xml :
94
- sys .stdout .write (document .toxml ("utf-8" ))
95
- elif opts .tree :
96
- if not hasattr (document ,'__getitem__' ):
97
- document = [document ]
98
- for fragment in document :
99
- print parser .tree .testSerializer (fragment ).encode ("utf-8" )
100
- elif opts .hilite :
101
- sys .stdout .write (document .hilite ("utf-8" ))
102
- elif opts .html :
103
- kwargs = {}
104
- for opt in serializer .HTMLSerializer .options :
105
- try :
106
- kwargs [opt ] = getattr (opts ,opt )
107
- except :
108
- pass
109
- if not kwargs ['quote_char' ]:
110
- del kwargs ['quote_char' ]
111
-
112
- tokens = treewalkers .getTreeWalker (opts .treebuilder )(document )
113
- for text in serializer .HTMLSerializer (** kwargs ).serialize (tokens , encoding = 'utf-8' ):
114
- sys .stdout .write (text )
115
- if not text .endswith ('\n ' ): sys .stdout .write ('\n ' )
95
+ for item in parser .log :
96
+ print item
97
+
98
+ if document is not None :
99
+ if opts .xml :
100
+ sys .stdout .write (document .toxml ("utf-8" ))
101
+ elif opts .tree :
102
+ if not hasattr (document ,'__getitem__' ):
103
+ document = [document ]
104
+ for fragment in document :
105
+ print parser .tree .testSerializer (fragment ).encode ("utf-8" )
106
+ elif opts .hilite :
107
+ sys .stdout .write (document .hilite ("utf-8" ))
108
+ elif opts .html :
109
+ kwargs = {}
110
+ for opt in serializer .HTMLSerializer .options :
111
+ try :
112
+ kwargs [opt ] = getattr (opts ,opt )
113
+ except :
114
+ pass
115
+ if not kwargs ['quote_char' ]:
116
+ del kwargs ['quote_char' ]
117
+
118
+ tokens = treewalkers .getTreeWalker (opts .treebuilder )(document )
119
+ for text in serializer .HTMLSerializer (** kwargs ).serialize (tokens , encoding = 'utf-8' ):
120
+ sys .stdout .write (text )
121
+ if not text .endswith ('\n ' ): sys .stdout .write ('\n ' )
116
122
if opts .error :
117
123
errList = []
118
124
for pos , errorcode , datavars in parser .errors :
0 commit comments