@@ -104,77 +104,71 @@ def main():
104
104
print ("The test was successful." )
105
105
return
106
106
107
- fn = fi .name + '.html'
108
- print ("Outputing HTML page:" , fn )
109
- with open (fn , 'w' , encoding = 'utf-8' ) as fh :
110
- fh .write (fi .html_out )
111
-
112
- fn = fi .name
113
- print ("Outputing man page:" , fn )
114
- with open (fn , 'w' , encoding = 'utf-8' ) as fh :
115
- fh .write (fi .man_out )
107
+ for fn , txt in ((fi .name + '.html' , fi .html_out ), (fi .name , fi .man_out )):
108
+ print ("Wrote:" , fn )
109
+ with open (fn , 'w' , encoding = 'utf-8' ) as fh :
110
+ fh .write (txt )
116
111
117
112
118
113
class HtmlToManPage (HTMLParser ):
119
114
def __init__ (self , fi ):
120
115
HTMLParser .__init__ (self , convert_charrefs = True )
121
116
122
- self .state = argparse .Namespace (
117
+ st = self .state = argparse .Namespace (
123
118
list_state = [ ],
124
119
p_macro = ".P\n " ,
125
120
at_first_tag_in_li = False ,
126
121
at_first_tag_in_dd = False ,
127
122
dt_from = None ,
128
123
in_pre = False ,
124
+ html_out = [ HTML_START % fi .title ],
125
+ man_out = [ MAN_START % (fi .prog , fi .sect , fi .date ) ],
129
126
txt = '' ,
130
127
)
131
128
132
- self .html_out = [ HTML_START % fi .title ]
133
- self .man_out = [ MAN_START % (fi .prog , fi .sect , fi .date ) ]
134
-
135
129
self .feed (fi .html_in )
136
130
fi .html_in = None
137
131
138
- self .html_out .append (HTML_END % fi .date )
139
- self .man_out .append (MAN_END )
132
+ st .html_out .append (HTML_END % fi .date )
133
+ st .man_out .append (MAN_END )
140
134
141
- fi .html_out = '' .join (self .html_out )
142
- self .html_out = None
135
+ fi .html_out = '' .join (st .html_out )
136
+ st .html_out = None
143
137
144
- fi .man_out = '' .join (self .man_out )
145
- self .man_out = None
138
+ fi .man_out = '' .join (st .man_out )
139
+ st .man_out = None
146
140
147
141
148
142
def handle_starttag (self , tag , attrs_list ):
149
143
st = self .state
150
144
if args .debug :
151
- print ('START' , tag , attrs_list , st )
145
+ self . output_debug ('START' , ( tag , attrs_list ) )
152
146
if st .at_first_tag_in_li :
153
147
if st .list_state [- 1 ] == 'dl' :
154
148
st .dt_from = tag
155
149
if tag == 'p' :
156
150
tag = 'dt'
157
151
else :
158
- self .html_out .append ('<dt>' )
152
+ st .html_out .append ('<dt>' )
159
153
st .at_first_tag_in_li = False
160
154
if tag == 'p' :
161
155
if not st .at_first_tag_in_dd :
162
- self .man_out .append (st .p_macro )
156
+ st .man_out .append (st .p_macro )
163
157
elif tag == 'li' :
164
158
st .at_first_tag_in_li = True
165
159
lstate = st .list_state [- 1 ]
166
160
if lstate == 'dl' :
167
161
return
168
162
if lstate == 'o' :
169
- self .man_out .append (".IP o\n " )
163
+ st .man_out .append (".IP o\n " )
170
164
else :
171
- self .man_out .append (".IP " + str (lstate ) + ".\n " )
165
+ st .man_out .append (".IP " + str (lstate ) + ".\n " )
172
166
st .list_state [- 1 ] += 1
173
167
elif tag == 'blockquote' :
174
- self .man_out .append (".RS 4\n " )
168
+ st .man_out .append (".RS 4\n " )
175
169
elif tag == 'pre' :
176
170
st .in_pre = True
177
- self .man_out .append (st .p_macro + ".nf\n " )
171
+ st .man_out .append (st .p_macro + ".nf\n " )
178
172
elif tag == 'code' and not st .in_pre :
179
173
st .txt += BOLD_FONT [0 ]
180
174
elif tag == 'strong' or tag == 'bold' :
@@ -188,57 +182,59 @@ class HtmlToManPage(HTMLParser):
188
182
start = int (val ) # We only support integers.
189
183
break
190
184
if st .list_state :
191
- self .man_out .append (".RS\n " )
185
+ st .man_out .append (".RS\n " )
192
186
if start == 0 :
193
187
tag = 'dl'
194
188
attrs_list = [ ]
195
189
st .list_state .append ('dl' )
196
190
else :
197
191
st .list_state .append (start )
198
- self .man_out .append (st .p_macro )
192
+ st .man_out .append (st .p_macro )
199
193
st .p_macro = ".IP\n "
200
194
elif tag == 'ul' :
201
- self .man_out .append (st .p_macro )
195
+ st .man_out .append (st .p_macro )
202
196
if st .list_state :
203
- self .man_out .append (".RS\n " )
197
+ st .man_out .append (".RS\n " )
204
198
st .p_macro = ".IP\n "
205
199
st .list_state .append ('o' )
206
- self .html_out .append ('<' + tag + ' ' .join ( ' ' + var + '="' + safeText (val ) + '"' for var , val in attrs_list ) + '>' )
200
+ st .html_out .append ('<' + tag + ' ' .join ( ' ' + var + '="' + safeText (val ) + '"' for var , val in attrs_list ) + '>' )
207
201
st .at_first_tag_in_dd = False
208
202
209
203
210
204
def handle_endtag (self , tag ):
211
205
st = self .state
212
206
if args .debug :
213
- print ( ' END' , tag , st )
207
+ self . output_debug ( ' END' , ( tag ,) )
214
208
if tag in CONSUMES_TXT or st .dt_from == tag :
215
209
txt = st .txt .strip ()
216
210
st .txt = ''
217
211
else :
218
212
txt = None
219
213
add_to_txt = None
220
214
if tag == 'h1' :
221
- self .man_out .append (st .p_macro + '.SH "' + manify (txt ) + '"\n ' )
215
+ st .man_out .append (st .p_macro + '.SH "' + manify (txt ) + '"\n ' )
216
+ elif tag == 'h2' :
217
+ st .man_out .append (st .p_macro + '.SS "' + manify (txt ) + '"\n ' )
222
218
elif tag == 'p' :
223
219
if st .dt_from == 'p' :
224
220
tag = 'dt'
225
- self .man_out .append ('.IP "' + manify (txt ) + '"\n ' )
221
+ st .man_out .append ('.IP "' + manify (txt ) + '"\n ' )
226
222
st .dt_from = None
227
- else :
228
- self .man_out .append (manify (txt ) + "\n " )
223
+ elif txt != '' :
224
+ st .man_out .append (manify (txt ) + "\n " )
229
225
elif tag == 'li' :
230
226
if st .list_state [- 1 ] == 'dl' :
231
227
if st .at_first_tag_in_li :
232
228
die ("Invalid 0. -> td translation" )
233
229
tag = 'dd'
234
230
if txt != '' :
235
- self .man_out .append (manify (txt ) + "\n " )
231
+ st .man_out .append (manify (txt ) + "\n " )
236
232
st .at_first_tag_in_li = False
237
233
elif tag == 'blockquote' :
238
- self .man_out .append (".RE\n " )
234
+ st .man_out .append (".RE\n " )
239
235
elif tag == 'pre' :
240
236
st .in_pre = False
241
- self .man_out .append (manify (txt ) + "\n .fi\n " )
237
+ st .man_out .append (manify (txt ) + "\n .fi\n " )
242
238
elif tag == 'code' and not st .in_pre :
243
239
add_to_txt = NORM_FONT [0 ]
244
240
elif tag == 'strong' or tag == 'bold' :
@@ -249,34 +245,46 @@ class HtmlToManPage(HTMLParser):
249
245
if st .list_state .pop () == 'dl' :
250
246
tag = 'dl'
251
247
if st .list_state :
252
- self .man_out .append (".RE\n " )
248
+ st .man_out .append (".RE\n " )
253
249
else :
254
250
st .p_macro = ".P\n "
255
251
st .at_first_tag_in_dd = False
256
- self .html_out .append ('</' + tag + '>' )
252
+ st .html_out .append ('</' + tag + '>' )
257
253
if add_to_txt :
258
254
if txt is None :
259
255
st .txt += add_to_txt
260
256
else :
261
257
txt += add_to_txt
262
258
if st .dt_from == tag :
263
- self .man_out .append ('.IP "' + manify (txt ) + '"\n ' )
264
- self .html_out .append ('</dt><dd>' )
259
+ st .man_out .append ('.IP "' + manify (txt ) + '"\n ' )
260
+ st .html_out .append ('</dt><dd>' )
265
261
st .at_first_tag_in_dd = True
266
262
st .dt_from = None
267
263
elif tag == 'dt' :
268
- self .html_out .append ('<dd>' )
264
+ st .html_out .append ('<dd>' )
269
265
st .at_first_tag_in_dd = True
270
266
271
267
272
268
def handle_data (self , data ):
273
269
st = self .state
274
270
if args .debug :
275
- print ( ' DATA' , [ data ], st )
276
- self .html_out .append (safeText (data ))
271
+ self . output_debug ( ' DATA' , ( data ,) )
272
+ st .html_out .append (safeText (data ))
277
273
st .txt += data
278
274
279
275
276
+ def output_debug (self , event , extra ):
277
+ import pprint
278
+ st = self .state
279
+ if args .debug < 2 :
280
+ if len (st .html_out ) > 2 :
281
+ st .html_out = ['...' ] + st .html_out [- 2 :]
282
+ if len (st .man_out ) > 2 :
283
+ st .man_out = ['...' ] + st .man_out [- 2 :]
284
+ print (event , extra )
285
+ pprint .PrettyPrinter (indent = 2 ).pprint (vars (st ))
286
+
287
+
280
288
def manify (txt ):
281
289
return re .sub (r"^(['.])" , r'\&\1' , txt .replace ('\\ ' , '\\ \\ ' )
282
290
.replace (NORM_FONT [0 ], NORM_FONT [1 ])
0 commit comments