1
1
#!/usr/bin/env python3
2
2
3
- # This script takes a manpage written in markdown and turns it into an html web
4
- # page and a nroff man page. The input file must have the name of the program
5
- # and the section in this format: NAME.NUM.md. The output files are written
6
- # into the current directory named NAME.NUM.html and NAME.NUM. The input
7
- # format has one extra extension: if a numbered list starts at 0, it is turned
8
- # into a description list. The dl's dt tag is taken from the contents of the
9
- # first tag inside the li, which is usually a p, code, or strong tag. The
10
- # cmarkgfm or commonmark lib is used to transforms the input file into html.
11
- # The html.parser is used as a state machine that both tweaks the html and
12
- # outputs the nroff data based on the html tags.
3
+ # This script transforms markdown files into html and (optionally) nroff. The
4
+ # output files are written into the current directory named for the input file
5
+ # without the .md suffix and either the .html suffix or no suffix.
13
6
#
14
- # We normally grab the prefix from the generated Makefile, which is then used
15
- # in the various other grabbed values (see the Makefile for its ${prefix}
16
- # paths). However, the maintainer can choose to override this prefix by
17
- # exporting RSYNC_OVERRIDE_PREFIX=/usr. This allows the man pages to refer to
18
- # /usr paths (and are thus compatible with the release-rsync script) while
19
- # still having the built rsync get installed into /usr/local for local testing.
7
+ # If the input .md file has a section number at the end of the name (e.g.,
8
+ # rsync.1.md) a nroff file is also output (PROJ.NUM.md -> PROJ.NUM).
20
9
#
21
- # Copyright (C) 2020 Wayne Davison
10
+ # The markdown input format has one extra extension: if a numbered list starts
11
+ # at 0, it is turned into a description list. The dl's dt tag is taken from the
12
+ # contents of the first tag inside the li, which is usually a p, code, or
13
+ # strong tag.
14
+ #
15
+ # The cmarkgfm or commonmark lib is used to transforms the input file into
16
+ # html. Then, the html.parser is used as a state machine that lets us tweak
17
+ # the html and (optionally) output nroff data based on the html tags.
18
+ #
19
+ # If the string @USE_GFM_PARSER@ exists in the file, the string is removed and
20
+ # a github-flavored-markup parser is used to parse the file.
21
+ #
22
+ # The man-page .md files also get the vars @VERSION@, @BINDIR@, and @LIBDIR@
23
+ # substituted. Some of these values depend on the Makefile $(prefix) (see the
24
+ # generated Makefile). If the maintainer wants to build files for /usr/local
25
+ # while creating release-ready man-page files for /usr, use the environment to
26
+ # set RSYNC_OVERRIDE_PREFIX=/usr.
27
+
28
+ # Copyright (C) 2020 - 2021 Wayne Davison
22
29
#
23
30
# This program is freely redistributable.
24
31
25
- import sys , os , re , argparse , subprocess , time
32
+ import os , sys , re , argparse , subprocess , time
26
33
from html .parser import HTMLParser
27
34
28
35
CONSUMES_TXT = set ('h1 h2 p li pre' .split ())
@@ -58,8 +65,30 @@ dd p:first-of-type {
58
65
</head><body>
59
66
"""
60
67
61
- HTML_END = """\
68
+ TABLE_STYLE = """\
69
+ table {
70
+ border-color: grey;
71
+ border-spacing: 0;
72
+ }
73
+ tr {
74
+ border-top: 1px solid grey;
75
+ }
76
+ tr:nth-child(2n) {
77
+ background-color: #f6f8fa;
78
+ }
79
+ th, td {
80
+ border: 1px solid #dfe2e5;
81
+ text-align: center;
82
+ padding-left: 1em;
83
+ padding-right: 1em;
84
+ }
85
+ """
86
+
87
+ MAN_HTML_END = """\
62
88
<div style="float: right"><p><i>%s</i></p></div>
89
+ """
90
+
91
+ HTML_END = """\
63
92
</body></html>
64
93
"""
65
94
@@ -78,41 +107,96 @@ NBR_DASH = ('\4', r"\-")
78
107
NBR_SPACE = ('\xa0 ' , r"\ " )
79
108
80
109
md_parser = None
110
+ env_subs = { }
81
111
82
112
def main ():
83
- fi = re .match (r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+)\.(?P<sect>\d+))\.md)$' , args .mdfile )
113
+ for mdfn in args .mdfiles :
114
+ parse_md_file (mdfn )
115
+
116
+ if args .test :
117
+ print ("The test was successful." )
118
+
119
+
120
+ def parse_md_file (mdfn ):
121
+ fi = re .match (r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+?)(\.(?P<sect>\d+))?)\.md)$' , mdfn )
84
122
if not fi :
85
- die ('Failed to parse NAME.NUM. md out of input file:' , args . mdfile )
123
+ die ('Failed to parse a md input file name :' , mdfn )
86
124
fi = argparse .Namespace (** fi .groupdict ())
125
+ fi .want_manpage = not not fi .sect
126
+ if fi .want_manpage :
127
+ fi .title = fi .prog + '(' + fi .sect + ') man page'
128
+ else :
129
+ fi .title = fi .prog
130
+
131
+ if fi .want_manpage :
132
+ if not env_subs :
133
+ find_man_substitutions ()
134
+ prog_ver = 'rsync ' + env_subs ['VERSION' ]
135
+ if fi .prog != 'rsync' :
136
+ prog_ver = fi .prog + ' from ' + prog_ver
137
+ fi .man_headings = (fi .prog , fi .sect , env_subs ['date' ], prog_ver , env_subs ['prefix' ])
138
+
139
+ with open (mdfn , 'r' , encoding = 'utf-8' ) as fh :
140
+ txt = fh .read ()
141
+
142
+ use_gfm_parser = '@USE_GFM_PARSER@' in txt
143
+ if use_gfm_parser :
144
+ txt = txt .replace ('@USE_GFM_PARSER@' , '' )
145
+
146
+ if fi .want_manpage :
147
+ txt = (txt .replace ('@VERSION@' , env_subs ['VERSION' ])
148
+ .replace ('@BINDIR@' , env_subs ['bindir' ])
149
+ .replace ('@LIBDIR@' , env_subs ['libdir' ]))
150
+
151
+ if use_gfm_parser :
152
+ if not gfm_parser :
153
+ die ('Input file requires cmarkgfm parser:' , mdfn )
154
+ fi .html_in = gfm_parser (txt )
155
+ else :
156
+ fi .html_in = md_parser (txt )
157
+ txt = None
158
+
159
+ TransformHtml (fi )
160
+
161
+ if args .test :
162
+ return
163
+
164
+ output_list = [ (fi .name + '.html' , fi .html_out ) ]
165
+ if fi .want_manpage :
166
+ output_list += [ (fi .name , fi .man_out ) ]
167
+ for fn , txt in output_list :
168
+ if os .path .lexists (fn ):
169
+ os .unlink (fn )
170
+ print ("Wrote:" , fn )
171
+ with open (fn , 'w' , encoding = 'utf-8' ) as fh :
172
+ fh .write (txt )
87
173
88
- if args .srcdir :
89
- fi .srcdir = args .srcdir + '/'
90
- elif not fi .srcdir :
91
- fi .srcdir = './'
92
174
93
- fi .title = fi .prog + '(' + fi .sect + ') man page'
94
- fi .mtime = 0
175
+ def find_man_substitutions ():
176
+ srcdir = os .path .dirname (sys .argv [0 ]) + '/'
177
+ mtime = 0
95
178
96
- git_dir = fi . srcdir + '.git'
179
+ git_dir = srcdir + '.git'
97
180
if os .path .lexists (git_dir ):
98
- fi . mtime = int (subprocess .check_output (['git' , '--git-dir' , git_dir , 'log' , '-1' , '--format=%at' ]))
181
+ mtime = int (subprocess .check_output (['git' , '--git-dir' , git_dir , 'log' , '-1' , '--format=%at' ]))
99
182
100
- env_subs = { 'prefix' : os .environ .get ('RSYNC_OVERRIDE_PREFIX' , None ) }
183
+ # Allow "prefix" to be overridden via the environment:
184
+ env_subs ['prefix' ] = os .environ .get ('RSYNC_OVERRIDE_PREFIX' , None )
101
185
102
186
if args .test :
103
187
env_subs ['VERSION' ] = '1.0.0'
104
188
env_subs ['bindir' ] = '/usr/bin'
105
189
env_subs ['libdir' ] = '/usr/lib/rsync'
106
190
else :
107
- for fn in (fi . srcdir + 'version.h' , 'Makefile' ):
191
+ for fn in (srcdir + 'version.h' , 'Makefile' ):
108
192
try :
109
193
st = os .lstat (fn )
110
194
except OSError :
111
- die ('Failed to find' , fi . srcdir + fn )
112
- if not fi . mtime :
113
- fi . mtime = st .st_mtime
195
+ die ('Failed to find' , srcdir + fn )
196
+ if not mtime :
197
+ mtime = st .st_mtime
114
198
115
- with open (fi . srcdir + 'version.h' , 'r' , encoding = 'utf-8' ) as fh :
199
+ with open (srcdir + 'version.h' , 'r' , encoding = 'utf-8' ) as fh :
116
200
txt = fh .read ()
117
201
m = re .search (r'"(.+?)"' , txt )
118
202
env_subs ['VERSION' ] = m .group (1 )
@@ -131,40 +215,14 @@ def main():
131
215
if var == 'srcdir' :
132
216
break
133
217
134
- fi .prog_ver = 'rsync ' + env_subs ['VERSION' ]
135
- if fi .prog != 'rsync' :
136
- fi .prog_ver = fi .prog + ' from ' + fi .prog_ver
137
-
138
- with open (fi .fn , 'r' , encoding = 'utf-8' ) as fh :
139
- txt = fh .read ()
140
-
141
- txt = re .sub (r'@VERSION@' , env_subs ['VERSION' ], txt )
142
- txt = re .sub (r'@BINDIR@' , env_subs ['bindir' ], txt )
143
- txt = re .sub (r'@LIBDIR@' , env_subs ['libdir' ], txt )
144
-
145
- fi .html_in = md_parser (txt )
146
- txt = None
147
-
148
- fi .date = time .strftime ('%d %b %Y' , time .localtime (fi .mtime ))
149
- fi .man_headings = (fi .prog , fi .sect , fi .date , fi .prog_ver , env_subs ['prefix' ])
150
-
151
- HtmlToManPage (fi )
152
-
153
- if args .test :
154
- print ("The test was successful." )
155
- return
156
-
157
- for fn , txt in ((fi .name + '.html' , fi .html_out ), (fi .name , fi .man_out )):
158
- print ("Wrote:" , fn )
159
- with open (fn , 'w' , encoding = 'utf-8' ) as fh :
160
- fh .write (txt )
218
+ env_subs ['date' ] = time .strftime ('%d %b %Y' , time .localtime (mtime ))
161
219
162
220
163
221
def html_via_commonmark (txt ):
164
222
return commonmark .HtmlRenderer ().render (commonmark .Parser ().parse (txt ))
165
223
166
224
167
- class HtmlToManPage (HTMLParser ):
225
+ class TransformHtml (HTMLParser ):
168
226
def __init__ (self , fi ):
169
227
HTMLParser .__init__ (self , convert_charrefs = True )
170
228
@@ -177,14 +235,23 @@ class HtmlToManPage(HTMLParser):
177
235
in_pre = False ,
178
236
in_code = False ,
179
237
html_out = [ HTML_START % fi .title ],
180
- man_out = [ MAN_START % fi . man_headings ],
238
+ man_out = [ ],
181
239
txt = '' ,
240
+ want_manpage = fi .want_manpage ,
182
241
)
183
242
243
+ if st .want_manpage :
244
+ st .man_out .append (MAN_START % fi .man_headings )
245
+
246
+ if '</table>' in fi .html_in :
247
+ st .html_out [0 ] = st .html_out [0 ].replace ('</style>' , TABLE_STYLE + '</style>' )
248
+
184
249
self .feed (fi .html_in )
185
250
fi .html_in = None
186
251
187
- st .html_out .append (HTML_END % fi .date )
252
+ if st .want_manpage :
253
+ st .html_out .append (MAN_HTML_END % env_subs ['date' ])
254
+ st .html_out .append (HTML_END )
188
255
st .man_out .append (MAN_END )
189
256
190
257
fi .html_out = '' .join (st .html_out )
@@ -232,8 +299,9 @@ class HtmlToManPage(HTMLParser):
232
299
elif tag == 'strong' or tag == 'b' :
233
300
st .txt += BOLD_FONT [0 ]
234
301
elif tag == 'em' or tag == 'i' :
235
- tag = 'u' # Change it into underline to be more like the man page
236
- st .txt += UNDR_FONT [0 ]
302
+ if st .want_manpage :
303
+ tag = 'u' # Change it into underline to be more like the man page
304
+ st .txt += UNDR_FONT [0 ]
237
305
elif tag == 'ol' :
238
306
start = 1
239
307
for var , val in attrs_list :
@@ -256,6 +324,10 @@ class HtmlToManPage(HTMLParser):
256
324
st .man_out .append (".RS\n " )
257
325
st .p_macro = ".IP\n "
258
326
st .list_state .append ('o' )
327
+ elif tag == 'hr' :
328
+ st .man_out .append (".l\n " )
329
+ st .html_out .append ("<hr />" )
330
+ return
259
331
st .html_out .append ('<' + tag + '' .join (' ' + var + '="' + htmlify (val ) + '"' for var , val in attrs_list ) + '>' )
260
332
st .at_first_tag_in_dd = False
261
333
@@ -300,8 +372,9 @@ class HtmlToManPage(HTMLParser):
300
372
elif tag == 'strong' or tag == 'b' :
301
373
add_to_txt = NORM_FONT [0 ]
302
374
elif tag == 'em' or tag == 'i' :
303
- tag = 'u' # Change it into underline to be more like the man page
304
- add_to_txt = NORM_FONT [0 ]
375
+ if st .want_manpage :
376
+ tag = 'u' # Change it into underline to be more like the man page
377
+ add_to_txt = NORM_FONT [0 ]
305
378
elif tag == 'ol' or tag == 'ul' :
306
379
if st .list_state .pop () == 'dl' :
307
380
tag = 'dl'
@@ -310,6 +383,8 @@ class HtmlToManPage(HTMLParser):
310
383
else :
311
384
st .p_macro = ".P\n "
312
385
st .at_first_tag_in_dd = False
386
+ elif tag == 'hr' :
387
+ return
313
388
st .html_out .append ('</' + tag + '>' )
314
389
if add_to_txt :
315
390
if txt is None :
@@ -379,22 +454,23 @@ def die(*msg):
379
454
380
455
381
456
if __name__ == '__main__' :
382
- parser = argparse .ArgumentParser (description = 'Transform a NAME.NUM.md markdown file into a NAME.NUM.html web page & a NAME.NUM man page.' , add_help = False )
383
- parser .add_argument ('--srcdir' , '-s' , help = 'Specify the source dir if the input file is not in it.' )
384
- parser .add_argument ('--test' , action = 'store_true' , help = 'Test if we can parse the input w/o updating any files.' )
457
+ parser = argparse .ArgumentParser (description = "Output html and (optionally) nroff for markdown pages." , add_help = False )
458
+ parser .add_argument ('--test' , action = 'store_true' , help = "Just test the parsing without outputting any files." )
385
459
parser .add_argument ('--debug' , '-D' , action = 'count' , default = 0 , help = 'Output copious info on the html parsing. Repeat for even more.' )
386
460
parser .add_argument ("--help" , "-h" , action = "help" , help = "Output this help message and exit." )
387
- parser .add_argument ('mdfile ' , help = "The NAME.NUM. md file to parse ." )
461
+ parser .add_argument ("mdfiles" , nargs = '+ ' , help = "The source . md files to convert ." )
388
462
args = parser .parse_args ()
389
463
390
464
try :
391
465
import cmarkgfm
392
466
md_parser = cmarkgfm .markdown_to_html
467
+ gfm_parser = cmarkgfm .github_flavored_markdown_to_html
393
468
except :
394
469
try :
395
470
import commonmark
396
471
md_parser = html_via_commonmark
397
472
except :
398
473
die ("Failed to find cmarkgfm or commonmark for python3." )
474
+ gfm_parser = None
399
475
400
476
main ()
0 commit comments