Skip to content

Commit a2b630c

Browse files
committed
Unify md parsing scripts & improve non-man html conversions.
1 parent 5b1baa7 commit a2b630c

File tree

7 files changed

+159
-184
lines changed

7 files changed

+159
-184
lines changed

Makefile.in

+4-4
Original file line numberDiff line numberDiff line change
@@ -257,16 +257,16 @@ proto.h-tstamp: $(srcdir)/*.c $(srcdir)/lib/compat.c daemon-parm.h
257257
.PHONY: man
258258
man: rsync.1 rsync-ssl.1 rsyncd.conf.5 rrsync.1
259259

260-
rsync.1: rsync.1.md md2man version.h Makefile
260+
rsync.1: rsync.1.md md-convert version.h Makefile
261261
@$(srcdir)/maybe-make-man $(srcdir) rsync.1.md
262262

263-
rsync-ssl.1: rsync-ssl.1.md md2man version.h Makefile
263+
rsync-ssl.1: rsync-ssl.1.md md-convert version.h Makefile
264264
@$(srcdir)/maybe-make-man $(srcdir) rsync-ssl.1.md
265265

266-
rsyncd.conf.5: rsyncd.conf.5.md md2man version.h Makefile
266+
rsyncd.conf.5: rsyncd.conf.5.md md-convert version.h Makefile
267267
@$(srcdir)/maybe-make-man $(srcdir) rsyncd.conf.5.md
268268

269-
rrsync.1: support/rrsync.1.md md2man Makefile
269+
rrsync.1: support/rrsync.1.md md-convert Makefile
270270
@$(srcdir)/maybe-make-man $(srcdir) support/rrsync.1.md
271271

272272
.PHONY: clean

NEWS.md

+2
Original file line numberDiff line numberDiff line change
@@ -4472,3 +4472,5 @@
44724472

44734473
\* DATE OF COMMIT is the date the protocol change was committed to version
44744474
control.
4475+
4476+
@USE_GFM_PARSER@

maybe-make-man

+2-2
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ fi
1616

1717
if [ ! -f "$flagfile" ]; then
1818
# We test our smallest manpage just to see if the python setup works.
19-
if "$srcdir/md2man" --test "$srcdir/rsync-ssl.1.md" >/dev/null 2>&1; then
19+
if "$srcdir/md-convert" --test "$srcdir/rsync-ssl.1.md" >/dev/null 2>&1; then
2020
touch $flagfile
2121
else
2222
outname=`echo "$inname" | sed 's/\.md$//'`
@@ -37,4 +37,4 @@ if [ ! -f "$flagfile" ]; then
3737
fi
3838
fi
3939

40-
"$srcdir/md2man" -s "$srcdir" "$srcdir/$inname"
40+
"$srcdir/md-convert" "$srcdir/$inname"

md-convert

+149-73
Original file line numberDiff line numberDiff line change
@@ -1,28 +1,35 @@
11
#!/usr/bin/env python3
22

3-
# This script takes a manpage written in markdown and turns it into an html web
4-
# page and a nroff man page. The input file must have the name of the program
5-
# and the section in this format: NAME.NUM.md. The output files are written
6-
# into the current directory named NAME.NUM.html and NAME.NUM. The input
7-
# format has one extra extension: if a numbered list starts at 0, it is turned
8-
# into a description list. The dl's dt tag is taken from the contents of the
9-
# first tag inside the li, which is usually a p, code, or strong tag. The
10-
# cmarkgfm or commonmark lib is used to transforms the input file into html.
11-
# The html.parser is used as a state machine that both tweaks the html and
12-
# outputs the nroff data based on the html tags.
3+
# This script transforms markdown files into html and (optionally) nroff. The
4+
# output files are written into the current directory named for the input file
5+
# without the .md suffix and either the .html suffix or no suffix.
136
#
14-
# We normally grab the prefix from the generated Makefile, which is then used
15-
# in the various other grabbed values (see the Makefile for its ${prefix}
16-
# paths). However, the maintainer can choose to override this prefix by
17-
# exporting RSYNC_OVERRIDE_PREFIX=/usr. This allows the man pages to refer to
18-
# /usr paths (and are thus compatible with the release-rsync script) while
19-
# still having the built rsync get installed into /usr/local for local testing.
7+
# If the input .md file has a section number at the end of the name (e.g.,
8+
# rsync.1.md) a nroff file is also output (PROJ.NUM.md -> PROJ.NUM).
209
#
21-
# Copyright (C) 2020 Wayne Davison
10+
# The markdown input format has one extra extension: if a numbered list starts
11+
# at 0, it is turned into a description list. The dl's dt tag is taken from the
12+
# contents of the first tag inside the li, which is usually a p, code, or
13+
# strong tag.
14+
#
15+
# The cmarkgfm or commonmark lib is used to transforms the input file into
16+
# html. Then, the html.parser is used as a state machine that lets us tweak
17+
# the html and (optionally) output nroff data based on the html tags.
18+
#
19+
# If the string @USE_GFM_PARSER@ exists in the file, the string is removed and
20+
# a github-flavored-markup parser is used to parse the file.
21+
#
22+
# The man-page .md files also get the vars @VERSION@, @BINDIR@, and @LIBDIR@
23+
# substituted. Some of these values depend on the Makefile $(prefix) (see the
24+
# generated Makefile). If the maintainer wants to build files for /usr/local
25+
# while creating release-ready man-page files for /usr, use the environment to
26+
# set RSYNC_OVERRIDE_PREFIX=/usr.
27+
28+
# Copyright (C) 2020 - 2021 Wayne Davison
2229
#
2330
# This program is freely redistributable.
2431

25-
import sys, os, re, argparse, subprocess, time
32+
import os, sys, re, argparse, subprocess, time
2633
from html.parser import HTMLParser
2734

2835
CONSUMES_TXT = set('h1 h2 p li pre'.split())
@@ -58,8 +65,30 @@ dd p:first-of-type {
5865
</head><body>
5966
"""
6067

61-
HTML_END = """\
68+
TABLE_STYLE = """\
69+
table {
70+
border-color: grey;
71+
border-spacing: 0;
72+
}
73+
tr {
74+
border-top: 1px solid grey;
75+
}
76+
tr:nth-child(2n) {
77+
background-color: #f6f8fa;
78+
}
79+
th, td {
80+
border: 1px solid #dfe2e5;
81+
text-align: center;
82+
padding-left: 1em;
83+
padding-right: 1em;
84+
}
85+
"""
86+
87+
MAN_HTML_END = """\
6288
<div style="float: right"><p><i>%s</i></p></div>
89+
"""
90+
91+
HTML_END = """\
6392
</body></html>
6493
"""
6594

@@ -78,41 +107,96 @@ NBR_DASH = ('\4', r"\-")
78107
NBR_SPACE = ('\xa0', r"\ ")
79108

80109
md_parser = None
110+
env_subs = { }
81111

82112
def main():
83-
fi = re.match(r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+)\.(?P<sect>\d+))\.md)$', args.mdfile)
113+
for mdfn in args.mdfiles:
114+
parse_md_file(mdfn)
115+
116+
if args.test:
117+
print("The test was successful.")
118+
119+
120+
def parse_md_file(mdfn):
121+
fi = re.match(r'^(?P<fn>(?P<srcdir>.+/)?(?P<name>(?P<prog>[^/]+?)(\.(?P<sect>\d+))?)\.md)$', mdfn)
84122
if not fi:
85-
die('Failed to parse NAME.NUM.md out of input file:', args.mdfile)
123+
die('Failed to parse a md input file name:', mdfn)
86124
fi = argparse.Namespace(**fi.groupdict())
125+
fi.want_manpage = not not fi.sect
126+
if fi.want_manpage:
127+
fi.title = fi.prog + '(' + fi.sect + ') man page'
128+
else:
129+
fi.title = fi.prog
130+
131+
if fi.want_manpage:
132+
if not env_subs:
133+
find_man_substitutions()
134+
prog_ver = 'rsync ' + env_subs['VERSION']
135+
if fi.prog != 'rsync':
136+
prog_ver = fi.prog + ' from ' + prog_ver
137+
fi.man_headings = (fi.prog, fi.sect, env_subs['date'], prog_ver, env_subs['prefix'])
138+
139+
with open(mdfn, 'r', encoding='utf-8') as fh:
140+
txt = fh.read()
141+
142+
use_gfm_parser = '@USE_GFM_PARSER@' in txt
143+
if use_gfm_parser:
144+
txt = txt.replace('@USE_GFM_PARSER@', '')
145+
146+
if fi.want_manpage:
147+
txt = (txt.replace('@VERSION@', env_subs['VERSION'])
148+
.replace('@BINDIR@', env_subs['bindir'])
149+
.replace('@LIBDIR@', env_subs['libdir']))
150+
151+
if use_gfm_parser:
152+
if not gfm_parser:
153+
die('Input file requires cmarkgfm parser:', mdfn)
154+
fi.html_in = gfm_parser(txt)
155+
else:
156+
fi.html_in = md_parser(txt)
157+
txt = None
158+
159+
TransformHtml(fi)
160+
161+
if args.test:
162+
return
163+
164+
output_list = [ (fi.name + '.html', fi.html_out) ]
165+
if fi.want_manpage:
166+
output_list += [ (fi.name, fi.man_out) ]
167+
for fn, txt in output_list:
168+
if os.path.lexists(fn):
169+
os.unlink(fn)
170+
print("Wrote:", fn)
171+
with open(fn, 'w', encoding='utf-8') as fh:
172+
fh.write(txt)
87173

88-
if args.srcdir:
89-
fi.srcdir = args.srcdir + '/'
90-
elif not fi.srcdir:
91-
fi.srcdir = './'
92174

93-
fi.title = fi.prog + '(' + fi.sect + ') man page'
94-
fi.mtime = 0
175+
def find_man_substitutions():
176+
srcdir = os.path.dirname(sys.argv[0]) + '/'
177+
mtime = 0
95178

96-
git_dir = fi.srcdir + '.git'
179+
git_dir = srcdir + '.git'
97180
if os.path.lexists(git_dir):
98-
fi.mtime = int(subprocess.check_output(['git', '--git-dir', git_dir, 'log', '-1', '--format=%at']))
181+
mtime = int(subprocess.check_output(['git', '--git-dir', git_dir, 'log', '-1', '--format=%at']))
99182

100-
env_subs = { 'prefix': os.environ.get('RSYNC_OVERRIDE_PREFIX', None) }
183+
# Allow "prefix" to be overridden via the environment:
184+
env_subs['prefix'] = os.environ.get('RSYNC_OVERRIDE_PREFIX', None)
101185

102186
if args.test:
103187
env_subs['VERSION'] = '1.0.0'
104188
env_subs['bindir'] = '/usr/bin'
105189
env_subs['libdir'] = '/usr/lib/rsync'
106190
else:
107-
for fn in (fi.srcdir + 'version.h', 'Makefile'):
191+
for fn in (srcdir + 'version.h', 'Makefile'):
108192
try:
109193
st = os.lstat(fn)
110194
except OSError:
111-
die('Failed to find', fi.srcdir + fn)
112-
if not fi.mtime:
113-
fi.mtime = st.st_mtime
195+
die('Failed to find', srcdir + fn)
196+
if not mtime:
197+
mtime = st.st_mtime
114198

115-
with open(fi.srcdir + 'version.h', 'r', encoding='utf-8') as fh:
199+
with open(srcdir + 'version.h', 'r', encoding='utf-8') as fh:
116200
txt = fh.read()
117201
m = re.search(r'"(.+?)"', txt)
118202
env_subs['VERSION'] = m.group(1)
@@ -131,40 +215,14 @@ def main():
131215
if var == 'srcdir':
132216
break
133217

134-
fi.prog_ver = 'rsync ' + env_subs['VERSION']
135-
if fi.prog != 'rsync':
136-
fi.prog_ver = fi.prog + ' from ' + fi.prog_ver
137-
138-
with open(fi.fn, 'r', encoding='utf-8') as fh:
139-
txt = fh.read()
140-
141-
txt = re.sub(r'@VERSION@', env_subs['VERSION'], txt)
142-
txt = re.sub(r'@BINDIR@', env_subs['bindir'], txt)
143-
txt = re.sub(r'@LIBDIR@', env_subs['libdir'], txt)
144-
145-
fi.html_in = md_parser(txt)
146-
txt = None
147-
148-
fi.date = time.strftime('%d %b %Y', time.localtime(fi.mtime))
149-
fi.man_headings = (fi.prog, fi.sect, fi.date, fi.prog_ver, env_subs['prefix'])
150-
151-
HtmlToManPage(fi)
152-
153-
if args.test:
154-
print("The test was successful.")
155-
return
156-
157-
for fn, txt in ((fi.name + '.html', fi.html_out), (fi.name, fi.man_out)):
158-
print("Wrote:", fn)
159-
with open(fn, 'w', encoding='utf-8') as fh:
160-
fh.write(txt)
218+
env_subs['date'] = time.strftime('%d %b %Y', time.localtime(mtime))
161219

162220

163221
def html_via_commonmark(txt):
164222
return commonmark.HtmlRenderer().render(commonmark.Parser().parse(txt))
165223

166224

167-
class HtmlToManPage(HTMLParser):
225+
class TransformHtml(HTMLParser):
168226
def __init__(self, fi):
169227
HTMLParser.__init__(self, convert_charrefs=True)
170228

@@ -177,14 +235,23 @@ class HtmlToManPage(HTMLParser):
177235
in_pre = False,
178236
in_code = False,
179237
html_out = [ HTML_START % fi.title ],
180-
man_out = [ MAN_START % fi.man_headings ],
238+
man_out = [ ],
181239
txt = '',
240+
want_manpage = fi.want_manpage,
182241
)
183242

243+
if st.want_manpage:
244+
st.man_out.append(MAN_START % fi.man_headings)
245+
246+
if '</table>' in fi.html_in:
247+
st.html_out[0] = st.html_out[0].replace('</style>', TABLE_STYLE + '</style>')
248+
184249
self.feed(fi.html_in)
185250
fi.html_in = None
186251

187-
st.html_out.append(HTML_END % fi.date)
252+
if st.want_manpage:
253+
st.html_out.append(MAN_HTML_END % env_subs['date'])
254+
st.html_out.append(HTML_END)
188255
st.man_out.append(MAN_END)
189256

190257
fi.html_out = ''.join(st.html_out)
@@ -232,8 +299,9 @@ class HtmlToManPage(HTMLParser):
232299
elif tag == 'strong' or tag == 'b':
233300
st.txt += BOLD_FONT[0]
234301
elif tag == 'em' or tag == 'i':
235-
tag = 'u' # Change it into underline to be more like the man page
236-
st.txt += UNDR_FONT[0]
302+
if st.want_manpage:
303+
tag = 'u' # Change it into underline to be more like the man page
304+
st.txt += UNDR_FONT[0]
237305
elif tag == 'ol':
238306
start = 1
239307
for var, val in attrs_list:
@@ -256,6 +324,10 @@ class HtmlToManPage(HTMLParser):
256324
st.man_out.append(".RS\n")
257325
st.p_macro = ".IP\n"
258326
st.list_state.append('o')
327+
elif tag == 'hr':
328+
st.man_out.append(".l\n")
329+
st.html_out.append("<hr />")
330+
return
259331
st.html_out.append('<' + tag + ''.join(' ' + var + '="' + htmlify(val) + '"' for var, val in attrs_list) + '>')
260332
st.at_first_tag_in_dd = False
261333

@@ -300,8 +372,9 @@ class HtmlToManPage(HTMLParser):
300372
elif tag == 'strong' or tag == 'b':
301373
add_to_txt = NORM_FONT[0]
302374
elif tag == 'em' or tag == 'i':
303-
tag = 'u' # Change it into underline to be more like the man page
304-
add_to_txt = NORM_FONT[0]
375+
if st.want_manpage:
376+
tag = 'u' # Change it into underline to be more like the man page
377+
add_to_txt = NORM_FONT[0]
305378
elif tag == 'ol' or tag == 'ul':
306379
if st.list_state.pop() == 'dl':
307380
tag = 'dl'
@@ -310,6 +383,8 @@ class HtmlToManPage(HTMLParser):
310383
else:
311384
st.p_macro = ".P\n"
312385
st.at_first_tag_in_dd = False
386+
elif tag == 'hr':
387+
return
313388
st.html_out.append('</' + tag + '>')
314389
if add_to_txt:
315390
if txt is None:
@@ -379,22 +454,23 @@ def die(*msg):
379454

380455

381456
if __name__ == '__main__':
382-
parser = argparse.ArgumentParser(description='Transform a NAME.NUM.md markdown file into a NAME.NUM.html web page & a NAME.NUM man page.', add_help=False)
383-
parser.add_argument('--srcdir', '-s', help='Specify the source dir if the input file is not in it.')
384-
parser.add_argument('--test', action='store_true', help='Test if we can parse the input w/o updating any files.')
457+
parser = argparse.ArgumentParser(description="Output html and (optionally) nroff for markdown pages.", add_help=False)
458+
parser.add_argument('--test', action='store_true', help="Just test the parsing without outputting any files.")
385459
parser.add_argument('--debug', '-D', action='count', default=0, help='Output copious info on the html parsing. Repeat for even more.')
386460
parser.add_argument("--help", "-h", action="help", help="Output this help message and exit.")
387-
parser.add_argument('mdfile', help="The NAME.NUM.md file to parse.")
461+
parser.add_argument("mdfiles", nargs='+', help="The source .md files to convert.")
388462
args = parser.parse_args()
389463

390464
try:
391465
import cmarkgfm
392466
md_parser = cmarkgfm.markdown_to_html
467+
gfm_parser = cmarkgfm.github_flavored_markdown_to_html
393468
except:
394469
try:
395470
import commonmark
396471
md_parser = html_via_commonmark
397472
except:
398473
die("Failed to find cmarkgfm or commonmark for python3.")
474+
gfm_parser = None
399475

400476
main()

md2man

+1
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
md-convert

0 commit comments

Comments
 (0)