Skip to content

Commit c70b2c4

Browse files
facelessuserwaylan
authored andcommitted
Tables: Improvements (#530)
Tables now handle escaped pipes when testing, in table borders, and in the inline content. To achieve properly, a bug had to be fixed related to appending escaped chars to the Markdown class. Now appended chars only appear in the current instance. Lastly the first backtick in a table can be escaped rounding out the last corner case.
1 parent b52293b commit c70b2c4

File tree

5 files changed

+172
-30
lines changed

5 files changed

+172
-30
lines changed

markdown/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -75,9 +75,6 @@ class Markdown(object):
7575
'xhtml5': to_xhtml_string,
7676
}
7777

78-
ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
79-
'(', ')', '>', '#', '+', '-', '.', '!']
80-
8178
def __init__(self, *args, **kwargs):
8279
"""
8380
Creates a new Markdown instance.
@@ -147,6 +144,9 @@ def __init__(self, *args, **kwargs):
147144
'deprecated along with "safe_mode".',
148145
DeprecationWarning)
149146

147+
self.ESCAPED_CHARS = ['\\', '`', '*', '_', '{', '}', '[', ']',
148+
'(', ')', '>', '#', '+', '-', '.', '!']
149+
150150
self.registeredExtensions = []
151151
self.docType = ""
152152
self.stripTopLevelTags = True

markdown/extensions/tables.py

Lines changed: 53 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -26,28 +26,43 @@
2626
class TableProcessor(BlockProcessor):
2727
""" Process Tables. """
2828

29-
RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(`+)|(\\\|)|(\|))')
29+
RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(\\`+)|(`+)|(\\\|)|(\|))')
30+
RE_END_BORDER = re.compile(r'(?<!\\)(?:\\\\)*\|$')
31+
32+
def __init__(self, parser):
33+
self.border = False
34+
self.separator = ''
35+
super(TableProcessor, self).__init__(parser)
3036

3137
def test(self, parent, block):
32-
rows = block.split('\n')
33-
return (len(rows) > 1 and '|' in rows[0] and
34-
'|' in rows[1] and '-' in rows[1] and
35-
rows[1].strip()[0] in ['|', ':', '-'] and
36-
set(rows[1]) <= set('|:- '))
38+
"""
39+
Ensure first two rows (column header and separator row) are valid table rows.
40+
41+
Keep border check and separator row do avoid repeating the work.
42+
"""
43+
is_table = False
44+
header = [row.strip() for row in block.split('\n')[0:2]]
45+
if len(header) == 2:
46+
self.border = header[0].startswith('|')
47+
row = self._split_row(header[0])
48+
is_table = len(row) > 1
49+
50+
if is_table:
51+
row = self._split_row(header[1])
52+
is_table = len(row) > 1 and set(''.join(row)) <= set('|:- ')
53+
if is_table:
54+
self.separator = row
55+
return is_table
3756

3857
def run(self, parent, blocks):
3958
""" Parse a table block and build table. """
4059
block = blocks.pop(0).split('\n')
4160
header = block[0].strip()
42-
seperator = block[1].strip()
4361
rows = [] if len(block) < 3 else block[2:]
44-
# Get format type (bordered by pipes or not)
45-
border = False
46-
if header.startswith('|'):
47-
border = True
62+
4863
# Get alignment of columns
4964
align = []
50-
for c in self._split_row(seperator, border):
65+
for c in self.separator:
5166
c = c.strip()
5267
if c.startswith(':') and c.endswith(':'):
5368
align.append('center')
@@ -57,21 +72,22 @@ def run(self, parent, blocks):
5772
align.append('right')
5873
else:
5974
align.append(None)
75+
6076
# Build table
6177
table = etree.SubElement(parent, 'table')
6278
thead = etree.SubElement(table, 'thead')
63-
self._build_row(header, thead, align, border)
79+
self._build_row(header, thead, align)
6480
tbody = etree.SubElement(table, 'tbody')
6581
for row in rows:
66-
self._build_row(row.strip(), tbody, align, border)
82+
self._build_row(row.strip(), tbody, align)
6783

68-
def _build_row(self, row, parent, align, border):
84+
def _build_row(self, row, parent, align):
6985
""" Given a row of text, build table cells. """
7086
tr = etree.SubElement(parent, 'tr')
7187
tag = 'td'
7288
if parent.tag == 'thead':
7389
tag = 'th'
74-
cells = self._split_row(row, border)
90+
cells = self._split_row(row)
7591
# We use align here rather than cells to ensure every row
7692
# contains the same number of columns.
7793
for i, a in enumerate(align):
@@ -83,13 +99,12 @@ def _build_row(self, row, parent, align, border):
8399
if a:
84100
c.set('align', a)
85101

86-
def _split_row(self, row, border):
102+
def _split_row(self, row):
87103
""" split a row of text into list of cells. """
88-
if border:
104+
if self.border:
89105
if row.startswith('|'):
90106
row = row[1:]
91-
if row.endswith('|'):
92-
row = row[:-1]
107+
row = self.RE_END_BORDER.sub('', row)
93108
return self._split(row)
94109

95110
def _split(self, row):
@@ -106,23 +121,33 @@ def _split(self, row):
106121
for m in self.RE_CODE_PIPES.finditer(row):
107122
# Store ` data (len, start_pos, end_pos)
108123
if m.group(2):
124+
# \`+
125+
# Store length of each tic group: subtract \
126+
tics.append(len(m.group(2)) - 1)
127+
# Store start of group, end of group, and escape length
128+
tic_points.append((m.start(2), m.end(2) - 1, 1))
129+
elif m.group(3):
109130
# `+
110131
# Store length of each tic group
111-
tics.append(len(m.group(2)))
112-
# Store start and end of tic group
113-
tic_points.append((m.start(2), m.end(2) - 1))
132+
tics.append(len(m.group(3)))
133+
# Store start of group, end of group, and escape length
134+
tic_points.append((m.start(3), m.end(3) - 1, 0))
114135
# Store pipe location
115-
elif m.group(4):
116-
pipes.append(m.start(4))
136+
elif m.group(5):
137+
pipes.append(m.start(5))
117138

118139
# Pair up tics according to size if possible
140+
# Subtract the escape length *only* from the opening.
119141
# Walk through tic list and see if tic has a close.
120142
# Store the tic region (start of region, end of region).
121143
pos = 0
122144
tic_len = len(tics)
123145
while pos < tic_len:
124146
try:
125-
index = tics[pos + 1:].index(tics[pos]) + 1
147+
tic_size = tics[pos] - tic_points[pos][2]
148+
if tic_size == 0:
149+
raise ValueError
150+
index = tics[pos + 1:].index(tic_size) + 1
126151
tic_region.append((tic_points[pos][0], tic_points[pos + index][1]))
127152
pos += index + 1
128153
except ValueError:
@@ -160,6 +185,8 @@ class TableExtension(Extension):
160185

161186
def extendMarkdown(self, md, md_globals):
162187
""" Add an instance of TableProcessor to BlockParser. """
188+
if '|' not in md.ESCAPED_CHARS:
189+
md.ESCAPED_CHARS.append('|')
163190
md.parser.blockprocessors.add('table',
164191
TableProcessor(md.parser),
165192
'<hashheader')

tests/extensions/extra/tables.html

Lines changed: 73 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -284,4 +284,76 @@ <h2>Table Tests</h2>
284284
<td><code>\</code></td>
285285
</tr>
286286
</tbody>
287-
</table>
287+
</table>
288+
<p>Only the first backtick can be escaped</p>
289+
<table>
290+
<thead>
291+
<tr>
292+
<th>Escaped</th>
293+
<th>Bacticks</th>
294+
</tr>
295+
</thead>
296+
<tbody>
297+
<tr>
298+
<td>`<code>\</code></td>
299+
<td>``</td>
300+
</tr>
301+
</tbody>
302+
</table>
303+
<p>Test escaped pipes</p>
304+
<table>
305+
<thead>
306+
<tr>
307+
<th>Column 1</th>
308+
<th>Column 2</th>
309+
</tr>
310+
</thead>
311+
<tbody>
312+
<tr>
313+
<td><code>|</code> |</td>
314+
<td>Pipes are okay in code and escaped. |</td>
315+
</tr>
316+
</tbody>
317+
</table>
318+
<table>
319+
<thead>
320+
<tr>
321+
<th>Column 1</th>
322+
<th>Column 2</th>
323+
</tr>
324+
</thead>
325+
<tbody>
326+
<tr>
327+
<td>row1</td>
328+
<td>row1 |</td>
329+
</tr>
330+
<tr>
331+
<td>row2</td>
332+
<td>row2</td>
333+
</tr>
334+
</tbody>
335+
</table>
336+
<p>Test header escapes</p>
337+
<table>
338+
<thead>
339+
<tr>
340+
<th><code>`\</code> |</th>
341+
<th><code>\</code> |</th>
342+
</tr>
343+
</thead>
344+
<tbody>
345+
<tr>
346+
<td>row1</td>
347+
<td>row1</td>
348+
</tr>
349+
<tr>
350+
<td>row2</td>
351+
<td>row2</td>
352+
</tr>
353+
</tbody>
354+
</table>
355+
<p>Escaped pipes in format row should not be a table</p>
356+
<p>| Column1 | Column2 |
357+
| ------- || ------- |
358+
| row1 | row1 |
359+
| row2 | row2 |</p>

tests/extensions/extra/tables.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -90,3 +90,34 @@ Odd backticks | Even backticks
9090
Escapes | More Escapes
9191
------- | ------
9292
`` `\`` | `\`
93+
94+
Only the first backtick can be escaped
95+
96+
Escaped | Bacticks
97+
------- | ------
98+
\`` \` | \`\`
99+
100+
Test escaped pipes
101+
102+
Column 1 | Column 2
103+
-------- | --------
104+
`|` \| | Pipes are okay in code and escaped. \|
105+
106+
| Column 1 | Column 2 |
107+
| -------- | -------- |
108+
| row1 | row1 \|
109+
| row2 | row2 |
110+
111+
Test header escapes
112+
113+
| `` `\`` \| | `\` \|
114+
| ---------- | ---- |
115+
| row1 | row1 |
116+
| row2 | row2 |
117+
118+
Escaped pipes in format row should not be a table
119+
120+
| Column1 | Column2 |
121+
| ------- \|| ------- |
122+
| row1 | row1 |
123+
| row2 | row2 |

tests/test_apis.py

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -758,3 +758,15 @@ def testExtensonConfigOptionBadFormat(self):
758758
"""
759759
self.create_config_file(config)
760760
self.assertRaises(yaml.YAMLError, parse_options, ['-c', self.tempfile])
761+
762+
763+
class TestEscapeAppend(unittest.TestCase):
764+
""" Tests escape character append. """
765+
766+
def testAppend(self):
767+
""" Test that appended escapes are only in the current instance. """
768+
md = markdown.Markdown()
769+
md.ESCAPED_CHARS.append('|')
770+
self.assertEqual('|' in md.ESCAPED_CHARS, True)
771+
md2 = markdown.Markdown()
772+
self.assertEqual('|' not in md2.ESCAPED_CHARS, True)

0 commit comments

Comments
 (0)