|
19 | 19 | from __future__ import unicode_literals |
20 | 20 | from . import Extension |
21 | 21 | from ..blockprocessors import BlockProcessor |
22 | | -from ..inlinepatterns import BacktickPattern, BACKTICK_RE |
23 | 22 | from ..util import etree |
| 23 | +import re |
24 | 24 |
|
25 | 25 |
|
26 | 26 | class TableProcessor(BlockProcessor): |
27 | 27 | """ Process Tables. """ |
28 | 28 |
|
| 29 | + RE_CODE_PIPES = re.compile(r'(?:(\\\\)|(`+)|(\\\|)|(\|))') |
| 30 | + |
29 | 31 | def test(self, parent, block): |
30 | 32 | rows = block.split('\n') |
31 | 33 | return (len(rows) > 1 and '|' in rows[0] and |
@@ -88,50 +90,70 @@ def _split_row(self, row, border): |
88 | 90 | row = row[1:] |
89 | 91 | if row.endswith('|'): |
90 | 92 | row = row[:-1] |
91 | | - return self._split(row, '|') |
| 93 | + return self._split(row) |
92 | 94 |
|
93 | | - def _split(self, row, marker): |
| 95 | + def _split(self, row): |
94 | 96 | """ split a row of text with some code into a list of cells. """ |
95 | | - if self._row_has_unpaired_backticks(row): |
96 | | - # fallback on old behaviour |
97 | | - return row.split(marker) |
98 | | - # modify the backtick pattern to only match at the beginning of the search string |
99 | | - backtick_pattern = BacktickPattern('^' + BACKTICK_RE) |
100 | 97 | elements = [] |
101 | | - current = '' |
102 | | - i = 0 |
103 | | - while i < len(row): |
104 | | - letter = row[i] |
105 | | - if letter == marker: |
106 | | - if current != '' or len(elements) == 0: |
107 | | - # Don't append empty string unless it is the first element |
108 | | - # The border is already removed when we get the row, then the line is strip()'d |
109 | | - # If the first element is a marker, then we have an empty first cell |
110 | | - elements.append(current) |
111 | | - current = '' |
112 | | - else: |
113 | | - match = backtick_pattern.getCompiledRegExp().match(row[i:]) |
114 | | - if not match: |
115 | | - current += letter |
116 | | - else: |
117 | | - groups = match.groups() |
118 | | - delim = groups[1] # the code block delimeter (ie 1 or more backticks) |
119 | | - row_contents = groups[2] # the text contained inside the code block |
120 | | - i += match.start(4) - 1 # jump pointer to the beginning of the rest of the text (group #4) |
121 | | - element = delim + row_contents + delim # reinstert backticks |
122 | | - current += element |
123 | | - i += 1 |
124 | | - elements.append(current) |
| 98 | + pipes = [] |
| 99 | + tics = [] |
| 100 | + tic_points = [] |
| 101 | + tic_region = [] |
| 102 | + good_pipes = [] |
| 103 | + |
| 104 | + # Parse row |
| 105 | + # Throw out \\, and \| |
| 106 | + for m in self.RE_CODE_PIPES.finditer(row): |
| 107 | + # Store ` data (len, start_pos, end_pos) |
| 108 | + if m.group(2): |
| 109 | + # `+ |
| 110 | + # Store length of each tic group |
| 111 | + tics.append(len(m.group(2))) |
| 112 | + # Store start and end of tic group |
| 113 | + tic_points.append((m.start(2), m.end(2) - 1)) |
| 114 | + # Store pipe location |
| 115 | + elif m.group(4): |
| 116 | + pipes.append(m.start(4)) |
| 117 | + |
| 118 | + # Pair up tics according to size if possible |
| 119 | + # Walk through tic list and see if tic has a close. |
| 120 | + # Store the tic region (start of region, end of region). |
| 121 | + pos = 0 |
| 122 | + tic_len = len(tics) |
| 123 | + while pos < tic_len: |
| 124 | + try: |
| 125 | + index = tics[pos + 1:].index(tics[pos]) + 1 |
| 126 | + tic_region.append((tic_points[pos][0], tic_points[pos + index][1])) |
| 127 | + pos += index + 1 |
| 128 | + except ValueError: |
| 129 | + pos += 1 |
| 130 | + |
| 131 | + # Resolve pipes. Check if they are within a tic pair region. |
| 132 | + # Walk through pipes comparing them to each region. |
| 133 | + # - If pipe position is less that a region, it isn't in a region |
| 134 | + # - If it is within a region, we don't want it, so throw it out |
| 135 | + # - If we didn't throw it out, it must be a table pipe |
| 136 | + for pipe in pipes: |
| 137 | + throw_out = False |
| 138 | + for region in tic_region: |
| 139 | + if pipe < region[0]: |
| 140 | + # Pipe is not in a region |
| 141 | + break |
| 142 | + elif region[0] <= pipe <= region[1]: |
| 143 | + # Pipe is within a code region. Throw it out. |
| 144 | + throw_out = True |
| 145 | + break |
| 146 | + if not throw_out: |
| 147 | + good_pipes.append(pipe) |
| 148 | + |
| 149 | + # Split row according to table delimeters. |
| 150 | + pos = 0 |
| 151 | + for pipe in good_pipes: |
| 152 | + elements.append(row[pos:pipe]) |
| 153 | + pos = pipe + 1 |
| 154 | + elements.append(row[pos:]) |
125 | 155 | return elements |
126 | 156 |
|
127 | | - def _row_has_unpaired_backticks(self, row): |
128 | | - count_total_backtick = row.count('`') |
129 | | - count_escaped_backtick = row.count('\`') |
130 | | - count_backtick = count_total_backtick - count_escaped_backtick |
131 | | - # odd number of backticks, |
132 | | - # we won't be able to build correct code blocks |
133 | | - return count_backtick & 1 |
134 | | - |
135 | 157 |
|
136 | 158 | class TableExtension(Extension): |
137 | 159 | """ Add tables to Markdown. """ |
|
0 commit comments