Skip to content

Commit 7ab8185

Browse files
authored
fix: blockquote code continuation (#3264)
BREAKING CHANGE: add space token after blockquote and hr if there are multiple newlines
1 parent 0e40783 commit 7ab8185

File tree

5 files changed

+86
-24
lines changed

5 files changed

+86
-24
lines changed

src/Lexer.ts

+4-5
Original file line numberDiff line numberDiff line change
@@ -101,9 +101,9 @@ export class _Lexer {
101101
/**
102102
* Lexing
103103
*/
104-
blockTokens(src: string, tokens?: Token[]): Token[];
105-
blockTokens(src: string, tokens?: TokensList): TokensList;
106-
blockTokens(src: string, tokens: Token[] = []) {
104+
blockTokens(src: string, tokens?: Token[], lastParagraphClipped?: boolean): Token[];
105+
blockTokens(src: string, tokens?: TokensList, lastParagraphClipped?: boolean): TokensList;
106+
blockTokens(src: string, tokens: Token[] = [], lastParagraphClipped = false) {
107107
if (this.options.pedantic) {
108108
src = src.replace(/\t/g, ' ').replace(/^ +$/gm, '');
109109
} else {
@@ -115,7 +115,6 @@ export class _Lexer {
115115
let token: Tokens.Generic | undefined;
116116
let lastToken;
117117
let cutSrc;
118-
let lastParagraphClipped;
119118

120119
while (src) {
121120
if (this.options.extensions
@@ -249,7 +248,7 @@ export class _Lexer {
249248
}
250249
if (this.state.top && (token = this.tokenizer.paragraph(cutSrc))) {
251250
lastToken = tokens[tokens.length - 1];
252-
if (lastParagraphClipped && lastToken.type === 'paragraph') {
251+
if (lastParagraphClipped && lastToken?.type === 'paragraph') {
253252
lastToken.raw += '\n' + token.raw;
254253
lastToken.text += '\n' + token.text;
255254
this.inlineQueue.pop();

src/Tokenizer.ts

+75-10
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ import {
77
} from './helpers.ts';
88
import type { Rules } from './rules.ts';
99
import type { _Lexer } from './Lexer.ts';
10-
import type { Links, Tokens } from './Tokens.ts';
10+
import type { Links, Tokens, Token } from './Tokens.ts';
1111
import type { MarkedOptions } from './MarkedOptions.ts';
1212

1313
function outputLink(cap: string[], link: Pick<Tokens.Link, 'href' | 'title'>, raw: string, lexer: _Lexer): Tokens.Link | Tokens.Image {
@@ -148,24 +148,89 @@ export class _Tokenizer {
148148
if (cap) {
149149
return {
150150
type: 'hr',
151-
raw: cap[0]
151+
raw: rtrim(cap[0], '\n')
152152
};
153153
}
154154
}
155155

156156
blockquote(src: string): Tokens.Blockquote | undefined {
157157
const cap = this.rules.block.blockquote.exec(src);
158158
if (cap) {
159-
// precede setext continuation with 4 spaces so it isn't a setext
160-
let text = cap[0].replace(/\n {0,3}((?:=+|-+) *)(?=\n|$)/g, '\n $1');
161-
text = rtrim(text.replace(/^ *>[ \t]?/gm, ''), '\n');
162-
const top = this.lexer.state.top;
163-
this.lexer.state.top = true;
164-
const tokens = this.lexer.blockTokens(text);
165-
this.lexer.state.top = top;
159+
let lines = rtrim(cap[0], '\n').split('\n');
160+
let raw = '';
161+
let text = '';
162+
const tokens: Token[] = [];
163+
164+
while (lines.length > 0) {
165+
let inBlockquote = false;
166+
const currentLines = [];
167+
168+
let i;
169+
for (i = 0; i < lines.length; i++) {
170+
// get lines up to a continuation
171+
if (/^ {0,3}>/.test(lines[i])) {
172+
currentLines.push(lines[i]);
173+
inBlockquote = true;
174+
} else if (!inBlockquote) {
175+
currentLines.push(lines[i]);
176+
} else {
177+
break;
178+
}
179+
}
180+
lines = lines.slice(i);
181+
182+
const currentRaw = currentLines.join('\n');
183+
const currentText = currentRaw
184+
// precede setext continuation with 4 spaces so it isn't a setext
185+
.replace(/\n {0,3}((?:=+|-+) *)(?=\n|$)/g, '\n $1')
186+
.replace(/^ {0,3}>[ \t]?/gm, '');
187+
raw = raw ? `${raw}\n${currentRaw}` : currentRaw;
188+
text = text ? `${text}\n${currentText}` : currentText;
189+
190+
// parse blockquote lines as top level tokens
191+
// merge paragraphs if this is a continuation
192+
const top = this.lexer.state.top;
193+
this.lexer.state.top = true;
194+
this.lexer.blockTokens(currentText, tokens, true);
195+
this.lexer.state.top = top;
196+
197+
// if there is no continuation then we are done
198+
if (lines.length === 0) {
199+
break;
200+
}
201+
202+
const lastToken = tokens[tokens.length - 1];
203+
204+
if (lastToken?.type === 'code') {
205+
// blockquote continuation cannot be preceded by a code block
206+
break;
207+
} else if (lastToken?.type === 'blockquote') {
208+
// include continuation in nested blockquote
209+
const oldToken = lastToken as Tokens.Blockquote;
210+
const newText = oldToken.raw + '\n' + lines.join('\n');
211+
const newToken = this.blockquote(newText)!;
212+
tokens[tokens.length - 1] = newToken;
213+
214+
raw = raw.substring(0, raw.length - oldToken.raw.length) + newToken.raw;
215+
text = text.substring(0, text.length - oldToken.text.length) + newToken.text;
216+
break;
217+
} else if (lastToken?.type === 'list') {
218+
// include continuation in nested list
219+
const oldToken = lastToken as Tokens.List;
220+
const newText = oldToken.raw + '\n' + lines.join('\n');
221+
const newToken = this.list(newText)!;
222+
tokens[tokens.length - 1] = newToken;
223+
224+
raw = raw.substring(0, raw.length - lastToken.raw.length) + newToken.raw;
225+
text = text.substring(0, text.length - oldToken.raw.length) + newToken.raw;
226+
lines = newText.substring(tokens[tokens.length - 1].raw.length).split('\n');
227+
continue;
228+
}
229+
}
230+
166231
return {
167232
type: 'blockquote',
168-
raw: cap[0],
233+
raw,
169234
tokens,
170235
text
171236
};

test/specs/commonmark/commonmark.0.31.json

+2-4
Original file line numberDiff line numberDiff line change
@@ -1887,17 +1887,15 @@
18871887
"example": 236,
18881888
"start_line": 3838,
18891889
"end_line": 3848,
1890-
"section": "Block quotes",
1891-
"shouldFail": true
1890+
"section": "Block quotes"
18921891
},
18931892
{
18941893
"markdown": "> ```\nfoo\n```\n",
18951894
"html": "<blockquote>\n<pre><code></code></pre>\n</blockquote>\n<p>foo</p>\n<pre><code></code></pre>\n",
18961895
"example": 237,
18971896
"start_line": 3851,
18981897
"end_line": 3861,
1899-
"section": "Block quotes",
1900-
"shouldFail": true
1898+
"section": "Block quotes"
19011899
},
19021900
{
19031901
"markdown": "> foo\n - bar\n",

test/specs/gfm/commonmark.0.31.json

+2-4
Original file line numberDiff line numberDiff line change
@@ -1887,17 +1887,15 @@
18871887
"example": 236,
18881888
"start_line": 3838,
18891889
"end_line": 3848,
1890-
"section": "Block quotes",
1891-
"shouldFail": true
1890+
"section": "Block quotes"
18921891
},
18931892
{
18941893
"markdown": "> ```\nfoo\n```\n",
18951894
"html": "<blockquote>\n<pre><code></code></pre>\n</blockquote>\n<p>foo</p>\n<pre><code></code></pre>\n",
18961895
"example": 237,
18971896
"start_line": 3851,
18981897
"end_line": 3861,
1899-
"section": "Block quotes",
1900-
"shouldFail": true
1898+
"section": "Block quotes"
19011899
},
19021900
{
19031901
"markdown": "> foo\n - bar\n",

test/unit/marked.test.js

+3-1
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,7 @@ describe('marked unit', () => {
1818

1919
assert.strictEqual(tokens[0].type, 'paragraph');
2020
assert.strictEqual(tokens[2].tokens[0].type, 'paragraph');
21-
assert.strictEqual(tokens[3].items[0].tokens[0].type, 'text');
21+
assert.strictEqual(tokens[4].items[0].tokens[0].type, 'text');
2222
});
2323
});
2424

@@ -910,6 +910,7 @@ br
910910
['text', 'paragraph'],
911911
['space', ''],
912912
['hr', '---'],
913+
['space', ''],
913914
['heading', '# heading'],
914915
['text', 'heading'],
915916
['code', '```code```'],
@@ -924,6 +925,7 @@ br
924925
['blockquote', '> blockquote'],
925926
['paragraph', 'blockquote'],
926927
['text', 'blockquote'],
928+
['space', ''],
927929
['list', '- list'],
928930
['list_item', '- list'],
929931
['text', 'list'],

0 commit comments

Comments
 (0)