Skip to content

Commit 175fc0c

Browse files
authored
fix: clean up rules so they can be typed (#3087)
BREAKING CHANGE: Lexer.rules object has been changed so it can be properly types. Some intermediate rules have been removed.
1 parent edae309 commit 175fc0c

File tree

7 files changed

+301
-346
lines changed

7 files changed

+301
-346
lines changed

src/Instance.ts

+23-13
Original file line numberDiff line numberDiff line change
@@ -142,11 +142,14 @@ export class Marked {
142142
if (pack.renderer) {
143143
const renderer = this.defaults.renderer || new _Renderer(this.defaults);
144144
for (const prop in pack.renderer) {
145-
const rendererFunc = pack.renderer[prop as keyof MarkedExtension['renderer']] as GenericRendererFunction;
146-
const rendererKey = prop as keyof _Renderer;
147-
const prevRenderer = renderer[rendererKey] as GenericRendererFunction;
145+
if (!(prop in renderer) || prop === 'options') {
146+
throw new Error(`renderer '${prop}' does not exist`);
147+
}
148+
const rendererProp = prop as Exclude<keyof _Renderer, 'options'>;
149+
const rendererFunc = pack.renderer[rendererProp] as GenericRendererFunction;
150+
const prevRenderer = renderer[rendererProp] as GenericRendererFunction;
148151
// Replace renderer with func to run extension, but fall back if false
149-
renderer[rendererKey] = (...args: unknown[]) => {
152+
renderer[rendererProp] = (...args: unknown[]) => {
150153
let ret = rendererFunc.apply(renderer, args);
151154
if (ret === false) {
152155
ret = prevRenderer.apply(renderer, args);
@@ -159,11 +162,15 @@ export class Marked {
159162
if (pack.tokenizer) {
160163
const tokenizer = this.defaults.tokenizer || new _Tokenizer(this.defaults);
161164
for (const prop in pack.tokenizer) {
162-
const tokenizerFunc = pack.tokenizer[prop as keyof MarkedExtension['tokenizer']] as UnknownFunction;
163-
const tokenizerKey = prop as keyof _Tokenizer;
164-
const prevTokenizer = tokenizer[tokenizerKey] as UnknownFunction;
165+
if (!(prop in tokenizer) || ['options', 'rules', 'lexer'].includes(prop)) {
166+
throw new Error(`tokenizer '${prop}' does not exist`);
167+
}
168+
const tokenizerProp = prop as Exclude<keyof _Tokenizer, 'options' | 'rules' | 'lexer'>;
169+
const tokenizerFunc = pack.tokenizer[tokenizerProp] as UnknownFunction;
170+
const prevTokenizer = tokenizer[tokenizerProp] as UnknownFunction;
165171
// Replace tokenizer with func to run extension, but fall back if false
166-
tokenizer[tokenizerKey] = (...args: unknown[]) => {
172+
// @ts-expect-error cannot type tokenizer function dynamically
173+
tokenizer[tokenizerProp] = (...args: unknown[]) => {
167174
let ret = tokenizerFunc.apply(tokenizer, args);
168175
if (ret === false) {
169176
ret = prevTokenizer.apply(tokenizer, args);
@@ -178,11 +185,14 @@ export class Marked {
178185
if (pack.hooks) {
179186
const hooks = this.defaults.hooks || new _Hooks();
180187
for (const prop in pack.hooks) {
181-
const hooksFunc = pack.hooks[prop as keyof MarkedExtension['hooks']] as UnknownFunction;
182-
const hooksKey = prop as keyof _Hooks;
183-
const prevHook = hooks[hooksKey] as UnknownFunction;
188+
if (!(prop in hooks) || prop === 'options') {
189+
throw new Error(`hook '${prop}' does not exist`);
190+
}
191+
const hooksProp = prop as Exclude<keyof _Hooks, 'options'>;
192+
const hooksFunc = pack.hooks[hooksProp] as UnknownFunction;
193+
const prevHook = hooks[hooksProp] as UnknownFunction;
184194
if (_Hooks.passThroughHooks.has(prop)) {
185-
hooks[hooksKey as 'preprocess' | 'postprocess'] = (arg: string | undefined) => {
195+
hooks[hooksProp] = (arg: string | undefined) => {
186196
if (this.defaults.async) {
187197
return Promise.resolve(hooksFunc.call(hooks, arg)).then(ret => {
188198
return prevHook.call(hooks, ret) as string;
@@ -193,7 +203,7 @@ export class Marked {
193203
return prevHook.call(hooks, ret) as string;
194204
};
195205
} else {
196-
hooks[hooksKey] = (...args: unknown[]) => {
206+
hooks[hooksProp] = (...args: unknown[]) => {
197207
let ret = hooksFunc.apply(hooks, args);
198208
if (ret === false) {
199209
ret = prevHook.apply(hooks, args);

src/Lexer.ts

+2-4
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,6 @@ import { _defaults } from './defaults.ts';
33
import { block, inline } from './rules.ts';
44
import type { Token, TokensList, Tokens } from './Tokens.ts';
55
import type { MarkedOptions, TokenizerExtension } from './MarkedOptions.ts';
6-
import type { Rules } from './rules.ts';
76

87
/**
98
* Block Lexer
@@ -22,8 +21,7 @@ export class _Lexer {
2221

2322
constructor(options?: MarkedOptions) {
2423
// TokenList cannot be created in one go
25-
// @ts-expect-error
26-
this.tokens = [];
24+
this.tokens = [] as unknown as TokensList;
2725
this.tokens.links = Object.create(null);
2826
this.options = options || _defaults;
2927
this.options.tokenizer = this.options.tokenizer || new _Tokenizer();
@@ -59,7 +57,7 @@ export class _Lexer {
5957
/**
6058
* Expose Rules
6159
*/
62-
static get rules(): Rules {
60+
static get rules() {
6361
return {
6462
block,
6563
inline

src/Tokenizer.ts

+64-70
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ import {
55
escape,
66
findClosingBracket
77
} from './helpers.ts';
8+
import type { Rules } from './rules.ts';
89
import type { _Lexer } from './Lexer.ts';
910
import type { Links, Tokens } from './Tokens.ts';
1011
import type { MarkedOptions } from './MarkedOptions.ts';
@@ -69,9 +70,8 @@ function indentCodeCompensation(raw: string, text: string) {
6970
*/
7071
export class _Tokenizer {
7172
options: MarkedOptions;
72-
// TODO: Fix this rules type
73-
rules: any;
74-
lexer!: _Lexer;
73+
rules!: Rules; // set by the lexer
74+
lexer!: _Lexer; // set by the lexer
7575

7676
constructor(options?: MarkedOptions) {
7777
this.options = options || _defaults;
@@ -111,7 +111,7 @@ export class _Tokenizer {
111111
return {
112112
type: 'code',
113113
raw,
114-
lang: cap[2] ? cap[2].trim().replace(this.rules.inline._escapes, '$1') : cap[2],
114+
lang: cap[2] ? cap[2].trim().replace(this.rules.inline.anyPunctuation, '$1') : cap[2],
115115
text
116116
};
117117
}
@@ -182,7 +182,7 @@ export class _Tokenizer {
182182
ordered: isordered,
183183
start: isordered ? +bull.slice(0, -1) : '',
184184
loose: false,
185-
items: [] as Tokens.ListItem[]
185+
items: []
186186
};
187187

188188
bull = isordered ? `\\d{1,9}\\${bull.slice(-1)}` : `\\${bull}`;
@@ -207,10 +207,10 @@ export class _Tokenizer {
207207
break;
208208
}
209209

210-
raw = cap[0] as string;
210+
raw = cap[0];
211211
src = src.substring(raw.length);
212212

213-
let line = cap[2].split('\n', 1)[0].replace(/^\t+/, (t: string) => ' '.repeat(3 * t.length)) as string;
213+
let line = cap[2].split('\n', 1)[0].replace(/^\t+/, (t: string) => ' '.repeat(3 * t.length));
214214
let nextLine = src.split('\n', 1)[0];
215215

216216
let indent = 0;
@@ -338,7 +338,7 @@ export class _Tokenizer {
338338

339339
// Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic
340340
list.items[list.items.length - 1].raw = raw.trimEnd();
341-
(list.items[list.items.length - 1] as Tokens.ListItem).text = itemContents.trimEnd();
341+
(list.items[list.items.length - 1]).text = itemContents.trimEnd();
342342
list.raw = list.raw.trimEnd();
343343

344344
// Item child tokens handled here at end because we needed to have the final item to trim it first
@@ -384,8 +384,8 @@ export class _Tokenizer {
384384
const cap = this.rules.block.def.exec(src);
385385
if (cap) {
386386
const tag = cap[1].toLowerCase().replace(/\s+/g, ' ');
387-
const href = cap[2] ? cap[2].replace(/^<(.*)>$/, '$1').replace(this.rules.inline._escapes, '$1') : '';
388-
const title = cap[3] ? cap[3].substring(1, cap[3].length - 1).replace(this.rules.inline._escapes, '$1') : cap[3];
387+
const href = cap[2] ? cap[2].replace(/^<(.*)>$/, '$1').replace(this.rules.inline.anyPunctuation, '$1') : '';
388+
const title = cap[3] ? cap[3].substring(1, cap[3].length - 1).replace(this.rules.inline.anyPunctuation, '$1') : cap[3];
389389
return {
390390
type: 'def',
391391
tag,
@@ -398,67 +398,61 @@ export class _Tokenizer {
398398

399399
table(src: string): Tokens.Table | undefined {
400400
const cap = this.rules.block.table.exec(src);
401-
if (cap) {
402-
if (!/[:|]/.test(cap[2])) {
403-
// delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading
404-
return;
405-
}
401+
if (!cap) {
402+
return;
403+
}
406404

407-
const item: Tokens.Table = {
408-
type: 'table',
409-
raw: cap[0],
410-
header: splitCells(cap[1]).map(c => {
411-
return { text: c, tokens: [] };
412-
}),
413-
align: cap[2].replace(/^\||\| *$/g, '').split('|'),
414-
rows: cap[3] && cap[3].trim() ? cap[3].replace(/\n[ \t]*$/, '').split('\n') : []
415-
};
405+
if (!/[:|]/.test(cap[2])) {
406+
// delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading
407+
return;
408+
}
416409

417-
if (item.header.length === item.align.length) {
418-
let l = item.align.length;
419-
let i, j, k, row;
420-
for (i = 0; i < l; i++) {
421-
const align = item.align[i];
422-
if (align) {
423-
if (/^ *-+: *$/.test(align)) {
424-
item.align[i] = 'right';
425-
} else if (/^ *:-+: *$/.test(align)) {
426-
item.align[i] = 'center';
427-
} else if (/^ *:-+ *$/.test(align)) {
428-
item.align[i] = 'left';
429-
} else {
430-
item.align[i] = null;
431-
}
432-
}
433-
}
410+
const headers = splitCells(cap[1]);
411+
const aligns = cap[2].replace(/^\||\| *$/g, '').split('|');
412+
const rows = cap[3] && cap[3].trim() ? cap[3].replace(/\n[ \t]*$/, '').split('\n') : [];
434413

435-
l = item.rows.length;
436-
for (i = 0; i < l; i++) {
437-
item.rows[i] = splitCells(item.rows[i] as unknown as string, item.header.length).map(c => {
438-
return { text: c, tokens: [] };
439-
});
440-
}
414+
const item: Tokens.Table = {
415+
type: 'table',
416+
raw: cap[0],
417+
header: [],
418+
align: [],
419+
rows: []
420+
};
441421

442-
// parse child tokens inside headers and cells
422+
if (headers.length !== aligns.length) {
423+
// header and align columns must be equal, rows can be different.
424+
return;
425+
}
443426

444-
// header child tokens
445-
l = item.header.length;
446-
for (j = 0; j < l; j++) {
447-
item.header[j].tokens = this.lexer.inline(item.header[j].text);
448-
}
427+
for (const align of aligns) {
428+
if (/^ *-+: *$/.test(align)) {
429+
item.align.push('right');
430+
} else if (/^ *:-+: *$/.test(align)) {
431+
item.align.push('center');
432+
} else if (/^ *:-+ *$/.test(align)) {
433+
item.align.push('left');
434+
} else {
435+
item.align.push(null);
436+
}
437+
}
449438

450-
// cell child tokens
451-
l = item.rows.length;
452-
for (j = 0; j < l; j++) {
453-
row = item.rows[j];
454-
for (k = 0; k < row.length; k++) {
455-
row[k].tokens = this.lexer.inline(row[k].text);
456-
}
457-
}
439+
for (const header of headers) {
440+
item.header.push({
441+
text: header,
442+
tokens: this.lexer.inline(header)
443+
});
444+
}
458445

459-
return item;
460-
}
446+
for (const row of rows) {
447+
item.rows.push(splitCells(row, item.header.length).map(cell => {
448+
return {
449+
text: cell,
450+
tokens: this.lexer.inline(cell)
451+
};
452+
}));
461453
}
454+
455+
return item;
462456
}
463457

464458
lheading(src: string): Tokens.Heading | undefined {
@@ -587,8 +581,8 @@ export class _Tokenizer {
587581
}
588582
}
589583
return outputLink(cap, {
590-
href: href ? href.replace(this.rules.inline._escapes, '$1') : href,
591-
title: title ? title.replace(this.rules.inline._escapes, '$1') : title
584+
href: href ? href.replace(this.rules.inline.anyPunctuation, '$1') : href,
585+
title: title ? title.replace(this.rules.inline.anyPunctuation, '$1') : title
592586
}, cap[0], this.lexer);
593587
}
594588
}
@@ -597,8 +591,8 @@ export class _Tokenizer {
597591
let cap;
598592
if ((cap = this.rules.inline.reflink.exec(src))
599593
|| (cap = this.rules.inline.nolink.exec(src))) {
600-
let link = (cap[2] || cap[1]).replace(/\s+/g, ' ');
601-
link = links[link.toLowerCase()];
594+
const linkString = (cap[2] || cap[1]).replace(/\s+/g, ' ');
595+
const link = links[linkString.toLowerCase()];
602596
if (!link) {
603597
const text = cap[0].charAt(0);
604598
return {
@@ -612,7 +606,7 @@ export class _Tokenizer {
612606
}
613607

614608
emStrong(src: string, maskedSrc: string, prevChar = ''): Tokens.Em | Tokens.Strong | undefined {
615-
let match = this.rules.inline.emStrong.lDelim.exec(src);
609+
let match = this.rules.inline.emStrongLDelim.exec(src);
616610
if (!match) return;
617611

618612
// _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well
@@ -625,7 +619,7 @@ export class _Tokenizer {
625619
const lLength = [...match[0]].length - 1;
626620
let rDelim, rLength, delimTotal = lLength, midDelimTotal = 0;
627621

628-
const endReg = match[0][0] === '*' ? this.rules.inline.emStrong.rDelimAst : this.rules.inline.emStrong.rDelimUnd;
622+
const endReg = match[0][0] === '*' ? this.rules.inline.emStrongRDelimAst : this.rules.inline.emStrongRDelimUnd;
629623
endReg.lastIndex = 0;
630624

631625
// Clip maskedSrc to same section of string as src (move to lexer?)
@@ -761,7 +755,7 @@ export class _Tokenizer {
761755
let prevCapZero;
762756
do {
763757
prevCapZero = cap[0];
764-
cap[0] = this.rules.inline._backpedal.exec(cap[0])[0];
758+
cap[0] = this.rules.inline._backpedal.exec(cap[0])?.[0] ?? '';
765759
} while (prevCapZero !== cap[0]);
766760
text = escape(cap[0]);
767761
if (cap[1] === 'www.') {

src/helpers.ts

+7-9
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,3 @@
1-
import type { Rule } from './rules.ts';
2-
31
/**
42
* Helpers
53
*/
@@ -48,18 +46,18 @@ export function unescape(html: string) {
4846

4947
const caret = /(^|[^\[])\^/g;
5048

51-
export function edit(regex: Rule, opt?: string) {
52-
regex = typeof regex === 'string' ? regex : regex.source;
49+
export function edit(regex: string | RegExp, opt?: string) {
50+
let source = typeof regex === 'string' ? regex : regex.source;
5351
opt = opt || '';
5452
const obj = {
5553
replace: (name: string | RegExp, val: string | RegExp) => {
56-
val = typeof val === 'object' && 'source' in val ? val.source : val;
57-
val = val.replace(caret, '$1');
58-
regex = (regex as string).replace(name, val);
54+
let valSource = typeof val === 'string' ? val : val.source;
55+
valSource = valSource.replace(caret, '$1');
56+
source = source.replace(name, valSource);
5957
return obj;
6058
},
6159
getRegex: () => {
62-
return new RegExp(regex, opt);
60+
return new RegExp(source, opt);
6361
}
6462
};
6563
return obj;
@@ -74,7 +72,7 @@ export function cleanUrl(href: string) {
7472
return href;
7573
}
7674

77-
export const noopTest = { exec: () => null };
75+
export const noopTest = { exec: () => null } as unknown as RegExp;
7876

7977
export function splitCells(tableRow: string, count?: number) {
8078
// ensure that every cell-delimiting pipe has a space

0 commit comments

Comments
 (0)