5
5
escape ,
6
6
findClosingBracket
7
7
} from './helpers.ts' ;
8
+ import type { Rules } from './rules.ts' ;
8
9
import type { _Lexer } from './Lexer.ts' ;
9
10
import type { Links , Tokens } from './Tokens.ts' ;
10
11
import type { MarkedOptions } from './MarkedOptions.ts' ;
@@ -69,9 +70,8 @@ function indentCodeCompensation(raw: string, text: string) {
69
70
*/
70
71
export class _Tokenizer {
71
72
options : MarkedOptions ;
72
- // TODO: Fix this rules type
73
- rules : any ;
74
- lexer ! : _Lexer ;
73
+ rules ! : Rules ; // set by the lexer
74
+ lexer ! : _Lexer ; // set by the lexer
75
75
76
76
constructor ( options ?: MarkedOptions ) {
77
77
this . options = options || _defaults ;
@@ -111,7 +111,7 @@ export class _Tokenizer {
111
111
return {
112
112
type : 'code' ,
113
113
raw,
114
- lang : cap [ 2 ] ? cap [ 2 ] . trim ( ) . replace ( this . rules . inline . _escapes , '$1' ) : cap [ 2 ] ,
114
+ lang : cap [ 2 ] ? cap [ 2 ] . trim ( ) . replace ( this . rules . inline . anyPunctuation , '$1' ) : cap [ 2 ] ,
115
115
text
116
116
} ;
117
117
}
@@ -182,7 +182,7 @@ export class _Tokenizer {
182
182
ordered : isordered ,
183
183
start : isordered ? + bull . slice ( 0 , - 1 ) : '' ,
184
184
loose : false ,
185
- items : [ ] as Tokens . ListItem [ ]
185
+ items : [ ]
186
186
} ;
187
187
188
188
bull = isordered ? `\\d{1,9}\\${ bull . slice ( - 1 ) } ` : `\\${ bull } ` ;
@@ -207,10 +207,10 @@ export class _Tokenizer {
207
207
break ;
208
208
}
209
209
210
- raw = cap [ 0 ] as string ;
210
+ raw = cap [ 0 ] ;
211
211
src = src . substring ( raw . length ) ;
212
212
213
- let line = cap [ 2 ] . split ( '\n' , 1 ) [ 0 ] . replace ( / ^ \t + / , ( t : string ) => ' ' . repeat ( 3 * t . length ) ) as string ;
213
+ let line = cap [ 2 ] . split ( '\n' , 1 ) [ 0 ] . replace ( / ^ \t + / , ( t : string ) => ' ' . repeat ( 3 * t . length ) ) ;
214
214
let nextLine = src . split ( '\n' , 1 ) [ 0 ] ;
215
215
216
216
let indent = 0 ;
@@ -338,7 +338,7 @@ export class _Tokenizer {
338
338
339
339
// Do not consume newlines at end of final item. Alternatively, make itemRegex *start* with any newlines to simplify/speed up endsWithBlankLine logic
340
340
list . items [ list . items . length - 1 ] . raw = raw . trimEnd ( ) ;
341
- ( list . items [ list . items . length - 1 ] as Tokens . ListItem ) . text = itemContents . trimEnd ( ) ;
341
+ ( list . items [ list . items . length - 1 ] ) . text = itemContents . trimEnd ( ) ;
342
342
list . raw = list . raw . trimEnd ( ) ;
343
343
344
344
// Item child tokens handled here at end because we needed to have the final item to trim it first
@@ -384,8 +384,8 @@ export class _Tokenizer {
384
384
const cap = this . rules . block . def . exec ( src ) ;
385
385
if ( cap ) {
386
386
const tag = cap [ 1 ] . toLowerCase ( ) . replace ( / \s + / g, ' ' ) ;
387
- const href = cap [ 2 ] ? cap [ 2 ] . replace ( / ^ < ( .* ) > $ / , '$1' ) . replace ( this . rules . inline . _escapes , '$1' ) : '' ;
388
- const title = cap [ 3 ] ? cap [ 3 ] . substring ( 1 , cap [ 3 ] . length - 1 ) . replace ( this . rules . inline . _escapes , '$1' ) : cap [ 3 ] ;
387
+ const href = cap [ 2 ] ? cap [ 2 ] . replace ( / ^ < ( .* ) > $ / , '$1' ) . replace ( this . rules . inline . anyPunctuation , '$1' ) : '' ;
388
+ const title = cap [ 3 ] ? cap [ 3 ] . substring ( 1 , cap [ 3 ] . length - 1 ) . replace ( this . rules . inline . anyPunctuation , '$1' ) : cap [ 3 ] ;
389
389
return {
390
390
type : 'def' ,
391
391
tag,
@@ -398,67 +398,61 @@ export class _Tokenizer {
398
398
399
399
table ( src : string ) : Tokens . Table | undefined {
400
400
const cap = this . rules . block . table . exec ( src ) ;
401
- if ( cap ) {
402
- if ( ! / [: | ] / . test ( cap [ 2 ] ) ) {
403
- // delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading
404
- return ;
405
- }
401
+ if ( ! cap ) {
402
+ return ;
403
+ }
406
404
407
- const item : Tokens . Table = {
408
- type : 'table' ,
409
- raw : cap [ 0 ] ,
410
- header : splitCells ( cap [ 1 ] ) . map ( c => {
411
- return { text : c , tokens : [ ] } ;
412
- } ) ,
413
- align : cap [ 2 ] . replace ( / ^ \| | \| * $ / g, '' ) . split ( '|' ) ,
414
- rows : cap [ 3 ] && cap [ 3 ] . trim ( ) ? cap [ 3 ] . replace ( / \n [ \t ] * $ / , '' ) . split ( '\n' ) : [ ]
415
- } ;
405
+ if ( ! / [: | ] / . test ( cap [ 2 ] ) ) {
406
+ // delimiter row must have a pipe (|) or colon (:) otherwise it is a setext heading
407
+ return ;
408
+ }
416
409
417
- if ( item . header . length === item . align . length ) {
418
- let l = item . align . length ;
419
- let i , j , k , row ;
420
- for ( i = 0 ; i < l ; i ++ ) {
421
- const align = item . align [ i ] ;
422
- if ( align ) {
423
- if ( / ^ * - + : * $ / . test ( align ) ) {
424
- item . align [ i ] = 'right' ;
425
- } else if ( / ^ * : - + : * $ / . test ( align ) ) {
426
- item . align [ i ] = 'center' ;
427
- } else if ( / ^ * : - + * $ / . test ( align ) ) {
428
- item . align [ i ] = 'left' ;
429
- } else {
430
- item . align [ i ] = null ;
431
- }
432
- }
433
- }
410
+ const headers = splitCells ( cap [ 1 ] ) ;
411
+ const aligns = cap [ 2 ] . replace ( / ^ \| | \| * $ / g, '' ) . split ( '|' ) ;
412
+ const rows = cap [ 3 ] && cap [ 3 ] . trim ( ) ? cap [ 3 ] . replace ( / \n [ \t ] * $ / , '' ) . split ( '\n' ) : [ ] ;
434
413
435
- l = item . rows . length ;
436
- for ( i = 0 ; i < l ; i ++ ) {
437
- item . rows [ i ] = splitCells ( item . rows [ i ] as unknown as string , item . header . length ) . map ( c => {
438
- return { text : c , tokens : [ ] } ;
439
- } ) ;
440
- }
414
+ const item : Tokens . Table = {
415
+ type : 'table' ,
416
+ raw : cap [ 0 ] ,
417
+ header : [ ] ,
418
+ align : [ ] ,
419
+ rows : [ ]
420
+ } ;
441
421
442
- // parse child tokens inside headers and cells
422
+ if ( headers . length !== aligns . length ) {
423
+ // header and align columns must be equal, rows can be different.
424
+ return ;
425
+ }
443
426
444
- // header child tokens
445
- l = item . header . length ;
446
- for ( j = 0 ; j < l ; j ++ ) {
447
- item . header [ j ] . tokens = this . lexer . inline ( item . header [ j ] . text ) ;
448
- }
427
+ for ( const align of aligns ) {
428
+ if ( / ^ * - + : * $ / . test ( align ) ) {
429
+ item . align . push ( 'right' ) ;
430
+ } else if ( / ^ * : - + : * $ / . test ( align ) ) {
431
+ item . align . push ( 'center' ) ;
432
+ } else if ( / ^ * : - + * $ / . test ( align ) ) {
433
+ item . align . push ( 'left' ) ;
434
+ } else {
435
+ item . align . push ( null ) ;
436
+ }
437
+ }
449
438
450
- // cell child tokens
451
- l = item . rows . length ;
452
- for ( j = 0 ; j < l ; j ++ ) {
453
- row = item . rows [ j ] ;
454
- for ( k = 0 ; k < row . length ; k ++ ) {
455
- row [ k ] . tokens = this . lexer . inline ( row [ k ] . text ) ;
456
- }
457
- }
439
+ for ( const header of headers ) {
440
+ item . header . push ( {
441
+ text : header ,
442
+ tokens : this . lexer . inline ( header )
443
+ } ) ;
444
+ }
458
445
459
- return item ;
460
- }
446
+ for ( const row of rows ) {
447
+ item . rows . push ( splitCells ( row , item . header . length ) . map ( cell => {
448
+ return {
449
+ text : cell ,
450
+ tokens : this . lexer . inline ( cell )
451
+ } ;
452
+ } ) ) ;
461
453
}
454
+
455
+ return item ;
462
456
}
463
457
464
458
lheading ( src : string ) : Tokens . Heading | undefined {
@@ -587,8 +581,8 @@ export class _Tokenizer {
587
581
}
588
582
}
589
583
return outputLink ( cap , {
590
- href : href ? href . replace ( this . rules . inline . _escapes , '$1' ) : href ,
591
- title : title ? title . replace ( this . rules . inline . _escapes , '$1' ) : title
584
+ href : href ? href . replace ( this . rules . inline . anyPunctuation , '$1' ) : href ,
585
+ title : title ? title . replace ( this . rules . inline . anyPunctuation , '$1' ) : title
592
586
} , cap [ 0 ] , this . lexer ) ;
593
587
}
594
588
}
@@ -597,8 +591,8 @@ export class _Tokenizer {
597
591
let cap ;
598
592
if ( ( cap = this . rules . inline . reflink . exec ( src ) )
599
593
|| ( cap = this . rules . inline . nolink . exec ( src ) ) ) {
600
- let link = ( cap [ 2 ] || cap [ 1 ] ) . replace ( / \s + / g, ' ' ) ;
601
- link = links [ link . toLowerCase ( ) ] ;
594
+ const linkString = ( cap [ 2 ] || cap [ 1 ] ) . replace ( / \s + / g, ' ' ) ;
595
+ const link = links [ linkString . toLowerCase ( ) ] ;
602
596
if ( ! link ) {
603
597
const text = cap [ 0 ] . charAt ( 0 ) ;
604
598
return {
@@ -612,7 +606,7 @@ export class _Tokenizer {
612
606
}
613
607
614
608
emStrong ( src : string , maskedSrc : string , prevChar = '' ) : Tokens . Em | Tokens . Strong | undefined {
615
- let match = this . rules . inline . emStrong . lDelim . exec ( src ) ;
609
+ let match = this . rules . inline . emStrongLDelim . exec ( src ) ;
616
610
if ( ! match ) return ;
617
611
618
612
// _ can't be between two alphanumerics. \p{L}\p{N} includes non-english alphabet/numbers as well
@@ -625,7 +619,7 @@ export class _Tokenizer {
625
619
const lLength = [ ...match [ 0 ] ] . length - 1 ;
626
620
let rDelim , rLength , delimTotal = lLength , midDelimTotal = 0 ;
627
621
628
- const endReg = match [ 0 ] [ 0 ] === '*' ? this . rules . inline . emStrong . rDelimAst : this . rules . inline . emStrong . rDelimUnd ;
622
+ const endReg = match [ 0 ] [ 0 ] === '*' ? this . rules . inline . emStrongRDelimAst : this . rules . inline . emStrongRDelimUnd ;
629
623
endReg . lastIndex = 0 ;
630
624
631
625
// Clip maskedSrc to same section of string as src (move to lexer?)
@@ -761,7 +755,7 @@ export class _Tokenizer {
761
755
let prevCapZero ;
762
756
do {
763
757
prevCapZero = cap [ 0 ] ;
764
- cap [ 0 ] = this . rules . inline . _backpedal . exec ( cap [ 0 ] ) [ 0 ] ;
758
+ cap [ 0 ] = this . rules . inline . _backpedal . exec ( cap [ 0 ] ) ?. [ 0 ] ?? '' ;
765
759
} while ( prevCapZero !== cap [ 0 ] ) ;
766
760
text = escape ( cap [ 0 ] ) ;
767
761
if ( cap [ 1 ] === 'www.' ) {
0 commit comments