@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
77
88 Written by Philip Hazel
99 Original API code Copyright (c) 1997-2012 University of Cambridge
10- New API code Copyright (c) 2016-2020 University of Cambridge
10+ New API code Copyright (c) 2016-2021 University of Cambridge
1111
1212-----------------------------------------------------------------------------
1313Redistribution and use in source and binary forms, with or without
@@ -137,7 +137,7 @@ static BOOL
137137
138138static int
139139 check_lookbehinds (uint32_t * , uint32_t * * , parsed_recurse_check * ,
140- compile_block * );
140+ compile_block * , int * );
141141
142142
143143/*************************************************
@@ -782,12 +782,15 @@ are allowed. */
782782#define PUBLIC_COMPILE_EXTRA_OPTIONS \
783783 (PUBLIC_LITERAL_COMPILE_EXTRA_OPTIONS| \
784784 PCRE2_EXTRA_ALLOW_SURROGATE_ESCAPES|PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL| \
785- PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX)
785+ PCRE2_EXTRA_ESCAPED_CR_IS_LF|PCRE2_EXTRA_ALT_BSUX| \
786+ PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK)
786787
787788/* Compile time error code numbers. They are given names so that they can more
788789easily be tracked. When a new number is added, the tables called eint1 and
789790eint2 in pcre2posix.c may need to be updated, and a new error text must be
790- added to compile_error_texts in pcre2_error.c. */
791+ added to compile_error_texts in pcre2_error.c. Also, the error codes in
792+ pcre2.h.in must be updated - their values are exactly 100 greater than these
793+ values. */
791794
792795enum { ERR0 = COMPILE_ERROR_BASE ,
793796 ERR1 , ERR2 , ERR3 , ERR4 , ERR5 , ERR6 , ERR7 , ERR8 , ERR9 , ERR10 ,
@@ -799,7 +802,7 @@ enum { ERR0 = COMPILE_ERROR_BASE,
799802 ERR61 , ERR62 , ERR63 , ERR64 , ERR65 , ERR66 , ERR67 , ERR68 , ERR69 , ERR70 ,
800803 ERR71 , ERR72 , ERR73 , ERR74 , ERR75 , ERR76 , ERR77 , ERR78 , ERR79 , ERR80 ,
801804 ERR81 , ERR82 , ERR83 , ERR84 , ERR85 , ERR86 , ERR87 , ERR88 , ERR89 , ERR90 ,
802- ERR91 , ERR92 , ERR93 , ERR94 , ERR95 , ERR96 , ERR97 , ERR98 };
805+ ERR91 , ERR92 , ERR93 , ERR94 , ERR95 , ERR96 , ERR97 , ERR98 , ERR99 };
803806
804807/* This is a table of start-of-pattern options such as (*UTF) and settings such
805808as (*LIMIT_MATCH=nnnn) and (*CRLF). For completeness and backward
@@ -7799,6 +7802,16 @@ for (;; pptr++)
77997802 }
78007803#endif
78017804
7805+ /* \K is forbidden in lookarounds since 10.38 because that's what Perl has
7806+ done. However, there's an option, in case anyone was relying on it. */
7807+
7808+ if (cb -> assert_depth > 0 && meta_arg == ESC_K &&
7809+ (cb -> cx -> extra_options & PCRE2_EXTRA_ALLOW_LOOKAROUND_BSK ) == 0 )
7810+ {
7811+ * errorcodeptr = ERR99 ;
7812+ return 0 ;
7813+ }
7814+
78027815 /* For the rest (including \X when Unicode is supported - if not it's
78037816 faulted at parse time), the OP value is the escape value when PCRE2_UCP is
78047817 not set; if it is set, these escapes do not show up here because they are
@@ -9148,7 +9161,7 @@ for (;; pptr++)
91489161 case META_LOOKAHEAD :
91499162 case META_LOOKAHEADNOT :
91509163 case META_LOOKAHEAD_NA :
9151- * errcodeptr = check_lookbehinds (pptr + 1 , & pptr , recurses , cb );
9164+ * errcodeptr = check_lookbehinds (pptr + 1 , & pptr , recurses , cb , lcptr );
91529165 if (* errcodeptr != 0 ) return -1 ;
91539166
91549167 /* Ignore any qualifiers that follow a lookahead assertion. */
@@ -9488,16 +9501,16 @@ Arguments
94889501 retptr if not NULL, return the ket pointer here
94899502 recurses chain of recurse_check to catch mutual recursion
94909503 cb points to the compile block
9504+ lcptr points to loop counter
94919505
94929506Returns: 0 on success, or an errorcode (cb->erroroffset will be set)
94939507*/
94949508
94959509static int
94969510check_lookbehinds (uint32_t * pptr , uint32_t * * retptr ,
9497- parsed_recurse_check * recurses , compile_block * cb )
9511+ parsed_recurse_check * recurses , compile_block * cb , int * lcptr )
94989512{
94999513int errorcode = 0 ;
9500- int loopcount = 0 ;
95019514int nestlevel = 0 ;
95029515
95039516cb -> erroroffset = PCRE2_UNSET ;
@@ -9623,7 +9636,7 @@ for (; *pptr != META_END; pptr++)
96239636 case META_LOOKBEHIND :
96249637 case META_LOOKBEHINDNOT :
96259638 case META_LOOKBEHIND_NA :
9626- if (!set_lookbehind_lengths (& pptr , & errorcode , & loopcount , recurses , cb ))
9639+ if (!set_lookbehind_lengths (& pptr , & errorcode , lcptr , recurses , cb ))
96279640 return errorcode ;
96289641 break ;
96299642 }
@@ -10078,7 +10091,8 @@ lengths. */
1007810091
1007910092if (has_lookbehind )
1008010093 {
10081- errorcode = check_lookbehinds (cb .parsed_pattern , NULL , NULL , & cb );
10094+ int loopcount = 0 ;
10095+ errorcode = check_lookbehinds (cb .parsed_pattern , NULL , NULL , & cb , & loopcount );
1008210096 if (errorcode != 0 ) goto HAD_CB_ERROR ;
1008310097 }
1008410098
0 commit comments