Skip to content

Commit aa9433e

Browse files
committed
Upgrade bundled PCRE2 to 10.33
1 parent ef34e00 commit aa9433e

34 files changed

+7264
-4511
lines changed

ext/pcre/config.w32

+1-1
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22

33
EXTENSION("pcre", "php_pcre.c", false /* never shared */,
44
"-Iext/pcre/pcre2lib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1");
5-
ADD_SOURCES("ext/pcre/pcre2lib", "pcre2_auto_possess.c pcre2_chartables.c pcre2_compile.c pcre2_config.c pcre2_context.c pcre2_dfa_match.c pcre2_error.c pcre2_jit_compile.c pcre2_maketables.c pcre2_match.c pcre2_match_data.c pcre2_newline.c pcre2_ord2utf.c pcre2_pattern_info.c pcre2_serialize.c pcre2_string_utils.c pcre2_study.c pcre2_substitute.c pcre2_substring.c pcre2_tables.c pcre2_ucd.c pcre2_valid_utf.c pcre2_xclass.c pcre2_find_bracket.c pcre2_convert.c pcre2_extuni.c", "pcre");
5+
ADD_SOURCES("ext/pcre/pcre2lib", "pcre2_auto_possess.c pcre2_chartables.c pcre2_compile.c pcre2_config.c pcre2_context.c pcre2_dfa_match.c pcre2_error.c pcre2_jit_compile.c pcre2_maketables.c pcre2_match.c pcre2_match_data.c pcre2_newline.c pcre2_ord2utf.c pcre2_pattern_info.c pcre2_serialize.c pcre2_string_utils.c pcre2_study.c pcre2_substitute.c pcre2_substring.c pcre2_tables.c pcre2_ucd.c pcre2_valid_utf.c pcre2_xclass.c pcre2_find_bracket.c pcre2_convert.c pcre2_extuni.c pcre2_script_run.c", "pcre");
66
ADD_DEF_FILE("ext\\pcre\\php_pcre.def");
77

88
AC_DEFINE('HAVE_BUNDLED_PCRE', 1, 'Using bundled PCRE library');

ext/pcre/config0.m4

+1-1
Original file line numberDiff line numberDiff line change
@@ -66,7 +66,7 @@ else
6666
pcre2lib/pcre2_newline.c pcre2lib/pcre2_ord2utf.c pcre2lib/pcre2_pattern_info.c pcre2lib/pcre2_serialize.c \
6767
pcre2lib/pcre2_string_utils.c pcre2lib/pcre2_study.c pcre2lib/pcre2_substitute.c pcre2lib/pcre2_substring.c \
6868
pcre2lib/pcre2_tables.c pcre2lib/pcre2_ucd.c pcre2lib/pcre2_valid_utf.c pcre2lib/pcre2_xclass.c \
69-
pcre2lib/pcre2_find_bracket.c pcre2lib/pcre2_convert.c pcre2lib/pcre2_extuni.c"
69+
pcre2lib/pcre2_find_bracket.c pcre2lib/pcre2_convert.c pcre2lib/pcre2_extuni.c pcre2lib/pcre2_script_run.c"
7070
PHP_PCRE_CFLAGS="-DHAVE_CONFIG_H -I@ext_srcdir@/pcre2lib -DZEND_ENABLE_STATIC_TSRMLS_CACHE=1"
7171
PHP_NEW_EXTENSION(pcre, $pcrelib_sources php_pcre.c, no,,$PHP_PCRE_CFLAGS)
7272
PHP_ADD_BUILD_DIR($ext_builddir/pcre2lib)

ext/pcre/pcre2lib/pcre2.h

+56-52
Original file line numberDiff line numberDiff line change
@@ -42,15 +42,9 @@ POSSIBILITY OF SUCH DAMAGE.
4242
/* The current PCRE version information. */
4343

4444
#define PCRE2_MAJOR 10
45-
#define PCRE2_MINOR 32
45+
#define PCRE2_MINOR 33
4646
#define PCRE2_PRERELEASE
47-
#define PCRE2_DATE 2018-09-10
48-
49-
/* For the benefit of systems without stdint.h, an alternative is to use
50-
inttypes.h. The existence of these headers is checked by configure or CMake. */
51-
52-
#define PCRE2_HAVE_STDINT_H 1
53-
#define PCRE2_HAVE_INTTYPES_H 1
47+
#define PCRE2_DATE 2019-04-16
5448

5549
/* When an application links to a PCRE DLL in Windows, the symbols that are
5650
imported have to be identified as such. When building PCRE2, the appropriate
@@ -87,18 +81,15 @@ set, we ensure here that it has no effect. */
8781
#define PCRE2_CALL_CONVENTION
8882
#endif
8983

90-
/* Have to include limits.h, stdlib.h and stdint.h (or inttypes.h) to ensure
91-
that size_t and uint8_t, UCHAR_MAX, etc are defined. If the system has neither
92-
header, the relevant values must be provided by some other means. */
84+
/* Have to include limits.h, stdlib.h, and inttypes.h to ensure that size_t and
85+
uint8_t, UCHAR_MAX, etc are defined. Some systems that do have inttypes.h do
86+
not have stdint.h, which is why we use inttypes.h, which according to the C
87+
standard is a superset of stdint.h. If none of these headers are available,
88+
the relevant values must be provided by some other means. */
9389

9490
#include <limits.h>
9591
#include <stdlib.h>
96-
97-
#if PCRE2_HAVE_STDINT_H
98-
#include <stdint.h>
99-
#elif PCRE2_HAVE_INTTYPES_H
10092
#include <inttypes.h>
101-
#endif
10293

10394
/* Allow for C++ users compiling this directly. */
10495

@@ -158,43 +149,37 @@ D is inspected during pcre2_dfa_match() execution
158149
#define PCRE2_EXTRA_BAD_ESCAPE_IS_LITERAL 0x00000002u /* C */
159150
#define PCRE2_EXTRA_MATCH_WORD 0x00000004u /* C */
160151
#define PCRE2_EXTRA_MATCH_LINE 0x00000008u /* C */
152+
#define PCRE2_EXTRA_ESCAPED_CR_IS_LF 0x00000010u /* C */
153+
#define PCRE2_EXTRA_ALT_BSUX 0x00000020u /* C */
161154

162155
/* These are for pcre2_jit_compile(). */
163156

164157
#define PCRE2_JIT_COMPLETE 0x00000001u /* For full matching */
165158
#define PCRE2_JIT_PARTIAL_SOFT 0x00000002u
166159
#define PCRE2_JIT_PARTIAL_HARD 0x00000004u
167-
168-
/* These are for pcre2_match(), pcre2_dfa_match(), and pcre2_jit_match(). Note
169-
that PCRE2_ANCHORED and PCRE2_NO_UTF_CHECK can also be passed to these
170-
functions (though pcre2_jit_match() ignores the latter since it bypasses all
171-
sanity checks). */
172-
173-
#define PCRE2_NOTBOL 0x00000001u
174-
#define PCRE2_NOTEOL 0x00000002u
175-
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
176-
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
177-
#define PCRE2_PARTIAL_SOFT 0x00000010u
178-
#define PCRE2_PARTIAL_HARD 0x00000020u
179-
180-
/* These are additional options for pcre2_dfa_match(). */
181-
182-
#define PCRE2_DFA_RESTART 0x00000040u
183-
#define PCRE2_DFA_SHORTEST 0x00000080u
184-
185-
/* These are additional options for pcre2_substitute(), which passes any others
186-
through to pcre2_match(). */
187-
188-
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u
189-
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u
190-
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u
191-
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u
192-
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u
193-
194-
/* A further option for pcre2_match(), not allowed for pcre2_dfa_match(),
195-
ignored for pcre2_jit_match(). */
196-
197-
#define PCRE2_NO_JIT 0x00002000u
160+
#define PCRE2_JIT_INVALID_UTF 0x00000100u
161+
162+
/* These are for pcre2_match(), pcre2_dfa_match(), pcre2_jit_match(), and
163+
pcre2_substitute(). Some are allowed only for one of the functions, and in
164+
these cases it is noted below. Note that PCRE2_ANCHORED, PCRE2_ENDANCHORED and
165+
PCRE2_NO_UTF_CHECK can also be passed to these functions (though
166+
pcre2_jit_match() ignores the latter since it bypasses all sanity checks). */
167+
168+
#define PCRE2_NOTBOL 0x00000001u
169+
#define PCRE2_NOTEOL 0x00000002u
170+
#define PCRE2_NOTEMPTY 0x00000004u /* ) These two must be kept */
171+
#define PCRE2_NOTEMPTY_ATSTART 0x00000008u /* ) adjacent to each other. */
172+
#define PCRE2_PARTIAL_SOFT 0x00000010u
173+
#define PCRE2_PARTIAL_HARD 0x00000020u
174+
#define PCRE2_DFA_RESTART 0x00000040u /* pcre2_dfa_match() only */
175+
#define PCRE2_DFA_SHORTEST 0x00000080u /* pcre2_dfa_match() only */
176+
#define PCRE2_SUBSTITUTE_GLOBAL 0x00000100u /* pcre2_substitute() only */
177+
#define PCRE2_SUBSTITUTE_EXTENDED 0x00000200u /* pcre2_substitute() only */
178+
#define PCRE2_SUBSTITUTE_UNSET_EMPTY 0x00000400u /* pcre2_substitute() only */
179+
#define PCRE2_SUBSTITUTE_UNKNOWN_UNSET 0x00000800u /* pcre2_substitute() only */
180+
#define PCRE2_SUBSTITUTE_OVERFLOW_LENGTH 0x00001000u /* pcre2_substitute() only */
181+
#define PCRE2_NO_JIT 0x00002000u /* Not for pcre2_dfa_match() */
182+
#define PCRE2_COPY_MATCHED_SUBJECT 0x00004000u
198183

199184
/* Options for pcre2_pattern_convert(). */
200185

@@ -318,6 +303,8 @@ pcre2_pattern_convert(). */
318303
#define PCRE2_ERROR_BAD_LITERAL_OPTIONS 192
319304
#define PCRE2_ERROR_SUPPORTED_ONLY_IN_UNICODE 193
320305
#define PCRE2_ERROR_INVALID_HYPHEN_IN_OPTIONS 194
306+
#define PCRE2_ERROR_ALPHA_ASSERTION_UNKNOWN 195
307+
#define PCRE2_ERROR_SCRIPT_RUN_NOT_AVAILABLE 196
321308

322309

323310
/* "Expected" matching error codes: no match and partial match. */
@@ -504,10 +491,10 @@ typedef struct pcre2_real_jit_stack pcre2_jit_stack; \
504491
typedef pcre2_jit_stack *(*pcre2_jit_callback)(void *);
505492

506493

507-
/* The structure for passing out data via the pcre_callout_function. We use a
508-
structure so that new fields can be added on the end in future versions,
509-
without changing the API of the function, thereby allowing old clients to work
510-
without modification. Define the generic version in a macro; the width-specific
494+
/* The structures for passing out data via callout functions. We use structures
495+
so that new fields can be added on the end in future versions, without changing
496+
the API of the function, thereby allowing old clients to work without
497+
modification. Define the generic versions in a macro; the width-specific
511498
versions are generated from this macro below. */
512499

513500
/* Flags for the callout_flags field. These are cleared after a callout. */
@@ -549,7 +536,19 @@ typedef struct pcre2_callout_enumerate_block { \
549536
PCRE2_SIZE callout_string_length; /* Length of string compiled into pattern */ \
550537
PCRE2_SPTR callout_string; /* String compiled into pattern */ \
551538
/* ------------------------------------------------------------------ */ \
552-
} pcre2_callout_enumerate_block;
539+
} pcre2_callout_enumerate_block; \
540+
\
541+
typedef struct pcre2_substitute_callout_block { \
542+
uint32_t version; /* Identifies version of block */ \
543+
/* ------------------------ Version 0 ------------------------------- */ \
544+
PCRE2_SPTR input; /* Pointer to input subject string */ \
545+
PCRE2_SPTR output; /* Pointer to output buffer */ \
546+
PCRE2_SIZE output_offsets[2]; /* Changed portion of the output */ \
547+
PCRE2_SIZE *ovector; /* Pointer to current ovector */ \
548+
uint32_t oveccount; /* Count of pairs set in ovector */ \
549+
uint32_t subscount; /* Substitution number */ \
550+
/* ------------------------------------------------------------------ */ \
551+
} pcre2_substitute_callout_block;
553552

554553

555554
/* List the generic forms of all other functions in macros, which will be
@@ -604,6 +603,9 @@ PCRE2_EXP_DECL void PCRE2_CALL_CONVENTION \
604603
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
605604
pcre2_set_callout(pcre2_match_context *, \
606605
int (*)(pcre2_callout_block *, void *), void *); \
606+
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
607+
pcre2_set_substitute_callout(pcre2_match_context *, \
608+
int (*)(pcre2_substitute_callout_block *, void *), void *); \
607609
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
608610
pcre2_set_depth_limit(pcre2_match_context *, uint32_t); \
609611
PCRE2_EXP_DECL int PCRE2_CALL_CONVENTION \
@@ -807,6 +809,7 @@ pcre2_compile are called by application code. */
807809

808810
#define pcre2_callout_block PCRE2_SUFFIX(pcre2_callout_block_)
809811
#define pcre2_callout_enumerate_block PCRE2_SUFFIX(pcre2_callout_enumerate_block_)
812+
#define pcre2_substitute_callout_block PCRE2_SUFFIX(pcre2_substitute_callout_block_)
810813
#define pcre2_general_context PCRE2_SUFFIX(pcre2_general_context_)
811814
#define pcre2_compile_context PCRE2_SUFFIX(pcre2_compile_context_)
812815
#define pcre2_convert_context PCRE2_SUFFIX(pcre2_convert_context_)
@@ -872,6 +875,7 @@ pcre2_compile are called by application code. */
872875
#define pcre2_set_newline PCRE2_SUFFIX(pcre2_set_newline_)
873876
#define pcre2_set_parens_nest_limit PCRE2_SUFFIX(pcre2_set_parens_nest_limit_)
874877
#define pcre2_set_offset_limit PCRE2_SUFFIX(pcre2_set_offset_limit_)
878+
#define pcre2_set_substitute_callout PCRE2_SUFFIX(pcre2_set_substitute_callout_)
875879
#define pcre2_substitute PCRE2_SUFFIX(pcre2_substitute_)
876880
#define pcre2_substring_copy_byname PCRE2_SUFFIX(pcre2_substring_copy_byname_)
877881
#define pcre2_substring_copy_bynumber PCRE2_SUFFIX(pcre2_substring_copy_bynumber_)

ext/pcre/pcre2lib/pcre2_auto_possess.c

+11-3
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,7 @@ and semantics are as close as possible to those of the Perl 5 language.
77
88
Written by Philip Hazel
99
Original API code Copyright (c) 1997-2012 University of Cambridge
10-
New API code Copyright (c) 2016-2018 University of Cambridge
10+
New API code Copyright (c) 2016-2019 University of Cambridge
1111
1212
-----------------------------------------------------------------------------
1313
Redistribution and use in source and binary forms, with or without
@@ -605,6 +605,15 @@ for(;;)
605605
if (cb->had_recurse) return FALSE;
606606
break;
607607

608+
/* A script run might have to backtrack if the iterated item can match
609+
characters from more than one script. So give up unless repeating an
610+
explicit character. */
611+
612+
case OP_SCRIPT_RUN:
613+
if (base_list[0] != OP_CHAR && base_list[0] != OP_CHARI)
614+
return FALSE;
615+
break;
616+
608617
/* Atomic sub-patterns and assertions can always auto-possessify their
609618
last iterator. However, if the group was entered as a result of checking
610619
a previous iterator, this is not possible. */
@@ -614,7 +623,6 @@ for(;;)
614623
case OP_ASSERTBACK:
615624
case OP_ASSERTBACK_NOT:
616625
case OP_ONCE:
617-
618626
return !entered_a_group;
619627
}
620628

@@ -1043,7 +1051,7 @@ for(;;)
10431051
if (chr > 255) break;
10441052
class_bitset = (uint8_t *)
10451053
((list_ptr == list ? code : base_end) - list_ptr[2]);
1046-
if ((class_bitset[chr >> 3] & (1 << (chr & 7))) != 0) return FALSE;
1054+
if ((class_bitset[chr >> 3] & (1u << (chr & 7))) != 0) return FALSE;
10471055
break;
10481056

10491057
#ifdef SUPPORT_WIDE_CHARS

ext/pcre/pcre2lib/pcre2_chartables.c

+25-20
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,14 @@ program. It contains character tables that are used when no external
77
tables are passed to PCRE2 by the application that calls it. The tables
88
are used only for characters whose code values are less than 256. */
99

10-
/* The following #includes are present because without them gcc 4.x may remove
10+
/*The dftables program (which is distributed with PCRE2) can be used to
11+
build alternative versions of this file. This is necessary if you are
12+
running in an EBCDIC environment, or if you want to default to a different
13+
encoding, for example ISO-8859-1. When dftables is run, it creates these
14+
tables in the current locale. This happens automatically if PCRE2 is
15+
configured with --enable-rebuild-chartables. */
16+
17+
/* The following #include is present because without it gcc 4.x may remove
1118
the array definition from the final binary if PCRE2 is built into a static
1219
library and dead code stripping is activated. This leads to link errors.
1320
Pulling in the header ensures that the array gets flagged as "someone
@@ -92,11 +99,10 @@ const uint8_t PRIV(default_tables)[] = {
9299
240,241,242,243,244,245,246,247,
93100
248,249,250,251,252,253,254,255,
94101

95-
/* This table contains bit maps for various character classes.
96-
Each map is 32 bytes long and the bits run from the least
97-
significant end of each byte. The classes that have their own
98-
maps are: space, xdigit, digit, upper, lower, word, graph
99-
print, punct, and cntrl. Other classes are built from combinations. */
102+
/* This table contains bit maps for various character classes. Each map is 32
103+
bytes long and the bits run from the least significant end of each byte. The
104+
classes that have their own maps are: space, xdigit, digit, upper, lower, word,
105+
graph print, punct, and cntrl. Other classes are built from combinations. */
100106

101107
0x00,0x3e,0x00,0x00,0x01,0x00,0x00,0x00,
102108
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,
@@ -151,28 +157,27 @@ print, punct, and cntrl. Other classes are built from combinations. */
151157
/* This table identifies various classes of character by individual bits:
152158
0x01 white space character
153159
0x02 letter
154-
0x04 decimal digit
155-
0x08 hexadecimal digit
160+
0x04 lower case letter
161+
0x08 decimal digit
156162
0x10 alphanumeric or '_'
157-
0x80 regular expression metacharacter or binary zero
158163
*/
159164

160-
0x80,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
165+
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 0- 7 */
161166
0x00,0x01,0x01,0x01,0x01,0x01,0x00,0x00, /* 8- 15 */
162167
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 16- 23 */
163168
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 24- 31 */
164-
0x01,0x00,0x00,0x00,0x80,0x00,0x00,0x00, /* - ' */
165-
0x80,0x80,0x80,0x80,0x00,0x00,0x80,0x00, /* ( - / */
166-
0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c,0x1c, /* 0 - 7 */
167-
0x1c,0x1c,0x00,0x00,0x00,0x00,0x00,0x80, /* 8 - ? */
168-
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* @ - G */
169+
0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* - ' */
170+
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* ( - / */
171+
0x18,0x18,0x18,0x18,0x18,0x18,0x18,0x18, /* 0 - 7 */
172+
0x18,0x18,0x00,0x00,0x00,0x00,0x00,0x00, /* 8 - ? */
173+
0x00,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* @ - G */
169174
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* H - O */
170175
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* P - W */
171-
0x12,0x12,0x12,0x80,0x80,0x00,0x80,0x10, /* X - _ */
172-
0x00,0x1a,0x1a,0x1a,0x1a,0x1a,0x1a,0x12, /* ` - g */
173-
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* h - o */
174-
0x12,0x12,0x12,0x12,0x12,0x12,0x12,0x12, /* p - w */
175-
0x12,0x12,0x12,0x80,0x80,0x00,0x00,0x00, /* x -127 */
176+
0x12,0x12,0x12,0x00,0x00,0x00,0x00,0x10, /* X - _ */
177+
0x00,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* ` - g */
178+
0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* h - o */
179+
0x16,0x16,0x16,0x16,0x16,0x16,0x16,0x16, /* p - w */
180+
0x16,0x16,0x16,0x00,0x00,0x00,0x00,0x00, /* x -127 */
176181
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 128-135 */
177182
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 136-143 */
178183
0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 144-151 */

0 commit comments

Comments
 (0)