Skip to content

Commit cc3e03c

Browse files
committed
Fix parsing of semi-reserved tokens at offset > 4 GB
To avoid increasing the size of parser stack elements by storing size_t offset and length, this instead only stores the start offset (or rather pointer now) and determines the length of the identifier in zend_lex_tstring.
1 parent b67409e commit cc3e03c

File tree

3 files changed

+14
-15
lines changed

3 files changed

+14
-15
lines changed

Zend/zend_compile.h

+1-6
Original file line numberDiff line numberDiff line change
@@ -117,17 +117,12 @@ typedef struct _zend_file_context {
117117
HashTable seen_symbols;
118118
} zend_file_context;
119119

120-
typedef struct {
121-
uint32_t offset;
122-
uint32_t len;
123-
} zend_lexer_ident_ref;
124-
125120
typedef union _zend_parser_stack_elem {
126121
zend_ast *ast;
127122
zend_string *str;
128123
zend_ulong num;
129124
unsigned char *ptr;
130-
zend_lexer_ident_ref ident;
125+
unsigned char *ident;
131126
} zend_parser_stack_elem;
132127

133128
void zend_compile_top_stmt(zend_ast *ast);

Zend/zend_language_scanner.h

+1-1
Original file line numberDiff line numberDiff line change
@@ -78,7 +78,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state);
7878
ZEND_API void zend_prepare_string_for_scanning(zval *str, zend_string *filename);
7979
ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding);
8080
ZEND_API zend_result zend_multibyte_set_filter(const zend_encoding *onetime_encoding);
81-
ZEND_API zend_result zend_lex_tstring(zval *zv, zend_lexer_ident_ref ident_ref);
81+
ZEND_API zend_result zend_lex_tstring(zval *zv, unsigned char *ident);
8282

8383
END_EXTERN_C()
8484

Zend/zend_language_scanner.l

+12-8
Original file line numberDiff line numberDiff line change
@@ -307,20 +307,25 @@ ZEND_API void zend_destroy_file_handle(zend_file_handle *file_handle)
307307
}
308308
}
309309

310-
ZEND_API zend_result zend_lex_tstring(zval *zv, zend_lexer_ident_ref ident_ref)
310+
ZEND_API zend_result zend_lex_tstring(zval *zv, unsigned char *ident)
311311
{
312-
char *ident = (char *) SCNG(yy_start) + ident_ref.offset;
313-
size_t length = ident_ref.len;
314-
if (length == sizeof("<?=")-1 && memcmp(ident, "<?=", sizeof("<?=")-1) == 0) {
312+
unsigned char *end = ident;
313+
while ((*end >= 'a' && *end <= 'z') || (*end >= 'A' && *end <= 'Z') || *end == '_') {
314+
end++;
315+
}
316+
317+
size_t length = end - ident;
318+
if (length == 0) {
319+
ZEND_ASSERT(ident[0] == '<' && ident[1] == '?' && ident[2] == '=');
315320
zend_throw_exception(zend_ce_parse_error, "Cannot use \"<?=\" as an identifier", 0);
316321
return FAILURE;
317322
}
318323

319324
if (SCNG(on_event)) {
320-
SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, ident, length, SCNG(on_event_context));
325+
SCNG(on_event)(ON_FEEDBACK, T_STRING, 0, (char *) ident, length, SCNG(on_event_context));
321326
}
322327

323-
ZVAL_STRINGL(zv, ident, length);
328+
ZVAL_STRINGL(zv, (char *) ident, length);
324329
return SUCCESS;
325330
}
326331

@@ -3096,8 +3101,7 @@ emit_token:
30963101
30973102
emit_token_with_ident:
30983103
if (PARSER_MODE()) {
3099-
elem->ident.offset = SCNG(yy_text) - SCNG(yy_start);
3100-
elem->ident.len = SCNG(yy_leng);
3104+
elem->ident = SCNG(yy_text);
31013105
}
31023106
if (SCNG(on_event)) {
31033107
SCNG(on_event)(ON_TOKEN, token, start_line, yytext, yyleng, SCNG(on_event_context));

0 commit comments

Comments
 (0)