Skip to content

Commit 1107593

Browse files
committed
ext tokenizer port + cleanup unused lexer states
we basically added a mechanism to store the token stream during parsing and exposed the entire parser stack on the tokenizer extension through an opt in flag: token_get_all($src, TOKEN_PARSE). this change allows easy future language enhancements regarding context aware parsing & scanning without further maintance on the tokenizer extension while solves known inconsistencies "parseless" tokenizer extension has when it handles `__halt_compiler()` presence.
1 parent 02a9eb4 commit 1107593

10 files changed

+510
-220
lines changed

Zend/zend_compile.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,6 @@
3030
#include "zend_interfaces.h"
3131
#include "zend_virtual_cwd.h"
3232
#include "zend_multibyte.h"
33-
#include "zend_language_scanner.h"
3433
#include "zend_inheritance.h"
3534

3635
#define SET_NODE(target, src) do { \
@@ -568,7 +567,10 @@ static int zend_add_const_name_literal(zend_op_array *op_array, zend_string *nam
568567
op.constant = zend_add_literal(CG(active_op_array), &_c); \
569568
} while (0)
570569

571-
void zend_stop_lexing(void) {
570+
void zend_stop_lexing(void)
571+
{
572+
if(LANG_SCNG(on_event)) LANG_SCNG(on_event)(ON_STOP, END, 0);
573+
572574
LANG_SCNG(yy_cursor) = LANG_SCNG(yy_limit);
573575
}
574576

Zend/zend_globals.h

+9
Original file line numberDiff line numberDiff line change
@@ -249,6 +249,12 @@ struct _zend_ini_scanner_globals {
249249
int scanner_mode;
250250
};
251251

252+
typedef enum {
253+
ON_TOKEN,
254+
ON_FEEDBACK,
255+
ON_STOP
256+
} zend_php_scanner_event;
257+
252258
struct _zend_php_scanner_globals {
253259
zend_file_handle *yy_in;
254260
zend_file_handle *yy_out;
@@ -278,6 +284,9 @@ struct _zend_php_scanner_globals {
278284

279285
/* initial string length after scanning to first variable */
280286
int scanned_string_len;
287+
288+
/* hooks */
289+
void (* on_event)(zend_php_scanner_event event, int token, int line);
281290
};
282291

283292
#endif /* ZEND_GLOBALS_H */

Zend/zend_language_parser.y

+6-8
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@
3535
#include "zend_globals.h"
3636
#include "zend_API.h"
3737
#include "zend_constants.h"
38-
#include "zend_language_scanner_defs.h"
38+
#include "zend_language_scanner.h"
3939

4040
#define YYSIZE_T size_t
4141
#define yytnamerr zend_yytnamerr
@@ -49,12 +49,6 @@ static YYSIZE_T zend_yytnamerr(char*, const char*);
4949
#define YYFREE free
5050
#endif
5151

52-
#define REWIND { \
53-
zend_stack_push(&LANG_SCNG(state_stack), (void *) &LANG_SCNG(yy_state)); \
54-
LANG_SCNG(yy_state) = yycST_LOOKING_FOR_SEMI_RESERVED_NAME; \
55-
LANG_SCNG(yy_cursor) = (unsigned char*)LANG_SCNG(yy_text); \
56-
LANG_SCNG(yy_leng) = 0; }
57-
5852
%}
5953

6054
%pure_parser
@@ -290,7 +284,11 @@ semi_reserved:
290284

291285
identifier:
292286
T_STRING { $$ = $1; }
293-
| /* if */ semi_reserved { REWIND } /* and rematch as */ T_STRING { $$ = $3; }
287+
| semi_reserved {
288+
zval zv;
289+
zend_lex_tstring(&zv);
290+
$$ = zend_ast_create_zval(&zv);
291+
}
294292
;
295293

296294
top_statement_list:

Zend/zend_language_scanner.h

+4
Original file line numberDiff line numberDiff line change
@@ -50,6 +50,9 @@ typedef struct _zend_lex_state {
5050
zend_encoding_filter output_filter;
5151
const zend_encoding *script_encoding;
5252

53+
/* hooks */
54+
void (* on_event)(zend_php_scanner_event event, int token, int line);
55+
5356
zend_ast *ast;
5457
zend_arena *ast_arena;
5558
} zend_lex_state;
@@ -66,6 +69,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state);
6669
ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename);
6770
ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, const zend_encoding *old_encoding);
6871
ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding);
72+
ZEND_API void zend_lex_tstring(zval *zv);
6973

7074
END_EXTERN_C()
7175

0 commit comments

Comments
 (0)