Skip to content

Proposal: Support overriding short_open_tag in token_get_all() #9612

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 3 additions & 0 deletions Zend/zend_globals.h
Original file line number Diff line number Diff line change
@@ -323,6 +323,9 @@ struct _zend_php_scanner_globals {
int heredoc_indentation;
bool heredoc_indentation_uses_spaces;

/* Short tags - either from defaults or tokenizer extension overrides */
bool short_tags;

/* original (unfiltered) script */
unsigned char *script_org;
size_t script_org_size;
2 changes: 2 additions & 0 deletions Zend/zend_language_scanner.h
Original file line number Diff line number Diff line change
@@ -57,6 +57,8 @@ typedef struct _zend_lex_state {

zend_ast *ast;
zend_arena *ast_arena;

bool short_tags;
} zend_lex_state;

typedef struct _zend_heredoc_label {
10 changes: 7 additions & 3 deletions Zend/zend_language_scanner.l
Original file line number Diff line number Diff line change
@@ -246,6 +246,7 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state)

lex_state->on_event = SCNG(on_event);
lex_state->on_event_context = SCNG(on_event_context);
lex_state->short_tags = SCNG(short_tags);

lex_state->ast = CG(ast);
lex_state->ast_arena = CG(ast_arena);
@@ -289,6 +290,7 @@ ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state)

SCNG(on_event) = lex_state->on_event;
SCNG(on_event_context) = lex_state->on_event_context;
SCNG(short_tags) = lex_state->short_tags;

CG(ast) = lex_state->ast;
CG(ast_arena) = lex_state->ast_arena;
@@ -542,6 +544,7 @@ ZEND_API zend_result open_file_for_scanning(zend_file_handle *file_handle)
/* Reset the scanner for scanning the new file */
SCNG(yy_in) = file_handle;
SCNG(yy_start) = NULL;
SCNG(short_tags) = CG(short_tags);

if (size != (size_t)-1) {
if (CG(multibyte)) {
@@ -731,6 +734,7 @@ ZEND_API void zend_prepare_string_for_scanning(zval *str, zend_string *filename)

SCNG(yy_in) = NULL;
SCNG(yy_start) = NULL;
SCNG(short_tags) = CG(short_tags);

buf = Z_STRVAL_P(str);
size = old_len;
@@ -2239,7 +2243,7 @@ string:
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
}
/* Degenerate case: <?phpX is interpreted as <? phpX with short tags. */
if (CG(short_tags)) {
if (SCNG(short_tags)) {
yyless(2);
BEGIN(ST_IN_SCRIPTING);
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
@@ -2248,7 +2252,7 @@ string:
}

<INITIAL>"<?" {
if (CG(short_tags)) {
if (SCNG(short_tags)) {
BEGIN(ST_IN_SCRIPTING);
RETURN_OR_SKIP_TOKEN(T_OPEN_TAG);
} else {
@@ -2273,7 +2277,7 @@ inline_char_handler:
}

if (*YYCURSOR == '?') {
if (CG(short_tags) /* <? */
if (SCNG(short_tags) /* <? */
|| (*(YYCURSOR + 1) == '=') /* <?= */
|| (!strncasecmp((char*)YYCURSOR + 1, "php", 3) && /* <?php[ \t\r\n] */
(YYCURSOR + 4 == YYLIMIT ||
2 changes: 2 additions & 0 deletions ext/tokenizer/php_tokenizer.h
Original file line number Diff line number Diff line change
@@ -24,6 +24,8 @@ extern zend_module_entry tokenizer_module_entry;
#define PHP_TOKENIZER_VERSION PHP_VERSION

#define TOKEN_PARSE (1 << 0)
#define TOKEN_ENABLE_SHORT_OPEN_TAG (1 << 1)
#define TOKEN_DISABLE_SHORT_OPEN_TAG (1 << 2)

#ifdef ZTS
#include "TSRM.h"
2 changes: 2 additions & 0 deletions ext/tokenizer/tests/bug81342.phpt
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
--TEST--
Bug #81342: New ampersand token parsing depends on new line after it
--EXTENSIONS--
tokenizer
--FILE--
<?php

20 changes: 20 additions & 0 deletions ext/tokenizer/tests/short_open_tag.phpt
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
--TEST--
token_get_all() TOKEN_*_SHORT_OPEN_TAG flag
--EXTENSIONS--
tokenizer
--FILE--
<?php
// This can be overridden for individual calls to token_get_all
echo json_encode(token_get_all('<?xml', TOKEN_DISABLE_SHORT_OPEN_TAG)), "\n";
echo json_encode(token_get_all('<?xml', TOKEN_ENABLE_SHORT_OPEN_TAG)), "\n";
// When setting both flags, TOKEN_DISABLE_SHORT_OPEN_TAG takes precedence
echo json_encode(token_get_all('<?xml', TOKEN_DISABLE_SHORT_OPEN_TAG|TOKEN_ENABLE_SHORT_OPEN_TAG)), "\n";
echo json_encode(PhpToken::tokenize('<?xml', TOKEN_DISABLE_SHORT_OPEN_TAG)), "\n";
echo json_encode(PhpToken::tokenize('<?xml', TOKEN_ENABLE_SHORT_OPEN_TAG)), "\n";
?>
--EXPECTF--
[[%d,"<?xml",1]]
[[%d,"<?",1],[%d,"xml",1]]
[[%d,"<?xml",1]]
[{"id":%d,"text":"<?xml","line":1,"pos":0}]
[{"id":%d,"text":"<?","line":1,"pos":0},{"id":%d,"text":"xml","line":1,"pos":2}]
21 changes: 16 additions & 5 deletions ext/tokenizer/tokenizer.c
Original file line number Diff line number Diff line change
@@ -319,7 +319,7 @@ static void add_token(
zend_hash_next_index_insert_new(Z_ARRVAL_P(return_value), &token);
}

static bool tokenize(zval *return_value, zend_string *source, zend_class_entry *token_class)
static bool tokenize(zval *return_value, zend_string *source, zend_class_entry *token_class, bool short_tags)
{
zval source_zval;
zend_lex_state original_lex_state;
@@ -335,6 +335,7 @@ static bool tokenize(zval *return_value, zend_string *source, zend_class_entry *
zend_prepare_string_for_scanning(&source_zval, ZSTR_EMPTY_ALLOC());

LANG_SCNG(yy_state) = yycINITIAL;
LANG_SCNG(short_tags) = short_tags;
zend_hash_init(&interned_strings, 0, NULL, NULL, 0);
array_init(return_value);

@@ -452,7 +453,7 @@ void on_event(
}

static bool tokenize_parse(
zval *return_value, zend_string *source, zend_class_entry *token_class)
zval *return_value, zend_string *source, zend_class_entry *token_class, bool short_tags)
{
zval source_zval;
struct event_context ctx;
@@ -478,6 +479,7 @@ static bool tokenize_parse(
LANG_SCNG(yy_state) = yycINITIAL;
LANG_SCNG(on_event) = on_event;
LANG_SCNG(on_event_context) = &ctx;
LANG_SCNG(short_tags) = short_tags;

if((success = (zendparse() == SUCCESS))) {
ZVAL_COPY_VALUE(return_value, &token_stream);
@@ -500,14 +502,23 @@ static bool tokenize_parse(
static bool tokenize_common(
zval *return_value, zend_string *source, zend_long flags, zend_class_entry *token_class)
{
bool result;
bool short_tags;
if (flags & (TOKEN_ENABLE_SHORT_OPEN_TAG|TOKEN_DISABLE_SHORT_OPEN_TAG)) {
/* TOKEN_DISABLE_SHORT_OPEN_TAG takes precedence over TOKEN_ENABLE_SHORT_OPEN_TAG */
short_tags = (flags & TOKEN_DISABLE_SHORT_OPEN_TAG) == 0;
} else {
short_tags = CG(short_tags);
}
if (flags & TOKEN_PARSE) {
return tokenize_parse(return_value, source, token_class);
result = tokenize_parse(return_value, source, token_class, short_tags);
} else {
int success = tokenize(return_value, source, token_class);
int success = tokenize(return_value, source, token_class, short_tags);
/* Normal token_get_all() should not throw. */
zend_clear_exception();
return success;
result = success;
}
return result;
}

/* }}} */
10 changes: 10 additions & 0 deletions ext/tokenizer/tokenizer.stub.php
Original file line number Diff line number Diff line change
@@ -7,6 +7,16 @@
* @cvalue TOKEN_PARSE
*/
const TOKEN_PARSE = UNKNOWN;
/**
* @var int
* @cvalue TOKEN_ENABLE_SHORT_OPEN_TAG
*/
const TOKEN_ENABLE_SHORT_OPEN_TAG = UNKNOWN;
/**
* @var int
* @cvalue TOKEN_DISABLE_SHORT_OPEN_TAG
*/
const TOKEN_DISABLE_SHORT_OPEN_TAG = UNKNOWN;

function token_get_all(string $code, int $flags = 0): array {}

4 changes: 3 additions & 1 deletion ext/tokenizer/tokenizer_arginfo.h

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.