From 25c42440e67f15740268779623aca51080b76a00 Mon Sep 17 00:00:00 2001 From: NathanFreeman <1056159381@qq.com> Date: Sat, 17 Sep 2022 01:51:45 +0800 Subject: [PATCH 1/2] fix #9535 find the position of illegal subcharacters and modify the value of device.pos add ((encoder->status && ((encoder->status & 0xF) || (encoder->status == 0x11))) || encoder->cache) condition. fix test fix test delay initialization parameter illegal_substchar optimize code --- ext/mbstring/libmbfl/mbfl/mbfilter.c | 5 + ext/mbstring/tests/gh9535.phpt | 208 +++++++++++++++++++++++++++ 2 files changed, 213 insertions(+) create mode 100644 ext/mbstring/tests/gh9535.phpt diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index d25abb63369e4..f36af6d488db8 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -1096,6 +1096,7 @@ mbfl_strcut( } else { mbfl_convert_filter *encoder = NULL; mbfl_convert_filter *decoder = NULL; + int mode_backup; const unsigned char *p, *q, *r; struct { mbfl_convert_filter encoder; @@ -1112,6 +1113,8 @@ mbfl_strcut( return NULL; } + mode_backup = decoder->illegal_mode; + /* wchar filter */ if (!(encoder = mbfl_convert_filter_new( string->encoding, @@ -1276,7 +1279,9 @@ mbfl_strcut( bk = _bk; } + decoder->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; (*encoder->filter_flush)(encoder); + decoder->illegal_mode = mode_backup; if (bk.decoder.filter_dtor) bk.decoder.filter_dtor(&bk.decoder); diff --git a/ext/mbstring/tests/gh9535.phpt b/ext/mbstring/tests/gh9535.phpt new file mode 100644 index 0000000000000..32f0b46a80d8f --- /dev/null +++ b/ext/mbstring/tests/gh9535.phpt @@ -0,0 +1,208 @@ +--TEST-- +GH-9535 (mb_strcut(): The behavior of mb_strcut in mbstring has been changed in PHP8.1) +--EXTENSIONS-- +mbstring +--FILE-- + +--EXPECTF-- +BASE64: 宛如繁 +HTML-ENTITIES: 宛如 +Quoted-Printable: %s +UTF-16: 宛如繁星般宛如 +UTF-16BE: 宛如繁星般宛如 +UTF-16LE: 宛如繁星般宛如 +UTF-7: 宛如繁星 +UTF7-IMAP: 宛如繁星 +JIS: 宛如繁星般 +ISO-2022-JP: 宛如繁星般 +ISO-2022-JP-MS: 宛如繁星 +GB18030: 宛如繁星般宛如 +HZ: 宛如繁星般 +ISO-2022-KR: 宛如繁星 +ISO-2022-JP-2004: 宛如繁星 +ISO-2022-JP-MOBILE#KDDI: 宛如繁星 +CP50220: 宛如繁星 +CP50221: 宛如繁星 +CP50222: 宛如繁星 + +BASE64: 星のように +HTML-ENTITIES: 星の +Quoted-Printable: 星の +UTF-16: 星のように月のように +UTF-16BE: 星のように月のように +UTF-16LE: 星のように月のように +UTF-7: 星のように月 +UTF7-IMAP: 星のように月 +JIS: 星のように月の +ISO-2022-JP: 星のように月の +ISO-2022-JP-MS: 星のように月の +GB18030: 星のように月のように +HZ: 星のように月のよ +ISO-2022-KR: 星のように月の +ISO-2022-JP-2004: 星のように月の +ISO-2022-JP-MOBILE#KDDI: 星のように月の +CP50220: 星のように月の +CP50221: 星のように月の +CP50222: 星のように月の + +BASE64: %s +HTML-ENTITIES: あa& +Quoted-Printable: あa +UTF-16: あaいb +UTF-16BE: あaいb +UTF-16LE: あaいb +UTF-7: あa +UTF7-IMAP: あa +JIS: あa +ISO-2022-JP: あa +ISO-2022-JP-MS: あa +GB18030: あaいb +HZ: あa +ISO-2022-KR: あa +ISO-2022-JP-2004: あa +ISO-2022-JP-MOBILE#KDDI: あa +CP50220: あa +CP50221: あa +CP50222: あa + +BASE64: AAAAAA +HTML-ENTITIES: AAAAAAAAAA +Quoted-Printable: AAAAAAAAAA +UTF-16: AAAAA +UTF-16BE: AAAAA +UTF-16LE: AAAAA +UTF-7: AAAAAAAAAA +UTF7-IMAP: AAAAAAAAAA +JIS: AAAAAAAAAA +ISO-2022-JP: AAAAAAAAAA +ISO-2022-JP-MS: AAAAAAAAAA +GB18030: AAAAAAAAAA +HZ: AAAAAAAAAA +ISO-2022-KR: AAAAAAAAAA +ISO-2022-JP-2004: AAAAAAAAAA +ISO-2022-JP-MOBILE#KDDI: AAAAAAAAAA +CP50220: AAAAAAAAAA +CP50221: AAAAAAAAAA +CP50222: AAAAAAAAAA + +BASE64:%s +HTML-ENTITIES: ?? +Quoted-Printable: ?? +UTF-16: ? +UTF-16BE: ? +UTF-16LE: ? +UTF-7: ?? +UTF7-IMAP: ?? +JIS: ?? +ISO-2022-JP: ?? +ISO-2022-JP-MS: ?? +GB18030: ?? +HZ: ?? +ISO-2022-KR: ?? +ISO-2022-JP-2004: ?? +ISO-2022-JP-MOBILE#KDDI: ?? +CP50220: ?? +CP50221: ?? +CP50222: ?? + +string(0) "" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" +string(2) "??" From ad4023af9bf815d2e66a1faa27caaa93ad4dfd49 Mon Sep 17 00:00:00 2001 From: MARiA so cute <33935209+NathanFreeman@users.noreply.github.com> Date: Tue, 11 Oct 2022 11:32:13 +0800 Subject: [PATCH 2/2] remove unuseful code and add XFAIL to test. --- ext/mbstring/libmbfl/mbfl/mbfilter.c | 4 ---- ext/mbstring/tests/gh9535.phpt | 2 ++ 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/ext/mbstring/libmbfl/mbfl/mbfilter.c b/ext/mbstring/libmbfl/mbfl/mbfilter.c index f36af6d488db8..4820f15e214ce 100644 --- a/ext/mbstring/libmbfl/mbfl/mbfilter.c +++ b/ext/mbstring/libmbfl/mbfl/mbfilter.c @@ -1096,7 +1096,6 @@ mbfl_strcut( } else { mbfl_convert_filter *encoder = NULL; mbfl_convert_filter *decoder = NULL; - int mode_backup; const unsigned char *p, *q, *r; struct { mbfl_convert_filter encoder; @@ -1113,8 +1112,6 @@ mbfl_strcut( return NULL; } - mode_backup = decoder->illegal_mode; - /* wchar filter */ if (!(encoder = mbfl_convert_filter_new( string->encoding, @@ -1281,7 +1278,6 @@ mbfl_strcut( decoder->illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE; (*encoder->filter_flush)(encoder); - decoder->illegal_mode = mode_backup; if (bk.decoder.filter_dtor) bk.decoder.filter_dtor(&bk.decoder); diff --git a/ext/mbstring/tests/gh9535.phpt b/ext/mbstring/tests/gh9535.phpt index 32f0b46a80d8f..d67b6a0863063 100644 --- a/ext/mbstring/tests/gh9535.phpt +++ b/ext/mbstring/tests/gh9535.phpt @@ -86,6 +86,8 @@ foreach($encodings as $encoding) { } ?> +--XFAIL-- +Discussion: https://github.com/php/php-src/pull/9562 --EXPECTF-- BASE64: 宛如繁 HTML-ENTITIES: 宛如