Skip to content

Commit f813708

Browse files
authored
Fix GH-13815: mb_trim() inaccurate $characters default value (#13820)
Because the default characters are defined in the stub file, and the stub file is UTF-8 (typically), the characters are encoded in the string as UTF-8. When using a different character encoding, there is a mismatch between what mb_trim expects and the UTF-8 encoded string it gets. One way of solving this is by making the characters argument nullable, which would mean that it always uses the internal code path that has the unicode codepoints that are defaulted actually stored as codepoint numbers instead of in a string. Co-authored-by: @ranvis
1 parent 13a5a81 commit f813708

File tree

4 files changed

+26
-6
lines changed

4 files changed

+26
-6
lines changed

ext/mbstring/mbstring.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3129,7 +3129,7 @@ static void php_do_mb_trim(INTERNAL_FUNCTION_PARAMETERS, mb_trim_mode mode)
31293129
ZEND_PARSE_PARAMETERS_START(1, 3)
31303130
Z_PARAM_STR(str)
31313131
Z_PARAM_OPTIONAL
3132-
Z_PARAM_STR(what)
3132+
Z_PARAM_STR_OR_NULL(what)
31333133
Z_PARAM_STR_OR_NULL(encoding)
31343134
ZEND_PARSE_PARAMETERS_END();
31353135

ext/mbstring/mbstring.stub.php

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -139,11 +139,11 @@ function mb_ucfirst(string $string, ?string $encoding = null): string {}
139139

140140
function mb_lcfirst(string $string, ?string $encoding = null): string {}
141141

142-
function mb_trim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string {}
142+
function mb_trim(string $string, ?string $characters = null, ?string $encoding = null): string {}
143143

144-
function mb_ltrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string {}
144+
function mb_ltrim(string $string, ?string $characters = null, ?string $encoding = null): string {}
145145

146-
function mb_rtrim(string $string, string $characters = " \f\n\r\t\v\x00\u{00A0}\u{1680}\u{2000}\u{2001}\u{2002}\u{2003}\u{2004}\u{2005}\u{2006}\u{2007}\u{2008}\u{2009}\u{200A}\u{2028}\u{2029}\u{202F}\u{205F}\u{3000}\u{0085}\u{180E}", ?string $encoding = null): string {}
146+
function mb_rtrim(string $string, ?string $characters = null, ?string $encoding = null): string {}
147147

148148
/** @refcount 1 */
149149
function mb_detect_encoding(string $string, array|string|null $encodings = null, bool $strict = false): string|false {}

ext/mbstring/mbstring_arginfo.h

Lines changed: 2 additions & 2 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

ext/mbstring/tests/gh13815.phpt

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,20 @@
1+
--TEST--
2+
GH-13815 (mb_trim() inaccurate $characters default value)
3+
--EXTENSIONS--
4+
mbstring
5+
--FILE--
6+
<?php
7+
$strUtf8 = "\u{3042}\u{3000}"; // U+3000: fullwidth space
8+
var_dump(mb_strlen(mb_trim($strUtf8)));
9+
var_dump(mb_strlen(mb_trim($strUtf8, encoding: 'UTF-8')));
10+
11+
mb_internal_encoding('Shift_JIS');
12+
$strSjis = mb_convert_encoding($strUtf8, 'Shift_JIS', 'UTF-8');
13+
var_dump(mb_strlen(mb_trim($strSjis)));
14+
var_dump(mb_strlen(mb_trim($strSjis, encoding: 'Shift_JIS')));
15+
?>
16+
--EXPECT--
17+
int(1)
18+
int(1)
19+
int(1)
20+
int(1)

0 commit comments

Comments
 (0)