Skip to content

Commit dc5f3b9

Browse files
Fix GH-15824 mb_detect_encoding() invalid "UTF8" (#15829)
I fixed from strcasecmp to strncasecmp. However, strncasecmp is specify size to #3 parameter. Hence, Add check length to mime and aliases. Co-authored-by: Niels Dossche <7771979+nielsdos@users.noreply.github.com>
1 parent db54576 commit dc5f3b9

File tree

2 files changed

+39
-2
lines changed

2 files changed

+39
-2
lines changed

ext/mbstring/libmbfl/mbfl/mbfl_encoding.c

+2-2
Original file line numberDiff line numberDiff line change
@@ -349,7 +349,7 @@ const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len)
349349
/* search MIME charset name */
350350
for (encoding = mbfl_encoding_ptr_list; *encoding; encoding++) {
351351
if ((*encoding)->mime_name) {
352-
if (strcasecmp((*encoding)->mime_name, name) == 0) {
352+
if (strncasecmp((*encoding)->mime_name, name, name_len) == 0 && (*encoding)->mime_name[name_len] == '\0') {
353353
return *encoding;
354354
}
355355
}
@@ -359,7 +359,7 @@ const mbfl_encoding *mbfl_name2encoding_ex(const char *name, size_t name_len)
359359
for (encoding = mbfl_encoding_ptr_list; *encoding; encoding++) {
360360
if ((*encoding)->aliases) {
361361
for (const char **alias = (*encoding)->aliases; *alias; alias++) {
362-
if (strcasecmp(*alias, name) == 0) {
362+
if (strncasecmp(name, *alias, name_len) == 0 && (*alias)[name_len] == '\0') {
363363
return *encoding;
364364
}
365365
}

ext/mbstring/tests/gh15824.phpt

+37
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
--TEST--
2+
GH-15824 (ValueError: mb_detect_encoding(): Argument #2 ($encodings) contains invalid encoding "UTF8")
3+
--EXTENSIONS--
4+
mbstring
5+
--FILE--
6+
<?php
7+
echo "== alias name ==\n";
8+
var_dump(mb_detect_encoding('abc', 'UTF8, ASCII'));
9+
var_dump(mb_detect_encoding('こんにちは', 'UTF8, ASCII'));
10+
var_dump(mb_detect_encoding('こんにちは', 'ASCII, UTF8'));
11+
var_dump(mb_detect_encoding("\xC5", 'US-ASCII, LATIN4'));
12+
var_dump(mb_detect_encoding("\xC5", 'US-ASCII, cyrillic'));
13+
/* 0x9D is not located in CP1254 */
14+
var_dump(mb_detect_encoding("\x9D", 'US-ASCII, CP1254, cyrillic'));
15+
var_dump(mb_detect_encoding("\x9D", 'US-ASCII, CP1254, cyrillic', false));
16+
17+
echo "== mime name ==\n";
18+
var_dump(mb_detect_encoding('abc', 'ANSI_X3.4-1968, ISO-8859-1'));
19+
var_dump(mb_detect_encoding('abc', 'CP50220, ANSI_X3.4-1968'));
20+
/* last comma is not mistake, intentionally */
21+
var_dump(mb_detect_encoding(bin2hex('1b24422422242424262428242a1b2842'), 'CP50220, ANSI_X3.4-1968,', false));
22+
var_dump(mb_detect_encoding('😄', 'US-ASCII, UTF-8-Mobile#KDDI-B, UTF-8'));
23+
?>
24+
--EXPECT--
25+
== alias name ==
26+
string(5) "UTF-8"
27+
string(5) "UTF-8"
28+
string(5) "UTF-8"
29+
string(10) "ISO-8859-4"
30+
string(10) "ISO-8859-5"
31+
string(10) "ISO-8859-5"
32+
string(10) "ISO-8859-5"
33+
== mime name ==
34+
string(5) "ASCII"
35+
string(7) "CP50220"
36+
string(7) "CP50220"
37+
string(19) "UTF-8-Mobile#KDDI-B"

0 commit comments

Comments
 (0)