Skip to content

Commit 9d63f4d

Browse files
committed
Fixed bug #76319
While at it, also make sure that mbstring case conversion takes into account the specified substitution character and substitution mode.
1 parent 3382424 commit 9d63f4d

File tree

5 files changed

+40
-8
lines changed

5 files changed

+40
-8
lines changed

NEWS

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -105,6 +105,8 @@ PHP NEWS
105105
. Fixed bug #73528 (Crash in zif_mb_send_mail). (Nikita)
106106
. Fixed bug #74929 (mbstring functions version 7.1.1 are slow compared to 5.3
107107
on Windows). (Nikita)
108+
. Fixed bug #76319 (mb_strtolower with invalid UTF-8 causes segmentation
109+
fault). (Nikita)
108110
. Update to Oniguruma 6.8.1. (cmb)
109111

110112
- ODBC:

ext/mbstring/mbstring.c

Lines changed: 13 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3252,6 +3252,14 @@ PHP_FUNCTION(mb_convert_encoding)
32523252
}
32533253
/* }}} */
32543254

3255+
static char *mbstring_convert_case(
3256+
int case_mode, const char *str, size_t str_len, size_t *ret_len,
3257+
const mbfl_encoding *enc) {
3258+
return php_unicode_convert_case(
3259+
case_mode, str, str_len, ret_len, enc,
3260+
MBSTRG(current_filter_illegal_mode), MBSTRG(current_filter_illegal_substchar));
3261+
}
3262+
32553263
/* {{{ proto string mb_convert_case(string sourcestring, int mode [, string encoding])
32563264
Returns a case-folded version of sourcestring */
32573265
PHP_FUNCTION(mb_convert_case)
@@ -3280,7 +3288,7 @@ PHP_FUNCTION(mb_convert_case)
32803288
return;
32813289
}
32823290

3283-
newstr = php_unicode_convert_case(case_mode, str, str_len, &ret_len, enc);
3291+
newstr = mbstring_convert_case(case_mode, str, str_len, &ret_len, enc);
32843292

32853293
if (newstr) {
32863294
// TODO: avoid reallocation ???
@@ -3312,7 +3320,7 @@ PHP_FUNCTION(mb_strtoupper)
33123320
RETURN_FALSE;
33133321
}
33143322

3315-
newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
3323+
newstr = mbstring_convert_case(PHP_UNICODE_CASE_UPPER, str, str_len, &ret_len, enc);
33163324

33173325
if (newstr) {
33183326
// TODO: avoid reallocation ???
@@ -3346,7 +3354,7 @@ PHP_FUNCTION(mb_strtolower)
33463354
RETURN_FALSE;
33473355
}
33483356

3349-
newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
3357+
newstr = mbstring_convert_case(PHP_UNICODE_CASE_LOWER, str, str_len, &ret_len, enc);
33503358

33513359
if (newstr) {
33523360
// TODO: avoid reallocation ???
@@ -5172,7 +5180,7 @@ MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t ol
51725180
* offsets otherwise. */
51735181

51745182
size_t len = 0;
5175-
haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
5183+
haystack.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_haystack, old_haystack_len, &len, enc);
51765184
haystack.len = len;
51775185

51785186
if (!haystack.val) {
@@ -5183,7 +5191,7 @@ MBSTRING_API size_t php_mb_stripos(int mode, const char *old_haystack, size_t ol
51835191
break;
51845192
}
51855193

5186-
needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
5194+
needle.val = (unsigned char *)mbstring_convert_case(PHP_UNICODE_CASE_FOLD_SIMPLE, (char *)old_needle, old_needle_len, &len, enc);
51875195
needle.len = len;
51885196

51895197
if (!needle.val) {

ext/mbstring/php_unicode.c

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -312,6 +312,14 @@ static int convert_case_filter(int c, void *void_data)
312312
struct convert_case_data *data = (struct convert_case_data *) void_data;
313313
unsigned out[3];
314314
unsigned len, i;
315+
316+
/* Handle invalid characters early, as we assign special meaning to
317+
* codepoints above 0xffffff. */
318+
if (UNEXPECTED(c > 0xffffff)) {
319+
(*data->next_filter->filter_function)(c, data->next_filter);
320+
return 0;
321+
}
322+
315323
switch (data->case_mode) {
316324
case PHP_UNICODE_CASE_UPPER_SIMPLE:
317325
out[0] = php_unicode_toupper_simple(c, data->no_encoding);
@@ -376,7 +384,7 @@ static int convert_case_filter(int c, void *void_data)
376384

377385
MBSTRING_API char *php_unicode_convert_case(
378386
int case_mode, const char *srcstr, size_t srclen, size_t *ret_len,
379-
const mbfl_encoding *src_encoding)
387+
const mbfl_encoding *src_encoding, int illegal_mode, int illegal_substchar)
380388
{
381389
struct convert_case_data data;
382390
mbfl_convert_filter *from_wchar, *to_wchar;
@@ -403,6 +411,11 @@ MBSTRING_API char *php_unicode_convert_case(
403411
return NULL;
404412
}
405413

414+
to_wchar->illegal_mode = illegal_mode;
415+
to_wchar->illegal_substchar = illegal_substchar;
416+
from_wchar->illegal_mode = illegal_mode;
417+
from_wchar->illegal_substchar = illegal_substchar;
418+
406419
data.next_filter = from_wchar;
407420
data.no_encoding = src_encoding->no_encoding;
408421
data.case_mode = case_mode;

ext/mbstring/php_unicode.h

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -87,8 +87,8 @@ MBSTRING_API int php_unicode_is_prop(unsigned long code, ...);
8787
MBSTRING_API int php_unicode_is_prop1(unsigned long code, int prop);
8888

8989
MBSTRING_API char *php_unicode_convert_case(
90-
int case_mode, const char *srcstr, size_t srclen, size_t *retlen,
91-
const mbfl_encoding *src_encoding);
90+
int case_mode, const char *srcstr, size_t srclen, size_t *ret_len,
91+
const mbfl_encoding *src_encoding, int illegal_mode, int illegal_substchar);
9292

9393
#define PHP_UNICODE_CASE_UPPER 0
9494
#define PHP_UNICODE_CASE_LOWER 1

ext/mbstring/tests/bug76319.phpt

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
--TEST--
2+
Bug #76319: mb_strtolower with invalid UTF-8 causes segmentation fault
3+
--FILE--
4+
<?php
5+
mb_substitute_character(0xFFFD);
6+
var_dump(mb_strtolower("a\xA1", 'UTF-8'));
7+
?>
8+
--EXPECT--
9+
string(4) "a�"

0 commit comments

Comments
 (0)