Skip to content

Commit 17da862

Browse files
committed
Optimize php_unicode_tolower/upper for ASCII
1 parent ba383b8 commit 17da862

File tree

1 file changed

+22
-39
lines changed

1 file changed

+22
-39
lines changed

ext/mbstring/php_unicode.c

Lines changed: 22 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -47,19 +47,6 @@
4747

4848
ZEND_EXTERN_MODULE_GLOBALS(mbstring)
4949

50-
/*
51-
* A simple array of 32-bit masks for lookup.
52-
*/
53-
static unsigned long masks32[32] = {
54-
0x00000001, 0x00000002, 0x00000004, 0x00000008, 0x00000010, 0x00000020,
55-
0x00000040, 0x00000080, 0x00000100, 0x00000200, 0x00000400, 0x00000800,
56-
0x00001000, 0x00002000, 0x00004000, 0x00008000, 0x00010000, 0x00020000,
57-
0x00040000, 0x00080000, 0x00100000, 0x00200000, 0x00400000, 0x00800000,
58-
0x01000000, 0x02000000, 0x04000000, 0x08000000, 0x10000000, 0x20000000,
59-
0x40000000, 0x80000000
60-
};
61-
62-
6350
static int prop_lookup(unsigned long code, unsigned long n)
6451
{
6552
long l, r, m;
@@ -151,27 +138,22 @@ static unsigned long case_lookup(unsigned long code, long l, long r, int field)
151138
return code;
152139
}
153140

154-
MBSTRING_API unsigned long php_turkish_toupper(unsigned long code, long l, long r, int field)
155-
{
156-
if (code == 0x0069L) {
157-
return 0x0130L;
158-
}
159-
return case_lookup(code, l, r, field);
160-
}
161-
162-
MBSTRING_API unsigned long php_turkish_tolower(unsigned long code, long l, long r, int field)
163-
{
164-
if (code == 0x0049L) {
165-
return 0x0131L;
166-
}
167-
return case_lookup(code, l, r, field);
168-
}
169-
170141
MBSTRING_API unsigned long php_unicode_toupper(unsigned long code, enum mbfl_no_encoding enc)
171142
{
172143
int field;
173144
long l, r;
174145

146+
if (code < 0x80) {
147+
/* Fast path for ASCII */
148+
if (code >= 0x61 && code <= 0x7A) {
149+
if (enc == mbfl_no_encoding_8859_9 && code == 0x0069L) {
150+
return 0x0130L;
151+
}
152+
return code - 0x20;
153+
}
154+
return code;
155+
}
156+
175157
if (php_unicode_is_upper(code))
176158
return code;
177159

@@ -182,11 +164,6 @@ MBSTRING_API unsigned long php_unicode_toupper(unsigned long code, enum mbfl_no_
182164
field = 2;
183165
l = _uccase_len[0];
184166
r = (l + _uccase_len[1]) - 3;
185-
186-
if (enc == mbfl_no_encoding_8859_9) {
187-
return php_turkish_toupper(code, l, r, field);
188-
}
189-
190167
} else {
191168
/*
192169
* The character is title case.
@@ -203,6 +180,17 @@ MBSTRING_API unsigned long php_unicode_tolower(unsigned long code, enum mbfl_no_
203180
int field;
204181
long l, r;
205182

183+
if (code < 0x80) {
184+
/* Fast path for ASCII */
185+
if (code >= 0x41 && code <= 0x5A) {
186+
if (enc == mbfl_no_encoding_8859_9 && code == 0x0049L) {
187+
return 0x0131L;
188+
}
189+
return code + 0x20;
190+
}
191+
return code;
192+
}
193+
206194
if (php_unicode_is_lower(code))
207195
return code;
208196

@@ -213,11 +201,6 @@ MBSTRING_API unsigned long php_unicode_tolower(unsigned long code, enum mbfl_no_
213201
field = 1;
214202
l = 0;
215203
r = _uccase_len[0] - 3;
216-
217-
if (enc == mbfl_no_encoding_8859_9) {
218-
return php_turkish_tolower(code, l, r, field);
219-
}
220-
221204
} else {
222205
/*
223206
* The character is title case.

0 commit comments

Comments
 (0)