Skip to content

Commit fa4fe63

Browse files
committedNov 24, 2011
- Support for UTS #46.
1 parent d0a898d commit fa4fe63

File tree

5 files changed

+363
-21
lines changed

5 files changed

+363
-21
lines changed
 

‎ext/intl/common/common_error.c

+1-2
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,6 @@ void intl_expose_icu_error_codes( INIT_FUNC_ARGS )
232232
INTL_EXPOSE_CONST( U_REGEX_ERROR_LIMIT );
233233

234234
/* The error code in the range 0x10400-0x104ff are reserved for IDNA related error codes */
235-
#if defined(U_IDNA_PROHIBITED_ERROR)
236235
INTL_EXPOSE_CONST( U_IDNA_PROHIBITED_ERROR );
237236
INTL_EXPOSE_CONST( U_IDNA_ERROR_START );
238237
INTL_EXPOSE_CONST( U_IDNA_UNASSIGNED_ERROR );
@@ -242,8 +241,8 @@ void intl_expose_icu_error_codes( INIT_FUNC_ARGS )
242241
INTL_EXPOSE_CONST( U_IDNA_VERIFICATION_ERROR );
243242
INTL_EXPOSE_CONST( U_IDNA_LABEL_TOO_LONG_ERROR );
244243
INTL_EXPOSE_CONST( U_IDNA_ZERO_LENGTH_LABEL_ERROR );
244+
INTL_EXPOSE_CONST( U_IDNA_DOMAIN_NAME_TOO_LONG_ERROR );
245245
INTL_EXPOSE_CONST( U_IDNA_ERROR_LIMIT );
246-
#endif
247246

248247
/* Aliases for StringPrep */
249248
INTL_EXPOSE_CONST( U_STRINGPREP_PROHIBITED_ERROR );

‎ext/intl/idn/idn.c

+216-17
Original file line numberDiff line numberDiff line change
@@ -29,14 +29,25 @@
2929
#include "ext/standard/php_string.h"
3030

3131
#include "intl_error.h"
32-
#include "intl_convert.h"
32+
#include "intl_convert.h"
3333
/* }}} */
3434

35+
#ifdef UIDNA_INFO_INITIALIZER
36+
#define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
37+
#endif
38+
39+
enum {
40+
INTL_IDN_VARIANT_2003 = 0,
41+
INTL_IDN_VARIANT_UTS46
42+
};
43+
3544
/* {{{ grapheme_register_constants
3645
* Register API constants
3746
*/
3847
void idn_register_constants( INIT_FUNC_ARGS )
3948
{
49+
/* OPTIONS */
50+
4051
/* Option to prohibit processing of unassigned codepoints in the input and
4152
do not check if the input conforms to STD-3 ASCII rules. */
4253
REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
@@ -46,6 +57,50 @@ void idn_register_constants( INIT_FUNC_ARGS )
4657

4758
/* Option to check if input conforms to STD-3 ASCII rules */
4859
REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
60+
61+
#ifdef HAVE_46_API
62+
63+
/* Option to check for whether the input conforms to the BiDi rules.
64+
* Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
65+
REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
66+
67+
/* Option to check for whether the input conforms to the CONTEXTJ rules.
68+
* Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
69+
REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
70+
71+
/* Option for nontransitional processing in ToASCII().
72+
* By default, ToASCII() uses transitional processing.
73+
* Ignored by the IDNA2003 implementation. */
74+
REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
75+
76+
/* Option for nontransitional processing in ToUnicode().
77+
* By default, ToUnicode() uses transitional processing.
78+
* Ignored by the IDNA2003 implementation. */
79+
REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
80+
#endif
81+
82+
/* VARIANTS */
83+
REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT);
84+
#ifdef HAVE_46_API
85+
REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
86+
#endif
87+
88+
#ifdef HAVE_46_API
89+
/* PINFO ERROR CODES */
90+
REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
91+
REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
92+
REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
93+
REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
94+
REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
95+
REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
96+
REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
97+
REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
98+
REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
99+
REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
100+
REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
101+
REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
102+
REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
103+
#endif
49104
}
50105
/* }}} */
51106

@@ -54,11 +109,100 @@ enum {
54109
INTL_IDN_TO_UTF8
55110
};
56111

57-
static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
112+
/* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
113+
static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode TSRMLS_DC)
114+
{
115+
intl_error_set_code(NULL, err TSRMLS_CC);
116+
if (U_FAILURE(err)) {
117+
char *buff;
118+
spprintf(&buff, 0, "%s: %s",
119+
mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8",
120+
msg);
121+
intl_error_set_custom_msg(NULL, buff, 1 TSRMLS_CC);
122+
efree(buff);
123+
return FAILURE;
124+
}
125+
126+
return SUCCESS;
127+
}
128+
129+
static inline void php_intl_bad_args(const char *msg, int mode TSRMLS_DC)
130+
{
131+
php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg, mode TSRMLS_CC);
132+
}
133+
134+
#ifdef HAVE_46_API
135+
static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
136+
const char *domain, int domain_len, uint32_t option, int mode, zval *idna_info)
137+
{
138+
UErrorCode status = U_ZERO_ERROR;
139+
UIDNA *uts46;
140+
int32_t len;
141+
int32_t buffer_capac = 255; /* no domain name may exceed this */
142+
char *buffer = emalloc(buffer_capac);
143+
UIDNAInfo info = UIDNA_INFO_INITIALIZER;
144+
int buffer_used = 0;
145+
146+
uts46 = uidna_openUTS46(option, &status);
147+
if (php_intl_idn_check_status(status, "failed to open UIDNA instance",
148+
mode TSRMLS_CC) == FAILURE) {
149+
efree(buffer);
150+
RETURN_FALSE;
151+
}
152+
153+
if (mode == INTL_IDN_TO_ASCII) {
154+
len = uidna_nameToASCII_UTF8(uts46, domain, (int32_t)domain_len,
155+
buffer, buffer_capac, &info, &status);
156+
} else {
157+
len = uidna_nameToUnicodeUTF8(uts46, domain, (int32_t)domain_len,
158+
buffer, buffer_capac, &info, &status);
159+
}
160+
if (php_intl_idn_check_status(status, "failed to convert name",
161+
mode TSRMLS_CC) == FAILURE) {
162+
uidna_close(uts46);
163+
efree(buffer);
164+
RETURN_FALSE;
165+
}
166+
if (len >= 255) {
167+
php_error_docref(NULL TSRMLS_CC, E_ERROR, "ICU returned an unexpected length");
168+
}
169+
170+
buffer[len] = '\0';
171+
172+
if (info.errors == 0) {
173+
RETVAL_STRINGL(buffer, len, 0);
174+
buffer_used = 1;
175+
} else {
176+
RETVAL_FALSE;
177+
}
178+
179+
if (idna_info) {
180+
if (buffer_used) { /* used in return_value then */
181+
zval_addref_p(return_value);
182+
add_assoc_zval_ex(idna_info, "result", sizeof("result"), return_value);
183+
} else {
184+
zval *zv;
185+
ALLOC_INIT_ZVAL(zv);
186+
ZVAL_STRINGL(zv, buffer, len, 0);
187+
buffer_used = 1;
188+
add_assoc_zval_ex(idna_info, "result", sizeof("result"), zv);
189+
}
190+
add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
191+
sizeof("isTransitionalDifferent"), info.isTransitionalDifferent);
192+
add_assoc_long_ex(idna_info, "errors", sizeof("errors"), (long)info.errors);
193+
}
194+
195+
if (!buffer_used) {
196+
efree(buffer);
197+
}
198+
199+
uidna_close(uts46);
200+
}
201+
#endif
202+
203+
static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
204+
const char *domain, int domain_len, uint32_t option, int mode)
58205
{
59-
unsigned char* domain;
60-
int domain_len;
61-
long option = 0;
62206
UChar* ustring = NULL;
63207
int ustring_len = 0;
64208
UErrorCode status;
@@ -67,18 +211,9 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
67211
UChar converted[MAXPATHLEN];
68212
int32_t converted_ret_len;
69213

70-
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|l", (char **)&domain, &domain_len, &option) == FAILURE) {
71-
return;
72-
}
73-
74-
if (domain_len < 1) {
75-
intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "idn_to_ascii: empty domain name", 0 TSRMLS_CC );
76-
RETURN_FALSE;
77-
}
78-
79214
/* convert the string to UTF-16. */
80215
status = U_ZERO_ERROR;
81-
intl_convert_utf8_to_utf16(&ustring, &ustring_len, (char*) domain, domain_len, &status );
216+
intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, &status);
82217

83218
if (U_FAILURE(status)) {
84219
intl_error_set_code(NULL, status TSRMLS_CC);
@@ -123,11 +258,75 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
123258
RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
124259
}
125260

261+
static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
262+
{
263+
char *domain;
264+
int domain_len;
265+
long option = 0,
266+
variant = INTL_IDN_VARIANT_2003;
267+
zval *idna_info = NULL;
268+
269+
intl_error_reset(NULL TSRMLS_CC);
270+
271+
if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|llz",
272+
&domain, &domain_len, &option, &variant, &idna_info) == FAILURE) {
273+
php_intl_bad_args("bad arguments", mode TSRMLS_CC);
274+
RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */
275+
}
276+
277+
#ifdef HAVE_46_API
278+
if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) {
279+
php_intl_bad_args("invalid variant, must be one of {"
280+
"INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}", mode TSRMLS_CC);
281+
RETURN_FALSE;
282+
}
283+
#else
284+
if (variant != INTL_IDN_VARIANT_2003) {
285+
php_intl_bad_args("invalid variant, PHP was compiled against "
286+
"an old version of ICU and only supports INTL_IDN_VARIANT_2003",
287+
mode TSRMLS_CC);
288+
RETURN_FALSE;
289+
}
290+
#endif
291+
292+
if (domain_len < 1) {
293+
php_intl_bad_args("empty domain name", mode TSRMLS_CC);
294+
RETURN_FALSE;
295+
}
296+
if (domain_len > INT32_MAX - 1) {
297+
php_intl_bad_args("domain name too large", mode TSRMLS_CC);
298+
RETURN_FALSE;
299+
}
300+
/* don't check options; it wasn't checked before */
301+
302+
if (idna_info != NULL) {
303+
if (variant == INTL_IDN_VARIANT_2003) {
304+
php_error_docref0(NULL TSRMLS_CC, E_NOTICE,
305+
"4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
306+
"takes 3 - extra argument ignored");
307+
} else {
308+
zval_dtor(idna_info);
309+
array_init(idna_info);
310+
}
311+
}
312+
313+
if (variant == INTL_IDN_VARIANT_2003) {
314+
php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU,
315+
domain, domain_len, (uint32_t)option, mode);
316+
}
317+
#ifdef HAVE_46_API
318+
else {
319+
php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, domain_len,
320+
(uint32_t)option, mode, idna_info);
321+
}
322+
#endif
323+
}
324+
126325
/* {{{ proto int idn_to_ascii(string domain[, int options])
127326
Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
128327
PHP_FUNCTION(idn_to_ascii)
129328
{
130-
php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
329+
php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
131330
}
132331
/* }}} */
133332

@@ -136,7 +335,7 @@ PHP_FUNCTION(idn_to_ascii)
136335
Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
137336
PHP_FUNCTION(idn_to_utf8)
138337
{
139-
php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
338+
php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
140339
}
141340
/* }}} */
142341

‎ext/intl/php_intl.c

+4-2
Original file line numberDiff line numberDiff line change
@@ -335,13 +335,15 @@ ZEND_END_ARG_INFO()
335335
ZEND_BEGIN_ARG_INFO_EX(arginfo_idn_to_ascii, 0, 0, 1)
336336
ZEND_ARG_INFO(0, domain)
337337
ZEND_ARG_INFO(0, option)
338-
ZEND_ARG_INFO(0, status)
338+
ZEND_ARG_INFO(0, variant)
339+
ZEND_ARG_INFO(1, idn_info)
339340
ZEND_END_ARG_INFO()
340341

341342
ZEND_BEGIN_ARG_INFO_EX(arginfo_idn_to_utf8, 0, 0, 1)
342343
ZEND_ARG_INFO(0, domain)
343344
ZEND_ARG_INFO(0, option)
344-
ZEND_ARG_INFO(0, status)
345+
ZEND_ARG_INFO(0, variant)
346+
ZEND_ARG_INFO(1, idn_info)
345347
ZEND_END_ARG_INFO()
346348

347349
ZEND_BEGIN_ARG_INFO_EX( arginfo_resourcebundle_create_proc, 0, 0, 2 )

‎ext/intl/tests/idn_uts46_basic.phpt

+53
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,53 @@
1+
--TEST--
2+
IDN UTS #46 API basic tests
3+
--SKIPIF--
4+
<?php
5+
if (!extension_loaded('intl'))
6+
die('skip');
7+
if (!defined('INTL_IDNA_VARIANT_UTS46'))
8+
die('skip no UTS #46 API');
9+
--FILE--
10+
<?php
11+
$utf8dn = "www.fußball.com";
12+
$asciiNonTrans = "www.xn--fuball-cta.com";
13+
14+
echo "all ok, no details:", "\n";
15+
var_dump(idn_to_ascii($utf8dn,
16+
IDNA_NONTRANSITIONAL_TO_ASCII, INTL_IDNA_VARIANT_UTS46));
17+
18+
echo "all ok, no details, transitional:", "\n";
19+
var_dump(idn_to_ascii($utf8dn, 0, INTL_IDNA_VARIANT_UTS46));
20+
21+
echo "all ok, with details:", "\n";
22+
var_dump(idn_to_ascii($utf8dn, IDNA_NONTRANSITIONAL_TO_ASCII,
23+
INTL_IDNA_VARIANT_UTS46, $info));
24+
var_dump($info);
25+
26+
echo "reverse, ok, with details:", "\n";
27+
var_dump(idn_to_utf8($asciiNonTrans, 0, INTL_IDNA_VARIANT_UTS46, $info));
28+
var_dump($info);
29+
--EXPECT--
30+
all ok, no details:
31+
string(22) "www.xn--fuball-cta.com"
32+
all ok, no details, transitional:
33+
string(16) "www.fussball.com"
34+
all ok, with details:
35+
string(22) "www.xn--fuball-cta.com"
36+
array(3) {
37+
["result"]=>
38+
string(22) "www.xn--fuball-cta.com"
39+
["isTransitionalDifferent"]=>
40+
bool(true)
41+
["errors"]=>
42+
int(0)
43+
}
44+
reverse, ok, with details:
45+
string(16) "www.fußball.com"
46+
array(3) {
47+
["result"]=>
48+
string(16) "www.fußball.com"
49+
["isTransitionalDifferent"]=>
50+
bool(false)
51+
["errors"]=>
52+
int(0)
53+
}

0 commit comments

Comments
 (0)