29
29
#include "ext/standard/php_string.h"
30
30
31
31
#include "intl_error.h"
32
- #include "intl_convert.h"
32
+ #include "intl_convert.h"
33
33
/* }}} */
34
34
35
+ #ifdef UIDNA_INFO_INITIALIZER
36
+ #define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
37
+ #endif
38
+
39
+ enum {
40
+ INTL_IDN_VARIANT_2003 = 0 ,
41
+ INTL_IDN_VARIANT_UTS46
42
+ };
43
+
35
44
/* {{{ grapheme_register_constants
36
45
* Register API constants
37
46
*/
38
47
void idn_register_constants ( INIT_FUNC_ARGS )
39
48
{
49
+ /* OPTIONS */
50
+
40
51
/* Option to prohibit processing of unassigned codepoints in the input and
41
52
do not check if the input conforms to STD-3 ASCII rules. */
42
53
REGISTER_LONG_CONSTANT ("IDNA_DEFAULT" , UIDNA_DEFAULT , CONST_CS | CONST_PERSISTENT );
@@ -46,6 +57,50 @@ void idn_register_constants( INIT_FUNC_ARGS )
46
57
47
58
/* Option to check if input conforms to STD-3 ASCII rules */
48
59
REGISTER_LONG_CONSTANT ("IDNA_USE_STD3_RULES" , UIDNA_USE_STD3_RULES , CONST_CS | CONST_PERSISTENT );
60
+
61
+ #ifdef HAVE_46_API
62
+
63
+ /* Option to check for whether the input conforms to the BiDi rules.
64
+ * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
65
+ REGISTER_LONG_CONSTANT ("IDNA_CHECK_BIDI" , UIDNA_CHECK_BIDI , CONST_CS | CONST_PERSISTENT );
66
+
67
+ /* Option to check for whether the input conforms to the CONTEXTJ rules.
68
+ * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
69
+ REGISTER_LONG_CONSTANT ("IDNA_CHECK_CONTEXTJ" , UIDNA_CHECK_CONTEXTJ , CONST_CS | CONST_PERSISTENT );
70
+
71
+ /* Option for nontransitional processing in ToASCII().
72
+ * By default, ToASCII() uses transitional processing.
73
+ * Ignored by the IDNA2003 implementation. */
74
+ REGISTER_LONG_CONSTANT ("IDNA_NONTRANSITIONAL_TO_ASCII" , UIDNA_NONTRANSITIONAL_TO_ASCII , CONST_CS | CONST_PERSISTENT );
75
+
76
+ /* Option for nontransitional processing in ToUnicode().
77
+ * By default, ToUnicode() uses transitional processing.
78
+ * Ignored by the IDNA2003 implementation. */
79
+ REGISTER_LONG_CONSTANT ("IDNA_NONTRANSITIONAL_TO_UNICODE" , UIDNA_NONTRANSITIONAL_TO_UNICODE , CONST_CS | CONST_PERSISTENT );
80
+ #endif
81
+
82
+ /* VARIANTS */
83
+ REGISTER_LONG_CONSTANT ("INTL_IDNA_VARIANT_2003" , INTL_IDN_VARIANT_2003 , CONST_CS | CONST_PERSISTENT );
84
+ #ifdef HAVE_46_API
85
+ REGISTER_LONG_CONSTANT ("INTL_IDNA_VARIANT_UTS46" , INTL_IDN_VARIANT_UTS46 , CONST_CS | CONST_PERSISTENT );
86
+ #endif
87
+
88
+ #ifdef HAVE_46_API
89
+ /* PINFO ERROR CODES */
90
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_EMPTY_LABEL" , UIDNA_ERROR_EMPTY_LABEL , CONST_CS | CONST_PERSISTENT );
91
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_LABEL_TOO_LONG" , UIDNA_ERROR_LABEL_TOO_LONG , CONST_CS | CONST_PERSISTENT );
92
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_DOMAIN_NAME_TOO_LONG" , UIDNA_ERROR_DOMAIN_NAME_TOO_LONG , CONST_CS | CONST_PERSISTENT );
93
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_LEADING_HYPHEN" , UIDNA_ERROR_LEADING_HYPHEN , CONST_CS | CONST_PERSISTENT );
94
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_TRAILING_HYPHEN" , UIDNA_ERROR_TRAILING_HYPHEN , CONST_CS | CONST_PERSISTENT );
95
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_HYPHEN_3_4" , UIDNA_ERROR_HYPHEN_3_4 , CONST_CS | CONST_PERSISTENT );
96
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_LEADING_COMBINING_MARK" , UIDNA_ERROR_LEADING_COMBINING_MARK , CONST_CS | CONST_PERSISTENT );
97
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_DISALLOWED" , UIDNA_ERROR_DISALLOWED , CONST_CS | CONST_PERSISTENT );
98
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_PUNYCODE" , UIDNA_ERROR_PUNYCODE , CONST_CS | CONST_PERSISTENT );
99
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_LABEL_HAS_DOT" , UIDNA_ERROR_LABEL_HAS_DOT , CONST_CS | CONST_PERSISTENT );
100
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_INVALID_ACE_LABEL" , UIDNA_ERROR_INVALID_ACE_LABEL , CONST_CS | CONST_PERSISTENT );
101
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_BIDI" , UIDNA_ERROR_BIDI , CONST_CS | CONST_PERSISTENT );
102
+ REGISTER_LONG_CONSTANT ("IDNA_ERROR_CONTEXTJ" , UIDNA_ERROR_CONTEXTJ , CONST_CS | CONST_PERSISTENT );
103
+ #endif
49
104
}
50
105
/* }}} */
51
106
@@ -54,11 +109,100 @@ enum {
54
109
INTL_IDN_TO_UTF8
55
110
};
56
111
57
- static void php_intl_idn_to (INTERNAL_FUNCTION_PARAMETERS , int mode )
112
+ /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
113
+ static int php_intl_idn_check_status (UErrorCode err , const char * msg , int mode TSRMLS_DC )
114
+ {
115
+ intl_error_set_code (NULL , err TSRMLS_CC );
116
+ if (U_FAILURE (err )) {
117
+ char * buff ;
118
+ spprintf (& buff , 0 , "%s: %s" ,
119
+ mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8" ,
120
+ msg );
121
+ intl_error_set_custom_msg (NULL , buff , 1 TSRMLS_CC );
122
+ efree (buff );
123
+ return FAILURE ;
124
+ }
125
+
126
+ return SUCCESS ;
127
+ }
128
+
129
+ static inline void php_intl_bad_args (const char * msg , int mode TSRMLS_DC )
130
+ {
131
+ php_intl_idn_check_status (U_ILLEGAL_ARGUMENT_ERROR , msg , mode TSRMLS_CC );
132
+ }
133
+
134
+ #ifdef HAVE_46_API
135
+ static void php_intl_idn_to_46 (INTERNAL_FUNCTION_PARAMETERS ,
136
+ const char * domain , int domain_len , uint32_t option , int mode , zval * idna_info )
137
+ {
138
+ UErrorCode status = U_ZERO_ERROR ;
139
+ UIDNA * uts46 ;
140
+ int32_t len ;
141
+ int32_t buffer_capac = 255 ; /* no domain name may exceed this */
142
+ char * buffer = emalloc (buffer_capac );
143
+ UIDNAInfo info = UIDNA_INFO_INITIALIZER ;
144
+ int buffer_used = 0 ;
145
+
146
+ uts46 = uidna_openUTS46 (option , & status );
147
+ if (php_intl_idn_check_status (status , "failed to open UIDNA instance" ,
148
+ mode TSRMLS_CC ) == FAILURE ) {
149
+ efree (buffer );
150
+ RETURN_FALSE ;
151
+ }
152
+
153
+ if (mode == INTL_IDN_TO_ASCII ) {
154
+ len = uidna_nameToASCII_UTF8 (uts46 , domain , (int32_t )domain_len ,
155
+ buffer , buffer_capac , & info , & status );
156
+ } else {
157
+ len = uidna_nameToUnicodeUTF8 (uts46 , domain , (int32_t )domain_len ,
158
+ buffer , buffer_capac , & info , & status );
159
+ }
160
+ if (php_intl_idn_check_status (status , "failed to convert name" ,
161
+ mode TSRMLS_CC ) == FAILURE ) {
162
+ uidna_close (uts46 );
163
+ efree (buffer );
164
+ RETURN_FALSE ;
165
+ }
166
+ if (len >= 255 ) {
167
+ php_error_docref (NULL TSRMLS_CC , E_ERROR , "ICU returned an unexpected length" );
168
+ }
169
+
170
+ buffer [len ] = '\0' ;
171
+
172
+ if (info .errors == 0 ) {
173
+ RETVAL_STRINGL (buffer , len , 0 );
174
+ buffer_used = 1 ;
175
+ } else {
176
+ RETVAL_FALSE ;
177
+ }
178
+
179
+ if (idna_info ) {
180
+ if (buffer_used ) { /* used in return_value then */
181
+ zval_addref_p (return_value );
182
+ add_assoc_zval_ex (idna_info , "result" , sizeof ("result" ), return_value );
183
+ } else {
184
+ zval * zv ;
185
+ ALLOC_INIT_ZVAL (zv );
186
+ ZVAL_STRINGL (zv , buffer , len , 0 );
187
+ buffer_used = 1 ;
188
+ add_assoc_zval_ex (idna_info , "result" , sizeof ("result" ), zv );
189
+ }
190
+ add_assoc_bool_ex (idna_info , "isTransitionalDifferent" ,
191
+ sizeof ("isTransitionalDifferent" ), info .isTransitionalDifferent );
192
+ add_assoc_long_ex (idna_info , "errors" , sizeof ("errors" ), (long )info .errors );
193
+ }
194
+
195
+ if (!buffer_used ) {
196
+ efree (buffer );
197
+ }
198
+
199
+ uidna_close (uts46 );
200
+ }
201
+ #endif
202
+
203
+ static void php_intl_idn_to (INTERNAL_FUNCTION_PARAMETERS ,
204
+ const char * domain , int domain_len , uint32_t option , int mode )
58
205
{
59
- unsigned char * domain ;
60
- int domain_len ;
61
- long option = 0 ;
62
206
UChar * ustring = NULL ;
63
207
int ustring_len = 0 ;
64
208
UErrorCode status ;
@@ -67,18 +211,9 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
67
211
UChar converted [MAXPATHLEN ];
68
212
int32_t converted_ret_len ;
69
213
70
- if (zend_parse_parameters (ZEND_NUM_ARGS () TSRMLS_CC , "s|l" , (char * * )& domain , & domain_len , & option ) == FAILURE ) {
71
- return ;
72
- }
73
-
74
- if (domain_len < 1 ) {
75
- intl_error_set ( NULL , U_ILLEGAL_ARGUMENT_ERROR , "idn_to_ascii: empty domain name" , 0 TSRMLS_CC );
76
- RETURN_FALSE ;
77
- }
78
-
79
214
/* convert the string to UTF-16. */
80
215
status = U_ZERO_ERROR ;
81
- intl_convert_utf8_to_utf16 (& ustring , & ustring_len , ( char * ) domain , domain_len , & status );
216
+ intl_convert_utf8_to_utf16 (& ustring , & ustring_len , domain , domain_len , & status );
82
217
83
218
if (U_FAILURE (status )) {
84
219
intl_error_set_code (NULL , status TSRMLS_CC );
@@ -123,11 +258,75 @@ static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS, int mode)
123
258
RETURN_STRINGL (((char * )converted_utf8 ), converted_utf8_len , 0 );
124
259
}
125
260
261
+ static void php_intl_idn_handoff (INTERNAL_FUNCTION_PARAMETERS , int mode )
262
+ {
263
+ char * domain ;
264
+ int domain_len ;
265
+ long option = 0 ,
266
+ variant = INTL_IDN_VARIANT_2003 ;
267
+ zval * idna_info = NULL ;
268
+
269
+ intl_error_reset (NULL TSRMLS_CC );
270
+
271
+ if (zend_parse_parameters (ZEND_NUM_ARGS () TSRMLS_CC , "s|llz" ,
272
+ & domain , & domain_len , & option , & variant , & idna_info ) == FAILURE ) {
273
+ php_intl_bad_args ("bad arguments" , mode TSRMLS_CC );
274
+ RETURN_NULL (); /* don't set FALSE because that's not the way it was before... */
275
+ }
276
+
277
+ #ifdef HAVE_46_API
278
+ if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46 ) {
279
+ php_intl_bad_args ("invalid variant, must be one of {"
280
+ "INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}" , mode TSRMLS_CC );
281
+ RETURN_FALSE ;
282
+ }
283
+ #else
284
+ if (variant != INTL_IDN_VARIANT_2003 ) {
285
+ php_intl_bad_args ("invalid variant, PHP was compiled against "
286
+ "an old version of ICU and only supports INTL_IDN_VARIANT_2003" ,
287
+ mode TSRMLS_CC );
288
+ RETURN_FALSE ;
289
+ }
290
+ #endif
291
+
292
+ if (domain_len < 1 ) {
293
+ php_intl_bad_args ("empty domain name" , mode TSRMLS_CC );
294
+ RETURN_FALSE ;
295
+ }
296
+ if (domain_len > INT32_MAX - 1 ) {
297
+ php_intl_bad_args ("domain name too large" , mode TSRMLS_CC );
298
+ RETURN_FALSE ;
299
+ }
300
+ /* don't check options; it wasn't checked before */
301
+
302
+ if (idna_info != NULL ) {
303
+ if (variant == INTL_IDN_VARIANT_2003 ) {
304
+ php_error_docref0 (NULL TSRMLS_CC , E_NOTICE ,
305
+ "4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
306
+ "takes 3 - extra argument ignored" );
307
+ } else {
308
+ zval_dtor (idna_info );
309
+ array_init (idna_info );
310
+ }
311
+ }
312
+
313
+ if (variant == INTL_IDN_VARIANT_2003 ) {
314
+ php_intl_idn_to (INTERNAL_FUNCTION_PARAM_PASSTHRU ,
315
+ domain , domain_len , (uint32_t )option , mode );
316
+ }
317
+ #ifdef HAVE_46_API
318
+ else {
319
+ php_intl_idn_to_46 (INTERNAL_FUNCTION_PARAM_PASSTHRU , domain , domain_len ,
320
+ (uint32_t )option , mode , idna_info );
321
+ }
322
+ #endif
323
+ }
324
+
126
325
/* {{{ proto int idn_to_ascii(string domain[, int options])
127
326
Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
128
327
PHP_FUNCTION (idn_to_ascii )
129
328
{
130
- php_intl_idn_to (INTERNAL_FUNCTION_PARAM_PASSTHRU , INTL_IDN_TO_ASCII );
329
+ php_intl_idn_handoff (INTERNAL_FUNCTION_PARAM_PASSTHRU , INTL_IDN_TO_ASCII );
131
330
}
132
331
/* }}} */
133
332
@@ -136,7 +335,7 @@ PHP_FUNCTION(idn_to_ascii)
136
335
Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
137
336
PHP_FUNCTION (idn_to_utf8 )
138
337
{
139
- php_intl_idn_to (INTERNAL_FUNCTION_PARAM_PASSTHRU , INTL_IDN_TO_UTF8 );
338
+ php_intl_idn_handoff (INTERNAL_FUNCTION_PARAM_PASSTHRU , INTL_IDN_TO_UTF8 );
140
339
}
141
340
/* }}} */
142
341
0 commit comments