root/ext/intl/idn/idn.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. idn_register_constants
  2. php_intl_idn_check_status
  3. php_intl_bad_args
  4. php_intl_idn_to_46
  5. php_intl_idn_to
  6. php_intl_idn_handoff
  7. PHP_FUNCTION
  8. PHP_FUNCTION

   1 /*
   2    +----------------------------------------------------------------------+
   3    | PHP Version 5                                                        |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 2009 The PHP Group                                     |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15    | Author: Pierre A. Joye <pierre@php.net>                              |
  16    |         Gustavo Lopes  <cataphract@php.net>                          |
  17    +----------------------------------------------------------------------+
  18  */
  19 /* $Id$ */
  20 
  21 /* {{{ includes */
  22 #ifdef HAVE_CONFIG_H
  23 #include "config.h"
  24 #endif
  25 
  26 #include <php.h>
  27 
  28 #include <unicode/uidna.h>
  29 #include <unicode/ustring.h>
  30 #include "ext/standard/php_string.h"
  31 
  32 #include "intl_error.h"
  33 #include "intl_convert.h"
  34 /* }}} */
  35 
  36 #ifdef UIDNA_INFO_INITIALIZER
  37 #define HAVE_46_API 1 /* has UTS#46 API (introduced in ICU 4.6) */
  38 #endif
  39 
  40 enum {
  41         INTL_IDN_VARIANT_2003 = 0,
  42         INTL_IDN_VARIANT_UTS46
  43 };
  44 
  45 /* {{{ grapheme_register_constants
  46  * Register API constants
  47  */
  48 void idn_register_constants( INIT_FUNC_ARGS )
  49 {
  50         /* OPTIONS */
  51 
  52         /* Option to prohibit processing of unassigned codepoints in the input and
  53            do not check if the input conforms to STD-3 ASCII rules. */
  54         REGISTER_LONG_CONSTANT("IDNA_DEFAULT", UIDNA_DEFAULT, CONST_CS | CONST_PERSISTENT);
  55 
  56         /* Option to allow processing of unassigned codepoints in the input */
  57         REGISTER_LONG_CONSTANT("IDNA_ALLOW_UNASSIGNED", UIDNA_ALLOW_UNASSIGNED, CONST_CS | CONST_PERSISTENT);
  58 
  59         /* Option to check if input conforms to STD-3 ASCII rules */
  60         REGISTER_LONG_CONSTANT("IDNA_USE_STD3_RULES", UIDNA_USE_STD3_RULES, CONST_CS | CONST_PERSISTENT);
  61 
  62 #ifdef HAVE_46_API
  63 
  64         /* Option to check for whether the input conforms to the BiDi rules.
  65          * Ignored by the IDNA2003 implementation. (IDNA2003 always performs a BiDi check.) */
  66         REGISTER_LONG_CONSTANT("IDNA_CHECK_BIDI", UIDNA_CHECK_BIDI, CONST_CS | CONST_PERSISTENT);
  67 
  68         /* Option to check for whether the input conforms to the CONTEXTJ rules.
  69          * Ignored by the IDNA2003 implementation. (The CONTEXTJ check is new in IDNA2008.) */
  70         REGISTER_LONG_CONSTANT("IDNA_CHECK_CONTEXTJ", UIDNA_CHECK_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
  71 
  72         /* Option for nontransitional processing in ToASCII().
  73          * By default, ToASCII() uses transitional processing.
  74          * Ignored by the IDNA2003 implementation. */
  75         REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_ASCII", UIDNA_NONTRANSITIONAL_TO_ASCII, CONST_CS | CONST_PERSISTENT);
  76 
  77         /* Option for nontransitional processing in ToUnicode().
  78          * By default, ToUnicode() uses transitional processing.
  79          * Ignored by the IDNA2003 implementation. */
  80         REGISTER_LONG_CONSTANT("IDNA_NONTRANSITIONAL_TO_UNICODE", UIDNA_NONTRANSITIONAL_TO_UNICODE, CONST_CS | CONST_PERSISTENT);
  81 #endif
  82 
  83         /* VARIANTS */
  84         REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_2003", INTL_IDN_VARIANT_2003, CONST_CS | CONST_PERSISTENT);
  85 #ifdef HAVE_46_API
  86         REGISTER_LONG_CONSTANT("INTL_IDNA_VARIANT_UTS46", INTL_IDN_VARIANT_UTS46, CONST_CS | CONST_PERSISTENT);
  87 #endif
  88 
  89 #ifdef HAVE_46_API
  90         /* PINFO ERROR CODES */
  91         REGISTER_LONG_CONSTANT("IDNA_ERROR_EMPTY_LABEL", UIDNA_ERROR_EMPTY_LABEL, CONST_CS | CONST_PERSISTENT);
  92         REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_TOO_LONG", UIDNA_ERROR_LABEL_TOO_LONG, CONST_CS | CONST_PERSISTENT);
  93         REGISTER_LONG_CONSTANT("IDNA_ERROR_DOMAIN_NAME_TOO_LONG", UIDNA_ERROR_DOMAIN_NAME_TOO_LONG, CONST_CS | CONST_PERSISTENT);
  94         REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_HYPHEN", UIDNA_ERROR_LEADING_HYPHEN, CONST_CS | CONST_PERSISTENT);
  95         REGISTER_LONG_CONSTANT("IDNA_ERROR_TRAILING_HYPHEN", UIDNA_ERROR_TRAILING_HYPHEN, CONST_CS | CONST_PERSISTENT);
  96         REGISTER_LONG_CONSTANT("IDNA_ERROR_HYPHEN_3_4", UIDNA_ERROR_HYPHEN_3_4, CONST_CS | CONST_PERSISTENT);
  97         REGISTER_LONG_CONSTANT("IDNA_ERROR_LEADING_COMBINING_MARK", UIDNA_ERROR_LEADING_COMBINING_MARK, CONST_CS | CONST_PERSISTENT);
  98         REGISTER_LONG_CONSTANT("IDNA_ERROR_DISALLOWED", UIDNA_ERROR_DISALLOWED, CONST_CS | CONST_PERSISTENT);
  99         REGISTER_LONG_CONSTANT("IDNA_ERROR_PUNYCODE", UIDNA_ERROR_PUNYCODE, CONST_CS | CONST_PERSISTENT);
 100         REGISTER_LONG_CONSTANT("IDNA_ERROR_LABEL_HAS_DOT", UIDNA_ERROR_LABEL_HAS_DOT, CONST_CS | CONST_PERSISTENT);
 101         REGISTER_LONG_CONSTANT("IDNA_ERROR_INVALID_ACE_LABEL", UIDNA_ERROR_INVALID_ACE_LABEL, CONST_CS | CONST_PERSISTENT);
 102         REGISTER_LONG_CONSTANT("IDNA_ERROR_BIDI", UIDNA_ERROR_BIDI, CONST_CS | CONST_PERSISTENT);
 103         REGISTER_LONG_CONSTANT("IDNA_ERROR_CONTEXTJ", UIDNA_ERROR_CONTEXTJ, CONST_CS | CONST_PERSISTENT);
 104 #endif
 105 }
 106 /* }}} */
 107 
 108 enum {
 109         INTL_IDN_TO_ASCII = 0,
 110         INTL_IDN_TO_UTF8
 111 };
 112 
 113 /* like INTL_CHECK_STATUS, but as a function and varying the name of the func */
 114 static int php_intl_idn_check_status(UErrorCode err, const char *msg, int mode TSRMLS_DC)
 115 {
 116         intl_error_set_code(NULL, err TSRMLS_CC);
 117         if (U_FAILURE(err)) {
 118                 char *buff;
 119                 spprintf(&buff, 0, "%s: %s",
 120                         mode == INTL_IDN_TO_ASCII ? "idn_to_ascii" : "idn_to_utf8",
 121                         msg);
 122                 intl_error_set_custom_msg(NULL, buff, 1 TSRMLS_CC);
 123                 efree(buff);
 124                 return FAILURE;
 125         }
 126 
 127         return SUCCESS;
 128 }
 129 
 130 static inline void php_intl_bad_args(const char *msg, int mode TSRMLS_DC)
 131 {
 132         php_intl_idn_check_status(U_ILLEGAL_ARGUMENT_ERROR, msg, mode TSRMLS_CC);
 133 }
 134 
 135 #ifdef HAVE_46_API
 136 static void php_intl_idn_to_46(INTERNAL_FUNCTION_PARAMETERS,
 137                 const char *domain, int domain_len, uint32_t option, int mode, zval *idna_info)
 138 {
 139         UErrorCode        status = U_ZERO_ERROR;
 140         UIDNA             *uts46;
 141         int32_t           len;
 142         int32_t           buffer_capac = 255; /* no domain name may exceed this */
 143         char              *buffer = emalloc(buffer_capac);
 144         UIDNAInfo         info = UIDNA_INFO_INITIALIZER;
 145         int                       buffer_used = 0;
 146         
 147         uts46 = uidna_openUTS46(option, &status);
 148         if (php_intl_idn_check_status(status, "failed to open UIDNA instance",
 149                         mode TSRMLS_CC) == FAILURE) {
 150                 efree(buffer);
 151                 RETURN_FALSE;
 152         }
 153 
 154         if (mode == INTL_IDN_TO_ASCII) {
 155                 len = uidna_nameToASCII_UTF8(uts46, domain, (int32_t)domain_len,
 156                                 buffer, buffer_capac, &info, &status);
 157         } else {
 158                 len = uidna_nameToUnicodeUTF8(uts46, domain, (int32_t)domain_len,
 159                                 buffer, buffer_capac, &info, &status);
 160         }
 161         if (php_intl_idn_check_status(status, "failed to convert name",
 162                         mode TSRMLS_CC) == FAILURE) {
 163                 uidna_close(uts46);
 164                 efree(buffer);
 165                 RETURN_FALSE;
 166         }
 167         if (len >= 255) {
 168                 php_error_docref(NULL TSRMLS_CC, E_ERROR, "ICU returned an unexpected length");
 169         }
 170 
 171         buffer[len] = '\0';
 172 
 173         if (info.errors == 0) {
 174                 RETVAL_STRINGL(buffer, len, 0);
 175                 buffer_used = 1;
 176         } else {
 177                 RETVAL_FALSE;
 178         }
 179 
 180         if (idna_info) {
 181                 if (buffer_used) { /* used in return_value then */
 182                         zval_addref_p(return_value);
 183                         add_assoc_zval_ex(idna_info, "result", sizeof("result"), return_value);
 184                 } else {
 185                         zval *zv;
 186                         ALLOC_INIT_ZVAL(zv);
 187                         ZVAL_STRINGL(zv, buffer, len, 0);
 188                         buffer_used = 1;
 189                         add_assoc_zval_ex(idna_info, "result", sizeof("result"), zv);
 190                 }
 191                 add_assoc_bool_ex(idna_info, "isTransitionalDifferent",
 192                                 sizeof("isTransitionalDifferent"), info.isTransitionalDifferent);
 193                 add_assoc_long_ex(idna_info, "errors", sizeof("errors"), (long)info.errors);
 194         }
 195 
 196         if (!buffer_used) {
 197                 efree(buffer);
 198         }
 199 
 200         uidna_close(uts46);
 201 }
 202 #endif
 203 
 204 static void php_intl_idn_to(INTERNAL_FUNCTION_PARAMETERS,
 205                 const char *domain, int domain_len, uint32_t option, int mode)
 206 {
 207         UChar* ustring = NULL;
 208         int ustring_len = 0;
 209         UErrorCode status;
 210         char     *converted_utf8;
 211         int32_t   converted_utf8_len;
 212         UChar     converted[MAXPATHLEN];
 213         int32_t   converted_ret_len;
 214 
 215         /* convert the string to UTF-16. */
 216         status = U_ZERO_ERROR;
 217         intl_convert_utf8_to_utf16(&ustring, &ustring_len, domain, domain_len, &status);
 218 
 219         if (U_FAILURE(status)) {
 220                 intl_error_set_code(NULL, status TSRMLS_CC);
 221 
 222                 /* Set error messages. */
 223                 intl_error_set_custom_msg( NULL, "Error converting input string to UTF-16", 0 TSRMLS_CC );
 224                 if (ustring) {
 225                         efree(ustring);
 226                 }
 227                 RETURN_FALSE;
 228         } else {
 229                 UParseError parse_error;
 230 
 231                 status = U_ZERO_ERROR;
 232                 if (mode == INTL_IDN_TO_ASCII) {
 233                         converted_ret_len = uidna_IDNToASCII(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
 234                 } else {
 235                         converted_ret_len = uidna_IDNToUnicode(ustring, ustring_len, converted, MAXPATHLEN, (int32_t)option, &parse_error, &status);
 236                 }
 237                 efree(ustring);
 238 
 239                 if (U_FAILURE(status)) {
 240                         intl_error_set( NULL, status, "idn_to_ascii: cannot convert to ASCII", 0 TSRMLS_CC );
 241                         RETURN_FALSE;
 242                 }
 243 
 244                 status = U_ZERO_ERROR;
 245                 intl_convert_utf16_to_utf8(&converted_utf8, &converted_utf8_len, converted, converted_ret_len, &status);
 246 
 247                 if (U_FAILURE(status)) {
 248                         /* Set global error code. */
 249                         intl_error_set_code(NULL, status TSRMLS_CC);
 250 
 251                         /* Set error messages. */
 252                         intl_error_set_custom_msg( NULL, "Error converting output string to UTF-8", 0 TSRMLS_CC );
 253                         efree(converted_utf8);
 254                         RETURN_FALSE;
 255                 }
 256         }
 257 
 258         /* return the allocated string, not a duplicate */
 259         RETURN_STRINGL(((char *)converted_utf8), converted_utf8_len, 0);
 260 }
 261 
 262 static void php_intl_idn_handoff(INTERNAL_FUNCTION_PARAMETERS, int mode)
 263 {
 264         char *domain;
 265         int domain_len;
 266         long option = 0,
 267                  variant = INTL_IDN_VARIANT_2003;
 268         zval *idna_info = NULL;
 269 
 270         intl_error_reset(NULL TSRMLS_CC);
 271 
 272         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|llz",
 273                         &domain, &domain_len, &option, &variant, &idna_info) == FAILURE) {
 274                 php_intl_bad_args("bad arguments", mode TSRMLS_CC);
 275                 RETURN_NULL(); /* don't set FALSE because that's not the way it was before... */
 276         }
 277 
 278 #ifdef HAVE_46_API
 279         if (variant != INTL_IDN_VARIANT_2003 && variant != INTL_IDN_VARIANT_UTS46) {
 280                 php_intl_bad_args("invalid variant, must be one of {"
 281                         "INTL_IDNA_VARIANT_2003, INTL_IDNA_VARIANT_UTS46}", mode TSRMLS_CC);
 282                 RETURN_FALSE;
 283         }
 284 #else
 285         if (variant != INTL_IDN_VARIANT_2003) {
 286                 php_intl_bad_args("invalid variant, PHP was compiled against "
 287                         "an old version of ICU and only supports INTL_IDN_VARIANT_2003",
 288                         mode TSRMLS_CC);
 289                 RETURN_FALSE;
 290         }
 291 #endif
 292 
 293         if (domain_len < 1) {
 294                 php_intl_bad_args("empty domain name", mode TSRMLS_CC);
 295                 RETURN_FALSE;
 296         }
 297         if (domain_len > INT32_MAX - 1) {
 298                 php_intl_bad_args("domain name too large", mode TSRMLS_CC);
 299                 RETURN_FALSE;
 300         }
 301         /* don't check options; it wasn't checked before */
 302 
 303         if (idna_info != NULL) {
 304                 if (variant == INTL_IDN_VARIANT_2003) {
 305                         php_error_docref0(NULL TSRMLS_CC, E_NOTICE,
 306                                 "4 arguments were provided, but INTL_IDNA_VARIANT_2003 only "
 307                                 "takes 3 - extra argument ignored");
 308                 } else {
 309                         zval_dtor(idna_info);
 310                         array_init(idna_info);
 311                 }
 312         }
 313         
 314         if (variant == INTL_IDN_VARIANT_2003) {
 315                 php_intl_idn_to(INTERNAL_FUNCTION_PARAM_PASSTHRU,
 316                                 domain, domain_len, (uint32_t)option, mode);
 317         }
 318 #ifdef HAVE_46_API
 319         else {
 320                 php_intl_idn_to_46(INTERNAL_FUNCTION_PARAM_PASSTHRU, domain, domain_len,
 321                                 (uint32_t)option, mode, idna_info);
 322         }
 323 #endif
 324 }
 325 
 326 /* {{{ proto int idn_to_ascii(string domain[, int options[, int variant[, array &idna_info]]])
 327    Converts an Unicode domain to ASCII representation, as defined in the IDNA RFC */
 328 PHP_FUNCTION(idn_to_ascii)
 329 {
 330         php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_ASCII);
 331 }
 332 /* }}} */
 333 
 334 
 335 /* {{{ proto int idn_to_utf8(string domain[, int options[, int variant[, array &idna_info]]])
 336    Converts an ASCII representation of the domain to Unicode (UTF-8), as defined in the IDNA RFC */
 337 PHP_FUNCTION(idn_to_utf8)
 338 {
 339         php_intl_idn_handoff(INTERNAL_FUNCTION_PARAM_PASSTHRU, INTL_IDN_TO_UTF8);
 340 }
 341 /* }}} */
 342 
 343 
 344 /*
 345  * Local variables:
 346  * tab-width: 4
 347  * c-basic-offset: 4
 348  * End:
 349  * vim600: fdm=marker
 350  * vim: noet sw=4 ts=4
 351  */

/* [<][>][^][v][top][bottom][index][help] */