root/ext/intl/locale/locale_methods.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. findOffset
  2. getPreferredTag
  3. getStrrtokenPos
  4. getSingletonPos
  5. PHP_NAMED_FUNCTION
  6. PHP_NAMED_FUNCTION
  7. get_icu_value_internal
  8. get_icu_value_src_php
  9. PHP_FUNCTION
  10. PHP_FUNCTION
  11. PHP_FUNCTION
  12. get_icu_disp_value_src_php
  13. PHP_FUNCTION
  14. PHP_FUNCTION
  15. PHP_FUNCTION
  16. PHP_FUNCTION
  17. PHP_FUNCTION
  18. PHP_FUNCTION
  19. PHP_FUNCTION
  20. append_key_value
  21. add_prefix
  22. append_multiple_key_values
  23. handleAppendResult
  24. PHP_FUNCTION
  25. get_private_subtags
  26. add_array_entry
  27. PHP_FUNCTION
  28. PHP_FUNCTION
  29. strToMatch
  30. PHP_FUNCTION
  31. array_cleanup
  32. lookup_loc_range
  33. PHP_FUNCTION
  34. PHP_FUNCTION

   1 /*
   2    +----------------------------------------------------------------------+
   3    | PHP Version 5                                                        |
   4    +----------------------------------------------------------------------+
   5    | This source file is subject to version 3.01 of the PHP license,      |
   6    | that is bundled with this package in the file LICENSE, and is        |
   7    | available through the world-wide-web at the following url:           |
   8    | http://www.php.net/license/3_01.txt                                  |
   9    | If you did not receive a copy of the PHP license and are unable to   |
  10    | obtain it through the world-wide-web, please send a note to          |
  11    | license@php.net so we can mail you a copy immediately.               |
  12    +----------------------------------------------------------------------+
  13    | Authors: Kirti Velankar <kirtig@yahoo-inc.com>                       |
  14    +----------------------------------------------------------------------+
  15 */
  16 
  17 /* $Id$ */
  18 
  19 #ifdef HAVE_CONFIG_H
  20 #include "config.h"
  21 #endif
  22 
  23 #include <unicode/ustring.h>
  24 #include <unicode/udata.h>
  25 #include <unicode/putil.h>
  26 #include <unicode/ures.h>
  27 
  28 #include "php_intl.h"
  29 #include "locale.h"
  30 #include "locale_class.h"
  31 #include "locale_methods.h"
  32 #include "intl_convert.h"
  33 #include "intl_data.h"
  34 
  35 #include <zend_API.h>
  36 #include <zend.h>
  37 #include <php.h>
  38 #include "main/php_ini.h"
  39 #include "ext/standard/php_smart_str.h"
  40 
  41 ZEND_EXTERN_MODULE_GLOBALS( intl )
  42 
  43 /* Sizes required for the strings "variant15" , "extlang11", "private12" etc. */
  44 #define SEPARATOR "_"
  45 #define SEPARATOR1 "-"
  46 #define DELIMITER "-_"
  47 #define EXTLANG_PREFIX "a"
  48 #define PRIVATE_PREFIX "x"
  49 #define DISP_NAME "name"
  50 
  51 #define MAX_NO_VARIANT  15
  52 #define MAX_NO_EXTLANG  3
  53 #define MAX_NO_PRIVATE  15
  54 #define MAX_NO_LOOKUP_LANG_TAG  100
  55 
  56 #define LOC_NOT_FOUND 1
  57 
  58 /* Sizes required for the strings "variant15" , "extlang3", "private12" etc. */
  59 #define VARIANT_KEYNAME_LEN  11
  60 #define EXTLANG_KEYNAME_LEN  10
  61 #define PRIVATE_KEYNAME_LEN  11
  62 
  63 /* Based on IANA registry at the time of writing this code
  64 *
  65 */
  66 static const char * const LOC_GRANDFATHERED[] = {
  67         "art-lojban",           "i-klingon",            "i-lux",                        "i-navajo",             "no-bok",               "no-nyn",
  68         "cel-gaulish",          "en-GB-oed",            "i-ami",                
  69         "i-bnn",                "i-default",            "i-enochian",   
  70         "i-mingo",              "i-pwn",                "i-tao", 
  71         "i-tay",                "i-tsu",                "sgn-BE-fr",
  72         "sgn-BE-nl",            "sgn-CH-de",            "zh-cmn",
  73         "zh-cmn-Hans",          "zh-cmn-Hant",          "zh-gan" ,
  74         "zh-guoyu",             "zh-hakka",             "zh-min",
  75         "zh-min-nan",           "zh-wuu",               "zh-xiang",     
  76         "zh-yue",               NULL
  77 };
  78 
  79 /* Based on IANA registry at the time of writing this code
  80 *  This array lists the preferred values for the grandfathered tags if applicable
  81 *  This is in sync with the array LOC_GRANDFATHERED      
  82 *  e.g. the offsets of the grandfathered tags match the offset of the preferred  value
  83 */
  84 static const int                LOC_PREFERRED_GRANDFATHERED_LEN = 6;
  85 static const char * const       LOC_PREFERRED_GRANDFATHERED[]  = {
  86         "jbo",                  "tlh",                  "lb",
  87         "nv",                   "nb",                   "nn",                   
  88         NULL
  89 };
  90 
  91 /*returns TRUE if a is an ID separator FALSE otherwise*/
  92 #define isIDSeparator(a) (a == '_' || a == '-')
  93 #define isKeywordSeparator(a) (a == '@' )
  94 #define isEndOfTag(a) (a == '\0' )
  95 
  96 #define isPrefixLetter(a) ((a=='x')||(a=='X')||(a=='i')||(a=='I'))
  97 
  98 /*returns TRUE if one of the special prefixes is here (s=string)
  99   'x-' or 'i-' */
 100 #define isIDPrefix(s) (isPrefixLetter(s[0])&&isIDSeparator(s[1]))
 101 #define isKeywordPrefix(s) ( isKeywordSeparator(s[0]) )
 102 
 103 /* Dot terminates it because of POSIX form  where dot precedes the codepage
 104  * except for variant */
 105 #define isTerminator(a)  ((a==0)||(a=='.')||(a=='@'))
 106 
 107 /* {{{ return the offset of 'key' in the array 'list'.
 108  * returns -1 if not present */
 109 static int16_t findOffset(const char* const* list, const char* key)
 110 {
 111         const char* const* anchor = list;
 112         while (*list != NULL) {
 113                 if (strcmp(key, *list) == 0) {
 114                         return (int16_t)(list - anchor);
 115                 }
 116                 list++;
 117         }
 118 
 119         return -1;
 120 
 121 }
 122 /*}}}*/
 123 
 124 static char* getPreferredTag(const char* gf_tag)
 125 { 
 126         char* result = NULL;
 127         int grOffset = 0;
 128 
 129         grOffset = findOffset( LOC_GRANDFATHERED ,gf_tag);
 130         if(grOffset < 0) {
 131                 return NULL;
 132         }
 133         if( grOffset < LOC_PREFERRED_GRANDFATHERED_LEN ){
 134                 /* return preferred tag */
 135                 result = estrdup( LOC_PREFERRED_GRANDFATHERED[grOffset] );
 136         } else {
 137                 /* Return correct grandfathered language tag */
 138                 result = estrdup( LOC_GRANDFATHERED[grOffset] );
 139         }
 140         return result;
 141 }
 142 
 143 /* {{{
 144 * returns the position of next token for lookup 
 145 * or -1 if no token
 146 * strtokr equivalent search for token in reverse direction 
 147 */
 148 static int getStrrtokenPos(char* str, int savedPos)
 149 {
 150         int result =-1;
 151         int i;
 152         
 153         for(i=savedPos-1; i>=0; i--) {
 154                 if(isIDSeparator(*(str+i)) ){
 155                         /* delimiter found; check for singleton */
 156                         if(i>=2 && isIDSeparator(*(str+i-2)) ){
 157                                 /* a singleton; so send the position of token before the singleton */
 158                                 result = i-2;
 159                         } else {
 160                                 result = i;
 161                         }
 162                         break;
 163                 }
 164         }
 165         if(result < 1){
 166                 /* Just in case inavlid locale e.g. '-x-xyz' or '-sl_Latn' */
 167                 result =-1;
 168         }
 169         return result;
 170 }
 171 /* }}} */
 172 
 173 /* {{{
 174 * returns the position of a singleton if present 
 175 * returns -1 if no singleton
 176 * strtok equivalent search for singleton
 177 */
 178 static int getSingletonPos(const char* str)
 179 {
 180         int result =-1;
 181         int i=0;
 182         int len = 0;
 183         
 184         if( str && ((len=strlen(str))>0) ){
 185                 for( i=0; i<len ; i++){
 186                         if( isIDSeparator(*(str+i)) ){
 187                                 if( i==1){
 188                                         /* string is of the form x-avy or a-prv1 */
 189                                         result =0;
 190                                         break;
 191                                 } else {
 192                                         /* delimiter found; check for singleton */
 193                                         if( isIDSeparator(*(str+i+2)) ){
 194                                                 /* a singleton; so send the position of separator before singleton */
 195                                                 result = i+1;
 196                                                 break;
 197                                         }
 198                                 }
 199                         }
 200                 }/* end of for */
 201                 
 202         }
 203         return result;
 204 }
 205 /* }}} */
 206 
 207 /* {{{ proto static string Locale::getDefault(  )
 208    Get default locale */
 209 /* }}} */
 210 /* {{{ proto static string locale_get_default( )
 211    Get default locale */
 212 PHP_NAMED_FUNCTION(zif_locale_get_default)
 213 {
 214         RETURN_STRING( intl_locale_get_default( TSRMLS_C ), TRUE );
 215 }
 216 
 217 /* }}} */
 218 
 219 /* {{{ proto static string Locale::setDefault( string $locale )
 220    Set default locale */
 221 /* }}} */
 222 /* {{{ proto static string locale_set_default( string $locale )
 223    Set default locale */
 224 PHP_NAMED_FUNCTION(zif_locale_set_default)
 225 {
 226         char* locale_name = NULL;
 227         int   len=0;    
 228 
 229         if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC,  "s",
 230                 &locale_name ,&len ) == FAILURE)
 231         {
 232                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
 233                                 "locale_set_default: unable to parse input params", 0 TSRMLS_CC );
 234 
 235                 RETURN_FALSE;
 236         }
 237 
 238         if(len == 0) {
 239                 locale_name =  (char *)uloc_getDefault() ;
 240                 len = strlen(locale_name);
 241         }
 242 
 243         zend_alter_ini_entry(LOCALE_INI_NAME, sizeof(LOCALE_INI_NAME), locale_name, len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME);  
 244 
 245         RETURN_TRUE;
 246 }
 247 /* }}} */
 248 
 249 /* {{{
 250 * Gets the value from ICU 
 251 * common code shared by get_primary_language,get_script or get_region or get_variant
 252 * result = 0 if error, 1 if successful , -1 if no value
 253 */
 254 static char* get_icu_value_internal( const char* loc_name , char* tag_name, int* result , int fromParseLocale)
 255 {
 256         char*           tag_value       = NULL;
 257         int32_t         tag_value_len   = 512;
 258 
 259         int             singletonPos    = 0;
 260         char*           mod_loc_name    = NULL;
 261         int             grOffset        = 0;
 262 
 263         int32_t         buflen          = 512;
 264         UErrorCode      status          = U_ZERO_ERROR;
 265 
 266 
 267         if( strcmp(tag_name, LOC_CANONICALIZE_TAG) != 0 ){
 268                 /* Handle  grandfathered languages */
 269                 grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
 270                 if( grOffset >= 0 ){
 271                         if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
 272                                 return estrdup(loc_name);
 273                         } else {
 274                                 /* Since Grandfathered , no value , do nothing , retutn NULL */
 275                                 return NULL;
 276                         }
 277                 }
 278 
 279         if( fromParseLocale==1 ){
 280                 /* Handle singletons */
 281                 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
 282                         if( strlen(loc_name)>1 && (isIDPrefix(loc_name) == 1) ){
 283                                 return estrdup(loc_name);
 284                         }
 285                 }
 286 
 287                 singletonPos = getSingletonPos( loc_name );     
 288                 if( singletonPos == 0){
 289                         /* singleton at start of script, region , variant etc.
 290                          * or invalid singleton at start of language */
 291                         return NULL;
 292                 } else if( singletonPos > 0 ){
 293                         /* singleton at some position except at start
 294                          * strip off the singleton and rest of the loc_name */
 295                         mod_loc_name = estrndup ( loc_name , singletonPos-1);
 296                 }
 297         } /* end of if fromParse */
 298 
 299         } /* end of if != LOC_CANONICAL_TAG */
 300 
 301         if( mod_loc_name == NULL){
 302                 mod_loc_name = estrdup(loc_name );      
 303         }
 304 
 305         /* Proceed to ICU */
 306     do{
 307                 tag_value = erealloc( tag_value , buflen  );
 308                 tag_value_len = buflen;
 309 
 310                 if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
 311                         buflen = uloc_getScript ( mod_loc_name ,tag_value , tag_value_len , &status);
 312                 }
 313                 if( strcmp(tag_name , LOC_LANG_TAG )==0 ){
 314                         buflen = uloc_getLanguage ( mod_loc_name ,tag_value , tag_value_len , &status);
 315                 }
 316                 if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
 317                         buflen = uloc_getCountry ( mod_loc_name ,tag_value , tag_value_len , &status);
 318                 }
 319                 if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
 320                         buflen = uloc_getVariant ( mod_loc_name ,tag_value , tag_value_len , &status);
 321                 }
 322                 if( strcmp(tag_name , LOC_CANONICALIZE_TAG)==0 ){
 323                         buflen = uloc_canonicalize ( mod_loc_name ,tag_value , tag_value_len , &status);
 324                 }
 325 
 326                 if( U_FAILURE( status ) ) {
 327                         if( status == U_BUFFER_OVERFLOW_ERROR ) {
 328                                 status = U_ZERO_ERROR;
 329                                 continue;
 330                         }
 331 
 332                         /* Error in retriving data */
 333                         *result = 0;
 334                         if( tag_value ){
 335                                 efree( tag_value );
 336                         }
 337                         if( mod_loc_name ){
 338                                 efree( mod_loc_name);
 339                         }
 340                         return NULL;
 341                 }
 342         } while( buflen > tag_value_len );
 343 
 344         if(  buflen ==0 ){
 345                 /* No value found */
 346                 *result = -1;
 347                 if( tag_value ){
 348                         efree( tag_value );
 349                 }
 350                 if( mod_loc_name ){
 351                         efree( mod_loc_name);
 352                 }
 353                 return NULL;
 354         } else {
 355                 *result = 1;
 356         }
 357 
 358         if( mod_loc_name ){
 359                 efree( mod_loc_name);
 360         }
 361         return tag_value;
 362 }
 363 /* }}} */
 364 
 365 /* {{{
 366 * Gets the value from ICU , called when PHP userspace function is called
 367 * common code shared by get_primary_language,get_script or get_region or get_variant
 368 */
 369 static void get_icu_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS) 
 370 {
 371 
 372         const char* loc_name            = NULL;
 373         int         loc_name_len        = 0;
 374 
 375         char*       tag_value           = NULL;
 376         char*       empty_result        = "";
 377 
 378         int         result              = 0;
 379         char*       msg                 = NULL;
 380 
 381         UErrorCode  status              = U_ZERO_ERROR;
 382 
 383         intl_error_reset( NULL TSRMLS_CC );
 384 
 385         if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
 386         &loc_name ,&loc_name_len ) == FAILURE) {
 387                 spprintf(&msg , 0, "locale_get_%s : unable to parse input params", tag_name );
 388                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
 389                 efree(msg);
 390 
 391                 RETURN_FALSE;
 392     }
 393 
 394         if(loc_name_len == 0) {
 395                 loc_name = intl_locale_get_default(TSRMLS_C);
 396         }
 397 
 398         /* Call ICU get */
 399         tag_value = get_icu_value_internal( loc_name , tag_name , &result ,0);
 400 
 401         /* No value found */
 402         if( result == -1 ) {
 403                 if( tag_value){
 404                         efree( tag_value);
 405                 }
 406                 RETURN_STRING( empty_result , TRUE);
 407         }
 408 
 409         /* value found */
 410         if( tag_value){
 411                 RETURN_STRING( tag_value , FALSE);
 412         }
 413 
 414         /* Error encountered while fetching the value */
 415         if( result ==0) {
 416                 spprintf(&msg , 0, "locale_get_%s : unable to get locale %s", tag_name , tag_name );
 417                 intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
 418                 efree(msg);
 419                 RETURN_NULL();
 420         }
 421 
 422 }
 423 /* }}} */
 424 
 425 /* {{{ proto static string Locale::getScript($locale) 
 426  * gets the script for the $locale 
 427  }}} */
 428 /* {{{ proto static string locale_get_script($locale) 
 429  * gets the script for the $locale 
 430  */
 431 PHP_FUNCTION( locale_get_script ) 
 432 {
 433         get_icu_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
 434 }
 435 /* }}} */
 436 
 437 /* {{{ proto static string Locale::getRegion($locale) 
 438  * gets the region for the $locale 
 439  }}} */
 440 /* {{{ proto static string locale_get_region($locale) 
 441  * gets the region for the $locale 
 442  */
 443 PHP_FUNCTION( locale_get_region ) 
 444 {
 445         get_icu_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
 446 }
 447 /* }}} */
 448 
 449 /* {{{ proto static string Locale::getPrimaryLanguage($locale) 
 450  * gets the primary language for the $locale 
 451  }}} */
 452 /* {{{ proto static string locale_get_primary_language($locale) 
 453  * gets the primary language for the $locale 
 454  */
 455 PHP_FUNCTION(locale_get_primary_language ) 
 456 {
 457         get_icu_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
 458 }
 459 /* }}} */
 460 
 461 
 462 /* {{{
 463  * common code shared by display_xyz functions to  get the value from ICU 
 464  }}} */
 465 static void get_icu_disp_value_src_php( char* tag_name, INTERNAL_FUNCTION_PARAMETERS) 
 466 {
 467         const char* loc_name            = NULL;
 468         int         loc_name_len        = 0;
 469 
 470         const char* disp_loc_name       = NULL;
 471         int         disp_loc_name_len   = 0;
 472         int         free_loc_name       = 0;
 473 
 474         UChar*      disp_name           = NULL;
 475         int32_t     disp_name_len       = 0;
 476 
 477         char*       mod_loc_name        = NULL;
 478 
 479         int32_t     buflen              = 512;
 480         UErrorCode  status              = U_ZERO_ERROR;
 481 
 482         char*       utf8value           = NULL;
 483         int         utf8value_len       = 0;
 484 
 485         char*       msg                 = NULL;
 486         int         grOffset            = 0;
 487 
 488         intl_error_reset( NULL TSRMLS_CC );
 489 
 490         if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s|s",
 491                 &loc_name, &loc_name_len , 
 492                 &disp_loc_name ,&disp_loc_name_len ) == FAILURE)
 493         {
 494                 spprintf(&msg , 0, "locale_get_display_%s : unable to parse input params", tag_name );
 495                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
 496                 efree(msg);
 497                 RETURN_FALSE;
 498         }
 499 
 500     if(loc_name_len > ULOC_FULLNAME_CAPACITY) {
 501         /* See bug 67397: overlong locale names cause trouble in uloc_getDisplayName */
 502                 spprintf(&msg , 0, "locale_get_display_%s : name too long", tag_name );
 503                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,  msg , 1 TSRMLS_CC );
 504                 efree(msg);
 505                 RETURN_FALSE;
 506     }
 507 
 508         if(loc_name_len == 0) {
 509                 loc_name = intl_locale_get_default(TSRMLS_C);
 510         }
 511 
 512         if( strcmp(tag_name, DISP_NAME) != 0 ){
 513                 /* Handle grandfathered languages */
 514                 grOffset = findOffset( LOC_GRANDFATHERED , loc_name );
 515                 if( grOffset >= 0 ){
 516                         if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
 517                                 mod_loc_name = getPreferredTag( loc_name );
 518                         } else {
 519                                 /* Since Grandfathered, no value, do nothing, retutn NULL */
 520                                 RETURN_FALSE;
 521                         }
 522                 }
 523         } /* end of if != LOC_CANONICAL_TAG */
 524 
 525         if( mod_loc_name==NULL ){
 526                 mod_loc_name = estrdup( loc_name );
 527         }
 528         
 529         /* Check if disp_loc_name passed , if not use default locale */
 530         if( !disp_loc_name){
 531                 disp_loc_name = estrdup(intl_locale_get_default(TSRMLS_C));
 532                 free_loc_name = 1;
 533         }
 534 
 535     /* Get the disp_value for the given locale */
 536     do{
 537         disp_name = erealloc( disp_name , buflen * sizeof(UChar)  );
 538         disp_name_len = buflen;
 539 
 540                 if( strcmp(tag_name , LOC_LANG_TAG)==0 ){
 541                         buflen = uloc_getDisplayLanguage ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
 542                 } else if( strcmp(tag_name , LOC_SCRIPT_TAG)==0 ){
 543                         buflen = uloc_getDisplayScript ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
 544                 } else if( strcmp(tag_name , LOC_REGION_TAG)==0 ){
 545                         buflen = uloc_getDisplayCountry ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
 546                 } else if( strcmp(tag_name , LOC_VARIANT_TAG)==0 ){
 547                         buflen = uloc_getDisplayVariant ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
 548                 } else if( strcmp(tag_name , DISP_NAME)==0 ){
 549                         buflen = uloc_getDisplayName ( mod_loc_name , disp_loc_name , disp_name , disp_name_len , &status);
 550                 }
 551 
 552                 /* U_STRING_NOT_TERMINATED_WARNING is admissible here; don't look for it */
 553                 if( U_FAILURE( status ) )
 554                 {
 555                         if( status == U_BUFFER_OVERFLOW_ERROR )
 556                         {
 557                                 status = U_ZERO_ERROR;
 558                                 continue;
 559                         }
 560 
 561                         spprintf(&msg, 0, "locale_get_display_%s : unable to get locale %s", tag_name , tag_name );
 562                         intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
 563                         efree(msg);
 564                         if( disp_name){
 565                                 efree( disp_name );
 566                         }
 567                         if( mod_loc_name){
 568                                 efree( mod_loc_name );
 569                         }
 570                         if (free_loc_name) {
 571                                 efree((void *)disp_loc_name);
 572                                 disp_loc_name = NULL;
 573                         }
 574                         RETURN_FALSE;
 575                 }
 576         } while( buflen > disp_name_len );
 577 
 578         if( mod_loc_name){
 579                 efree( mod_loc_name );
 580         }
 581         if (free_loc_name) {
 582                 efree((void *)disp_loc_name);
 583                 disp_loc_name = NULL;
 584         }
 585         /* Convert display locale name from UTF-16 to UTF-8. */
 586         intl_convert_utf16_to_utf8( &utf8value, &utf8value_len, disp_name, buflen, &status );
 587         efree( disp_name );
 588         if( U_FAILURE( status ) )
 589         {
 590                 spprintf(&msg, 0, "locale_get_display_%s :error converting display name for %s to UTF-8", tag_name , tag_name );
 591                 intl_error_set( NULL, status, msg , 1 TSRMLS_CC );
 592                 efree(msg);
 593                 RETURN_FALSE;
 594         }
 595 
 596         RETVAL_STRINGL( utf8value, utf8value_len , FALSE);
 597 
 598 }
 599 /* }}} */
 600 
 601 /* {{{ proto static string Locale::getDisplayName($locale[, $in_locale = null])
 602 * gets the name for the $locale in $in_locale or default_locale
 603  }}} */
 604 /* {{{ proto static string get_display_name($locale[, $in_locale = null])
 605 * gets the name for the $locale in $in_locale or default_locale
 606 */
 607 PHP_FUNCTION(locale_get_display_name) 
 608 {
 609     get_icu_disp_value_src_php( DISP_NAME , INTERNAL_FUNCTION_PARAM_PASSTHRU );
 610 }
 611 /* }}} */
 612 
 613 /* {{{ proto static string Locale::getDisplayLanguage($locale[, $in_locale = null])
 614 * gets the language for the $locale in $in_locale or default_locale
 615  }}} */
 616 /* {{{ proto static string get_display_language($locale[, $in_locale = null])
 617 * gets the language for the $locale in $in_locale or default_locale
 618 */
 619 PHP_FUNCTION(locale_get_display_language) 
 620 {
 621     get_icu_disp_value_src_php( LOC_LANG_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
 622 }
 623 /* }}} */
 624 
 625 /* {{{ proto static string Locale::getDisplayScript($locale, $in_locale = null)
 626 * gets the script for the $locale in $in_locale or default_locale
 627  }}} */
 628 /* {{{ proto static string get_display_script($locale, $in_locale = null)
 629 * gets the script for the $locale in $in_locale or default_locale
 630 */
 631 PHP_FUNCTION(locale_get_display_script) 
 632 {
 633     get_icu_disp_value_src_php( LOC_SCRIPT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
 634 }
 635 /* }}} */
 636 
 637 /* {{{ proto static string Locale::getDisplayRegion($locale, $in_locale = null)
 638 * gets the region for the $locale in $in_locale or default_locale
 639  }}} */
 640 /* {{{ proto static string get_display_region($locale, $in_locale = null)
 641 * gets the region for the $locale in $in_locale or default_locale
 642 */
 643 PHP_FUNCTION(locale_get_display_region) 
 644 {
 645     get_icu_disp_value_src_php( LOC_REGION_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
 646 }
 647 /* }}} */
 648 
 649 /* {{{
 650 * proto static string Locale::getDisplayVariant($locale, $in_locale = null)
 651 * gets the variant for the $locale in $in_locale or default_locale
 652  }}} */
 653 /* {{{
 654 * proto static string get_display_variant($locale, $in_locale = null)
 655 * gets the variant for the $locale in $in_locale or default_locale
 656 */
 657 PHP_FUNCTION(locale_get_display_variant) 
 658 {
 659     get_icu_disp_value_src_php( LOC_VARIANT_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
 660 }
 661 /* }}} */
 662 
 663  /* {{{ proto static array getKeywords(string $locale) {
 664  * return an associative array containing keyword-value
 665  * pairs for this locale. The keys are keys to the array (doh!)
 666  * }}}*/
 667  /* {{{ proto static array locale_get_keywords(string $locale) {
 668  * return an associative array containing keyword-value
 669  * pairs for this locale. The keys are keys to the array (doh!)
 670  */ 
 671 PHP_FUNCTION( locale_get_keywords )
 672 {
 673     UEnumeration*   e        = NULL;
 674     UErrorCode      status   = U_ZERO_ERROR;
 675 
 676     const char*         kw_key        = NULL;
 677     int32_t         kw_key_len    = 0;
 678 
 679     const char*         loc_name        = NULL;
 680     int                 loc_name_len    = 0;
 681 
 682 /* 
 683         ICU expects the buffer to be allocated  before calling the function 
 684         and so the buffer size has been explicitly specified 
 685         ICU uloc.h #define      ULOC_KEYWORD_AND_VALUES_CAPACITY   100 
 686         hence the kw_value buffer size is 100
 687 */
 688         char*           kw_value        = NULL;
 689     int32_t     kw_value_len    = 100;
 690 
 691     intl_error_reset( NULL TSRMLS_CC );
 692 
 693     if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
 694         &loc_name, &loc_name_len ) == FAILURE)
 695     {
 696         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
 697              "locale_get_keywords: unable to parse input params", 0 TSRMLS_CC );
 698 
 699         RETURN_FALSE;
 700     }
 701 
 702     if(loc_name_len == 0) {
 703         loc_name = intl_locale_get_default(TSRMLS_C);
 704     }
 705 
 706         /* Get the keywords */
 707     e = uloc_openKeywords( loc_name, &status );
 708     if( e != NULL )
 709     {
 710                 /* Traverse it, filling the return array. */
 711         array_init( return_value );
 712 
 713         while( ( kw_key = uenum_next( e, &kw_key_len, &status ) ) != NULL ){
 714                         kw_value = ecalloc( 1 , kw_value_len  );
 715 
 716                         /* Get the keyword value for each keyword */
 717                         kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len ,  &status );
 718                         if (status == U_BUFFER_OVERFLOW_ERROR) {
 719                                 status = U_ZERO_ERROR;
 720                                 kw_value = erealloc( kw_value , kw_value_len+1);
 721                                 kw_value_len=uloc_getKeywordValue( loc_name,kw_key, kw_value, kw_value_len+1 ,  &status );
 722                         } else if(!U_FAILURE(status)) {
 723                                 kw_value = erealloc( kw_value , kw_value_len+1);
 724                         } 
 725                         if (U_FAILURE(status)) {
 726                                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_get_keywords: Error encountered while getting the keyword  value for the  keyword", 0 TSRMLS_CC );
 727                                 if( kw_value){
 728                                         efree( kw_value );
 729                                 }
 730                                 zval_dtor(return_value);
 731                         RETURN_FALSE;
 732                         }
 733 
 734                 add_assoc_stringl( return_value, (char *)kw_key, kw_value , kw_value_len, 0);
 735                 } /* end of while */
 736 
 737         } /* end of if e!=NULL */
 738 
 739     uenum_close( e );
 740 }
 741 /* }}} */
 742 
 743  /* {{{ proto static string Locale::canonicalize($locale) 
 744  * @return string the canonicalized locale 
 745  * }}} */
 746  /* {{{ proto static string locale_canonicalize(Locale $loc, string $locale) 
 747  * @param string $locale        The locale string to canonicalize
 748  */
 749 PHP_FUNCTION(locale_canonicalize)
 750 {
 751         get_icu_value_src_php( LOC_CANONICALIZE_TAG , INTERNAL_FUNCTION_PARAM_PASSTHRU );
 752 }
 753 /* }}} */
 754 
 755 /* {{{ append_key_value 
 756 * Internal function which is called from locale_compose
 757 * gets the value for the key_name and appends to the loc_name
 758 * returns 1 if successful , -1 if not found , 
 759 * 0 if array element is not a string , -2 if buffer-overflow
 760 */
 761 static int append_key_value(smart_str* loc_name, HashTable* hash_arr, char* key_name)
 762 {
 763         zval**  ele_value       = NULL;
 764 
 765         if(zend_hash_find(hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
 766                 if(Z_TYPE_PP(ele_value)!= IS_STRING ){
 767                         /* element value is not a string */
 768                         return FAILURE;
 769                 }
 770                 if(strcmp(key_name, LOC_LANG_TAG) != 0 && 
 771                    strcmp(key_name, LOC_GRANDFATHERED_LANG_TAG)!=0 ) {
 772                         /* not lang or grandfathered tag */
 773                         smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
 774                 }
 775                 smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
 776                 return SUCCESS;
 777         }
 778 
 779         return LOC_NOT_FOUND;
 780 }
 781 /* }}} */
 782 
 783 /* {{{ append_prefix , appends the prefix needed
 784 * e.g. private adds 'x'
 785 */
 786 static void add_prefix(smart_str* loc_name, char* key_name)
 787 {
 788         if( strncmp(key_name , LOC_PRIVATE_TAG , 7) == 0 ){
 789                 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
 790                 smart_str_appendl(loc_name, PRIVATE_PREFIX , sizeof(PRIVATE_PREFIX)-1);
 791         }
 792 }
 793 /* }}} */
 794 
 795 /* {{{ append_multiple_key_values 
 796 * Internal function which is called from locale_compose
 797 * gets the multiple values for the key_name and appends to the loc_name
 798 * used for 'variant','extlang','private' 
 799 * returns 1 if successful , -1 if not found , 
 800 * 0 if array element is not a string , -2 if buffer-overflow
 801 */
 802 static int append_multiple_key_values(smart_str* loc_name, HashTable* hash_arr, char* key_name TSRMLS_DC)
 803 {
 804         zval**  ele_value       = NULL;
 805         int     i               = 0;
 806         int     isFirstSubtag   = 0;
 807         int     max_value       = 0;
 808 
 809         /* Variant/ Extlang/Private etc. */
 810         if( zend_hash_find( hash_arr , key_name , strlen(key_name) + 1 ,(void **)&ele_value ) == SUCCESS ) {
 811                 if( Z_TYPE_PP(ele_value) == IS_STRING ){
 812                         add_prefix( loc_name , key_name);
 813 
 814                         smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
 815                         smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
 816                         return SUCCESS;
 817                 } else if(Z_TYPE_PP(ele_value) == IS_ARRAY ) {
 818                         HashPosition pos;
 819                         HashTable *arr = HASH_OF(*ele_value);
 820                         zval **data = NULL;
 821 
 822                         zend_hash_internal_pointer_reset_ex(arr, &pos);
 823                         while(zend_hash_get_current_data_ex(arr, (void **)&data, &pos) != FAILURE) {
 824                                 if(Z_TYPE_PP(data) != IS_STRING) {
 825                                         return FAILURE;
 826                                 }
 827                                 if (isFirstSubtag++ == 0){
 828                                         add_prefix(loc_name , key_name);
 829                                 }
 830                                 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
 831                                 smart_str_appendl(loc_name, Z_STRVAL_PP(data) , Z_STRLEN_PP(data));
 832                                 zend_hash_move_forward_ex(arr, &pos);
 833                         }
 834                         return SUCCESS;
 835                 } else {
 836                         return FAILURE;
 837                 }
 838         } else {
 839                 char cur_key_name[31];
 840                 /* Decide the max_value: the max. no. of elements allowed */
 841                 if( strcmp(key_name , LOC_VARIANT_TAG) ==0 ){
 842                         max_value  = MAX_NO_VARIANT;
 843                 }
 844                 if( strcmp(key_name , LOC_EXTLANG_TAG) ==0 ){
 845                         max_value  = MAX_NO_EXTLANG;
 846                 }
 847                 if( strcmp(key_name , LOC_PRIVATE_TAG) ==0 ){
 848                         max_value  = MAX_NO_PRIVATE;
 849                 }
 850 
 851                 /* Multiple variant values as variant0, variant1 ,variant2 */
 852                 isFirstSubtag = 0;
 853                 for( i=0 ; i< max_value; i++ ){  
 854                         snprintf( cur_key_name , 30, "%s%d", key_name , i);     
 855                         if( zend_hash_find( hash_arr , cur_key_name , strlen(cur_key_name) + 1,(void **)&ele_value ) == SUCCESS ){
 856                                 if( Z_TYPE_PP(ele_value)!= IS_STRING ){
 857                                         /* variant is not a string */
 858                                         return FAILURE;
 859                                 }
 860                                 /* Add the contents */
 861                                 if (isFirstSubtag++ == 0){
 862                                         add_prefix(loc_name , cur_key_name);
 863                                 }
 864                                 smart_str_appendl(loc_name, SEPARATOR , sizeof(SEPARATOR)-1);
 865                                 smart_str_appendl(loc_name, Z_STRVAL_PP(ele_value) , Z_STRLEN_PP(ele_value));
 866                         }
 867                 } /* end of for */
 868         } /* end of else */
 869 
 870         return SUCCESS;
 871 }
 872 /* }}} */
 873 
 874 /*{{{
 875 * If applicable sets error message and aborts locale_compose gracefully
 876 * returns 0  if locale_compose needs to be aborted 
 877 * otherwise returns 1
 878 */
 879 static int handleAppendResult( int result, smart_str* loc_name TSRMLS_DC)
 880 {
 881         intl_error_reset( NULL TSRMLS_CC );
 882         if( result == FAILURE) {
 883                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
 884                          "locale_compose: parameter array element is not a string", 0 TSRMLS_CC );
 885                 smart_str_free(loc_name);
 886                 return 0;
 887         }
 888         return 1;
 889 }
 890 /* }}} */
 891 
 892 #define RETURN_SMART_STR(s) smart_str_0((s)); RETURN_STRINGL((s)->c, (s)->len, 0)
 893 /* {{{ proto static string Locale::composeLocale($array) 
 894 * Creates a locale by combining the parts of locale-ID passed   
 895 * }}} */
 896 /* {{{ proto static string compose_locale($array) 
 897 * Creates a locale by combining the parts of locale-ID passed   
 898 * }}} */
 899 PHP_FUNCTION(locale_compose)
 900 {
 901         smart_str       loc_name_s = {0};
 902         smart_str *loc_name = &loc_name_s;
 903         zval*                   arr     = NULL;
 904         HashTable*              hash_arr = NULL;
 905         int                     result = 0;
 906 
 907         intl_error_reset( NULL TSRMLS_CC );
 908 
 909         if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "a",
 910                 &arr) == FAILURE)
 911         {
 912                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
 913                          "locale_compose: unable to parse input params", 0 TSRMLS_CC );
 914                 RETURN_FALSE;
 915         }
 916 
 917         hash_arr = HASH_OF( arr );
 918 
 919         if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 )
 920                 RETURN_FALSE;
 921 
 922         /* Check for grandfathered first */
 923         result = append_key_value(loc_name, hash_arr,  LOC_GRANDFATHERED_LANG_TAG);     
 924         if( result == SUCCESS){
 925                 RETURN_SMART_STR(loc_name);
 926         }
 927         if( !handleAppendResult( result, loc_name TSRMLS_CC)){
 928                 RETURN_FALSE;
 929         }
 930 
 931         /* Not grandfathered */
 932         result = append_key_value(loc_name, hash_arr , LOC_LANG_TAG);   
 933         if( result == LOC_NOT_FOUND ){
 934                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
 935                 "locale_compose: parameter array does not contain 'language' tag.", 0 TSRMLS_CC );
 936                 smart_str_free(loc_name);
 937                 RETURN_FALSE;
 938         }
 939         if( !handleAppendResult( result, loc_name TSRMLS_CC)){
 940                 RETURN_FALSE;
 941         }
 942 
 943         /* Extlang */
 944         result = append_multiple_key_values(loc_name, hash_arr , LOC_EXTLANG_TAG TSRMLS_CC);
 945         if( !handleAppendResult( result, loc_name TSRMLS_CC)){
 946                 RETURN_FALSE;
 947         }
 948 
 949         /* Script */
 950         result = append_key_value(loc_name, hash_arr , LOC_SCRIPT_TAG); 
 951         if( !handleAppendResult( result, loc_name TSRMLS_CC)){
 952                 RETURN_FALSE;
 953         }
 954         
 955         /* Region */
 956         result = append_key_value( loc_name, hash_arr , LOC_REGION_TAG);
 957         if( !handleAppendResult( result, loc_name TSRMLS_CC)){
 958                 RETURN_FALSE;
 959         }
 960 
 961         /* Variant */
 962         result = append_multiple_key_values( loc_name, hash_arr , LOC_VARIANT_TAG TSRMLS_CC); 
 963         if( !handleAppendResult( result, loc_name TSRMLS_CC)){
 964                 RETURN_FALSE;
 965         }
 966 
 967         /* Private */
 968         result = append_multiple_key_values( loc_name, hash_arr , LOC_PRIVATE_TAG TSRMLS_CC);
 969         if( !handleAppendResult( result, loc_name TSRMLS_CC)){
 970                 RETURN_FALSE;
 971         }
 972 
 973         RETURN_SMART_STR(loc_name);
 974 }
 975 /* }}} */
 976 
 977 
 978 /*{{{
 979 * Parses the locale and returns private subtags  if existing
 980 * else returns NULL
 981 * e.g. for locale='en_US-x-prv1-prv2-prv3'
 982 * returns a pointer to the string 'prv1-prv2-prv3'
 983 */
 984 static char* get_private_subtags(const char* loc_name)
 985 {
 986         char*   result =NULL;
 987         int     singletonPos = 0;
 988         int     len =0; 
 989         const char*     mod_loc_name =NULL;
 990 
 991         if( loc_name && (len = strlen(loc_name)>0 ) ){
 992                 mod_loc_name = loc_name ; 
 993                 len   = strlen(mod_loc_name);
 994                 while( (singletonPos = getSingletonPos(mod_loc_name))!= -1){
 995 
 996                         if( singletonPos!=-1){ 
 997                                 if( (*(mod_loc_name+singletonPos)=='x') || (*(mod_loc_name+singletonPos)=='X') ){               
 998                                         /* private subtag start found */
 999                                         if( singletonPos + 2 ==  len){
1000                                                 /* loc_name ends with '-x-' ; return  NULL */
1001                                         }
1002                                         else{
1003                                                 /* result = mod_loc_name + singletonPos +2; */
1004                                                 result = estrndup(mod_loc_name + singletonPos+2  , (len -( singletonPos +2) ) );
1005                                         }
1006                                         break;
1007                                 }
1008                                 else{
1009                                         if( singletonPos + 1 >=  len){
1010                                                 /* String end */
1011                                                 break;
1012                                         } else {
1013                                                 /* singleton found but not a private subtag , hence check further in the string for the private subtag */
1014                                                 mod_loc_name = mod_loc_name + singletonPos +1;
1015                                                 len = strlen(mod_loc_name);
1016                                         }
1017                                 }
1018                         }
1019 
1020                 } /* end of while */
1021         }
1022         
1023         return result;
1024 }
1025 /* }}} */
1026 
1027 /* {{{ code used by locale_parse
1028 */
1029 static int add_array_entry(const char* loc_name, zval* hash_arr, char* key_name TSRMLS_DC)
1030 {
1031         char*   key_value       = NULL;
1032         char*   cur_key_name    = NULL;
1033         char*   token           = NULL;
1034         char*   last_ptr        = NULL;
1035 
1036         int     result          = 0;
1037         int     cur_result      = 0;
1038         int     cnt             = 0;
1039 
1040 
1041         if( strcmp(key_name , LOC_PRIVATE_TAG)==0 ){
1042                 key_value = get_private_subtags( loc_name );
1043                 result = 1;
1044         } else {
1045                 key_value = get_icu_value_internal( loc_name , key_name , &result,1 );
1046         }
1047         if( (strcmp(key_name , LOC_PRIVATE_TAG)==0) || 
1048                 ( strcmp(key_name , LOC_VARIANT_TAG)==0) ){
1049                 if( result > 0 && key_value){
1050                         /* Tokenize on the "_" or "-"  */
1051                         token = php_strtok_r( key_value , DELIMITER ,&last_ptr);        
1052                         if( cur_key_name ){
1053                                 efree( cur_key_name);
1054                         }
1055                         cur_key_name = (char*)ecalloc( 25,  25);
1056                         sprintf( cur_key_name , "%s%d", key_name , cnt++);      
1057                         add_assoc_string( hash_arr, cur_key_name , token ,TRUE );
1058                         /* tokenize on the "_" or "-" and stop  at singleton if any */
1059                         while( (token = php_strtok_r(NULL , DELIMITER , &last_ptr)) && (strlen(token)>1) ){
1060                                 sprintf( cur_key_name , "%s%d", key_name , cnt++);      
1061                                 add_assoc_string( hash_arr, cur_key_name , token , TRUE );
1062                         }
1063 /*
1064                         if( strcmp(key_name, LOC_PRIVATE_TAG) == 0 ){
1065                         }
1066 */
1067                 }
1068         } else {
1069                 if( result == 1 ){
1070                         add_assoc_string( hash_arr, key_name , key_value , TRUE );
1071                         cur_result = 1;
1072                 }
1073         }
1074 
1075         if( cur_key_name ){
1076                 efree( cur_key_name);
1077         }
1078         /*if( key_name != LOC_PRIVATE_TAG && key_value){*/
1079         if( key_value){
1080                 efree(key_value);       
1081         }
1082         return cur_result;
1083 }
1084 /* }}} */
1085 
1086 /* {{{ proto static array Locale::parseLocale($locale) 
1087 * parses a locale-id into an array the different parts of it
1088  }}} */
1089 /* {{{ proto static array parse_locale($locale) 
1090 * parses a locale-id into an array the different parts of it
1091 */
1092 PHP_FUNCTION(locale_parse)
1093 {
1094     const char* loc_name        = NULL;
1095     int         loc_name_len    = 0;
1096     int         grOffset        = 0;
1097 
1098     intl_error_reset( NULL TSRMLS_CC );
1099 
1100     if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1101         &loc_name, &loc_name_len ) == FAILURE)
1102     {
1103         intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1104              "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1105 
1106         RETURN_FALSE;
1107     }
1108 
1109     if(loc_name_len == 0) {
1110         loc_name = intl_locale_get_default(TSRMLS_C);
1111     }
1112 
1113         array_init( return_value );
1114 
1115         grOffset =  findOffset( LOC_GRANDFATHERED , loc_name );
1116         if( grOffset >= 0 ){
1117                 add_assoc_string( return_value , LOC_GRANDFATHERED_LANG_TAG , estrdup(loc_name) ,FALSE );
1118         }
1119         else{
1120                 /* Not grandfathered */
1121                 add_array_entry( loc_name , return_value , LOC_LANG_TAG TSRMLS_CC);
1122                 add_array_entry( loc_name , return_value , LOC_SCRIPT_TAG TSRMLS_CC);
1123                 add_array_entry( loc_name , return_value , LOC_REGION_TAG TSRMLS_CC);
1124                 add_array_entry( loc_name , return_value , LOC_VARIANT_TAG TSRMLS_CC);
1125                 add_array_entry( loc_name , return_value , LOC_PRIVATE_TAG TSRMLS_CC);
1126         }
1127 }
1128 /* }}} */
1129 
1130 /* {{{ proto static array Locale::getAllVariants($locale)
1131 * gets an array containing the list of variants, or null
1132  }}} */
1133 /* {{{ proto static array locale_get_all_variants($locale)
1134 * gets an array containing the list of variants, or null
1135 */
1136 PHP_FUNCTION(locale_get_all_variants)
1137 {
1138         const char*     loc_name        = NULL;
1139         int             loc_name_len    = 0;
1140 
1141         int     result          = 0;
1142         char*   token           = NULL;
1143         char*   variant         = NULL;
1144         char*   saved_ptr       = NULL;
1145 
1146         intl_error_reset( NULL TSRMLS_CC );
1147         
1148         if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s",
1149         &loc_name, &loc_name_len ) == FAILURE)
1150         {
1151                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1152              "locale_parse: unable to parse input params", 0 TSRMLS_CC );
1153 
1154                 RETURN_FALSE;
1155         }
1156 
1157         if(loc_name_len == 0) {
1158                 loc_name = intl_locale_get_default(TSRMLS_C);
1159         }
1160 
1161 
1162         array_init( return_value );
1163 
1164         /* If the locale is grandfathered, stop, no variants */
1165         if( findOffset( LOC_GRANDFATHERED , loc_name ) >=  0 ){ 
1166                 /* ("Grandfathered Tag. No variants."); */
1167         }
1168         else {  
1169         /* Call ICU variant */
1170                 variant = get_icu_value_internal( loc_name , LOC_VARIANT_TAG , &result ,0);
1171                 if( result > 0 && variant){
1172                         /* Tokenize on the "_" or "-" */
1173                         token = php_strtok_r( variant , DELIMITER , &saved_ptr);        
1174                         add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1175                         /* tokenize on the "_" or "-" and stop  at singleton if any     */
1176                         while( (token = php_strtok_r(NULL , DELIMITER, &saved_ptr)) && (strlen(token)>1) ){
1177                                 add_next_index_stringl( return_value, token , strlen(token) ,TRUE );
1178                         }
1179                 }
1180                 if( variant ){
1181                         efree( variant );
1182                 }
1183         }
1184                         
1185 
1186 }
1187 /* }}} */
1188 
1189 /*{{{
1190 * Converts to lower case and also replaces all hyphens with the underscore
1191 */
1192 static int strToMatch(const char* str ,char *retstr)
1193 {
1194         char*   anchor  = NULL;
1195         const char*     anchor1 = NULL;
1196         int     result  = 0;
1197 
1198     if( (!str) || str[0] == '\0'){
1199         return result;
1200     } else {
1201         anchor = retstr;
1202         anchor1 = str;
1203         while( (*str)!='\0' ){
1204                 if( *str == '-' ){
1205                         *retstr =  '_';
1206                 } else {
1207                         *retstr = tolower(*str);
1208                 }
1209             str++;
1210             retstr++;
1211         }
1212         *retstr = '\0';
1213         retstr=  anchor;
1214         str=  anchor1;
1215         result = 1;
1216     }
1217 
1218     return(result);
1219 }
1220 /* }}} */
1221 
1222 /* {{{ proto static boolean Locale::filterMatches(string $langtag, string $locale[, bool $canonicalize])
1223 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm 
1224 */
1225 /* }}} */
1226 /* {{{ proto boolean locale_filter_matches(string $langtag, string $locale[, bool $canonicalize])
1227 * Checks if a $langtag filter matches with $locale according to RFC 4647's basic filtering algorithm 
1228 */
1229 PHP_FUNCTION(locale_filter_matches)
1230 {
1231         char*           lang_tag        = NULL;
1232         int             lang_tag_len    = 0;
1233         const char*     loc_range       = NULL;
1234         int             loc_range_len   = 0;
1235 
1236         int             result          = 0;
1237         char*           token           = 0;
1238         char*           chrcheck        = NULL;
1239 
1240         char*           can_lang_tag    = NULL;
1241         char*           can_loc_range   = NULL;
1242 
1243         char*           cur_lang_tag    = NULL;
1244         char*           cur_loc_range   = NULL;
1245 
1246         zend_bool       boolCanonical   = 0;    
1247         UErrorCode      status          = U_ZERO_ERROR;
1248 
1249         intl_error_reset( NULL TSRMLS_CC );
1250         
1251         if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "ss|b",
1252                 &lang_tag, &lang_tag_len , &loc_range , &loc_range_len , 
1253                 &boolCanonical) == FAILURE)
1254         {
1255                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1256                 "locale_filter_matches: unable to parse input params", 0 TSRMLS_CC );
1257 
1258                 RETURN_FALSE;
1259         }
1260 
1261         if(loc_range_len == 0) {
1262                 loc_range = intl_locale_get_default(TSRMLS_C);
1263         }
1264 
1265         if( strcmp(loc_range,"*")==0){
1266                 RETURN_TRUE;
1267         }
1268 
1269         if( boolCanonical ){
1270                 /* canonicalize loc_range */
1271                 can_loc_range=get_icu_value_internal( loc_range , LOC_CANONICALIZE_TAG , &result , 0);
1272                 if( result ==0) {
1273                         intl_error_set( NULL, status, 
1274                                 "locale_filter_matches : unable to canonicalize loc_range" , 0 TSRMLS_CC );
1275                         RETURN_FALSE;
1276                 }
1277 
1278                 /* canonicalize lang_tag */
1279                 can_lang_tag = get_icu_value_internal( lang_tag , LOC_CANONICALIZE_TAG , &result ,  0);
1280                 if( result ==0) {
1281                         intl_error_set( NULL, status, 
1282                                 "locale_filter_matches : unable to canonicalize lang_tag" , 0 TSRMLS_CC );
1283                         RETURN_FALSE;
1284                 }
1285 
1286                 /* Convert to lower case for case-insensitive comparison */
1287                 cur_lang_tag = ecalloc( 1, strlen(can_lang_tag) + 1);
1288 
1289                 /* Convert to lower case for case-insensitive comparison */
1290                 result = strToMatch( can_lang_tag , cur_lang_tag);
1291                 if( result == 0) {
1292                         efree( cur_lang_tag );
1293                         efree( can_lang_tag );
1294                         RETURN_FALSE;
1295                 }
1296 
1297                 cur_loc_range = ecalloc( 1, strlen(can_loc_range) + 1);
1298                 result = strToMatch( can_loc_range , cur_loc_range );
1299                 if( result == 0) {
1300                         efree( cur_lang_tag );
1301                         efree( can_lang_tag );
1302                         efree( cur_loc_range );
1303                         efree( can_loc_range );
1304                         RETURN_FALSE;
1305                 }
1306 
1307                 /* check if prefix */
1308                 token   = strstr( cur_lang_tag , cur_loc_range );
1309         
1310                 if( token && (token==cur_lang_tag) ){
1311                         /* check if the char. after match is SEPARATOR */
1312                         chrcheck = token + (strlen(cur_loc_range));
1313                         if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){ 
1314                                 if( cur_lang_tag){
1315                                         efree( cur_lang_tag );
1316                                 }
1317                                 if( cur_loc_range){
1318                                         efree( cur_loc_range );
1319                                 }
1320                                 if( can_lang_tag){
1321                                         efree( can_lang_tag );
1322                                 }
1323                                 if( can_loc_range){
1324                                         efree( can_loc_range );
1325                                 }
1326                                 RETURN_TRUE;
1327                         }
1328                 }
1329 
1330                 /* No prefix as loc_range */
1331                 if( cur_lang_tag){
1332                         efree( cur_lang_tag );
1333                 }
1334                 if( cur_loc_range){
1335                         efree( cur_loc_range );
1336                 }
1337                 if( can_lang_tag){
1338                         efree( can_lang_tag );
1339                 }
1340                 if( can_loc_range){
1341                         efree( can_loc_range );
1342                 }
1343                 RETURN_FALSE;
1344 
1345         } /* end of if isCanonical */
1346         else{
1347                 /* Convert to lower case for case-insensitive comparison */
1348                 cur_lang_tag = ecalloc( 1, strlen(lang_tag ) + 1);
1349                 
1350                 result = strToMatch( lang_tag , cur_lang_tag);
1351                 if( result == 0) {
1352                         efree( cur_lang_tag );
1353                         RETURN_FALSE;
1354                 }
1355                 cur_loc_range = ecalloc( 1, strlen(loc_range ) + 1);
1356                 result = strToMatch( loc_range , cur_loc_range );
1357                 if( result == 0) {
1358                         efree( cur_lang_tag );
1359                         efree( cur_loc_range );
1360                         RETURN_FALSE;
1361                 }
1362 
1363                 /* check if prefix */
1364                 token   = strstr( cur_lang_tag , cur_loc_range );
1365                 
1366                 if( token && (token==cur_lang_tag) ){
1367                         /* check if the char. after match is SEPARATOR */
1368                         chrcheck = token + (strlen(cur_loc_range));
1369                         if( isIDSeparator(*chrcheck) || isEndOfTag(*chrcheck) ){ 
1370                                 if( cur_lang_tag){
1371                                         efree( cur_lang_tag );
1372                                 }
1373                                 if( cur_loc_range){
1374                                         efree( cur_loc_range );
1375                                 }
1376                                 RETURN_TRUE;
1377                         }
1378                 }
1379 
1380                 /* No prefix as loc_range */
1381                 if( cur_lang_tag){
1382                         efree( cur_lang_tag );
1383                 }
1384                 if( cur_loc_range){
1385                         efree( cur_loc_range );
1386                 }
1387                 RETURN_FALSE;
1388 
1389         }
1390 }
1391 /* }}} */
1392 
1393 static void array_cleanup( char* arr[] , int arr_size)
1394 {
1395         int i=0;
1396         for( i=0; i< arr_size; i++ ){ 
1397                 if( arr[i*2] ){
1398                         efree( arr[i*2]);
1399                 }
1400         }
1401         efree(arr);
1402 }
1403 
1404 #define LOOKUP_CLEAN_RETURN(value)      array_cleanup(cur_arr, cur_arr_len); return (value)
1405 /* {{{
1406 * returns the lookup result to lookup_loc_range_src_php 
1407 * internal function
1408 */
1409 static char* lookup_loc_range(const char* loc_range, HashTable* hash_arr, int canonicalize  TSRMLS_DC)
1410 {
1411         int     i = 0;
1412         int     cur_arr_len = 0;
1413         int result = 0;
1414 
1415         char* lang_tag = NULL;
1416         zval** ele_value = NULL;
1417         char** cur_arr = NULL;
1418 
1419         char* cur_loc_range     = NULL;
1420         char* can_loc_range     = NULL;
1421         int     saved_pos = 0;
1422 
1423         char* return_value = NULL;
1424 
1425         cur_arr = ecalloc(zend_hash_num_elements(hash_arr)*2, sizeof(char *));
1426         /* convert the array to lowercase , also replace hyphens with the underscore and store it in cur_arr */
1427         for(zend_hash_internal_pointer_reset(hash_arr);
1428                 zend_hash_has_more_elements(hash_arr) == SUCCESS;
1429                 zend_hash_move_forward(hash_arr)) {
1430                 
1431                 if (zend_hash_get_current_data(hash_arr, (void**)&ele_value) == FAILURE) {
1432                         /* Should never actually fail since the key is known to exist.*/
1433                         continue;
1434                 }
1435                 if(Z_TYPE_PP(ele_value)!= IS_STRING) {
1436                         /* element value is not a string */
1437                         intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: locale array element is not a string", 0 TSRMLS_CC);
1438                         LOOKUP_CLEAN_RETURN(NULL);
1439                 } 
1440                 cur_arr[cur_arr_len*2] = estrndup(Z_STRVAL_PP(ele_value), Z_STRLEN_PP(ele_value));
1441                 result = strToMatch(Z_STRVAL_PP(ele_value), cur_arr[cur_arr_len*2]);
1442                 if(result == 0) {
1443                         intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag", 0 TSRMLS_CC);
1444                         LOOKUP_CLEAN_RETURN(NULL);
1445                 }
1446                 cur_arr[cur_arr_len*2+1] = Z_STRVAL_PP(ele_value);
1447                 cur_arr_len++ ; 
1448         } /* end of for */
1449 
1450         /* Canonicalize array elements */
1451         if(canonicalize) {
1452                 for(i=0; i<cur_arr_len; i++) { 
1453                         lang_tag = get_icu_value_internal(cur_arr[i*2], LOC_CANONICALIZE_TAG, &result, 0);
1454                         if(result != 1 || lang_tag == NULL || !lang_tag[0]) {
1455                                 if(lang_tag) {
1456                                         efree(lang_tag);
1457                                 }
1458                                 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1459                                 LOOKUP_CLEAN_RETURN(NULL);
1460                         }
1461                         cur_arr[i*2] = erealloc(cur_arr[i*2], strlen(lang_tag)+1);
1462                         result = strToMatch(lang_tag, cur_arr[i*2]);    
1463                         efree(lang_tag);
1464                         if(result == 0) {
1465                                 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1466                                 LOOKUP_CLEAN_RETURN(NULL);
1467                         }
1468                 }
1469 
1470         }
1471 
1472         if(canonicalize) {
1473                 /* Canonicalize the loc_range */
1474                 can_loc_range = get_icu_value_internal(loc_range, LOC_CANONICALIZE_TAG, &result , 0);
1475                 if( result != 1 || can_loc_range == NULL || !can_loc_range[0]) {
1476                         /* Error */
1477                         intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize loc_range" , 0 TSRMLS_CC );
1478                         if(can_loc_range) {
1479                                 efree(can_loc_range);
1480                         }
1481                         LOOKUP_CLEAN_RETURN(NULL);
1482                 } else {
1483                         loc_range = can_loc_range;
1484                 }
1485         } 
1486 
1487         cur_loc_range = ecalloc(1, strlen(loc_range)+1);
1488         /* convert to lower and replace hyphens */
1489         result = strToMatch(loc_range, cur_loc_range);  
1490         if(can_loc_range) {
1491                 efree(can_loc_range);
1492         }
1493         if(result == 0) {
1494                 intl_error_set(NULL, U_ILLEGAL_ARGUMENT_ERROR, "lookup_loc_range: unable to canonicalize lang_tag" , 0 TSRMLS_CC);
1495                 LOOKUP_CLEAN_RETURN(NULL);
1496         }
1497 
1498         /* Lookup for the lang_tag match */
1499         saved_pos = strlen(cur_loc_range);
1500         while(saved_pos > 0) {
1501                 for(i=0; i< cur_arr_len; i++){ 
1502                         if(cur_arr[i*2] != NULL && strlen(cur_arr[i*2]) == saved_pos && strncmp(cur_loc_range, cur_arr[i*2], saved_pos) == 0) { 
1503                                 /* Match found */
1504                                 return_value = estrdup(canonicalize?cur_arr[i*2]:cur_arr[i*2+1]);
1505                                 efree(cur_loc_range);
1506                                 LOOKUP_CLEAN_RETURN(return_value);
1507                         }
1508                 }
1509                 saved_pos = getStrrtokenPos(cur_loc_range, saved_pos);
1510         }
1511 
1512         /* Match not found */
1513         efree(cur_loc_range);
1514         LOOKUP_CLEAN_RETURN(NULL);
1515 }
1516 /* }}} */
1517 
1518 /* {{{ proto string Locale::lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]]) 
1519 * Searchs the items in $langtag for the best match to the language
1520 * range 
1521 */
1522 /* }}} */
1523 /* {{{ proto string locale_lookup(array $langtag, string $locale[, bool $canonicalize[, string $default = null]])
1524 * Searchs the items in $langtag for the best match to the language
1525 * range 
1526 */
1527 PHP_FUNCTION(locale_lookup)
1528 {
1529         char*           fallback_loc            = NULL;
1530         int             fallback_loc_len        = 0;
1531         const char*     loc_range               = NULL;
1532         int             loc_range_len           = 0;
1533 
1534         zval*           arr                             = NULL;
1535         HashTable*      hash_arr                = NULL;
1536         zend_bool       boolCanonical   = 0;
1537         char*           result                  =NULL;
1538 
1539         intl_error_reset( NULL TSRMLS_CC );
1540 
1541         if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "as|bs", &arr, &loc_range, &loc_range_len,
1542                 &boolCanonical, &fallback_loc, &fallback_loc_len) == FAILURE) {
1543                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR, "locale_lookup: unable to parse input params", 0 TSRMLS_CC );
1544                 RETURN_FALSE;
1545         }
1546 
1547         if(loc_range_len == 0) {
1548                 loc_range = intl_locale_get_default(TSRMLS_C);
1549         }
1550 
1551         hash_arr = HASH_OF(arr);
1552 
1553         if( !hash_arr || zend_hash_num_elements( hash_arr ) == 0 ) {
1554                 RETURN_EMPTY_STRING();
1555         } 
1556         
1557         result = lookup_loc_range(loc_range, hash_arr, boolCanonical TSRMLS_CC);
1558         if(result == NULL || result[0] == '\0') {
1559                 if( fallback_loc ) {
1560                         result = estrndup(fallback_loc, fallback_loc_len);
1561                 } else {
1562                         RETURN_EMPTY_STRING();
1563                 }
1564         }
1565 
1566         RETVAL_STRINGL(result, strlen(result), 0);
1567 }
1568 /* }}} */
1569 
1570 /* {{{ proto string Locale::acceptFromHttp(string $http_accept)
1571 * Tries to find out best available locale based on HTTP �Accept-Language� header
1572 */
1573 /* }}} */
1574 /* {{{ proto string locale_accept_from_http(string $http_accept)
1575 * Tries to find out best available locale based on HTTP �Accept-Language� header
1576 */
1577 PHP_FUNCTION(locale_accept_from_http)
1578 {
1579         UEnumeration *available;
1580         char *http_accept = NULL;
1581         int http_accept_len;
1582         UErrorCode status = 0;
1583         int len;
1584         char resultLocale[INTL_MAX_LOCALE_LEN+1];
1585         UAcceptResult outResult;
1586 
1587         if(zend_parse_parameters( ZEND_NUM_ARGS() TSRMLS_CC, "s", &http_accept, &http_accept_len) == FAILURE)
1588         {
1589                 intl_error_set( NULL, U_ILLEGAL_ARGUMENT_ERROR,
1590                 "locale_accept_from_http: unable to parse input parameters", 0 TSRMLS_CC );
1591                 RETURN_FALSE;
1592         }
1593         
1594         available = ures_openAvailableLocales(NULL, &status);
1595         INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to retrieve locale list");
1596         len = uloc_acceptLanguageFromHTTP(resultLocale, INTL_MAX_LOCALE_LEN, 
1597                                                 &outResult, http_accept, available, &status);
1598         uenum_close(available);
1599         INTL_CHECK_STATUS(status, "locale_accept_from_http: failed to find acceptable locale");
1600         if (len < 0 || outResult == ULOC_ACCEPT_FAILED) {
1601                 RETURN_FALSE;
1602         }
1603         RETURN_STRINGL(resultLocale, len, 1);
1604 }
1605 /* }}} */
1606 
1607 /*
1608  * Local variables:
1609  * tab-width: 4
1610  * c-basic-offset: 4
1611  * End:
1612  * vim600: noet sw=4 ts=4 fdm=marker
1613  * vim<600: noet sw=4 ts=4
1614  *can_loc_len
1615 */

/* [<][>][^][v][top][bottom][index][help] */