root/ext/mbstring/php_mbregex.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ZEND_EXTERN_MODULE_GLOBALS
  2. php_mb_regex_free_cache
  3. _php_mb_regex_globals_ctor
  4. _php_mb_regex_globals_dtor
  5. php_mb_regex_globals_alloc
  6. php_mb_regex_globals_free
  7. PHP_MINIT_FUNCTION
  8. PHP_MSHUTDOWN_FUNCTION
  9. PHP_RINIT_FUNCTION
  10. PHP_RSHUTDOWN_FUNCTION
  11. PHP_MINFO_FUNCTION
  12. _php_mb_regex_name2mbctype
  13. _php_mb_regex_mbctype2name
  14. php_mb_regex_set_mbctype
  15. php_mb_regex_set_default_mbctype
  16. php_mb_regex_get_mbctype
  17. php_mb_regex_get_default_mbctype
  18. php_mbregex_compile_pattern
  19. _php_mb_regex_get_option_string
  20. _php_mb_regex_init_options
  21. PHP_FUNCTION
  22. _php_mb_regex_ereg_exec
  23. PHP_FUNCTION
  24. PHP_FUNCTION
  25. _php_mb_regex_ereg_replace_exec
  26. PHP_FUNCTION
  27. PHP_FUNCTION
  28. PHP_FUNCTION
  29. PHP_FUNCTION
  30. PHP_FUNCTION
  31. _php_mb_regex_ereg_search_exec
  32. PHP_FUNCTION
  33. PHP_FUNCTION
  34. PHP_FUNCTION
  35. PHP_FUNCTION
  36. PHP_FUNCTION
  37. PHP_FUNCTION
  38. PHP_FUNCTION
  39. _php_mb_regex_set_options
  40. PHP_FUNCTION

   1 /*
   2    +----------------------------------------------------------------------+
   3    | PHP Version 5                                                        |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 1997-2016 The PHP Group                                |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15    | Author: Tsukada Takuya <tsukada@fminn.nagano.nagano.jp>              |
  16    +----------------------------------------------------------------------+
  17  */
  18 
  19 /* $Id$ */
  20 
  21 
  22 #ifdef HAVE_CONFIG_H
  23 #include "config.h"
  24 #endif
  25 
  26 #include "php.h"
  27 #include "php_ini.h"
  28 
  29 #if HAVE_MBREGEX
  30 
  31 #include "ext/standard/php_smart_str.h"
  32 #include "ext/standard/info.h"
  33 #include "php_mbregex.h"
  34 #include "mbstring.h"
  35  
  36 #include "php_onig_compat.h" /* must come prior to the oniguruma header */
  37 #include <oniguruma.h>
  38 #undef UChar
  39 
  40 ZEND_EXTERN_MODULE_GLOBALS(mbstring)
  41 
  42 struct _zend_mb_regex_globals {
  43         OnigEncoding default_mbctype;
  44         OnigEncoding current_mbctype;
  45         HashTable ht_rc;
  46         zval *search_str;
  47         zval *search_str_val;
  48         unsigned int search_pos;
  49         php_mb_regex_t *search_re;
  50         OnigRegion *search_regs;
  51         OnigOptionType regex_default_options;
  52         OnigSyntaxType *regex_default_syntax;
  53 };
  54 
  55 #define MBREX(g) (MBSTRG(mb_regex_globals)->g)
  56 
  57 /* {{{ static void php_mb_regex_free_cache() */
  58 static void php_mb_regex_free_cache(php_mb_regex_t **pre) 
  59 {
  60         onig_free(*pre);
  61 }
  62 /* }}} */
  63 
  64 /* {{{ _php_mb_regex_globals_ctor */
  65 static int _php_mb_regex_globals_ctor(zend_mb_regex_globals *pglobals TSRMLS_DC)
  66 {
  67         pglobals->default_mbctype = ONIG_ENCODING_UTF8;
  68         pglobals->current_mbctype = ONIG_ENCODING_UTF8;
  69         zend_hash_init(&(pglobals->ht_rc), 0, NULL, (void (*)(void *)) php_mb_regex_free_cache, 1);
  70         pglobals->search_str = (zval*) NULL;
  71         pglobals->search_re = (php_mb_regex_t*)NULL;
  72         pglobals->search_pos = 0;
  73         pglobals->search_regs = (OnigRegion*)NULL;
  74         pglobals->regex_default_options = ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
  75         pglobals->regex_default_syntax = ONIG_SYNTAX_RUBY;
  76         return SUCCESS;
  77 }
  78 /* }}} */
  79 
  80 /* {{{ _php_mb_regex_globals_dtor */
  81 static void _php_mb_regex_globals_dtor(zend_mb_regex_globals *pglobals TSRMLS_DC) 
  82 {
  83         zend_hash_destroy(&pglobals->ht_rc);
  84 }
  85 /* }}} */
  86 
  87 /* {{{ php_mb_regex_globals_alloc */
  88 zend_mb_regex_globals *php_mb_regex_globals_alloc(TSRMLS_D)
  89 {
  90         zend_mb_regex_globals *pglobals = pemalloc(
  91                         sizeof(zend_mb_regex_globals), 1);
  92         if (!pglobals) {
  93                 return NULL;
  94         }
  95         if (SUCCESS != _php_mb_regex_globals_ctor(pglobals TSRMLS_CC)) {
  96                 pefree(pglobals, 1);
  97                 return NULL;
  98         }
  99         return pglobals;
 100 }
 101 /* }}} */
 102 
 103 /* {{{ php_mb_regex_globals_free */
 104 void php_mb_regex_globals_free(zend_mb_regex_globals *pglobals TSRMLS_DC)
 105 {
 106         if (!pglobals) {
 107                 return;
 108         }
 109         _php_mb_regex_globals_dtor(pglobals TSRMLS_CC);
 110         pefree(pglobals, 1);
 111 }
 112 /* }}} */
 113 
 114 /* {{{ PHP_MINIT_FUNCTION(mb_regex) */
 115 PHP_MINIT_FUNCTION(mb_regex)
 116 {
 117         onig_init();
 118         return SUCCESS;
 119 }
 120 /* }}} */
 121 
 122 /* {{{ PHP_MSHUTDOWN_FUNCTION(mb_regex) */
 123 PHP_MSHUTDOWN_FUNCTION(mb_regex)
 124 {
 125         onig_end();
 126         return SUCCESS;
 127 }
 128 /* }}} */
 129 
 130 /* {{{ PHP_RINIT_FUNCTION(mb_regex) */
 131 PHP_RINIT_FUNCTION(mb_regex)
 132 {
 133         return MBSTRG(mb_regex_globals) ? SUCCESS: FAILURE;
 134 }
 135 /* }}} */
 136 
 137 /* {{{ PHP_RSHUTDOWN_FUNCTION(mb_regex) */
 138 PHP_RSHUTDOWN_FUNCTION(mb_regex)
 139 {
 140         MBREX(current_mbctype) = MBREX(default_mbctype);
 141 
 142         if (MBREX(search_str) != NULL) {
 143                 zval_ptr_dtor(&MBREX(search_str));
 144                 MBREX(search_str) = (zval *)NULL;
 145         }
 146         MBREX(search_pos) = 0;
 147 
 148         if (MBREX(search_regs) != NULL) {
 149                 onig_region_free(MBREX(search_regs), 1);
 150                 MBREX(search_regs) = (OnigRegion *)NULL;
 151         }
 152         zend_hash_clean(&MBREX(ht_rc));
 153 
 154         return SUCCESS;
 155 }
 156 /* }}} */
 157 
 158 /* {{{ PHP_MINFO_FUNCTION(mb_regex) */
 159 PHP_MINFO_FUNCTION(mb_regex)
 160 {
 161         char buf[32];
 162         php_info_print_table_start();
 163         php_info_print_table_row(2, "Multibyte (japanese) regex support", "enabled");
 164         snprintf(buf, sizeof(buf), "%d.%d.%d",
 165                         ONIGURUMA_VERSION_MAJOR,
 166                         ONIGURUMA_VERSION_MINOR,
 167                         ONIGURUMA_VERSION_TEENY);
 168 #ifdef PHP_ONIG_BUNDLED
 169 #ifdef USE_COMBINATION_EXPLOSION_CHECK
 170         php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "On");
 171 #else   /* USE_COMBINATION_EXPLOSION_CHECK */
 172         php_info_print_table_row(2, "Multibyte regex (oniguruma) backtrack check", "Off");
 173 #endif  /* USE_COMBINATION_EXPLOSION_CHECK */
 174 #endif /* PHP_BUNDLED_ONIG */
 175         php_info_print_table_row(2, "Multibyte regex (oniguruma) version", buf);
 176         php_info_print_table_end();
 177 }
 178 /* }}} */
 179 
 180 /*
 181  * encoding name resolver
 182  */
 183 
 184 /* {{{ encoding name map */
 185 typedef struct _php_mb_regex_enc_name_map_t {
 186         const char *names;
 187         OnigEncoding code;
 188 } php_mb_regex_enc_name_map_t;
 189 
 190 php_mb_regex_enc_name_map_t enc_name_map[] = {
 191 #ifdef ONIG_ENCODING_EUC_JP
 192         {
 193                 "EUC-JP\0EUCJP\0X-EUC-JP\0UJIS\0EUCJP\0EUCJP-WIN\0",
 194                 ONIG_ENCODING_EUC_JP
 195         },
 196 #endif
 197 #ifdef ONIG_ENCODING_UTF8
 198         {
 199                 "UTF-8\0UTF8\0",
 200                 ONIG_ENCODING_UTF8
 201         },
 202 #endif
 203 #ifdef ONIG_ENCODING_UTF16_BE
 204         {
 205                 "UTF-16\0UTF-16BE\0",
 206                 ONIG_ENCODING_UTF16_BE
 207         },
 208 #endif
 209 #ifdef ONIG_ENCODING_UTF16_LE
 210         {
 211                 "UTF-16LE\0",
 212                 ONIG_ENCODING_UTF16_LE
 213         },
 214 #endif
 215 #ifdef ONIG_ENCODING_UTF32_BE
 216         {
 217                 "UCS-4\0UTF-32\0UTF-32BE\0",
 218                 ONIG_ENCODING_UTF32_BE
 219         },
 220 #endif
 221 #ifdef ONIG_ENCODING_UTF32_LE
 222         {
 223                 "UCS-4LE\0UTF-32LE\0",
 224                 ONIG_ENCODING_UTF32_LE
 225         },
 226 #endif
 227 #ifdef ONIG_ENCODING_SJIS
 228         {
 229                 "SJIS\0CP932\0MS932\0SHIFT_JIS\0SJIS-WIN\0WINDOWS-31J\0",
 230                 ONIG_ENCODING_SJIS
 231         },
 232 #endif
 233 #ifdef ONIG_ENCODING_BIG5
 234         {
 235                 "BIG5\0BIG-5\0BIGFIVE\0CN-BIG5\0BIG-FIVE\0",
 236                 ONIG_ENCODING_BIG5
 237         },
 238 #endif
 239 #ifdef ONIG_ENCODING_EUC_CN
 240         {
 241                 "EUC-CN\0EUCCN\0EUC_CN\0GB-2312\0GB2312\0",
 242                 ONIG_ENCODING_EUC_CN
 243         },
 244 #endif
 245 #ifdef ONIG_ENCODING_EUC_TW
 246         {
 247                 "EUC-TW\0EUCTW\0EUC_TW\0",
 248                 ONIG_ENCODING_EUC_TW
 249         },
 250 #endif
 251 #ifdef ONIG_ENCODING_EUC_KR
 252         {
 253                 "EUC-KR\0EUCKR\0EUC_KR\0",
 254                 ONIG_ENCODING_EUC_KR
 255         },
 256 #endif
 257 #if defined(ONIG_ENCODING_KOI8) && !PHP_ONIG_BAD_KOI8_ENTRY
 258         {
 259                 "KOI8\0KOI-8\0",
 260                 ONIG_ENCODING_KOI8
 261         },
 262 #endif
 263 #ifdef ONIG_ENCODING_KOI8_R
 264         {
 265                 "KOI8R\0KOI8-R\0KOI-8R\0",
 266                 ONIG_ENCODING_KOI8_R
 267         },
 268 #endif
 269 #ifdef ONIG_ENCODING_ISO_8859_1
 270         {
 271                 "ISO-8859-1\0ISO8859-1\0ISO_8859_1\0ISO8859_1\0",
 272                 ONIG_ENCODING_ISO_8859_1
 273         },
 274 #endif
 275 #ifdef ONIG_ENCODING_ISO_8859_2
 276         {
 277                 "ISO-8859-2\0ISO8859-2\0ISO_8859_2\0ISO8859_2\0",
 278                 ONIG_ENCODING_ISO_8859_2
 279         },
 280 #endif
 281 #ifdef ONIG_ENCODING_ISO_8859_3
 282         {
 283                 "ISO-8859-3\0ISO8859-3\0ISO_8859_3\0ISO8859_3\0",
 284                 ONIG_ENCODING_ISO_8859_3
 285         },
 286 #endif
 287 #ifdef ONIG_ENCODING_ISO_8859_4
 288         {
 289                 "ISO-8859-4\0ISO8859-4\0ISO_8859_4\0ISO8859_4\0",
 290                 ONIG_ENCODING_ISO_8859_4
 291         },
 292 #endif
 293 #ifdef ONIG_ENCODING_ISO_8859_5
 294         {
 295                 "ISO-8859-5\0ISO8859-5\0ISO_8859_5\0ISO8859_5\0",
 296                 ONIG_ENCODING_ISO_8859_5
 297         },
 298 #endif
 299 #ifdef ONIG_ENCODING_ISO_8859_6
 300         {
 301                 "ISO-8859-6\0ISO8859-6\0ISO_8859_6\0ISO8859_6\0",
 302                 ONIG_ENCODING_ISO_8859_6
 303         },
 304 #endif
 305 #ifdef ONIG_ENCODING_ISO_8859_7
 306         {
 307                 "ISO-8859-7\0ISO8859-7\0ISO_8859_7\0ISO8859_7\0",
 308                 ONIG_ENCODING_ISO_8859_7
 309         },
 310 #endif
 311 #ifdef ONIG_ENCODING_ISO_8859_8
 312         {
 313                 "ISO-8859-8\0ISO8859-8\0ISO_8859_8\0ISO8859_8\0",
 314                 ONIG_ENCODING_ISO_8859_8
 315         },
 316 #endif
 317 #ifdef ONIG_ENCODING_ISO_8859_9
 318         {
 319                 "ISO-8859-9\0ISO8859-9\0ISO_8859_9\0ISO8859_9\0",
 320                 ONIG_ENCODING_ISO_8859_9
 321         },
 322 #endif
 323 #ifdef ONIG_ENCODING_ISO_8859_10
 324         {
 325                 "ISO-8859-10\0ISO8859-10\0ISO_8859_10\0ISO8859_10\0",
 326                 ONIG_ENCODING_ISO_8859_10
 327         },
 328 #endif
 329 #ifdef ONIG_ENCODING_ISO_8859_11
 330         {
 331                 "ISO-8859-11\0ISO8859-11\0ISO_8859_11\0ISO8859_11\0",
 332                 ONIG_ENCODING_ISO_8859_11
 333         },
 334 #endif
 335 #ifdef ONIG_ENCODING_ISO_8859_13
 336         {
 337                 "ISO-8859-13\0ISO8859-13\0ISO_8859_13\0ISO8859_13\0",
 338                 ONIG_ENCODING_ISO_8859_13
 339         },
 340 #endif
 341 #ifdef ONIG_ENCODING_ISO_8859_14
 342         {
 343                 "ISO-8859-14\0ISO8859-14\0ISO_8859_14\0ISO8859_14\0",
 344                 ONIG_ENCODING_ISO_8859_14
 345         },
 346 #endif
 347 #ifdef ONIG_ENCODING_ISO_8859_15
 348         {
 349                 "ISO-8859-15\0ISO8859-15\0ISO_8859_15\0ISO8859_15\0",
 350                 ONIG_ENCODING_ISO_8859_15
 351         },
 352 #endif
 353 #ifdef ONIG_ENCODING_ISO_8859_16
 354         {
 355                 "ISO-8859-16\0ISO8859-16\0ISO_8859_16\0ISO8859_16\0",
 356                 ONIG_ENCODING_ISO_8859_16
 357         },
 358 #endif
 359 #ifdef ONIG_ENCODING_ASCII
 360         {
 361                 "ASCII\0US-ASCII\0US_ASCII\0ISO646\0",
 362                 ONIG_ENCODING_ASCII
 363         },
 364 #endif
 365         { NULL, ONIG_ENCODING_UNDEF }
 366 };
 367 /* }}} */
 368 
 369 /* {{{ php_mb_regex_name2mbctype */
 370 static OnigEncoding _php_mb_regex_name2mbctype(const char *pname)
 371 {
 372         const char *p;
 373         php_mb_regex_enc_name_map_t *mapping;
 374 
 375         if (pname == NULL || !*pname) {
 376                 return ONIG_ENCODING_UNDEF;
 377         }
 378 
 379         for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
 380                 for (p = mapping->names; *p != '\0'; p += (strlen(p) + 1)) {
 381                         if (strcasecmp(p, pname) == 0) {
 382                                 return mapping->code;
 383                         }
 384                 }
 385         }
 386 
 387         return ONIG_ENCODING_UNDEF;
 388 }
 389 /* }}} */
 390 
 391 /* {{{ php_mb_regex_mbctype2name */
 392 static const char *_php_mb_regex_mbctype2name(OnigEncoding mbctype)
 393 {
 394         php_mb_regex_enc_name_map_t *mapping;
 395 
 396         for (mapping = enc_name_map; mapping->names != NULL; mapping++) {
 397                 if (mapping->code == mbctype) {
 398                         return mapping->names;
 399                 }
 400         }
 401 
 402         return NULL;
 403 }
 404 /* }}} */
 405 
 406 /* {{{ php_mb_regex_set_mbctype */
 407 int php_mb_regex_set_mbctype(const char *encname TSRMLS_DC)
 408 {
 409         OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
 410         if (mbctype == ONIG_ENCODING_UNDEF) {
 411                 return FAILURE;
 412         }
 413         MBREX(current_mbctype) = mbctype;
 414         return SUCCESS;
 415 }
 416 /* }}} */
 417 
 418 /* {{{ php_mb_regex_set_default_mbctype */
 419 int php_mb_regex_set_default_mbctype(const char *encname TSRMLS_DC)
 420 {
 421         OnigEncoding mbctype = _php_mb_regex_name2mbctype(encname);
 422         if (mbctype == ONIG_ENCODING_UNDEF) {
 423                 return FAILURE;
 424         }
 425         MBREX(default_mbctype) = mbctype;
 426         return SUCCESS;
 427 }
 428 /* }}} */
 429 
 430 /* {{{ php_mb_regex_get_mbctype */
 431 const char *php_mb_regex_get_mbctype(TSRMLS_D)
 432 {
 433         return _php_mb_regex_mbctype2name(MBREX(current_mbctype));
 434 }
 435 /* }}} */
 436 
 437 /* {{{ php_mb_regex_get_default_mbctype */
 438 const char *php_mb_regex_get_default_mbctype(TSRMLS_D)
 439 {
 440         return _php_mb_regex_mbctype2name(MBREX(default_mbctype));
 441 }
 442 /* }}} */
 443 
 444 /*
 445  * regex cache
 446  */
 447 /* {{{ php_mbregex_compile_pattern */
 448 static php_mb_regex_t *php_mbregex_compile_pattern(const char *pattern, int patlen, OnigOptionType options, OnigEncoding enc, OnigSyntaxType *syntax TSRMLS_DC)
 449 {
 450         int err_code = 0;
 451         int found = 0;
 452         php_mb_regex_t *retval = NULL, **rc = NULL;
 453         OnigErrorInfo err_info;
 454         OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
 455 
 456         found = zend_hash_find(&MBREX(ht_rc), (char *)pattern, patlen+1, (void **) &rc);
 457         if (found == FAILURE || (*rc)->options != options || (*rc)->enc != enc || (*rc)->syntax != syntax) {
 458                 if ((err_code = onig_new(&retval, (OnigUChar *)pattern, (OnigUChar *)(pattern + patlen), options, enc, syntax, &err_info)) != ONIG_NORMAL) {
 459                         onig_error_code_to_str(err_str, err_code, err_info);
 460                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex compile err: %s", err_str);
 461                         retval = NULL;
 462                         goto out;
 463                 }
 464                 zend_hash_update(&MBREX(ht_rc), (char *) pattern, patlen + 1, (void *) &retval, sizeof(retval), NULL);
 465         } else if (found == SUCCESS) {
 466                 retval = *rc;
 467         }
 468 out:
 469         return retval; 
 470 }
 471 /* }}} */
 472 
 473 /* {{{ _php_mb_regex_get_option_string */
 474 static size_t _php_mb_regex_get_option_string(char *str, size_t len, OnigOptionType option, OnigSyntaxType *syntax)
 475 {
 476         size_t len_left = len;
 477         size_t len_req = 0;
 478         char *p = str;
 479         char c;
 480 
 481         if ((option & ONIG_OPTION_IGNORECASE) != 0) {
 482                 if (len_left > 0) {
 483                         --len_left;
 484                         *(p++) = 'i';
 485                 }
 486                 ++len_req;      
 487         }
 488 
 489         if ((option & ONIG_OPTION_EXTEND) != 0) {
 490                 if (len_left > 0) {
 491                         --len_left;
 492                         *(p++) = 'x';
 493                 }
 494                 ++len_req;      
 495         }
 496 
 497         if ((option & (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) ==
 498                         (ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE)) {
 499                 if (len_left > 0) {
 500                         --len_left;
 501                         *(p++) = 'p';
 502                 }
 503                 ++len_req;      
 504         } else {
 505                 if ((option & ONIG_OPTION_MULTILINE) != 0) {
 506                         if (len_left > 0) {
 507                                 --len_left;
 508                                 *(p++) = 'm';
 509                         }
 510                         ++len_req;      
 511                 }
 512 
 513                 if ((option & ONIG_OPTION_SINGLELINE) != 0) {
 514                         if (len_left > 0) {
 515                                 --len_left;
 516                                 *(p++) = 's';
 517                         }
 518                         ++len_req;      
 519                 }
 520         }       
 521         if ((option & ONIG_OPTION_FIND_LONGEST) != 0) {
 522                 if (len_left > 0) {
 523                         --len_left;
 524                         *(p++) = 'l';
 525                 }
 526                 ++len_req;      
 527         }
 528         if ((option & ONIG_OPTION_FIND_NOT_EMPTY) != 0) {
 529                 if (len_left > 0) {
 530                         --len_left;
 531                         *(p++) = 'n';
 532                 }
 533                 ++len_req;      
 534         }
 535 
 536         c = 0;
 537 
 538         if (syntax == ONIG_SYNTAX_JAVA) {
 539                 c = 'j';
 540         } else if (syntax == ONIG_SYNTAX_GNU_REGEX) {
 541                 c = 'u';
 542         } else if (syntax == ONIG_SYNTAX_GREP) {
 543                 c = 'g';
 544         } else if (syntax == ONIG_SYNTAX_EMACS) {
 545                 c = 'c';
 546         } else if (syntax == ONIG_SYNTAX_RUBY) {
 547                 c = 'r';
 548         } else if (syntax == ONIG_SYNTAX_PERL) {
 549                 c = 'z';
 550         } else if (syntax == ONIG_SYNTAX_POSIX_BASIC) {
 551                 c = 'b';
 552         } else if (syntax == ONIG_SYNTAX_POSIX_EXTENDED) {
 553                 c = 'd';
 554         }
 555 
 556         if (c != 0) {
 557                 if (len_left > 0) {
 558                         --len_left;
 559                         *(p++) = c;
 560                 }
 561                 ++len_req;
 562         }
 563 
 564 
 565         if (len_left > 0) {
 566                 --len_left;
 567                 *(p++) = '\0';
 568         }
 569         ++len_req;      
 570         if (len < len_req) {
 571                 return len_req;
 572         }
 573 
 574         return 0;
 575 }
 576 /* }}} */
 577 
 578 /* {{{ _php_mb_regex_init_options */
 579 static void
 580 _php_mb_regex_init_options(const char *parg, int narg, OnigOptionType *option, OnigSyntaxType **syntax, int *eval) 
 581 {
 582         int n;
 583         char c;
 584         int optm = 0; 
 585 
 586         *syntax = ONIG_SYNTAX_RUBY;
 587 
 588         if (parg != NULL) {
 589                 n = 0;
 590                 while(n < narg) {
 591                         c = parg[n++];
 592                         switch (c) {
 593                                 case 'i':
 594                                         optm |= ONIG_OPTION_IGNORECASE;
 595                                         break;
 596                                 case 'x':
 597                                         optm |= ONIG_OPTION_EXTEND;
 598                                         break;
 599                                 case 'm':
 600                                         optm |= ONIG_OPTION_MULTILINE;
 601                                         break;
 602                                 case 's':
 603                                         optm |= ONIG_OPTION_SINGLELINE;
 604                                         break;
 605                                 case 'p':
 606                                         optm |= ONIG_OPTION_MULTILINE | ONIG_OPTION_SINGLELINE;
 607                                         break;
 608                                 case 'l':
 609                                         optm |= ONIG_OPTION_FIND_LONGEST;
 610                                         break;
 611                                 case 'n':
 612                                         optm |= ONIG_OPTION_FIND_NOT_EMPTY;
 613                                         break;
 614                                 case 'j':
 615                                         *syntax = ONIG_SYNTAX_JAVA;
 616                                         break;
 617                                 case 'u':
 618                                         *syntax = ONIG_SYNTAX_GNU_REGEX;
 619                                         break;
 620                                 case 'g':
 621                                         *syntax = ONIG_SYNTAX_GREP;
 622                                         break;
 623                                 case 'c':
 624                                         *syntax = ONIG_SYNTAX_EMACS;
 625                                         break;
 626                                 case 'r':
 627                                         *syntax = ONIG_SYNTAX_RUBY;
 628                                         break;
 629                                 case 'z':
 630                                         *syntax = ONIG_SYNTAX_PERL;
 631                                         break;
 632                                 case 'b':
 633                                         *syntax = ONIG_SYNTAX_POSIX_BASIC;
 634                                         break;
 635                                 case 'd':
 636                                         *syntax = ONIG_SYNTAX_POSIX_EXTENDED;
 637                                         break;
 638                                 case 'e':
 639                                         if (eval != NULL) *eval = 1; 
 640                                         break;
 641                                 default:
 642                                         break;
 643                         }
 644                 }
 645                 if (option != NULL) *option|=optm; 
 646         }
 647 }
 648 /* }}} */
 649 
 650 /*
 651  * php functions
 652  */
 653 
 654 /* {{{ proto string mb_regex_encoding([string encoding])
 655    Returns the current encoding for regex as a string. */
 656 PHP_FUNCTION(mb_regex_encoding)
 657 {
 658         size_t argc = ZEND_NUM_ARGS();
 659         char *encoding;
 660         int encoding_len;
 661         OnigEncoding mbctype;
 662 
 663         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &encoding, &encoding_len) == FAILURE) {
 664                 return;
 665         }
 666 
 667         if (argc == 0) {
 668                 const char *retval = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
 669 
 670                 if (retval == NULL) {
 671                         RETURN_FALSE;
 672                 }
 673 
 674                 RETURN_STRING((char *)retval, 1);
 675         } else if (argc == 1) {
 676                 mbctype = _php_mb_regex_name2mbctype(encoding);
 677 
 678                 if (mbctype == ONIG_ENCODING_UNDEF) {
 679                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
 680                         RETURN_FALSE;
 681                 }
 682 
 683                 MBREX(current_mbctype) = mbctype;
 684                 RETURN_TRUE;
 685         }
 686 }
 687 /* }}} */
 688 
 689 /* {{{ _php_mb_regex_ereg_exec */
 690 static void _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAMETERS, int icase)
 691 {
 692         zval **arg_pattern, *array;
 693         char *string;
 694         int string_len;
 695         php_mb_regex_t *re;
 696         OnigRegion *regs = NULL;
 697         int i, match_len, beg, end;
 698         OnigOptionType options;
 699         char *str;
 700 
 701         array = NULL;
 702 
 703         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zs|z", &arg_pattern, &string, &string_len, &array) == FAILURE) {
 704                 RETURN_FALSE;
 705         }
 706 
 707         options = MBREX(regex_default_options);
 708         if (icase) {
 709                 options |= ONIG_OPTION_IGNORECASE;
 710         }
 711 
 712         /* compile the regular expression from the supplied regex */
 713         if (Z_TYPE_PP(arg_pattern) != IS_STRING) {
 714                 /* we convert numbers to integers and treat them as a string */
 715                 if (Z_TYPE_PP(arg_pattern) == IS_DOUBLE) {
 716                         convert_to_long_ex(arg_pattern);        /* get rid of decimal places */
 717                 }
 718                 convert_to_string_ex(arg_pattern);
 719                 /* don't bother doing an extended regex with just a number */
 720         }
 721 
 722         if (!Z_STRVAL_PP(arg_pattern) || Z_STRLEN_PP(arg_pattern) == 0) {
 723                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "empty pattern");
 724                 RETVAL_FALSE;
 725                 goto out;
 726         }
 727 
 728         re = php_mbregex_compile_pattern(Z_STRVAL_PP(arg_pattern), Z_STRLEN_PP(arg_pattern), options, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC);
 729         if (re == NULL) {
 730                 RETVAL_FALSE;
 731                 goto out;
 732         }
 733 
 734         regs = onig_region_new();
 735 
 736         /* actually execute the regular expression */
 737         if (onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, (OnigUChar *)(string + string_len), regs, 0) < 0) {
 738                 RETVAL_FALSE;
 739                 goto out;
 740         }
 741 
 742         match_len = 1;
 743         str = string;
 744         if (array != NULL) {
 745                 match_len = regs->end[0] - regs->beg[0];
 746                 zval_dtor(array);
 747                 array_init(array);
 748                 for (i = 0; i < regs->num_regs; i++) {
 749                         beg = regs->beg[i];
 750                         end = regs->end[i];
 751                         if (beg >= 0 && beg < end && end <= string_len) {
 752                                 add_index_stringl(array, i, (char *)&str[beg], end - beg, 1);
 753                         } else {
 754                                 add_index_bool(array, i, 0);
 755                         }
 756                 }
 757         }
 758 
 759         if (match_len == 0) {
 760                 match_len = 1;
 761         }
 762         RETVAL_LONG(match_len);
 763 out:
 764         if (regs != NULL) {
 765                 onig_region_free(regs, 1);
 766         }
 767 }
 768 /* }}} */
 769 
 770 /* {{{ proto int mb_ereg(string pattern, string string [, array registers])
 771    Regular expression match for multibyte string */
 772 PHP_FUNCTION(mb_ereg)
 773 {
 774         _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
 775 }
 776 /* }}} */
 777 
 778 /* {{{ proto int mb_eregi(string pattern, string string [, array registers])
 779    Case-insensitive regular expression match for multibyte string */
 780 PHP_FUNCTION(mb_eregi)
 781 {
 782         _php_mb_regex_ereg_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
 783 }
 784 /* }}} */
 785 
 786 /* {{{ _php_mb_regex_ereg_replace_exec */
 787 static void _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAMETERS, OnigOptionType options, int is_callable)
 788 {
 789         zval **arg_pattern_zval;
 790 
 791         char *arg_pattern;
 792         int arg_pattern_len;
 793 
 794         char *replace;
 795         int replace_len;
 796 
 797         zend_fcall_info arg_replace_fci;
 798         zend_fcall_info_cache arg_replace_fci_cache;
 799 
 800         char *string;
 801         int string_len;
 802 
 803         char *p;
 804         php_mb_regex_t *re;
 805         OnigSyntaxType *syntax;
 806         OnigRegion *regs = NULL;
 807         smart_str out_buf = { 0 };
 808         smart_str eval_buf = { 0 };
 809         smart_str *pbuf;
 810         int i, err, eval, n;
 811         OnigUChar *pos;
 812         OnigUChar *string_lim;
 813         char *description = NULL;
 814         char pat_buf[2];
 815 
 816         const mbfl_encoding *enc;
 817 
 818         {
 819                 const char *current_enc_name;
 820                 current_enc_name = _php_mb_regex_mbctype2name(MBREX(current_mbctype));
 821                 if (current_enc_name == NULL ||
 822                         (enc = mbfl_name2encoding(current_enc_name)) == NULL) {
 823                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
 824                         RETURN_FALSE;
 825                 }
 826         }
 827         eval = 0;
 828         {
 829                 char *option_str = NULL;
 830                 int option_str_len = 0;
 831 
 832                 if (!is_callable) {
 833                         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zss|s",
 834                                                 &arg_pattern_zval,
 835                                                 &replace, &replace_len,
 836                                                 &string, &string_len,
 837                                                 &option_str, &option_str_len) == FAILURE) {
 838                                 RETURN_FALSE;
 839                         }
 840                 } else {
 841                         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "Zfs|s",
 842                                                 &arg_pattern_zval,
 843                                                 &arg_replace_fci, &arg_replace_fci_cache,
 844                                                 &string, &string_len,
 845                                                 &option_str, &option_str_len) == FAILURE) {
 846                                 RETURN_FALSE;
 847                         }
 848                 }
 849 
 850                 if (option_str != NULL) {
 851                         _php_mb_regex_init_options(option_str, option_str_len, &options, &syntax, &eval);
 852                 } else {
 853                         options |= MBREX(regex_default_options);
 854                         syntax = MBREX(regex_default_syntax);
 855                 }
 856         }
 857         if (Z_TYPE_PP(arg_pattern_zval) == IS_STRING) {
 858                 arg_pattern = Z_STRVAL_PP(arg_pattern_zval);
 859                 arg_pattern_len = Z_STRLEN_PP(arg_pattern_zval);
 860         } else {
 861                 /* FIXME: this code is not multibyte aware! */
 862                 convert_to_long_ex(arg_pattern_zval);
 863                 pat_buf[0] = (char)Z_LVAL_PP(arg_pattern_zval); 
 864                 pat_buf[1] = '\0';
 865 
 866                 arg_pattern = pat_buf;
 867                 arg_pattern_len = 1;    
 868         }
 869         /* create regex pattern buffer */
 870         re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, options, MBREX(current_mbctype), syntax TSRMLS_CC);
 871         if (re == NULL) {
 872                 RETURN_FALSE;
 873         }
 874 
 875         if (eval || is_callable) {
 876                 pbuf = &eval_buf;
 877                 description = zend_make_compiled_string_description("mbregex replace" TSRMLS_CC);
 878         } else {
 879                 pbuf = &out_buf;
 880                 description = NULL;
 881         }
 882 
 883         if (is_callable) {
 884                 if (eval) {
 885                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Option 'e' cannot be used with replacement callback");
 886                         RETURN_FALSE;
 887                 }
 888         }
 889 
 890         /* do the actual work */
 891         err = 0;
 892         pos = (OnigUChar *)string;
 893         string_lim = (OnigUChar*)(string + string_len);
 894         regs = onig_region_new();
 895         while (err >= 0) {
 896                 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)string_lim, pos, (OnigUChar *)string_lim, regs, 0);
 897                 if (err <= -2) {
 898                         OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
 899                         onig_error_code_to_str(err_str, err);
 900                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in php_mbereg_replace_exec(): %s", err_str);
 901                         break;
 902                 }
 903                 if (err >= 0) {
 904 #if moriyoshi_0
 905                         if (regs->beg[0] == regs->end[0]) {
 906                                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
 907                                 break;
 908                         }
 909 #endif
 910                         /* copy the part of the string before the match */
 911                         smart_str_appendl(&out_buf, pos, (size_t)((OnigUChar *)(string + regs->beg[0]) - pos));
 912 
 913                         if (!is_callable) {
 914                                 /* copy replacement and backrefs */
 915                                 i = 0;
 916                                 p = replace;
 917                                 while (i < replace_len) {
 918                                         int fwd = (int) php_mb_mbchar_bytes_ex(p, enc);
 919                                         n = -1;
 920                                         if ((replace_len - i) >= 2 && fwd == 1 &&
 921                                         p[0] == '\\' && p[1] >= '0' && p[1] <= '9') {
 922                                                 n = p[1] - '0';
 923                                         }
 924                                         if (n >= 0 && n < regs->num_regs) {
 925                                                 if (regs->beg[n] >= 0 && regs->beg[n] < regs->end[n] && regs->end[n] <= string_len) {
 926                                                         smart_str_appendl(pbuf, string + regs->beg[n], regs->end[n] - regs->beg[n]);
 927                                                 }
 928                                                 p += 2;
 929                                                 i += 2;
 930                                         } else {
 931                                                 smart_str_appendl(pbuf, p, fwd);
 932                                                 p += fwd;
 933                                                 i += fwd;
 934                                         }
 935                                 }
 936                         }
 937                                 
 938                         if (eval) {
 939                                 zval v;
 940                                 /* null terminate buffer */
 941                                 smart_str_0(&eval_buf);
 942                                 /* do eval */
 943                                 if (zend_eval_stringl(eval_buf.c, eval_buf.len, &v, description TSRMLS_CC) == FAILURE) {
 944                                         efree(description);
 945                                         php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, eval_buf.c);
 946                                         /* zend_error() does not return in this case */
 947                                 }
 948 
 949                                 /* result of eval */
 950                                 convert_to_string(&v);
 951                                 smart_str_appendl(&out_buf, Z_STRVAL(v), Z_STRLEN(v));
 952                                 /* Clean up */
 953                                 eval_buf.len = 0;
 954                                 zval_dtor(&v);
 955                         } else if (is_callable) {
 956                                 zval *retval_ptr;
 957                                 zval **args[1];
 958                                 zval *subpats;
 959                                 int i;
 960                                 
 961                                 MAKE_STD_ZVAL(subpats);
 962                                 array_init(subpats);
 963                                 
 964                                 for (i = 0; i < regs->num_regs; i++) {
 965                                         add_next_index_stringl(subpats, string + regs->beg[i], regs->end[i] - regs->beg[i], 1);
 966                                 }                               
 967                                 
 968                                 args[0] = &subpats;
 969                                 /* null terminate buffer */
 970                                 smart_str_0(&eval_buf);
 971                                 
 972                                 arg_replace_fci.param_count = 1;
 973                                 arg_replace_fci.params = args;
 974                                 arg_replace_fci.retval_ptr_ptr = &retval_ptr;
 975                                 if (zend_call_function(&arg_replace_fci, &arg_replace_fci_cache TSRMLS_CC) == SUCCESS && arg_replace_fci.retval_ptr_ptr) {
 976                                         convert_to_string_ex(&retval_ptr);
 977                                         smart_str_appendl(&out_buf, Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
 978                                         eval_buf.len = 0;
 979                                         zval_ptr_dtor(&retval_ptr);
 980                                 } else {
 981                                         efree(description);
 982                                         if (!EG(exception)) {
 983                                                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
 984                                         }
 985                                 }
 986                                 zval_ptr_dtor(&subpats);
 987                         }
 988 
 989                         n = regs->end[0];
 990                         if ((pos - (OnigUChar *)string) < n) {
 991                                 pos = (OnigUChar *)string + n;
 992                         } else {
 993                                 if (pos < string_lim) {
 994                                         smart_str_appendl(&out_buf, pos, 1); 
 995                                 }
 996                                 pos++;
 997                         }
 998                 } else { /* nomatch */
 999                         /* stick that last bit of string on our output */
1000                         if (string_lim - pos > 0) {
1001                                 smart_str_appendl(&out_buf, pos, string_lim - pos);
1002                         }
1003                 }
1004                 onig_region_free(regs, 0);
1005         }
1006 
1007         if (description) {
1008                 efree(description);
1009         }
1010         if (regs != NULL) {
1011                 onig_region_free(regs, 1);
1012         }
1013         smart_str_free(&eval_buf);
1014 
1015         if (err <= -2) {
1016                 smart_str_free(&out_buf);       
1017                 RETVAL_FALSE;
1018         } else {
1019                 smart_str_appendc(&out_buf, '\0');
1020                 RETVAL_STRINGL((char *)out_buf.c, out_buf.len - 1, 0);
1021         }
1022 }
1023 /* }}} */
1024 
1025 /* {{{ proto string mb_ereg_replace(string pattern, string replacement, string string [, string option])
1026    Replace regular expression for multibyte string */
1027 PHP_FUNCTION(mb_ereg_replace)
1028 {
1029         _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1030 }
1031 /* }}} */
1032 
1033 /* {{{ proto string mb_eregi_replace(string pattern, string replacement, string string)
1034    Case insensitive replace regular expression for multibyte string */
1035 PHP_FUNCTION(mb_eregi_replace)
1036 {
1037         _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, ONIG_OPTION_IGNORECASE, 0);
1038 }
1039 /* }}} */
1040 
1041 /* {{{ proto string mb_ereg_replace_callback(string pattern, string callback, string string [, string option])
1042     regular expression for multibyte string using replacement callback */
1043 PHP_FUNCTION(mb_ereg_replace_callback)
1044 {
1045         _php_mb_regex_ereg_replace_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1046 }
1047 /* }}} */
1048 
1049 /* {{{ proto array mb_split(string pattern, string string [, int limit])
1050    split multibyte string into array by regular expression */
1051 PHP_FUNCTION(mb_split)
1052 {
1053         char *arg_pattern;
1054         int arg_pattern_len;
1055         php_mb_regex_t *re;
1056         OnigRegion *regs = NULL;
1057         char *string;
1058         OnigUChar *pos, *chunk_pos;
1059         int string_len;
1060 
1061         int n, err;
1062         long count = -1;
1063 
1064         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|l", &arg_pattern, &arg_pattern_len, &string, &string_len, &count) == FAILURE) {
1065                 RETURN_FALSE;
1066         } 
1067 
1068         if (count > 0) {
1069                 count--;
1070         }
1071 
1072         /* create regex pattern buffer */
1073         if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, MBREX(regex_default_options), MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
1074                 RETURN_FALSE;
1075         }
1076 
1077         array_init(return_value);
1078 
1079         chunk_pos = pos = (OnigUChar *)string;
1080         err = 0;
1081         regs = onig_region_new();
1082         /* churn through str, generating array entries as we go */
1083         while (count != 0 && (pos - (OnigUChar *)string) < string_len) {
1084                 int beg, end;
1085                 err = onig_search(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), pos, (OnigUChar *)(string + string_len), regs, 0);
1086                 if (err < 0) {
1087                         break;
1088                 }
1089                 beg = regs->beg[0], end = regs->end[0];
1090                 /* add it to the array */
1091                 if ((pos - (OnigUChar *)string) < end) {
1092                         if (beg < string_len && beg >= (chunk_pos - (OnigUChar *)string)) {
1093                                 add_next_index_stringl(return_value, (char *)chunk_pos, ((OnigUChar *)(string + beg) - chunk_pos), 1);
1094                                 --count;
1095                         } else {
1096                                 err = -2;
1097                                 break;
1098                         }
1099                         /* point at our new starting point */
1100                         chunk_pos = pos = (OnigUChar *)string + end;
1101                 } else {
1102                         pos++;
1103                 }
1104                 onig_region_free(regs, 0);
1105         }
1106 
1107         onig_region_free(regs, 1);
1108 
1109         /* see if we encountered an error */
1110         if (err <= -2) {
1111                 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1112                 onig_error_code_to_str(err_str, err);
1113                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbsplit(): %s", err_str);
1114                 zval_dtor(return_value);
1115                 RETURN_FALSE;
1116         }
1117 
1118         /* otherwise we just have one last element to add to the array */
1119         n = ((OnigUChar *)(string + string_len) - chunk_pos);
1120         if (n > 0) {
1121                 add_next_index_stringl(return_value, (char *)chunk_pos, n, 1);
1122         } else {
1123                 add_next_index_stringl(return_value, "", 0, 1);
1124         }
1125 }
1126 /* }}} */
1127 
1128 /* {{{ proto bool mb_ereg_match(string pattern, string string [,string option])
1129    Regular expression match for multibyte string */
1130 PHP_FUNCTION(mb_ereg_match)
1131 {
1132         char *arg_pattern;
1133         int arg_pattern_len;
1134 
1135         char *string;
1136         int string_len;
1137 
1138         php_mb_regex_t *re;
1139         OnigSyntaxType *syntax;
1140         OnigOptionType option = 0;
1141         int err;
1142 
1143         {
1144                 char *option_str = NULL;
1145                 int option_str_len = 0;
1146 
1147                 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s",
1148                                           &arg_pattern, &arg_pattern_len, &string, &string_len,
1149                                           &option_str, &option_str_len)==FAILURE) {
1150                         RETURN_FALSE;
1151                 }
1152 
1153                 if (option_str != NULL) {
1154                         _php_mb_regex_init_options(option_str, option_str_len, &option, &syntax, NULL);
1155                 } else {
1156                         option |= MBREX(regex_default_options);
1157                         syntax = MBREX(regex_default_syntax);
1158                 }
1159         }
1160 
1161         if ((re = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
1162                 RETURN_FALSE;
1163         }
1164 
1165         /* match */
1166         err = onig_match(re, (OnigUChar *)string, (OnigUChar *)(string + string_len), (OnigUChar *)string, NULL, 0);
1167         if (err >= 0) {
1168                 RETVAL_TRUE;
1169         } else {
1170                 RETVAL_FALSE;
1171         }
1172 }
1173 /* }}} */
1174 
1175 /* regex search */
1176 /* {{{ _php_mb_regex_ereg_search_exec */
1177 static void
1178 _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAMETERS, int mode)
1179 {
1180         size_t argc = ZEND_NUM_ARGS();
1181         char *arg_pattern, *arg_options;
1182         int arg_pattern_len, arg_options_len;
1183         int n, i, err, pos, len, beg, end;
1184         OnigOptionType option;
1185         OnigUChar *str;
1186         OnigSyntaxType *syntax;
1187 
1188         if (zend_parse_parameters(argc TSRMLS_CC, "|ss", &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1189                 return;
1190         }
1191 
1192         option = MBREX(regex_default_options);
1193 
1194         if (argc == 2) {
1195                 option = 0;
1196                 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1197         }
1198 
1199         if (argc > 0) {
1200                 /* create regex pattern buffer */
1201                 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), MBREX(regex_default_syntax) TSRMLS_CC)) == NULL) {
1202                         RETURN_FALSE;
1203                 }
1204         }
1205 
1206         pos = MBREX(search_pos);
1207         str = NULL;
1208         len = 0;
1209         if (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING){
1210                 str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
1211                 len = Z_STRLEN_P(MBREX(search_str));
1212         }
1213 
1214         if (MBREX(search_re) == NULL) {
1215                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "No regex given");
1216                 RETURN_FALSE;
1217         }
1218 
1219         if (str == NULL) {
1220                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "No string given");
1221                 RETURN_FALSE;
1222         }
1223 
1224         if (MBREX(search_regs)) {
1225                 onig_region_free(MBREX(search_regs), 1);
1226         }
1227         MBREX(search_regs) = onig_region_new();
1228 
1229         err = onig_search(MBREX(search_re), str, str + len, str + pos, str  + len, MBREX(search_regs), 0);
1230         if (err == ONIG_MISMATCH) {
1231                 MBREX(search_pos) = len;
1232                 RETVAL_FALSE;
1233         } else if (err <= -2) {
1234                 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1235                 onig_error_code_to_str(err_str, err);
1236                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "mbregex search failure in mbregex_search(): %s", err_str);
1237                 RETVAL_FALSE;
1238         } else {
1239                 if (MBREX(search_regs)->beg[0] == MBREX(search_regs)->end[0]) {
1240                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty regular expression");
1241                 }
1242                 switch (mode) {
1243                 case 1:
1244                         array_init(return_value);
1245                         beg = MBREX(search_regs)->beg[0];
1246                         end = MBREX(search_regs)->end[0];
1247                         add_next_index_long(return_value, beg);
1248                         add_next_index_long(return_value, end - beg);
1249                         break;
1250                 case 2:
1251                         array_init(return_value);
1252                         n = MBREX(search_regs)->num_regs;
1253                         for (i = 0; i < n; i++) {
1254                                 beg = MBREX(search_regs)->beg[i];
1255                                 end = MBREX(search_regs)->end[i];
1256                                 if (beg >= 0 && beg <= end && end <= len) {
1257                                         add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
1258                                 } else {
1259                                         add_index_bool(return_value, i, 0);
1260                                 }
1261                         }
1262                         break;
1263                 default:
1264                         RETVAL_TRUE;
1265                         break;
1266                 }
1267                 end = MBREX(search_regs)->end[0];
1268                 if (pos < end) {
1269                         MBREX(search_pos) = end;
1270                 } else {
1271                         MBREX(search_pos) = pos + 1;
1272                 }
1273         }
1274 
1275         if (err < 0) {
1276                 onig_region_free(MBREX(search_regs), 1);
1277                 MBREX(search_regs) = (OnigRegion *)NULL;
1278         }
1279 }
1280 /* }}} */
1281 
1282 /* {{{ proto bool mb_ereg_search([string pattern[, string option]])
1283    Regular expression search for multibyte string */
1284 PHP_FUNCTION(mb_ereg_search)
1285 {
1286         _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
1287 }
1288 /* }}} */
1289 
1290 /* {{{ proto array mb_ereg_search_pos([string pattern[, string option]])
1291    Regular expression search for multibyte string */
1292 PHP_FUNCTION(mb_ereg_search_pos)
1293 {
1294         _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
1295 }
1296 /* }}} */
1297 
1298 /* {{{ proto array mb_ereg_search_regs([string pattern[, string option]])
1299    Regular expression search for multibyte string */
1300 PHP_FUNCTION(mb_ereg_search_regs)
1301 {
1302         _php_mb_regex_ereg_search_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 2);
1303 }
1304 /* }}} */
1305 
1306 /* {{{ proto bool mb_ereg_search_init(string string [, string pattern[, string option]])
1307    Initialize string and regular expression for search. */
1308 PHP_FUNCTION(mb_ereg_search_init)
1309 {
1310         size_t argc = ZEND_NUM_ARGS();
1311         zval *arg_str;
1312         char *arg_pattern = NULL, *arg_options = NULL;
1313         int arg_pattern_len = 0, arg_options_len = 0;
1314         OnigSyntaxType *syntax = NULL;
1315         OnigOptionType option;
1316 
1317         if (zend_parse_parameters(argc TSRMLS_CC, "z|ss", &arg_str, &arg_pattern, &arg_pattern_len, &arg_options, &arg_options_len) == FAILURE) {
1318                 return;
1319         }
1320         
1321         if (argc > 1 && arg_pattern_len == 0) {
1322                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty pattern");
1323                 RETURN_FALSE;
1324         }
1325 
1326         option = MBREX(regex_default_options);
1327         syntax = MBREX(regex_default_syntax);
1328 
1329         if (argc == 3) {
1330                 option = 0;
1331                 _php_mb_regex_init_options(arg_options, arg_options_len, &option, &syntax, NULL);
1332         }
1333 
1334         if (argc > 1) {
1335                 /* create regex pattern buffer */
1336                 if ((MBREX(search_re) = php_mbregex_compile_pattern(arg_pattern, arg_pattern_len, option, MBREX(current_mbctype), syntax TSRMLS_CC)) == NULL) {
1337                         RETURN_FALSE;
1338                 }
1339         }
1340 
1341         if (MBREX(search_str) != NULL) {
1342                 zval_ptr_dtor(&MBREX(search_str));
1343                 MBREX(search_str) = (zval *)NULL;
1344         }
1345 
1346         MBREX(search_str) = arg_str;
1347         Z_ADDREF_P(MBREX(search_str));
1348         SEPARATE_ZVAL_IF_NOT_REF(&MBREX(search_str));
1349 
1350         MBREX(search_pos) = 0;
1351 
1352         if (MBREX(search_regs) != NULL) {
1353                 onig_region_free(MBREX(search_regs), 1);
1354                 MBREX(search_regs) = (OnigRegion *) NULL;
1355         }
1356 
1357         RETURN_TRUE;
1358 }
1359 /* }}} */
1360 
1361 /* {{{ proto array mb_ereg_search_getregs(void)
1362    Get matched substring of the last time */
1363 PHP_FUNCTION(mb_ereg_search_getregs)
1364 {
1365         int n, i, len, beg, end;
1366         OnigUChar *str;
1367 
1368         if (MBREX(search_regs) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && Z_STRVAL_P(MBREX(search_str)) != NULL) {
1369                 array_init(return_value);
1370 
1371                 str = (OnigUChar *)Z_STRVAL_P(MBREX(search_str));
1372                 len = Z_STRLEN_P(MBREX(search_str));
1373                 n = MBREX(search_regs)->num_regs;
1374                 for (i = 0; i < n; i++) {
1375                         beg = MBREX(search_regs)->beg[i];
1376                         end = MBREX(search_regs)->end[i];
1377                         if (beg >= 0 && beg <= end && end <= len) {
1378                                 add_index_stringl(return_value, i, (char *)&str[beg], end - beg, 1);
1379                         } else {
1380                                 add_index_bool(return_value, i, 0);
1381                         }
1382                 }
1383         } else {
1384                 RETVAL_FALSE;
1385         }
1386 }
1387 /* }}} */
1388 
1389 /* {{{ proto int mb_ereg_search_getpos(void)
1390    Get search start position */
1391 PHP_FUNCTION(mb_ereg_search_getpos)
1392 {
1393         RETVAL_LONG(MBREX(search_pos));
1394 }
1395 /* }}} */
1396 
1397 /* {{{ proto bool mb_ereg_search_setpos(int position)
1398    Set search start position */
1399 PHP_FUNCTION(mb_ereg_search_setpos)
1400 {
1401         long position;
1402 
1403         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "l", &position) == FAILURE) {
1404                 return;
1405         }
1406 
1407         if (position < 0 || (MBREX(search_str) != NULL && Z_TYPE_P(MBREX(search_str)) == IS_STRING && position >= Z_STRLEN_P(MBREX(search_str)))) {
1408                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Position is out of range");
1409                 MBREX(search_pos) = 0;
1410                 RETURN_FALSE;
1411         }
1412 
1413         MBREX(search_pos) = position;
1414         RETURN_TRUE;
1415 }
1416 /* }}} */
1417 
1418 /* {{{ php_mb_regex_set_options */
1419 static void _php_mb_regex_set_options(OnigOptionType options, OnigSyntaxType *syntax, OnigOptionType *prev_options, OnigSyntaxType **prev_syntax TSRMLS_DC) 
1420 {
1421         if (prev_options != NULL) {
1422                 *prev_options = MBREX(regex_default_options);
1423         }
1424         if (prev_syntax != NULL) {
1425                 *prev_syntax = MBREX(regex_default_syntax);
1426         }
1427         MBREX(regex_default_options) = options;
1428         MBREX(regex_default_syntax) = syntax;
1429 }
1430 /* }}} */
1431 
1432 /* {{{ proto string mb_regex_set_options([string options])
1433    Set or get the default options for mbregex functions */
1434 PHP_FUNCTION(mb_regex_set_options)
1435 {
1436         OnigOptionType opt;
1437         OnigSyntaxType *syntax;
1438         char *string = NULL;
1439         int string_len;
1440         char buf[16];
1441 
1442         if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s",
1443                                   &string, &string_len) == FAILURE) {
1444                 RETURN_FALSE;
1445         }
1446         if (string != NULL) {
1447                 opt = 0;
1448                 syntax = NULL;
1449                 _php_mb_regex_init_options(string, string_len, &opt, &syntax, NULL);
1450                 _php_mb_regex_set_options(opt, syntax, NULL, NULL TSRMLS_CC);
1451         } else {
1452                 opt = MBREX(regex_default_options);
1453                 syntax = MBREX(regex_default_syntax);
1454         }
1455         _php_mb_regex_get_option_string(buf, sizeof(buf), opt, syntax);
1456 
1457         RETVAL_STRING(buf, 1);
1458 }
1459 /* }}} */
1460 
1461 #endif  /* HAVE_MBREGEX */
1462 
1463 /*
1464  * Local variables:
1465  * tab-width: 4
1466  * c-basic-offset: 4
1467  * End:
1468  * vim600: fdm=marker
1469  * vim: noet sw=4 ts=4
1470  */

/* [<][>][^][v][top][bottom][index][help] */