1 /*
2 +----------------------------------------------------------------------+
3 | PHP Version 5 |
4 +----------------------------------------------------------------------+
5 | Copyright (c) 1997-2016 The PHP Group |
6 +----------------------------------------------------------------------+
7 | This source file is subject to version 3.01 of the PHP license, |
8 | that is bundled with this package in the file LICENSE, and is |
9 | available through the world-wide-web at the following url: |
10 | http://www.php.net/license/3_01.txt |
11 | If you did not receive a copy of the PHP license and are unable to |
12 | obtain it through the world-wide-web, please send a note to |
13 | license@php.net so we can mail you a copy immediately. |
14 +----------------------------------------------------------------------+
15 | Author: Andrei Zmievski <andrei@php.net> |
16 +----------------------------------------------------------------------+
17 */
18
19 /* $Id$ */
20
21 #include "php.h"
22 #include "php_ini.h"
23 #include "php_globals.h"
24 #include "php_pcre.h"
25 #include "ext/standard/info.h"
26 #include "ext/standard/php_smart_str.h"
27
28 #if HAVE_PCRE || HAVE_BUNDLED_PCRE
29
30 #include "ext/standard/php_string.h"
31
32 #define PREG_PATTERN_ORDER 1
33 #define PREG_SET_ORDER 2
34 #define PREG_OFFSET_CAPTURE (1<<8)
35
36 #define PREG_SPLIT_NO_EMPTY (1<<0)
37 #define PREG_SPLIT_DELIM_CAPTURE (1<<1)
38 #define PREG_SPLIT_OFFSET_CAPTURE (1<<2)
39
40 #define PREG_REPLACE_EVAL (1<<0)
41
42 #define PREG_GREP_INVERT (1<<0)
43
44 #define PCRE_CACHE_SIZE 4096
45
46 /* not fully functional workaround for libpcre < 8.0, see bug #70232 */
47 #ifndef PCRE_NOTEMPTY_ATSTART
48 # define PCRE_NOTEMPTY_ATSTART PCRE_NOTEMPTY
49 #endif
50
51 enum {
52 PHP_PCRE_NO_ERROR = 0,
53 PHP_PCRE_INTERNAL_ERROR,
54 PHP_PCRE_BACKTRACK_LIMIT_ERROR,
55 PHP_PCRE_RECURSION_LIMIT_ERROR,
56 PHP_PCRE_BAD_UTF8_ERROR,
57 PHP_PCRE_BAD_UTF8_OFFSET_ERROR
58 };
59
60
61 ZEND_DECLARE_MODULE_GLOBALS(pcre)
62
63
64 static void pcre_handle_exec_error(int pcre_code TSRMLS_DC) /* {{{ */
65 {
66 int preg_code = 0;
67
68 switch (pcre_code) {
69 case PCRE_ERROR_MATCHLIMIT:
70 preg_code = PHP_PCRE_BACKTRACK_LIMIT_ERROR;
71 break;
72
73 case PCRE_ERROR_RECURSIONLIMIT:
74 preg_code = PHP_PCRE_RECURSION_LIMIT_ERROR;
75 break;
76
77 case PCRE_ERROR_BADUTF8:
78 preg_code = PHP_PCRE_BAD_UTF8_ERROR;
79 break;
80
81 case PCRE_ERROR_BADUTF8_OFFSET:
82 preg_code = PHP_PCRE_BAD_UTF8_OFFSET_ERROR;
83 break;
84
85 default:
86 preg_code = PHP_PCRE_INTERNAL_ERROR;
87 break;
88 }
89
90 PCRE_G(error_code) = preg_code;
91 }
92 /* }}} */
93
94 static void php_free_pcre_cache(void *data) /* {{{ */
95 {
96 pcre_cache_entry *pce = (pcre_cache_entry *) data;
97 if (!pce) return;
98 pefree(pce->re, 1);
99 if (pce->extra) pefree(pce->extra, 1);
100 #if HAVE_SETLOCALE
101 if ((void*)pce->tables) pefree((void*)pce->tables, 1);
102 pefree(pce->locale, 1);
103 #endif
104 }
105 /* }}} */
106
107 static PHP_GINIT_FUNCTION(pcre) /* {{{ */
108 {
109 zend_hash_init(&pcre_globals->pcre_cache, 0, NULL, php_free_pcre_cache, 1);
110 pcre_globals->backtrack_limit = 0;
111 pcre_globals->recursion_limit = 0;
112 pcre_globals->error_code = PHP_PCRE_NO_ERROR;
113 }
114 /* }}} */
115
116 static PHP_GSHUTDOWN_FUNCTION(pcre) /* {{{ */
117 {
118 zend_hash_destroy(&pcre_globals->pcre_cache);
119 }
120 /* }}} */
121
122 PHP_INI_BEGIN()
123 STD_PHP_INI_ENTRY("pcre.backtrack_limit", "1000000", PHP_INI_ALL, OnUpdateLong, backtrack_limit, zend_pcre_globals, pcre_globals)
124 STD_PHP_INI_ENTRY("pcre.recursion_limit", "100000", PHP_INI_ALL, OnUpdateLong, recursion_limit, zend_pcre_globals, pcre_globals)
125 PHP_INI_END()
126
127
128 /* {{{ PHP_MINFO_FUNCTION(pcre) */
129 static PHP_MINFO_FUNCTION(pcre)
130 {
131 php_info_print_table_start();
132 php_info_print_table_row(2, "PCRE (Perl Compatible Regular Expressions) Support", "enabled" );
133 php_info_print_table_row(2, "PCRE Library Version", pcre_version() );
134 php_info_print_table_end();
135
136 DISPLAY_INI_ENTRIES();
137 }
138 /* }}} */
139
140 /* {{{ PHP_MINIT_FUNCTION(pcre) */
141 static PHP_MINIT_FUNCTION(pcre)
142 {
143 REGISTER_INI_ENTRIES();
144
145 REGISTER_LONG_CONSTANT("PREG_PATTERN_ORDER", PREG_PATTERN_ORDER, CONST_CS | CONST_PERSISTENT);
146 REGISTER_LONG_CONSTANT("PREG_SET_ORDER", PREG_SET_ORDER, CONST_CS | CONST_PERSISTENT);
147 REGISTER_LONG_CONSTANT("PREG_OFFSET_CAPTURE", PREG_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
148 REGISTER_LONG_CONSTANT("PREG_SPLIT_NO_EMPTY", PREG_SPLIT_NO_EMPTY, CONST_CS | CONST_PERSISTENT);
149 REGISTER_LONG_CONSTANT("PREG_SPLIT_DELIM_CAPTURE", PREG_SPLIT_DELIM_CAPTURE, CONST_CS | CONST_PERSISTENT);
150 REGISTER_LONG_CONSTANT("PREG_SPLIT_OFFSET_CAPTURE", PREG_SPLIT_OFFSET_CAPTURE, CONST_CS | CONST_PERSISTENT);
151 REGISTER_LONG_CONSTANT("PREG_GREP_INVERT", PREG_GREP_INVERT, CONST_CS | CONST_PERSISTENT);
152
153 REGISTER_LONG_CONSTANT("PREG_NO_ERROR", PHP_PCRE_NO_ERROR, CONST_CS | CONST_PERSISTENT);
154 REGISTER_LONG_CONSTANT("PREG_INTERNAL_ERROR", PHP_PCRE_INTERNAL_ERROR, CONST_CS | CONST_PERSISTENT);
155 REGISTER_LONG_CONSTANT("PREG_BACKTRACK_LIMIT_ERROR", PHP_PCRE_BACKTRACK_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
156 REGISTER_LONG_CONSTANT("PREG_RECURSION_LIMIT_ERROR", PHP_PCRE_RECURSION_LIMIT_ERROR, CONST_CS | CONST_PERSISTENT);
157 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_ERROR", PHP_PCRE_BAD_UTF8_ERROR, CONST_CS | CONST_PERSISTENT);
158 REGISTER_LONG_CONSTANT("PREG_BAD_UTF8_OFFSET_ERROR", PHP_PCRE_BAD_UTF8_OFFSET_ERROR, CONST_CS | CONST_PERSISTENT);
159 REGISTER_STRING_CONSTANT("PCRE_VERSION", (char *)pcre_version(), CONST_CS | CONST_PERSISTENT);
160
161 return SUCCESS;
162 }
163 /* }}} */
164
165 /* {{{ PHP_MSHUTDOWN_FUNCTION(pcre) */
166 static PHP_MSHUTDOWN_FUNCTION(pcre)
167 {
168 UNREGISTER_INI_ENTRIES();
169
170 return SUCCESS;
171 }
172 /* }}} */
173
174 /* {{{ static pcre_clean_cache */
175 static int pcre_clean_cache(void *data, void *arg TSRMLS_DC)
176 {
177 pcre_cache_entry *pce = (pcre_cache_entry *) data;
178 int *num_clean = (int *)arg;
179
180 if (*num_clean > 0 && !pce->refcount) {
181 (*num_clean)--;
182 return ZEND_HASH_APPLY_REMOVE;
183 } else {
184 return ZEND_HASH_APPLY_KEEP;
185 }
186 }
187 /* }}} */
188
189 /* {{{ static make_subpats_table */
190 static char **make_subpats_table(int num_subpats, pcre_cache_entry *pce TSRMLS_DC)
191 {
192 pcre_extra *extra = pce->extra;
193 int name_cnt = 0, name_size, ni = 0;
194 int rc;
195 char *name_table;
196 unsigned short name_idx;
197 char **subpat_names = (char **)ecalloc(num_subpats, sizeof(char *));
198
199 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMECOUNT, &name_cnt);
200 if (rc < 0) {
201 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
202 efree(subpat_names);
203 return NULL;
204 }
205 if (name_cnt > 0) {
206 int rc1, rc2;
207
208 rc1 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMETABLE, &name_table);
209 rc2 = pcre_fullinfo(pce->re, extra, PCRE_INFO_NAMEENTRYSIZE, &name_size);
210 rc = rc2 ? rc2 : rc1;
211 if (rc < 0) {
212 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
213 efree(subpat_names);
214 return NULL;
215 }
216
217 while (ni++ < name_cnt) {
218 name_idx = 0xff * (unsigned char)name_table[0] + (unsigned char)name_table[1];
219 subpat_names[name_idx] = name_table + 2;
220 if (is_numeric_string(subpat_names[name_idx], strlen(subpat_names[name_idx]), NULL, NULL, 0) > 0) {
221 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Numeric named subpatterns are not allowed");
222 efree(subpat_names);
223 return NULL;
224 }
225 name_table += name_size;
226 }
227 }
228
229 return subpat_names;
230 }
231 /* }}} */
232
233 /* {{{ static calculate_unit_length */
234 /* Calculates the byte length of the next character. Assumes valid UTF-8 for PCRE_UTF8. */
235 static zend_always_inline int calculate_unit_length(pcre_cache_entry *pce, char *start)
236 {
237 int unit_len;
238
239 if (pce->compile_options & PCRE_UTF8) {
240 char *end = start;
241
242 /* skip continuation bytes */
243 while ((*++end & 0xC0) == 0x80);
244 unit_len = end - start;
245 } else {
246 unit_len = 1;
247 }
248 return unit_len;
249 }
250 /* }}} */
251
252 /* {{{ pcre_get_compiled_regex_cache
253 */
254 PHPAPI pcre_cache_entry* pcre_get_compiled_regex_cache(char *regex, int regex_len TSRMLS_DC)
255 {
256 pcre *re = NULL;
257 pcre_extra *extra;
258 int coptions = 0;
259 int soptions = 0;
260 const char *error;
261 int erroffset;
262 char delimiter;
263 char start_delimiter;
264 char end_delimiter;
265 char *p, *pp;
266 char *pattern;
267 int do_study = 0;
268 int poptions = 0;
269 int count = 0;
270 unsigned const char *tables = NULL;
271 #if HAVE_SETLOCALE
272 char *locale;
273 #endif
274 pcre_cache_entry *pce;
275 pcre_cache_entry new_entry;
276 char *tmp = NULL;
277
278 #if HAVE_SETLOCALE
279 # if defined(PHP_WIN32) && defined(ZTS)
280 _configthreadlocale(_ENABLE_PER_THREAD_LOCALE);
281 # endif
282 locale = setlocale(LC_CTYPE, NULL);
283 #endif
284
285 /* Try to lookup the cached regex entry, and if successful, just pass
286 back the compiled pattern, otherwise go on and compile it. */
287 if (zend_hash_find(&PCRE_G(pcre_cache), regex, regex_len+1, (void **)&pce) == SUCCESS) {
288 /*
289 * We use a quick pcre_fullinfo() check to see whether cache is corrupted, and if it
290 * is, we flush it and compile the pattern from scratch.
291 */
292 if (pcre_fullinfo(pce->re, NULL, PCRE_INFO_CAPTURECOUNT, &count) == PCRE_ERROR_BADMAGIC) {
293 zend_hash_clean(&PCRE_G(pcre_cache));
294 } else {
295 #if HAVE_SETLOCALE
296 if (!strcmp(pce->locale, locale)) {
297 #endif
298 return pce;
299 #if HAVE_SETLOCALE
300 }
301 #endif
302 }
303 }
304
305 p = regex;
306
307 /* Parse through the leading whitespace, and display a warning if we
308 get to the end without encountering a delimiter. */
309 while (isspace((int)*(unsigned char *)p)) p++;
310 if (*p == 0) {
311 php_error_docref(NULL TSRMLS_CC, E_WARNING,
312 p < regex + regex_len ? "Null byte in regex" : "Empty regular expression");
313 return NULL;
314 }
315
316 /* Get the delimiter and display a warning if it is alphanumeric
317 or a backslash. */
318 delimiter = *p++;
319 if (isalnum((int)*(unsigned char *)&delimiter) || delimiter == '\\') {
320 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Delimiter must not be alphanumeric or backslash");
321 return NULL;
322 }
323
324 start_delimiter = delimiter;
325 if ((pp = strchr("([{< )]}> )]}>", delimiter)))
326 delimiter = pp[5];
327 end_delimiter = delimiter;
328
329 pp = p;
330
331 if (start_delimiter == end_delimiter) {
332 /* We need to iterate through the pattern, searching for the ending delimiter,
333 but skipping the backslashed delimiters. If the ending delimiter is not
334 found, display a warning. */
335 while (*pp != 0) {
336 if (*pp == '\\' && pp[1] != 0) pp++;
337 else if (*pp == delimiter)
338 break;
339 pp++;
340 }
341 } else {
342 /* We iterate through the pattern, searching for the matching ending
343 * delimiter. For each matching starting delimiter, we increment nesting
344 * level, and decrement it for each matching ending delimiter. If we
345 * reach the end of the pattern without matching, display a warning.
346 */
347 int brackets = 1; /* brackets nesting level */
348 while (*pp != 0) {
349 if (*pp == '\\' && pp[1] != 0) pp++;
350 else if (*pp == end_delimiter && --brackets <= 0)
351 break;
352 else if (*pp == start_delimiter)
353 brackets++;
354 pp++;
355 }
356 }
357
358 if (*pp == 0) {
359 if (pp < regex + regex_len) {
360 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
361 } else if (start_delimiter == end_delimiter) {
362 php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending delimiter '%c' found", delimiter);
363 } else {
364 php_error_docref(NULL TSRMLS_CC,E_WARNING, "No ending matching delimiter '%c' found", delimiter);
365 }
366 return NULL;
367 }
368
369 /* Make a copy of the actual pattern. */
370 pattern = estrndup(p, pp-p);
371
372 /* Move on to the options */
373 pp++;
374
375 /* Parse through the options, setting appropriate flags. Display
376 a warning if we encounter an unknown modifier. */
377 while (pp < regex + regex_len) {
378 switch (*pp++) {
379 /* Perl compatible options */
380 case 'i': coptions |= PCRE_CASELESS; break;
381 case 'm': coptions |= PCRE_MULTILINE; break;
382 case 's': coptions |= PCRE_DOTALL; break;
383 case 'x': coptions |= PCRE_EXTENDED; break;
384
385 /* PCRE specific options */
386 case 'A': coptions |= PCRE_ANCHORED; break;
387 case 'D': coptions |= PCRE_DOLLAR_ENDONLY;break;
388 case 'S': do_study = 1; break;
389 case 'U': coptions |= PCRE_UNGREEDY; break;
390 case 'X': coptions |= PCRE_EXTRA; break;
391 case 'u': coptions |= PCRE_UTF8;
392 /* In PCRE, by default, \d, \D, \s, \S, \w, and \W recognize only ASCII
393 characters, even in UTF-8 mode. However, this can be changed by setting
394 the PCRE_UCP option. */
395 #ifdef PCRE_UCP
396 coptions |= PCRE_UCP;
397 #endif
398 break;
399
400 /* Custom preg options */
401 case 'e': poptions |= PREG_REPLACE_EVAL; break;
402
403 case ' ':
404 case '\n':
405 break;
406
407 default:
408 if (pp[-1]) {
409 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Unknown modifier '%c'", pp[-1]);
410 } else {
411 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Null byte in regex");
412 }
413 efree(pattern);
414 return NULL;
415 }
416 }
417
418 #if HAVE_SETLOCALE
419 if (strcmp(locale, "C"))
420 tables = pcre_maketables();
421 #endif
422
423 /* Compile pattern and display a warning if compilation failed. */
424 re = pcre_compile(pattern,
425 coptions,
426 &error,
427 &erroffset,
428 tables);
429
430 if (re == NULL) {
431 php_error_docref(NULL TSRMLS_CC,E_WARNING, "Compilation failed: %s at offset %d", error, erroffset);
432 efree(pattern);
433 if (tables) {
434 pefree((void*)tables, 1);
435 }
436 return NULL;
437 }
438
439 /* If study option was specified, study the pattern and
440 store the result in extra for passing to pcre_exec. */
441 if (do_study) {
442 extra = pcre_study(re, soptions, &error);
443 if (extra) {
444 extra->flags |= PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
445 }
446 if (error != NULL) {
447 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Error while studying pattern");
448 }
449 } else {
450 extra = NULL;
451 }
452
453 efree(pattern);
454
455 /*
456 * If we reached cache limit, clean out the items from the head of the list;
457 * these are supposedly the oldest ones (but not necessarily the least used
458 * ones).
459 */
460 if (zend_hash_num_elements(&PCRE_G(pcre_cache)) == PCRE_CACHE_SIZE) {
461 int num_clean = PCRE_CACHE_SIZE / 8;
462 zend_hash_apply_with_argument(&PCRE_G(pcre_cache), pcre_clean_cache, &num_clean TSRMLS_CC);
463 }
464
465 /* Store the compiled pattern and extra info in the cache. */
466 new_entry.re = re;
467 new_entry.extra = extra;
468 new_entry.preg_options = poptions;
469 new_entry.compile_options = coptions;
470 #if HAVE_SETLOCALE
471 new_entry.locale = pestrdup(locale, 1);
472 new_entry.tables = tables;
473 #endif
474 new_entry.refcount = 0;
475
476 /*
477 * Interned strings are not duplicated when stored in HashTable,
478 * but all the interned strings created during HTTP request are removed
479 * at end of request. However PCRE_G(pcre_cache) must be consistent
480 * on the next request as well. So we disable usage of interned strings
481 * as hash keys especually for this table.
482 * See bug #63180
483 */
484 if (IS_INTERNED(regex)) {
485 regex = tmp = estrndup(regex, regex_len);
486 }
487
488 zend_hash_update(&PCRE_G(pcre_cache), regex, regex_len+1, (void *)&new_entry,
489 sizeof(pcre_cache_entry), (void**)&pce);
490
491 if (tmp) {
492 efree(tmp);
493 }
494
495 return pce;
496 }
497 /* }}} */
498
499 /* {{{ pcre_get_compiled_regex
500 */
501 PHPAPI pcre* pcre_get_compiled_regex(char *regex, pcre_extra **extra, int *preg_options TSRMLS_DC)
502 {
503 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
504
505 if (extra) {
506 *extra = pce ? pce->extra : NULL;
507 }
508 if (preg_options) {
509 *preg_options = pce ? pce->preg_options : 0;
510 }
511
512 return pce ? pce->re : NULL;
513 }
514 /* }}} */
515
516 /* {{{ pcre_get_compiled_regex_ex
517 */
518 PHPAPI pcre* pcre_get_compiled_regex_ex(char *regex, pcre_extra **extra, int *preg_options, int *compile_options TSRMLS_DC)
519 {
520 pcre_cache_entry * pce = pcre_get_compiled_regex_cache(regex, strlen(regex) TSRMLS_CC);
521
522 if (extra) {
523 *extra = pce ? pce->extra : NULL;
524 }
525 if (preg_options) {
526 *preg_options = pce ? pce->preg_options : 0;
527 }
528 if (compile_options) {
529 *compile_options = pce ? pce->compile_options : 0;
530 }
531
532 return pce ? pce->re : NULL;
533 }
534 /* }}} */
535
536 /* {{{ add_offset_pair */
537 static inline void add_offset_pair(zval *result, char *str, int len, int offset, char *name)
538 {
539 zval *match_pair;
540
541 ALLOC_ZVAL(match_pair);
542 array_init(match_pair);
543 INIT_PZVAL(match_pair);
544
545 /* Add (match, offset) to the return value */
546 add_next_index_stringl(match_pair, str, len, 1);
547 add_next_index_long(match_pair, offset);
548
549 if (name) {
550 zval_add_ref(&match_pair);
551 zend_hash_update(Z_ARRVAL_P(result), name, strlen(name)+1, &match_pair, sizeof(zval *), NULL);
552 }
553 zend_hash_next_index_insert(Z_ARRVAL_P(result), &match_pair, sizeof(zval *), NULL);
554 }
555 /* }}} */
556
557 static void php_do_pcre_match(INTERNAL_FUNCTION_PARAMETERS, int global) /* {{{ */
558 {
559 /* parameters */
560 char *regex; /* Regular expression */
561 char *subject; /* String to match against */
562 int regex_len;
563 int subject_len;
564 pcre_cache_entry *pce; /* Compiled regular expression */
565 zval *subpats = NULL; /* Array for subpatterns */
566 long flags = 0; /* Match control flags */
567 long start_offset = 0; /* Where the new search starts */
568
569 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|zll", ®ex, ®ex_len,
570 &subject, &subject_len, &subpats, &flags, &start_offset) == FAILURE) {
571 RETURN_FALSE;
572 }
573
574 /* Compile regex or get it from cache. */
575 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
576 RETURN_FALSE;
577 }
578
579 pce->refcount++;
580 php_pcre_match_impl(pce, subject, subject_len, return_value, subpats,
581 global, ZEND_NUM_ARGS() >= 4, flags, start_offset TSRMLS_CC);
582 pce->refcount--;
583 }
584 /* }}} */
585
586 /* {{{ php_pcre_match_impl() */
587 PHPAPI void php_pcre_match_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
588 zval *subpats, int global, int use_flags, long flags, long start_offset TSRMLS_DC)
589 {
590 zval *result_set, /* Holds a set of subpatterns after
591 a global match */
592 **match_sets = NULL; /* An array of sets of matches for each
593 subpattern after a global match */
594 pcre_extra *extra = pce->extra;/* Holds results of studying */
595 pcre_extra extra_data; /* Used locally for exec options */
596 int exoptions = 0; /* Execution options */
597 int count = 0; /* Count of matched subpatterns */
598 int *offsets; /* Array of subpattern offsets */
599 int num_subpats; /* Number of captured subpatterns */
600 int size_offsets; /* Size of the offsets array */
601 int matched; /* Has anything matched */
602 int g_notempty = 0; /* If the match should not be empty */
603 const char **stringlist; /* Holds list of subpatterns */
604 char **subpat_names; /* Array for named subpatterns */
605 int i, rc;
606 int subpats_order; /* Order of subpattern matches */
607 int offset_capture; /* Capture match offsets: yes/no */
608 unsigned char *mark = NULL; /* Target for MARK name */
609 zval *marks = NULL; /* Array of marks for PREG_PATTERN_ORDER */
610
611 /* Overwrite the passed-in value for subpatterns with an empty array. */
612 if (subpats != NULL) {
613 zval_dtor(subpats);
614 array_init(subpats);
615 }
616
617 subpats_order = global ? PREG_PATTERN_ORDER : 0;
618
619 if (use_flags) {
620 offset_capture = flags & PREG_OFFSET_CAPTURE;
621
622 /*
623 * subpats_order is pre-set to pattern mode so we change it only if
624 * necessary.
625 */
626 if (flags & 0xff) {
627 subpats_order = flags & 0xff;
628 }
629 if ((global && (subpats_order < PREG_PATTERN_ORDER || subpats_order > PREG_SET_ORDER)) ||
630 (!global && subpats_order != 0)) {
631 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid flags specified");
632 return;
633 }
634 } else {
635 offset_capture = 0;
636 }
637
638 /* Negative offset counts from the end of the string. */
639 if (start_offset < 0) {
640 start_offset = subject_len + start_offset;
641 if (start_offset < 0) {
642 start_offset = 0;
643 }
644 }
645
646 if (extra == NULL) {
647 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
648 extra = &extra_data;
649 }
650 extra->match_limit = PCRE_G(backtrack_limit);
651 extra->match_limit_recursion = PCRE_G(recursion_limit);
652 #ifdef PCRE_EXTRA_MARK
653 extra->mark = &mark;
654 extra->flags |= PCRE_EXTRA_MARK;
655 #endif
656
657 /* Calculate the size of the offsets array, and allocate memory for it. */
658 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
659 if (rc < 0) {
660 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
661 RETURN_FALSE;
662 }
663 num_subpats++;
664 size_offsets = num_subpats * 3;
665
666 /*
667 * Build a mapping from subpattern numbers to their names. We will always
668 * allocate the table, even though there may be no named subpatterns. This
669 * avoids somewhat more complicated logic in the inner loops.
670 */
671 subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
672 if (!subpat_names) {
673 RETURN_FALSE;
674 }
675
676 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
677 memset(offsets, 0, size_offsets*sizeof(int));
678 /* Allocate match sets array and initialize the values. */
679 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
680 match_sets = (zval **)safe_emalloc(num_subpats, sizeof(zval *), 0);
681 for (i=0; i<num_subpats; i++) {
682 ALLOC_ZVAL(match_sets[i]);
683 array_init(match_sets[i]);
684 INIT_PZVAL(match_sets[i]);
685 }
686 }
687
688 matched = 0;
689 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
690
691 do {
692 /* Execute the regular expression. */
693 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
694 exoptions|g_notempty, offsets, size_offsets);
695
696 /* the string was already proved to be valid UTF-8 */
697 exoptions |= PCRE_NO_UTF8_CHECK;
698
699 /* Check for too many substrings condition. */
700 if (count == 0) {
701 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
702 count = size_offsets/3;
703 }
704
705 /* If something has matched */
706 if (count > 0) {
707 matched++;
708
709 /* If subpatterns array has been passed, fill it in with values. */
710 if (subpats != NULL) {
711 /* Try to get the list of substrings and display a warning if failed. */
712 if ((offsets[1] - offsets[0] < 0) || pcre_get_substring_list(subject, offsets, count, &stringlist) < 0) {
713 efree(subpat_names);
714 efree(offsets);
715 if (match_sets) efree(match_sets);
716 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Get subpatterns list failed");
717 RETURN_FALSE;
718 }
719
720 if (global) { /* global pattern matching */
721 if (subpats && subpats_order == PREG_PATTERN_ORDER) {
722 /* For each subpattern, insert it into the appropriate array. */
723 for (i = 0; i < count; i++) {
724 if (offset_capture) {
725 add_offset_pair(match_sets[i], (char *)stringlist[i],
726 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], NULL);
727 } else {
728 add_next_index_stringl(match_sets[i], (char *)stringlist[i],
729 offsets[(i<<1)+1] - offsets[i<<1], 1);
730 }
731 }
732 /* Add MARK, if available */
733 if (mark) {
734 if (!marks) {
735 MAKE_STD_ZVAL(marks);
736 array_init(marks);
737 }
738 add_index_string(marks, matched - 1, (char *) mark, 1);
739 }
740 /*
741 * If the number of captured subpatterns on this run is
742 * less than the total possible number, pad the result
743 * arrays with empty strings.
744 */
745 if (count < num_subpats) {
746 for (; i < num_subpats; i++) {
747 add_next_index_string(match_sets[i], "", 1);
748 }
749 }
750 } else {
751 /* Allocate the result set array */
752 ALLOC_ZVAL(result_set);
753 array_init(result_set);
754 INIT_PZVAL(result_set);
755
756 /* Add all the subpatterns to it */
757 for (i = 0; i < count; i++) {
758 if (offset_capture) {
759 add_offset_pair(result_set, (char *)stringlist[i],
760 offsets[(i<<1)+1] - offsets[i<<1], offsets[i<<1], subpat_names[i]);
761 } else {
762 if (subpat_names[i]) {
763 add_assoc_stringl(result_set, subpat_names[i], (char *)stringlist[i],
764 offsets[(i<<1)+1] - offsets[i<<1], 1);
765 }
766 add_next_index_stringl(result_set, (char *)stringlist[i],
767 offsets[(i<<1)+1] - offsets[i<<1], 1);
768 }
769 }
770 /* Add MARK, if available */
771 if (mark) {
772 add_assoc_string(result_set, "MARK", (char *) mark, 1);
773 }
774 /* And add it to the output array */
775 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &result_set, sizeof(zval *), NULL);
776 }
777 } else { /* single pattern matching */
778 /* For each subpattern, insert it into the subpatterns array. */
779 for (i = 0; i < count; i++) {
780 if (offset_capture) {
781 add_offset_pair(subpats, (char *)stringlist[i],
782 offsets[(i<<1)+1] - offsets[i<<1],
783 offsets[i<<1], subpat_names[i]);
784 } else {
785 if (subpat_names[i]) {
786 add_assoc_stringl(subpats, subpat_names[i], (char *)stringlist[i],
787 offsets[(i<<1)+1] - offsets[i<<1], 1);
788 }
789 add_next_index_stringl(subpats, (char *)stringlist[i],
790 offsets[(i<<1)+1] - offsets[i<<1], 1);
791 }
792 }
793 /* Add MARK, if available */
794 if (mark) {
795 add_assoc_string(subpats, "MARK", (char *) mark, 1);
796 }
797 }
798
799 pcre_free((void *) stringlist);
800 }
801 } else if (count == PCRE_ERROR_NOMATCH) {
802 /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
803 this is not necessarily the end. We need to advance
804 the start offset, and continue. Fudge the offset values
805 to achieve this, unless we're already at the end of the string. */
806 if (g_notempty != 0 && start_offset < subject_len) {
807 int unit_len = calculate_unit_length(pce, subject + start_offset);
808
809 offsets[0] = start_offset;
810 offsets[1] = start_offset + unit_len;
811 } else
812 break;
813 } else {
814 pcre_handle_exec_error(count TSRMLS_CC);
815 break;
816 }
817
818 /* If we have matched an empty string, mimic what Perl's /g options does.
819 This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
820 the match again at the same point. If this fails (picked up above) we
821 advance to the next character. */
822 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
823
824 /* Advance to the position right after the last full match */
825 start_offset = offsets[1];
826 } while (global);
827
828 /* Add the match sets to the output array and clean up */
829 if (global && subpats && subpats_order == PREG_PATTERN_ORDER) {
830 for (i = 0; i < num_subpats; i++) {
831 if (subpat_names[i]) {
832 zend_hash_update(Z_ARRVAL_P(subpats), subpat_names[i],
833 strlen(subpat_names[i])+1, &match_sets[i], sizeof(zval *), NULL);
834 Z_ADDREF_P(match_sets[i]);
835 }
836 zend_hash_next_index_insert(Z_ARRVAL_P(subpats), &match_sets[i], sizeof(zval *), NULL);
837 }
838 efree(match_sets);
839
840 if (marks) {
841 add_assoc_zval(subpats, "MARK", marks);
842 }
843 }
844
845 efree(offsets);
846 efree(subpat_names);
847
848 /* Did we encounter an error? */
849 if (PCRE_G(error_code) == PHP_PCRE_NO_ERROR) {
850 RETVAL_LONG(matched);
851 } else {
852 RETVAL_FALSE;
853 }
854 }
855 /* }}} */
856
857 /* {{{ proto int preg_match(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
858 Perform a Perl-style regular expression match */
859 static PHP_FUNCTION(preg_match)
860 {
861 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
862 }
863 /* }}} */
864
865 /* {{{ proto int preg_match_all(string pattern, string subject [, array &subpatterns [, int flags [, int offset]]])
866 Perform a Perl-style global regular expression match */
867 static PHP_FUNCTION(preg_match_all)
868 {
869 php_do_pcre_match(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
870 }
871 /* }}} */
872
873 /* {{{ preg_get_backref
874 */
875 static int preg_get_backref(char **str, int *backref)
876 {
877 register char in_brace = 0;
878 register char *walk = *str;
879
880 if (walk[1] == 0)
881 return 0;
882
883 if (*walk == '$' && walk[1] == '{') {
884 in_brace = 1;
885 walk++;
886 }
887 walk++;
888
889 if (*walk >= '0' && *walk <= '9') {
890 *backref = *walk - '0';
891 walk++;
892 } else
893 return 0;
894
895 if (*walk && *walk >= '0' && *walk <= '9') {
896 *backref = *backref * 10 + *walk - '0';
897 walk++;
898 }
899
900 if (in_brace) {
901 if (*walk == 0 || *walk != '}')
902 return 0;
903 else
904 walk++;
905 }
906
907 *str = walk;
908 return 1;
909 }
910 /* }}} */
911
912 /* {{{ preg_do_repl_func
913 */
914 static int preg_do_repl_func(zval *function, char *subject, int *offsets, char **subpat_names, int count, unsigned char *mark, char **result TSRMLS_DC)
915 {
916 zval *retval_ptr; /* Function return value */
917 zval **args[1]; /* Argument to pass to function */
918 zval *subpats; /* Captured subpatterns */
919 int result_len; /* Return value length */
920 int i;
921
922 MAKE_STD_ZVAL(subpats);
923 array_init(subpats);
924 for (i = 0; i < count; i++) {
925 if (subpat_names[i]) {
926 add_assoc_stringl(subpats, subpat_names[i], &subject[offsets[i<<1]] , offsets[(i<<1)+1] - offsets[i<<1], 1);
927 }
928 add_next_index_stringl(subpats, &subject[offsets[i<<1]], offsets[(i<<1)+1] - offsets[i<<1], 1);
929 }
930 if (mark) {
931 add_assoc_string(subpats, "MARK", (char *) mark, 1);
932 }
933 args[0] = &subpats;
934
935 if (call_user_function_ex(EG(function_table), NULL, function, &retval_ptr, 1, args, 0, NULL TSRMLS_CC) == SUCCESS && retval_ptr) {
936 convert_to_string_ex(&retval_ptr);
937 *result = estrndup(Z_STRVAL_P(retval_ptr), Z_STRLEN_P(retval_ptr));
938 result_len = Z_STRLEN_P(retval_ptr);
939 zval_ptr_dtor(&retval_ptr);
940 } else {
941 if (!EG(exception)) {
942 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to call custom replacement function");
943 }
944 result_len = offsets[1] - offsets[0];
945 *result = estrndup(&subject[offsets[0]], result_len);
946 }
947
948 zval_ptr_dtor(&subpats);
949
950 return result_len;
951 }
952 /* }}} */
953
954 /* {{{ preg_do_eval
955 */
956 static int preg_do_eval(char *eval_str, int eval_str_len, char *subject,
957 int *offsets, int count, char **result TSRMLS_DC)
958 {
959 zval retval; /* Return value from evaluation */
960 char *eval_str_end, /* End of eval string */
961 *match, /* Current match for a backref */
962 *esc_match, /* Quote-escaped match */
963 *walk, /* Used to walk the code string */
964 *segment, /* Start of segment to append while walking */
965 walk_last; /* Last walked character */
966 int match_len; /* Length of the match */
967 int esc_match_len; /* Length of the quote-escaped match */
968 int result_len; /* Length of the result of the evaluation */
969 int backref; /* Current backref */
970 char *compiled_string_description;
971 smart_str code = {0};
972
973 eval_str_end = eval_str + eval_str_len;
974 walk = segment = eval_str;
975 walk_last = 0;
976
977 while (walk < eval_str_end) {
978 /* If found a backreference.. */
979 if ('\\' == *walk || '$' == *walk) {
980 smart_str_appendl(&code, segment, walk - segment);
981 if (walk_last == '\\') {
982 code.c[code.len-1] = *walk++;
983 segment = walk;
984 walk_last = 0;
985 continue;
986 }
987 segment = walk;
988 if (preg_get_backref(&walk, &backref)) {
989 if (backref < count) {
990 /* Find the corresponding string match and substitute it
991 in instead of the backref */
992 match = subject + offsets[backref<<1];
993 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
994 if (match_len) {
995 esc_match = php_addslashes(match, match_len, &esc_match_len, 0 TSRMLS_CC);
996 } else {
997 esc_match = match;
998 esc_match_len = 0;
999 }
1000 } else {
1001 esc_match = "";
1002 esc_match_len = 0;
1003 }
1004 smart_str_appendl(&code, esc_match, esc_match_len);
1005
1006 segment = walk;
1007
1008 /* Clean up and reassign */
1009 if (esc_match_len)
1010 efree(esc_match);
1011 continue;
1012 }
1013 }
1014 walk++;
1015 walk_last = walk[-1];
1016 }
1017 smart_str_appendl(&code, segment, walk - segment);
1018 smart_str_0(&code);
1019
1020 compiled_string_description = zend_make_compiled_string_description("regexp code" TSRMLS_CC);
1021 /* Run the code */
1022 if (zend_eval_stringl(code.c, code.len, &retval, compiled_string_description TSRMLS_CC) == FAILURE) {
1023 efree(compiled_string_description);
1024 php_error_docref(NULL TSRMLS_CC,E_ERROR, "Failed evaluating code: %s%s", PHP_EOL, code.c);
1025 /* zend_error() does not return in this case */
1026 }
1027 efree(compiled_string_description);
1028 convert_to_string(&retval);
1029
1030 /* Save the return value and its length */
1031 *result = estrndup(Z_STRVAL(retval), Z_STRLEN(retval));
1032 result_len = Z_STRLEN(retval);
1033
1034 /* Clean up */
1035 zval_dtor(&retval);
1036 smart_str_free(&code);
1037
1038 return result_len;
1039 }
1040 /* }}} */
1041
1042 /* {{{ php_pcre_replace
1043 */
1044 PHPAPI char *php_pcre_replace(char *regex, int regex_len,
1045 char *subject, int subject_len,
1046 zval *replace_val, int is_callable_replace,
1047 int *result_len, int limit, int *replace_count TSRMLS_DC)
1048 {
1049 pcre_cache_entry *pce; /* Compiled regular expression */
1050 char *result; /* Function result */
1051
1052 /* Compile regex or get it from cache. */
1053 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1054 return NULL;
1055 }
1056 pce->refcount++;
1057 result = php_pcre_replace_impl(pce, subject, subject_len, replace_val,
1058 is_callable_replace, result_len, limit, replace_count TSRMLS_CC);
1059 pce->refcount--;
1060
1061 return result;
1062 }
1063 /* }}} */
1064
1065 /* {{{ php_pcre_replace_impl() */
1066 PHPAPI char *php_pcre_replace_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *replace_val,
1067 int is_callable_replace, int *result_len, int limit, int *replace_count TSRMLS_DC)
1068 {
1069 pcre_extra *extra = pce->extra;/* Holds results of studying */
1070 pcre_extra extra_data; /* Used locally for exec options */
1071 int exoptions = 0; /* Execution options */
1072 int count = 0; /* Count of matched subpatterns */
1073 int *offsets; /* Array of subpattern offsets */
1074 char **subpat_names; /* Array for named subpatterns */
1075 int num_subpats; /* Number of captured subpatterns */
1076 int size_offsets; /* Size of the offsets array */
1077 int new_len; /* Length of needed storage */
1078 int alloc_len; /* Actual allocated length */
1079 int eval_result_len=0; /* Length of the eval'ed or
1080 function-returned string */
1081 int match_len; /* Length of the current match */
1082 int backref; /* Backreference number */
1083 int eval; /* If the replacement string should be eval'ed */
1084 int start_offset; /* Where the new search starts */
1085 int g_notempty=0; /* If the match should not be empty */
1086 int replace_len=0; /* Length of replacement string */
1087 char *result, /* Result of replacement */
1088 *replace=NULL, /* Replacement string */
1089 *new_buf, /* Temporary buffer for re-allocation */
1090 *walkbuf, /* Location of current replacement in the result */
1091 *walk, /* Used to walk the replacement string */
1092 *match, /* The current match */
1093 *piece, /* The current piece of subject */
1094 *replace_end=NULL, /* End of replacement string */
1095 *eval_result, /* Result of eval or custom function */
1096 walk_last; /* Last walked character */
1097 int rc;
1098 unsigned char *mark = NULL; /* Target for MARK name */
1099
1100 if (extra == NULL) {
1101 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1102 extra = &extra_data;
1103 }
1104 extra->match_limit = PCRE_G(backtrack_limit);
1105 extra->match_limit_recursion = PCRE_G(recursion_limit);
1106 #ifdef PCRE_EXTRA_MARK
1107 extra->mark = &mark;
1108 extra->flags |= PCRE_EXTRA_MARK;
1109 #endif
1110
1111 eval = pce->preg_options & PREG_REPLACE_EVAL;
1112 if (is_callable_replace) {
1113 if (eval) {
1114 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Modifier /e cannot be used with replacement callback");
1115 return NULL;
1116 }
1117 } else {
1118 replace = Z_STRVAL_P(replace_val);
1119 replace_len = Z_STRLEN_P(replace_val);
1120 replace_end = replace + replace_len;
1121 }
1122
1123 if (eval) {
1124 php_error_docref(NULL TSRMLS_CC, E_DEPRECATED, "The /e modifier is deprecated, use preg_replace_callback instead");
1125 }
1126
1127 /* Calculate the size of the offsets array, and allocate memory for it. */
1128 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &num_subpats);
1129 if (rc < 0) {
1130 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1131 return NULL;
1132 }
1133 num_subpats++;
1134 size_offsets = num_subpats * 3;
1135
1136 /*
1137 * Build a mapping from subpattern numbers to their names. We will always
1138 * allocate the table, even though there may be no named subpatterns. This
1139 * avoids somewhat more complicated logic in the inner loops.
1140 */
1141 subpat_names = make_subpats_table(num_subpats, pce TSRMLS_CC);
1142 if (!subpat_names) {
1143 return NULL;
1144 }
1145
1146 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1147
1148 alloc_len = 2 * subject_len + 1;
1149 result = safe_emalloc(alloc_len, sizeof(char), 0);
1150
1151 /* Initialize */
1152 match = NULL;
1153 *result_len = 0;
1154 start_offset = 0;
1155 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1156
1157 while (1) {
1158 /* Execute the regular expression. */
1159 count = pcre_exec(pce->re, extra, subject, subject_len, start_offset,
1160 exoptions|g_notempty, offsets, size_offsets);
1161
1162 /* the string was already proved to be valid UTF-8 */
1163 exoptions |= PCRE_NO_UTF8_CHECK;
1164
1165 /* Check for too many substrings condition. */
1166 if (count == 0) {
1167 php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1168 count = size_offsets/3;
1169 }
1170
1171 piece = subject + start_offset;
1172
1173 if (count > 0 && (offsets[1] - offsets[0] >= 0) && (limit == -1 || limit > 0)) {
1174 if (replace_count) {
1175 ++*replace_count;
1176 }
1177 /* Set the match location in subject */
1178 match = subject + offsets[0];
1179
1180 new_len = *result_len + offsets[0] - start_offset; /* part before the match */
1181
1182 /* If evaluating, do it and add the return string's length */
1183 if (eval) {
1184 eval_result_len = preg_do_eval(replace, replace_len, subject,
1185 offsets, count, &eval_result TSRMLS_CC);
1186 new_len += eval_result_len;
1187 } else if (is_callable_replace) {
1188 /* Use custom function to get replacement string and its length. */
1189 eval_result_len = preg_do_repl_func(replace_val, subject, offsets, subpat_names, count, mark, &eval_result TSRMLS_CC);
1190 new_len += eval_result_len;
1191 } else { /* do regular substitution */
1192 walk = replace;
1193 walk_last = 0;
1194 while (walk < replace_end) {
1195 if ('\\' == *walk || '$' == *walk) {
1196 if (walk_last == '\\') {
1197 walk++;
1198 walk_last = 0;
1199 continue;
1200 }
1201 if (preg_get_backref(&walk, &backref)) {
1202 if (backref < count)
1203 new_len += offsets[(backref<<1)+1] - offsets[backref<<1];
1204 continue;
1205 }
1206 }
1207 new_len++;
1208 walk++;
1209 walk_last = walk[-1];
1210 }
1211 }
1212
1213 if (new_len + 1 > alloc_len) {
1214 alloc_len = 1 + alloc_len + 2 * new_len;
1215 new_buf = emalloc(alloc_len);
1216 memcpy(new_buf, result, *result_len);
1217 efree(result);
1218 result = new_buf;
1219 }
1220 /* copy the part of the string before the match */
1221 memcpy(&result[*result_len], piece, match-piece);
1222 *result_len += match-piece;
1223
1224 /* copy replacement and backrefs */
1225 walkbuf = result + *result_len;
1226
1227 /* If evaluating or using custom function, copy result to the buffer
1228 * and clean up. */
1229 if (eval || is_callable_replace) {
1230 memcpy(walkbuf, eval_result, eval_result_len);
1231 *result_len += eval_result_len;
1232 STR_FREE(eval_result);
1233 } else { /* do regular backreference copying */
1234 walk = replace;
1235 walk_last = 0;
1236 while (walk < replace_end) {
1237 if ('\\' == *walk || '$' == *walk) {
1238 if (walk_last == '\\') {
1239 *(walkbuf-1) = *walk++;
1240 walk_last = 0;
1241 continue;
1242 }
1243 if (preg_get_backref(&walk, &backref)) {
1244 if (backref < count) {
1245 match_len = offsets[(backref<<1)+1] - offsets[backref<<1];
1246 memcpy(walkbuf, subject + offsets[backref<<1], match_len);
1247 walkbuf += match_len;
1248 }
1249 continue;
1250 }
1251 }
1252 *walkbuf++ = *walk++;
1253 walk_last = walk[-1];
1254 }
1255 *walkbuf = '\0';
1256 /* increment the result length by how much we've added to the string */
1257 *result_len += walkbuf - (result + *result_len);
1258 }
1259
1260 if (limit != -1)
1261 limit--;
1262
1263 } else if (count == PCRE_ERROR_NOMATCH || limit == 0) {
1264 /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1265 this is not necessarily the end. We need to advance
1266 the start offset, and continue. Fudge the offset values
1267 to achieve this, unless we're already at the end of the string. */
1268 if (g_notempty != 0 && start_offset < subject_len) {
1269 int unit_len = calculate_unit_length(pce, piece);
1270
1271 offsets[0] = start_offset;
1272 offsets[1] = start_offset + unit_len;
1273 memcpy(&result[*result_len], piece, unit_len);
1274 *result_len += unit_len;
1275 } else {
1276 new_len = *result_len + subject_len - start_offset;
1277 if (new_len + 1 > alloc_len) {
1278 alloc_len = new_len + 1; /* now we know exactly how long it is */
1279 new_buf = safe_emalloc(alloc_len, sizeof(char), 0);
1280 memcpy(new_buf, result, *result_len);
1281 efree(result);
1282 result = new_buf;
1283 }
1284 /* stick that last bit of string on our output */
1285 memcpy(&result[*result_len], piece, subject_len - start_offset);
1286 *result_len += subject_len - start_offset;
1287 result[*result_len] = '\0';
1288 break;
1289 }
1290 } else {
1291 pcre_handle_exec_error(count TSRMLS_CC);
1292 efree(result);
1293 result = NULL;
1294 break;
1295 }
1296
1297 /* If we have matched an empty string, mimic what Perl's /g options does.
1298 This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1299 the match again at the same point. If this fails (picked up above) we
1300 advance to the next character. */
1301 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1302
1303 /* Advance to the next piece. */
1304 start_offset = offsets[1];
1305 }
1306
1307 efree(offsets);
1308 efree(subpat_names);
1309
1310 return result;
1311 }
1312 /* }}} */
1313
1314 /* {{{ php_replace_in_subject
1315 */
1316 static char *php_replace_in_subject(zval *regex, zval *replace, zval **subject, int *result_len, int limit, int is_callable_replace, int *replace_count TSRMLS_DC)
1317 {
1318 zval **regex_entry,
1319 **replace_entry = NULL,
1320 *replace_value,
1321 empty_replace;
1322 char *subject_value,
1323 *result;
1324 int subject_len;
1325
1326 /* Make sure we're dealing with strings. */
1327 convert_to_string_ex(subject);
1328 /* FIXME: This might need to be changed to STR_EMPTY_ALLOC(). Check if this zval could be dtor()'ed somehow */
1329 ZVAL_STRINGL(&empty_replace, "", 0, 0);
1330
1331 /* If regex is an array */
1332 if (Z_TYPE_P(regex) == IS_ARRAY) {
1333 /* Duplicate subject string for repeated replacement */
1334 subject_value = estrndup(Z_STRVAL_PP(subject), Z_STRLEN_PP(subject));
1335 subject_len = Z_STRLEN_PP(subject);
1336 *result_len = subject_len;
1337
1338 zend_hash_internal_pointer_reset(Z_ARRVAL_P(regex));
1339
1340 replace_value = replace;
1341 if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace)
1342 zend_hash_internal_pointer_reset(Z_ARRVAL_P(replace));
1343
1344 /* For each entry in the regex array, get the entry */
1345 while (zend_hash_get_current_data(Z_ARRVAL_P(regex), (void **)®ex_entry) == SUCCESS) {
1346 /* Make sure we're dealing with strings. */
1347 convert_to_string_ex(regex_entry);
1348
1349 /* If replace is an array and not a callable construct */
1350 if (Z_TYPE_P(replace) == IS_ARRAY && !is_callable_replace) {
1351 /* Get current entry */
1352 if (zend_hash_get_current_data(Z_ARRVAL_P(replace), (void **)&replace_entry) == SUCCESS) {
1353 if (!is_callable_replace) {
1354 convert_to_string_ex(replace_entry);
1355 }
1356 replace_value = *replace_entry;
1357 zend_hash_move_forward(Z_ARRVAL_P(replace));
1358 } else {
1359 /* We've run out of replacement strings, so use an empty one */
1360 replace_value = &empty_replace;
1361 }
1362 }
1363
1364 /* Do the actual replacement and put the result back into subject_value
1365 for further replacements. */
1366 if ((result = php_pcre_replace(Z_STRVAL_PP(regex_entry),
1367 Z_STRLEN_PP(regex_entry),
1368 subject_value,
1369 subject_len,
1370 replace_value,
1371 is_callable_replace,
1372 result_len,
1373 limit,
1374 replace_count TSRMLS_CC)) != NULL) {
1375 efree(subject_value);
1376 subject_value = result;
1377 subject_len = *result_len;
1378 } else {
1379 efree(subject_value);
1380 return NULL;
1381 }
1382
1383 zend_hash_move_forward(Z_ARRVAL_P(regex));
1384 }
1385
1386 return subject_value;
1387 } else {
1388 result = php_pcre_replace(Z_STRVAL_P(regex),
1389 Z_STRLEN_P(regex),
1390 Z_STRVAL_PP(subject),
1391 Z_STRLEN_PP(subject),
1392 replace,
1393 is_callable_replace,
1394 result_len,
1395 limit,
1396 replace_count TSRMLS_CC);
1397 return result;
1398 }
1399 }
1400 /* }}} */
1401
1402 /* {{{ preg_replace_impl
1403 */
1404 static void preg_replace_impl(INTERNAL_FUNCTION_PARAMETERS, int is_callable_replace, int is_filter)
1405 {
1406 zval **regex,
1407 **replace,
1408 **subject,
1409 **subject_entry,
1410 **zcount = NULL;
1411 char *result;
1412 int result_len;
1413 int limit_val = -1;
1414 long limit = -1;
1415 char *string_key;
1416 uint string_key_len;
1417 ulong num_key;
1418 char *callback_name;
1419 int replace_count=0, old_replace_count;
1420
1421 /* Get function parameters and do error-checking. */
1422 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ZZZ|lZ", ®ex, &replace, &subject, &limit, &zcount) == FAILURE) {
1423 return;
1424 }
1425
1426 if (!is_callable_replace && Z_TYPE_PP(replace) == IS_ARRAY && Z_TYPE_PP(regex) != IS_ARRAY) {
1427 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter mismatch, pattern is a string while replacement is an array");
1428 RETURN_FALSE;
1429 }
1430
1431 SEPARATE_ZVAL(replace);
1432 if (Z_TYPE_PP(replace) != IS_ARRAY && (Z_TYPE_PP(replace) != IS_OBJECT || !is_callable_replace)) {
1433 convert_to_string_ex(replace);
1434 }
1435 if (is_callable_replace) {
1436 if (!zend_is_callable(*replace, 0, &callback_name TSRMLS_CC)) {
1437 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Requires argument 2, '%s', to be a valid callback", callback_name);
1438 efree(callback_name);
1439 MAKE_COPY_ZVAL(subject, return_value);
1440 return;
1441 }
1442 efree(callback_name);
1443 }
1444
1445 SEPARATE_ZVAL(regex);
1446 SEPARATE_ZVAL(subject);
1447
1448 if (ZEND_NUM_ARGS() > 3) {
1449 limit_val = limit;
1450 }
1451
1452 if (Z_TYPE_PP(regex) != IS_ARRAY)
1453 convert_to_string_ex(regex);
1454
1455 /* if subject is an array */
1456 if (Z_TYPE_PP(subject) == IS_ARRAY) {
1457 array_init(return_value);
1458 zend_hash_internal_pointer_reset(Z_ARRVAL_PP(subject));
1459
1460 /* For each subject entry, convert it to string, then perform replacement
1461 and add the result to the return_value array. */
1462 while (zend_hash_get_current_data(Z_ARRVAL_PP(subject), (void **)&subject_entry) == SUCCESS) {
1463 SEPARATE_ZVAL(subject_entry);
1464 old_replace_count = replace_count;
1465 if ((result = php_replace_in_subject(*regex, *replace, subject_entry, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1466 if (!is_filter || replace_count > old_replace_count) {
1467 /* Add to return array */
1468 switch(zend_hash_get_current_key_ex(Z_ARRVAL_PP(subject), &string_key, &string_key_len, &num_key, 0, NULL))
1469 {
1470 case HASH_KEY_IS_STRING:
1471 add_assoc_stringl_ex(return_value, string_key, string_key_len, result, result_len, 0);
1472 break;
1473
1474 case HASH_KEY_IS_LONG:
1475 add_index_stringl(return_value, num_key, result, result_len, 0);
1476 break;
1477 }
1478 } else {
1479 efree(result);
1480 }
1481 }
1482
1483 zend_hash_move_forward(Z_ARRVAL_PP(subject));
1484 }
1485 } else { /* if subject is not an array */
1486 old_replace_count = replace_count;
1487 if ((result = php_replace_in_subject(*regex, *replace, subject, &result_len, limit_val, is_callable_replace, &replace_count TSRMLS_CC)) != NULL) {
1488 if (!is_filter || replace_count > old_replace_count) {
1489 RETVAL_STRINGL(result, result_len, 0);
1490 } else {
1491 efree(result);
1492 }
1493 }
1494 }
1495 if (ZEND_NUM_ARGS() > 4) {
1496 zval_dtor(*zcount);
1497 ZVAL_LONG(*zcount, replace_count);
1498 }
1499
1500 }
1501 /* }}} */
1502
1503 /* {{{ proto mixed preg_replace(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1504 Perform Perl-style regular expression replacement. */
1505 static PHP_FUNCTION(preg_replace)
1506 {
1507 preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 0);
1508 }
1509 /* }}} */
1510
1511 /* {{{ proto mixed preg_replace_callback(mixed regex, mixed callback, mixed subject [, int limit [, int &count]])
1512 Perform Perl-style regular expression replacement using replacement callback. */
1513 static PHP_FUNCTION(preg_replace_callback)
1514 {
1515 preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1, 0);
1516 }
1517 /* }}} */
1518
1519 /* {{{ proto mixed preg_filter(mixed regex, mixed replace, mixed subject [, int limit [, int &count]])
1520 Perform Perl-style regular expression replacement and only return matches. */
1521 static PHP_FUNCTION(preg_filter)
1522 {
1523 preg_replace_impl(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0, 1);
1524 }
1525 /* }}} */
1526
1527 /* {{{ proto array preg_split(string pattern, string subject [, int limit [, int flags]])
1528 Split string into an array using a perl-style regular expression as a delimiter */
1529 static PHP_FUNCTION(preg_split)
1530 {
1531 char *regex; /* Regular expression */
1532 char *subject; /* String to match against */
1533 int regex_len;
1534 int subject_len;
1535 long limit_val = -1;/* Integer value of limit */
1536 long flags = 0; /* Match control flags */
1537 pcre_cache_entry *pce; /* Compiled regular expression */
1538
1539 /* Get function parameters and do error checking */
1540 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ll", ®ex, ®ex_len,
1541 &subject, &subject_len, &limit_val, &flags) == FAILURE) {
1542 RETURN_FALSE;
1543 }
1544
1545 /* Compile regex or get it from cache. */
1546 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1547 RETURN_FALSE;
1548 }
1549
1550 pce->refcount++;
1551 php_pcre_split_impl(pce, subject, subject_len, return_value, limit_val, flags TSRMLS_CC);
1552 pce->refcount--;
1553 }
1554 /* }}} */
1555
1556 /* {{{ php_pcre_split
1557 */
1558 PHPAPI void php_pcre_split_impl(pcre_cache_entry *pce, char *subject, int subject_len, zval *return_value,
1559 long limit_val, long flags TSRMLS_DC)
1560 {
1561 pcre_extra *extra = NULL; /* Holds results of studying */
1562 pcre *re_bump = NULL; /* Regex instance for empty matches */
1563 pcre_extra *extra_bump = NULL; /* Almost dummy */
1564 pcre_extra extra_data; /* Used locally for exec options */
1565 int *offsets; /* Array of subpattern offsets */
1566 int size_offsets; /* Size of the offsets array */
1567 int exoptions = 0; /* Execution options */
1568 int count = 0; /* Count of matched subpatterns */
1569 int start_offset; /* Where the new search starts */
1570 int next_offset; /* End of the last delimiter match + 1 */
1571 int g_notempty = 0; /* If the match should not be empty */
1572 char *last_match; /* Location of last match */
1573 int rc;
1574 int no_empty; /* If NO_EMPTY flag is set */
1575 int delim_capture; /* If delimiters should be captured */
1576 int offset_capture; /* If offsets should be captured */
1577
1578 no_empty = flags & PREG_SPLIT_NO_EMPTY;
1579 delim_capture = flags & PREG_SPLIT_DELIM_CAPTURE;
1580 offset_capture = flags & PREG_SPLIT_OFFSET_CAPTURE;
1581
1582 if (limit_val == 0) {
1583 limit_val = -1;
1584 }
1585
1586 if (extra == NULL) {
1587 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1588 extra = &extra_data;
1589 }
1590 extra->match_limit = PCRE_G(backtrack_limit);
1591 extra->match_limit_recursion = PCRE_G(recursion_limit);
1592 #ifdef PCRE_EXTRA_MARK
1593 extra->flags &= ~PCRE_EXTRA_MARK;
1594 #endif
1595
1596 /* Initialize return value */
1597 array_init(return_value);
1598
1599 /* Calculate the size of the offsets array, and allocate memory for it. */
1600 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1601 if (rc < 0) {
1602 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1603 RETURN_FALSE;
1604 }
1605 size_offsets = (size_offsets + 1) * 3;
1606 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1607
1608 /* Start at the beginning of the string */
1609 start_offset = 0;
1610 next_offset = 0;
1611 last_match = subject;
1612 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1613
1614 /* Get next piece if no limit or limit not yet reached and something matched*/
1615 while ((limit_val == -1 || limit_val > 1)) {
1616 count = pcre_exec(pce->re, extra, subject,
1617 subject_len, start_offset,
1618 exoptions|g_notempty, offsets, size_offsets);
1619
1620 /* the string was already proved to be valid UTF-8 */
1621 exoptions |= PCRE_NO_UTF8_CHECK;
1622
1623 /* Check for too many substrings condition. */
1624 if (count == 0) {
1625 php_error_docref(NULL TSRMLS_CC,E_NOTICE, "Matched, but too many substrings");
1626 count = size_offsets/3;
1627 }
1628
1629 /* If something matched */
1630 if (count > 0 && (offsets[1] - offsets[0] >= 0)) {
1631 if (!no_empty || &subject[offsets[0]] != last_match) {
1632
1633 if (offset_capture) {
1634 /* Add (match, offset) pair to the return value */
1635 add_offset_pair(return_value, last_match, &subject[offsets[0]]-last_match, next_offset, NULL);
1636 } else {
1637 /* Add the piece to the return value */
1638 add_next_index_stringl(return_value, last_match,
1639 &subject[offsets[0]]-last_match, 1);
1640 }
1641
1642 /* One less left to do */
1643 if (limit_val != -1)
1644 limit_val--;
1645 }
1646
1647 last_match = &subject[offsets[1]];
1648 next_offset = offsets[1];
1649
1650 if (delim_capture) {
1651 int i, match_len;
1652 for (i = 1; i < count; i++) {
1653 match_len = offsets[(i<<1)+1] - offsets[i<<1];
1654 /* If we have matched a delimiter */
1655 if (!no_empty || match_len > 0) {
1656 if (offset_capture) {
1657 add_offset_pair(return_value, &subject[offsets[i<<1]], match_len, offsets[i<<1], NULL);
1658 } else {
1659 add_next_index_stringl(return_value,
1660 &subject[offsets[i<<1]],
1661 match_len, 1);
1662 }
1663 }
1664 }
1665 }
1666 } else if (count == PCRE_ERROR_NOMATCH) {
1667 /* If we previously set PCRE_NOTEMPTY_ATSTART after a null match,
1668 this is not necessarily the end. We need to advance
1669 the start offset, and continue. Fudge the offset values
1670 to achieve this, unless we're already at the end of the string. */
1671 if (g_notempty != 0 && start_offset < subject_len) {
1672 if (pce->compile_options & PCRE_UTF8) {
1673 if (re_bump == NULL) {
1674 int dummy;
1675
1676 if ((re_bump = pcre_get_compiled_regex("/./us", &extra_bump, &dummy TSRMLS_CC)) == NULL) {
1677 RETURN_FALSE;
1678 }
1679 }
1680 count = pcre_exec(re_bump, extra_bump, subject,
1681 subject_len, start_offset,
1682 exoptions, offsets, size_offsets);
1683 if (count < 1) {
1684 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error");
1685 RETURN_FALSE;
1686 }
1687 } else {
1688 offsets[0] = start_offset;
1689 offsets[1] = start_offset + 1;
1690 }
1691 } else
1692 break;
1693 } else {
1694 pcre_handle_exec_error(count TSRMLS_CC);
1695 break;
1696 }
1697
1698 /* If we have matched an empty string, mimic what Perl's /g options does.
1699 This turns out to be rather cunning. First we set PCRE_NOTEMPTY_ATSTART and try
1700 the match again at the same point. If this fails (picked up above) we
1701 advance to the next character. */
1702 g_notempty = (offsets[1] == offsets[0])? PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED : 0;
1703
1704 /* Advance to the position right after the last full match */
1705 start_offset = offsets[1];
1706 }
1707
1708
1709 start_offset = last_match - subject; /* the offset might have been incremented, but without further successful matches */
1710
1711 if (!no_empty || start_offset < subject_len)
1712 {
1713 if (offset_capture) {
1714 /* Add the last (match, offset) pair to the return value */
1715 add_offset_pair(return_value, &subject[start_offset], subject_len - start_offset, start_offset, NULL);
1716 } else {
1717 /* Add the last piece to the return value */
1718 add_next_index_stringl(return_value, last_match, subject + subject_len - last_match, 1);
1719 }
1720 }
1721
1722
1723 /* Clean up */
1724 efree(offsets);
1725 }
1726 /* }}} */
1727
1728 /* {{{ proto string preg_quote(string str [, string delim_char])
1729 Quote regular expression characters plus an optional character */
1730 static PHP_FUNCTION(preg_quote)
1731 {
1732 int in_str_len;
1733 char *in_str; /* Input string argument */
1734 char *in_str_end; /* End of the input string */
1735 int delim_len = 0;
1736 char *delim = NULL; /* Additional delimiter argument */
1737 char *out_str, /* Output string with quoted characters */
1738 *p, /* Iterator for input string */
1739 *q, /* Iterator for output string */
1740 delim_char=0, /* Delimiter character to be quoted */
1741 c; /* Current character */
1742 zend_bool quote_delim = 0; /* Whether to quote additional delim char */
1743
1744 /* Get the arguments and check for errors */
1745 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", &in_str, &in_str_len,
1746 &delim, &delim_len) == FAILURE) {
1747 return;
1748 }
1749
1750 in_str_end = in_str + in_str_len;
1751
1752 /* Nothing to do if we got an empty string */
1753 if (in_str == in_str_end) {
1754 RETURN_EMPTY_STRING();
1755 }
1756
1757 if (delim && *delim) {
1758 delim_char = delim[0];
1759 quote_delim = 1;
1760 }
1761
1762 /* Allocate enough memory so that even if each character
1763 is quoted, we won't run out of room */
1764 out_str = safe_emalloc(4, in_str_len, 1);
1765
1766 /* Go through the string and quote necessary characters */
1767 for(p = in_str, q = out_str; p != in_str_end; p++) {
1768 c = *p;
1769 switch(c) {
1770 case '.':
1771 case '\\':
1772 case '+':
1773 case '*':
1774 case '?':
1775 case '[':
1776 case '^':
1777 case ']':
1778 case '$':
1779 case '(':
1780 case ')':
1781 case '{':
1782 case '}':
1783 case '=':
1784 case '!':
1785 case '>':
1786 case '<':
1787 case '|':
1788 case ':':
1789 case '-':
1790 *q++ = '\\';
1791 *q++ = c;
1792 break;
1793
1794 case '\0':
1795 *q++ = '\\';
1796 *q++ = '0';
1797 *q++ = '0';
1798 *q++ = '0';
1799 break;
1800
1801 default:
1802 if (quote_delim && c == delim_char)
1803 *q++ = '\\';
1804 *q++ = c;
1805 break;
1806 }
1807 }
1808 *q = '\0';
1809
1810 /* Reallocate string and return it */
1811 RETVAL_STRINGL(erealloc(out_str, q - out_str + 1), q - out_str, 0);
1812 }
1813 /* }}} */
1814
1815 /* {{{ proto array preg_grep(string regex, array input [, int flags])
1816 Searches array and returns entries which match regex */
1817 static PHP_FUNCTION(preg_grep)
1818 {
1819 char *regex; /* Regular expression */
1820 int regex_len;
1821 zval *input; /* Input array */
1822 long flags = 0; /* Match control flags */
1823 pcre_cache_entry *pce; /* Compiled regular expression */
1824
1825 /* Get arguments and do error checking */
1826 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sa|l", ®ex, ®ex_len,
1827 &input, &flags) == FAILURE) {
1828 return;
1829 }
1830
1831 /* Compile regex or get it from cache. */
1832 if ((pce = pcre_get_compiled_regex_cache(regex, regex_len TSRMLS_CC)) == NULL) {
1833 RETURN_FALSE;
1834 }
1835
1836 pce->refcount++;
1837 php_pcre_grep_impl(pce, input, return_value, flags TSRMLS_CC);
1838 pce->refcount--;
1839 }
1840 /* }}} */
1841
1842 PHPAPI void php_pcre_grep_impl(pcre_cache_entry *pce, zval *input, zval *return_value, long flags TSRMLS_DC) /* {{{ */
1843 {
1844 zval **entry; /* An entry in the input array */
1845 pcre_extra *extra = pce->extra;/* Holds results of studying */
1846 pcre_extra extra_data; /* Used locally for exec options */
1847 int *offsets; /* Array of subpattern offsets */
1848 int size_offsets; /* Size of the offsets array */
1849 int count = 0; /* Count of matched subpatterns */
1850 char *string_key;
1851 uint string_key_len;
1852 ulong num_key;
1853 zend_bool invert; /* Whether to return non-matching
1854 entries */
1855 int rc;
1856
1857 invert = flags & PREG_GREP_INVERT ? 1 : 0;
1858
1859 if (extra == NULL) {
1860 extra_data.flags = PCRE_EXTRA_MATCH_LIMIT | PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1861 extra = &extra_data;
1862 }
1863 extra->match_limit = PCRE_G(backtrack_limit);
1864 extra->match_limit_recursion = PCRE_G(recursion_limit);
1865 #ifdef PCRE_EXTRA_MARK
1866 extra->flags &= ~PCRE_EXTRA_MARK;
1867 #endif
1868
1869 /* Calculate the size of the offsets array, and allocate memory for it. */
1870 rc = pcre_fullinfo(pce->re, extra, PCRE_INFO_CAPTURECOUNT, &size_offsets);
1871 if (rc < 0) {
1872 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Internal pcre_fullinfo() error %d", rc);
1873 RETURN_FALSE;
1874 }
1875 size_offsets = (size_offsets + 1) * 3;
1876 offsets = (int *)safe_emalloc(size_offsets, sizeof(int), 0);
1877
1878 /* Initialize return array */
1879 array_init(return_value);
1880
1881 PCRE_G(error_code) = PHP_PCRE_NO_ERROR;
1882
1883 /* Go through the input array */
1884 zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1885 while (zend_hash_get_current_data(Z_ARRVAL_P(input), (void **)&entry) == SUCCESS) {
1886 zval subject = **entry;
1887
1888 if (Z_TYPE_PP(entry) != IS_STRING) {
1889 zval_copy_ctor(&subject);
1890 convert_to_string(&subject);
1891 }
1892
1893 /* Perform the match */
1894 count = pcre_exec(pce->re, extra, Z_STRVAL(subject),
1895 Z_STRLEN(subject), 0,
1896 0, offsets, size_offsets);
1897
1898 /* Check for too many substrings condition. */
1899 if (count == 0) {
1900 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Matched, but too many substrings");
1901 count = size_offsets/3;
1902 } else if (count < 0 && count != PCRE_ERROR_NOMATCH) {
1903 pcre_handle_exec_error(count TSRMLS_CC);
1904 break;
1905 }
1906
1907 /* If the entry fits our requirements */
1908 if ((count > 0 && !invert) || (count == PCRE_ERROR_NOMATCH && invert)) {
1909
1910 Z_ADDREF_PP(entry);
1911
1912 /* Add to return array */
1913 switch (zend_hash_get_current_key_ex(Z_ARRVAL_P(input), &string_key, &string_key_len, &num_key, 0, NULL))
1914 {
1915 case HASH_KEY_IS_STRING:
1916 zend_hash_update(Z_ARRVAL_P(return_value), string_key,
1917 string_key_len, entry, sizeof(zval *), NULL);
1918 break;
1919
1920 case HASH_KEY_IS_LONG:
1921 zend_hash_index_update(Z_ARRVAL_P(return_value), num_key, entry,
1922 sizeof(zval *), NULL);
1923 break;
1924 }
1925 }
1926
1927 if (Z_TYPE_PP(entry) != IS_STRING) {
1928 zval_dtor(&subject);
1929 }
1930
1931 zend_hash_move_forward(Z_ARRVAL_P(input));
1932 }
1933 zend_hash_internal_pointer_reset(Z_ARRVAL_P(input));
1934 /* Clean up */
1935 efree(offsets);
1936 }
1937 /* }}} */
1938
1939 /* {{{ proto int preg_last_error()
1940 Returns the error code of the last regexp execution. */
1941 static PHP_FUNCTION(preg_last_error)
1942 {
1943 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "") == FAILURE) {
1944 return;
1945 }
1946
1947 RETURN_LONG(PCRE_G(error_code));
1948 }
1949 /* }}} */
1950
1951 /* {{{ module definition structures */
1952
1953 /* {{{ arginfo */
1954 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match, 0, 0, 2)
1955 ZEND_ARG_INFO(0, pattern)
1956 ZEND_ARG_INFO(0, subject)
1957 ZEND_ARG_INFO(1, subpatterns) /* array */
1958 ZEND_ARG_INFO(0, flags)
1959 ZEND_ARG_INFO(0, offset)
1960 ZEND_END_ARG_INFO()
1961
1962 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_match_all, 0, 0, 2)
1963 ZEND_ARG_INFO(0, pattern)
1964 ZEND_ARG_INFO(0, subject)
1965 ZEND_ARG_INFO(1, subpatterns) /* array */
1966 ZEND_ARG_INFO(0, flags)
1967 ZEND_ARG_INFO(0, offset)
1968 ZEND_END_ARG_INFO()
1969
1970 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace, 0, 0, 3)
1971 ZEND_ARG_INFO(0, regex)
1972 ZEND_ARG_INFO(0, replace)
1973 ZEND_ARG_INFO(0, subject)
1974 ZEND_ARG_INFO(0, limit)
1975 ZEND_ARG_INFO(1, count)
1976 ZEND_END_ARG_INFO()
1977
1978 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_replace_callback, 0, 0, 3)
1979 ZEND_ARG_INFO(0, regex)
1980 ZEND_ARG_INFO(0, callback)
1981 ZEND_ARG_INFO(0, subject)
1982 ZEND_ARG_INFO(0, limit)
1983 ZEND_ARG_INFO(1, count)
1984 ZEND_END_ARG_INFO()
1985
1986 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_split, 0, 0, 2)
1987 ZEND_ARG_INFO(0, pattern)
1988 ZEND_ARG_INFO(0, subject)
1989 ZEND_ARG_INFO(0, limit)
1990 ZEND_ARG_INFO(0, flags)
1991 ZEND_END_ARG_INFO()
1992
1993 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_quote, 0, 0, 1)
1994 ZEND_ARG_INFO(0, str)
1995 ZEND_ARG_INFO(0, delim_char)
1996 ZEND_END_ARG_INFO()
1997
1998 ZEND_BEGIN_ARG_INFO_EX(arginfo_preg_grep, 0, 0, 2)
1999 ZEND_ARG_INFO(0, regex)
2000 ZEND_ARG_INFO(0, input) /* array */
2001 ZEND_ARG_INFO(0, flags)
2002 ZEND_END_ARG_INFO()
2003
2004 ZEND_BEGIN_ARG_INFO(arginfo_preg_last_error, 0)
2005 ZEND_END_ARG_INFO()
2006 /* }}} */
2007
2008 static const zend_function_entry pcre_functions[] = {
2009 PHP_FE(preg_match, arginfo_preg_match)
2010 PHP_FE(preg_match_all, arginfo_preg_match_all)
2011 PHP_FE(preg_replace, arginfo_preg_replace)
2012 PHP_FE(preg_replace_callback, arginfo_preg_replace_callback)
2013 PHP_FE(preg_filter, arginfo_preg_replace)
2014 PHP_FE(preg_split, arginfo_preg_split)
2015 PHP_FE(preg_quote, arginfo_preg_quote)
2016 PHP_FE(preg_grep, arginfo_preg_grep)
2017 PHP_FE(preg_last_error, arginfo_preg_last_error)
2018 PHP_FE_END
2019 };
2020
2021 zend_module_entry pcre_module_entry = {
2022 STANDARD_MODULE_HEADER,
2023 "pcre",
2024 pcre_functions,
2025 PHP_MINIT(pcre),
2026 PHP_MSHUTDOWN(pcre),
2027 NULL,
2028 NULL,
2029 PHP_MINFO(pcre),
2030 NO_VERSION_YET,
2031 PHP_MODULE_GLOBALS(pcre),
2032 PHP_GINIT(pcre),
2033 PHP_GSHUTDOWN(pcre),
2034 NULL,
2035 STANDARD_MODULE_PROPERTIES_EX
2036 };
2037
2038 #ifdef COMPILE_DL_PCRE
2039 ZEND_GET_MODULE(pcre)
2040 #endif
2041
2042 /* }}} */
2043
2044 #endif /* HAVE_PCRE || HAVE_BUNDLED_PCRE */
2045
2046 /*
2047 * Local variables:
2048 * tab-width: 4
2049 * c-basic-offset: 4
2050 * End:
2051 * vim600: sw=4 ts=4 fdm=marker
2052 * vim<600: sw=4 ts=4
2053 */