This source file includes following definitions.
- ZEND_DECLARE_MODULE_GLOBALS
- ZEND_GET_MODULE
- get_input_encoding
- get_output_encoding
- _php_mb_allocators_malloc
- _php_mb_allocators_realloc
- _php_mb_allocators_calloc
- _php_mb_allocators_free
- _php_mb_allocators_pmalloc
- _php_mb_allocators_prealloc
- _php_mb_allocators_pfree
- php_mb_parse_encoding_list
- php_mb_parse_encoding_array
- php_mb_zend_encoding_fetcher
- php_mb_zend_encoding_name_getter
- php_mb_zend_encoding_lexer_compatibility_checker
- php_mb_zend_encoding_detector
- php_mb_zend_encoding_converter
- php_mb_zend_encoding_list_parser
- php_mb_zend_internal_encoding_getter
- php_mb_zend_internal_encoding_setter
- _php_mb_compile_regex
- _php_mb_match_regex
- _php_mb_free_regex
- _php_mb_compile_regex
- _php_mb_match_regex
- _php_mb_free_regex
- php_mb_nls_get_default_detect_order_list
- php_mb_rfc1867_substring_conf
- php_mb_rfc1867_getword
- php_mb_rfc1867_getword_conf
- php_mb_rfc1867_basename
- PHP_INI_MH
- PHP_INI_MH
- PHP_INI_MH
- PHP_INI_MH
- _php_mb_ini_mbstring_internal_encoding_set
- PHP_INI_MH
- PHP_INI_MH
- PHP_INI_MH
- PHP_INI_MH
- PHP_INI_BEGIN
- PHP_GSHUTDOWN_FUNCTION
- PHP_MINIT_FUNCTION
- PHP_MSHUTDOWN_FUNCTION
- PHP_RINIT_FUNCTION
- PHP_RSHUTDOWN_FUNCTION
- PHP_MINFO_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- php_mb_convert_encoding
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- php_mb_numericentity_exec
- PHP_FUNCTION
- PHP_FUNCTION
- my_smart_str_dtor
- _php_mbstr_parse_mail_headers
- PHP_FUNCTION
- PHP_FUNCTION
- PHP_FUNCTION
- php_mb_populate_current_detect_order_list
- php_mb_encoding_translation
- php_mb_mbchar_bytes_ex
- php_mb_mbchar_bytes
- php_mb_safe_strrchr_ex
- php_mb_safe_strrchr
- php_mb_stripos
- php_mb_gpc_get_detect_order
- php_mb_gpc_set_input_encoding
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48 #ifdef HAVE_CONFIG_H
49 #include "config.h"
50 #endif
51
52 #include "php.h"
53 #include "php_ini.h"
54 #include "php_variables.h"
55 #include "mbstring.h"
56 #include "ext/standard/php_string.h"
57 #include "ext/standard/php_mail.h"
58 #include "ext/standard/exec.h"
59 #include "ext/standard/php_smart_str.h"
60 #include "ext/standard/url.h"
61 #include "main/php_output.h"
62 #include "ext/standard/info.h"
63
64 #include "libmbfl/mbfl/mbfl_allocators.h"
65 #include "libmbfl/mbfl/mbfilter_pass.h"
66
67 #include "php_variables.h"
68 #include "php_globals.h"
69 #include "rfc1867.h"
70 #include "php_content_types.h"
71 #include "SAPI.h"
72 #include "php_unicode.h"
73 #include "TSRM.h"
74
75 #include "mb_gpc.h"
76
77 #if HAVE_MBREGEX
78 #include "php_mbregex.h"
79 #endif
80
81 #include "zend_multibyte.h"
82
83 #if HAVE_ONIG
84 #include "php_onig_compat.h"
85 #include <oniguruma.h>
86 #undef UChar
87 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
88 #include "ext/pcre/php_pcre.h"
89 #endif
90
91
92 #if HAVE_MBSTRING
93
94
95 ZEND_DECLARE_MODULE_GLOBALS(mbstring)
96
97 static PHP_GINIT_FUNCTION(mbstring);
98 static PHP_GSHUTDOWN_FUNCTION(mbstring);
99
100 static void php_mb_populate_current_detect_order_list(TSRMLS_D);
101
102 static int php_mb_encoding_translation(TSRMLS_D);
103
104 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC);
105
106 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC);
107
108
109
110
111 typedef struct _php_mb_nls_ident_list {
112 enum mbfl_no_language lang;
113 const enum mbfl_no_encoding *list;
114 size_t list_size;
115 } php_mb_nls_ident_list;
116
117 static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
118 mbfl_no_encoding_ascii,
119 mbfl_no_encoding_jis,
120 mbfl_no_encoding_utf8,
121 mbfl_no_encoding_euc_jp,
122 mbfl_no_encoding_sjis
123 };
124
125 static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
126 mbfl_no_encoding_ascii,
127 mbfl_no_encoding_utf8,
128 mbfl_no_encoding_euc_cn,
129 mbfl_no_encoding_cp936
130 };
131
132 static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
133 mbfl_no_encoding_ascii,
134 mbfl_no_encoding_utf8,
135 mbfl_no_encoding_euc_tw,
136 mbfl_no_encoding_big5
137 };
138
139 static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
140 mbfl_no_encoding_ascii,
141 mbfl_no_encoding_utf8,
142 mbfl_no_encoding_euc_kr,
143 mbfl_no_encoding_uhc
144 };
145
146 static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
147 mbfl_no_encoding_ascii,
148 mbfl_no_encoding_utf8,
149 mbfl_no_encoding_koi8r,
150 mbfl_no_encoding_cp1251,
151 mbfl_no_encoding_cp866
152 };
153
154 static const enum mbfl_no_encoding php_mb_default_identify_list_hy[] = {
155 mbfl_no_encoding_ascii,
156 mbfl_no_encoding_utf8,
157 mbfl_no_encoding_armscii8
158 };
159
160 static const enum mbfl_no_encoding php_mb_default_identify_list_tr[] = {
161 mbfl_no_encoding_ascii,
162 mbfl_no_encoding_utf8,
163 mbfl_no_encoding_cp1254,
164 mbfl_no_encoding_8859_9
165 };
166
167 static const enum mbfl_no_encoding php_mb_default_identify_list_ua[] = {
168 mbfl_no_encoding_ascii,
169 mbfl_no_encoding_utf8,
170 mbfl_no_encoding_koi8u
171 };
172
173 static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
174 mbfl_no_encoding_ascii,
175 mbfl_no_encoding_utf8
176 };
177
178
179 static const php_mb_nls_ident_list php_mb_default_identify_list[] = {
180 { mbfl_no_language_japanese, php_mb_default_identify_list_ja, sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
181 { mbfl_no_language_korean, php_mb_default_identify_list_kr, sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
182 { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, sizeof(php_mb_default_identify_list_tw_hk) / sizeof(php_mb_default_identify_list_tw_hk[0]) },
183 { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
184 { mbfl_no_language_russian, php_mb_default_identify_list_ru, sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
185 { mbfl_no_language_armenian, php_mb_default_identify_list_hy, sizeof(php_mb_default_identify_list_hy) / sizeof(php_mb_default_identify_list_hy[0]) },
186 { mbfl_no_language_turkish, php_mb_default_identify_list_tr, sizeof(php_mb_default_identify_list_tr) / sizeof(php_mb_default_identify_list_tr[0]) },
187 { mbfl_no_language_ukrainian, php_mb_default_identify_list_ua, sizeof(php_mb_default_identify_list_ua) / sizeof(php_mb_default_identify_list_ua[0]) },
188 { mbfl_no_language_neutral, php_mb_default_identify_list_neut, sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]) }
189 };
190
191
192
193
194 static const struct mb_overload_def mb_ovld[] = {
195 {MB_OVERLOAD_MAIL, "mail", "mb_send_mail", "mb_orig_mail"},
196 {MB_OVERLOAD_STRING, "strlen", "mb_strlen", "mb_orig_strlen"},
197 {MB_OVERLOAD_STRING, "strpos", "mb_strpos", "mb_orig_strpos"},
198 {MB_OVERLOAD_STRING, "strrpos", "mb_strrpos", "mb_orig_strrpos"},
199 {MB_OVERLOAD_STRING, "stripos", "mb_stripos", "mb_orig_stripos"},
200 {MB_OVERLOAD_STRING, "strripos", "mb_strripos", "mb_orig_strripos"},
201 {MB_OVERLOAD_STRING, "strstr", "mb_strstr", "mb_orig_strstr"},
202 {MB_OVERLOAD_STRING, "strrchr", "mb_strrchr", "mb_orig_strrchr"},
203 {MB_OVERLOAD_STRING, "stristr", "mb_stristr", "mb_orig_stristr"},
204 {MB_OVERLOAD_STRING, "substr", "mb_substr", "mb_orig_substr"},
205 {MB_OVERLOAD_STRING, "strtolower", "mb_strtolower", "mb_orig_strtolower"},
206 {MB_OVERLOAD_STRING, "strtoupper", "mb_strtoupper", "mb_orig_strtoupper"},
207 {MB_OVERLOAD_STRING, "substr_count", "mb_substr_count", "mb_orig_substr_count"},
208 #if HAVE_MBREGEX
209 {MB_OVERLOAD_REGEX, "ereg", "mb_ereg", "mb_orig_ereg"},
210 {MB_OVERLOAD_REGEX, "eregi", "mb_eregi", "mb_orig_eregi"},
211 {MB_OVERLOAD_REGEX, "ereg_replace", "mb_ereg_replace", "mb_orig_ereg_replace"},
212 {MB_OVERLOAD_REGEX, "eregi_replace", "mb_eregi_replace", "mb_orig_eregi_replace"},
213 {MB_OVERLOAD_REGEX, "split", "mb_split", "mb_orig_split"},
214 #endif
215 {0, NULL, NULL, NULL}
216 };
217
218
219
220 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_language, 0, 0, 0)
221 ZEND_ARG_INFO(0, language)
222 ZEND_END_ARG_INFO()
223
224 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_internal_encoding, 0, 0, 0)
225 ZEND_ARG_INFO(0, encoding)
226 ZEND_END_ARG_INFO()
227
228 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_input, 0, 0, 0)
229 ZEND_ARG_INFO(0, type)
230 ZEND_END_ARG_INFO()
231
232 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_http_output, 0, 0, 0)
233 ZEND_ARG_INFO(0, encoding)
234 ZEND_END_ARG_INFO()
235
236 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_order, 0, 0, 0)
237 ZEND_ARG_INFO(0, encoding)
238 ZEND_END_ARG_INFO()
239
240 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substitute_character, 0, 0, 0)
241 ZEND_ARG_INFO(0, substchar)
242 ZEND_END_ARG_INFO()
243
244 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_preferred_mime_name, 0, 0, 1)
245 ZEND_ARG_INFO(0, encoding)
246 ZEND_END_ARG_INFO()
247
248 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_parse_str, 0, 0, 1)
249 ZEND_ARG_INFO(0, encoded_string)
250 ZEND_ARG_INFO(1, result)
251 ZEND_END_ARG_INFO()
252
253 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_output_handler, 0, 0, 2)
254 ZEND_ARG_INFO(0, contents)
255 ZEND_ARG_INFO(0, status)
256 ZEND_END_ARG_INFO()
257
258 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strlen, 0, 0, 1)
259 ZEND_ARG_INFO(0, str)
260 ZEND_ARG_INFO(0, encoding)
261 ZEND_END_ARG_INFO()
262
263 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strpos, 0, 0, 2)
264 ZEND_ARG_INFO(0, haystack)
265 ZEND_ARG_INFO(0, needle)
266 ZEND_ARG_INFO(0, offset)
267 ZEND_ARG_INFO(0, encoding)
268 ZEND_END_ARG_INFO()
269
270 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrpos, 0, 0, 2)
271 ZEND_ARG_INFO(0, haystack)
272 ZEND_ARG_INFO(0, needle)
273 ZEND_ARG_INFO(0, offset)
274 ZEND_ARG_INFO(0, encoding)
275 ZEND_END_ARG_INFO()
276
277 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stripos, 0, 0, 2)
278 ZEND_ARG_INFO(0, haystack)
279 ZEND_ARG_INFO(0, needle)
280 ZEND_ARG_INFO(0, offset)
281 ZEND_ARG_INFO(0, encoding)
282 ZEND_END_ARG_INFO()
283
284 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strripos, 0, 0, 2)
285 ZEND_ARG_INFO(0, haystack)
286 ZEND_ARG_INFO(0, needle)
287 ZEND_ARG_INFO(0, offset)
288 ZEND_ARG_INFO(0, encoding)
289 ZEND_END_ARG_INFO()
290
291 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strstr, 0, 0, 2)
292 ZEND_ARG_INFO(0, haystack)
293 ZEND_ARG_INFO(0, needle)
294 ZEND_ARG_INFO(0, part)
295 ZEND_ARG_INFO(0, encoding)
296 ZEND_END_ARG_INFO()
297
298 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrchr, 0, 0, 2)
299 ZEND_ARG_INFO(0, haystack)
300 ZEND_ARG_INFO(0, needle)
301 ZEND_ARG_INFO(0, part)
302 ZEND_ARG_INFO(0, encoding)
303 ZEND_END_ARG_INFO()
304
305 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_stristr, 0, 0, 2)
306 ZEND_ARG_INFO(0, haystack)
307 ZEND_ARG_INFO(0, needle)
308 ZEND_ARG_INFO(0, part)
309 ZEND_ARG_INFO(0, encoding)
310 ZEND_END_ARG_INFO()
311
312 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strrichr, 0, 0, 2)
313 ZEND_ARG_INFO(0, haystack)
314 ZEND_ARG_INFO(0, needle)
315 ZEND_ARG_INFO(0, part)
316 ZEND_ARG_INFO(0, encoding)
317 ZEND_END_ARG_INFO()
318
319 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr_count, 0, 0, 2)
320 ZEND_ARG_INFO(0, haystack)
321 ZEND_ARG_INFO(0, needle)
322 ZEND_ARG_INFO(0, encoding)
323 ZEND_END_ARG_INFO()
324
325 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_substr, 0, 0, 2)
326 ZEND_ARG_INFO(0, str)
327 ZEND_ARG_INFO(0, start)
328 ZEND_ARG_INFO(0, length)
329 ZEND_ARG_INFO(0, encoding)
330 ZEND_END_ARG_INFO()
331
332 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strcut, 0, 0, 2)
333 ZEND_ARG_INFO(0, str)
334 ZEND_ARG_INFO(0, start)
335 ZEND_ARG_INFO(0, length)
336 ZEND_ARG_INFO(0, encoding)
337 ZEND_END_ARG_INFO()
338
339 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strwidth, 0, 0, 1)
340 ZEND_ARG_INFO(0, str)
341 ZEND_ARG_INFO(0, encoding)
342 ZEND_END_ARG_INFO()
343
344 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strimwidth, 0, 0, 3)
345 ZEND_ARG_INFO(0, str)
346 ZEND_ARG_INFO(0, start)
347 ZEND_ARG_INFO(0, width)
348 ZEND_ARG_INFO(0, trimmarker)
349 ZEND_ARG_INFO(0, encoding)
350 ZEND_END_ARG_INFO()
351
352 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_encoding, 0, 0, 2)
353 ZEND_ARG_INFO(0, str)
354 ZEND_ARG_INFO(0, to)
355 ZEND_ARG_INFO(0, from)
356 ZEND_END_ARG_INFO()
357
358 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_case, 0, 0, 2)
359 ZEND_ARG_INFO(0, sourcestring)
360 ZEND_ARG_INFO(0, mode)
361 ZEND_ARG_INFO(0, encoding)
362 ZEND_END_ARG_INFO()
363
364 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtoupper, 0, 0, 1)
365 ZEND_ARG_INFO(0, sourcestring)
366 ZEND_ARG_INFO(0, encoding)
367 ZEND_END_ARG_INFO()
368
369 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_strtolower, 0, 0, 1)
370 ZEND_ARG_INFO(0, sourcestring)
371 ZEND_ARG_INFO(0, encoding)
372 ZEND_END_ARG_INFO()
373
374 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_detect_encoding, 0, 0, 1)
375 ZEND_ARG_INFO(0, str)
376 ZEND_ARG_INFO(0, encoding_list)
377 ZEND_ARG_INFO(0, strict)
378 ZEND_END_ARG_INFO()
379
380 ZEND_BEGIN_ARG_INFO(arginfo_mb_list_encodings, 0)
381 ZEND_END_ARG_INFO()
382
383 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encoding_aliases, 0, 0, 1)
384 ZEND_ARG_INFO(0, encoding)
385 ZEND_END_ARG_INFO()
386
387 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_mimeheader, 0, 0, 1)
388 ZEND_ARG_INFO(0, str)
389 ZEND_ARG_INFO(0, charset)
390 ZEND_ARG_INFO(0, transfer)
391 ZEND_ARG_INFO(0, linefeed)
392 ZEND_ARG_INFO(0, indent)
393 ZEND_END_ARG_INFO()
394
395 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_mimeheader, 0, 0, 1)
396 ZEND_ARG_INFO(0, string)
397 ZEND_END_ARG_INFO()
398
399 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_kana, 0, 0, 1)
400 ZEND_ARG_INFO(0, str)
401 ZEND_ARG_INFO(0, option)
402 ZEND_ARG_INFO(0, encoding)
403 ZEND_END_ARG_INFO()
404
405 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_convert_variables, 0, 0, 3)
406 ZEND_ARG_INFO(0, to)
407 ZEND_ARG_INFO(0, from)
408 ZEND_ARG_VARIADIC_INFO(1, vars)
409 ZEND_END_ARG_INFO()
410
411 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_encode_numericentity, 0, 0, 2)
412 ZEND_ARG_INFO(0, string)
413 ZEND_ARG_INFO(0, convmap)
414 ZEND_ARG_INFO(0, encoding)
415 ZEND_ARG_INFO(0, is_hex)
416 ZEND_END_ARG_INFO()
417
418 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_decode_numericentity, 0, 0, 2)
419 ZEND_ARG_INFO(0, string)
420 ZEND_ARG_INFO(0, convmap)
421 ZEND_ARG_INFO(0, encoding)
422 ZEND_END_ARG_INFO()
423
424 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_send_mail, 0, 0, 3)
425 ZEND_ARG_INFO(0, to)
426 ZEND_ARG_INFO(0, subject)
427 ZEND_ARG_INFO(0, message)
428 ZEND_ARG_INFO(0, additional_headers)
429 ZEND_ARG_INFO(0, additional_parameters)
430 ZEND_END_ARG_INFO()
431
432 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_get_info, 0, 0, 0)
433 ZEND_ARG_INFO(0, type)
434 ZEND_END_ARG_INFO()
435
436 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_check_encoding, 0, 0, 0)
437 ZEND_ARG_INFO(0, var)
438 ZEND_ARG_INFO(0, encoding)
439 ZEND_END_ARG_INFO()
440
441 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_encoding, 0, 0, 0)
442 ZEND_ARG_INFO(0, encoding)
443 ZEND_END_ARG_INFO()
444
445 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg, 0, 0, 2)
446 ZEND_ARG_INFO(0, pattern)
447 ZEND_ARG_INFO(0, string)
448 ZEND_ARG_INFO(1, registers)
449 ZEND_END_ARG_INFO()
450
451 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi, 0, 0, 2)
452 ZEND_ARG_INFO(0, pattern)
453 ZEND_ARG_INFO(0, string)
454 ZEND_ARG_INFO(1, registers)
455 ZEND_END_ARG_INFO()
456
457 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace, 0, 0, 3)
458 ZEND_ARG_INFO(0, pattern)
459 ZEND_ARG_INFO(0, replacement)
460 ZEND_ARG_INFO(0, string)
461 ZEND_ARG_INFO(0, option)
462 ZEND_END_ARG_INFO()
463
464 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_eregi_replace, 0, 0, 3)
465 ZEND_ARG_INFO(0, pattern)
466 ZEND_ARG_INFO(0, replacement)
467 ZEND_ARG_INFO(0, string)
468 ZEND_END_ARG_INFO()
469
470 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_replace_callback, 0, 0, 3)
471 ZEND_ARG_INFO(0, pattern)
472 ZEND_ARG_INFO(0, callback)
473 ZEND_ARG_INFO(0, string)
474 ZEND_ARG_INFO(0, option)
475 ZEND_END_ARG_INFO()
476
477 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_split, 0, 0, 2)
478 ZEND_ARG_INFO(0, pattern)
479 ZEND_ARG_INFO(0, string)
480 ZEND_ARG_INFO(0, limit)
481 ZEND_END_ARG_INFO()
482
483 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_match, 0, 0, 2)
484 ZEND_ARG_INFO(0, pattern)
485 ZEND_ARG_INFO(0, string)
486 ZEND_ARG_INFO(0, option)
487 ZEND_END_ARG_INFO()
488
489 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search, 0, 0, 0)
490 ZEND_ARG_INFO(0, pattern)
491 ZEND_ARG_INFO(0, option)
492 ZEND_END_ARG_INFO()
493
494 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_pos, 0, 0, 0)
495 ZEND_ARG_INFO(0, pattern)
496 ZEND_ARG_INFO(0, option)
497 ZEND_END_ARG_INFO()
498
499 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_regs, 0, 0, 0)
500 ZEND_ARG_INFO(0, pattern)
501 ZEND_ARG_INFO(0, option)
502 ZEND_END_ARG_INFO()
503
504 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_init, 0, 0, 1)
505 ZEND_ARG_INFO(0, string)
506 ZEND_ARG_INFO(0, pattern)
507 ZEND_ARG_INFO(0, option)
508 ZEND_END_ARG_INFO()
509
510 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getregs, 0)
511 ZEND_END_ARG_INFO()
512
513 ZEND_BEGIN_ARG_INFO(arginfo_mb_ereg_search_getpos, 0)
514 ZEND_END_ARG_INFO()
515
516 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_ereg_search_setpos, 0, 0, 1)
517 ZEND_ARG_INFO(0, position)
518 ZEND_END_ARG_INFO()
519
520 ZEND_BEGIN_ARG_INFO_EX(arginfo_mb_regex_set_options, 0, 0, 0)
521 ZEND_ARG_INFO(0, options)
522 ZEND_END_ARG_INFO()
523
524
525
526 const zend_function_entry mbstring_functions[] = {
527 PHP_FE(mb_convert_case, arginfo_mb_convert_case)
528 PHP_FE(mb_strtoupper, arginfo_mb_strtoupper)
529 PHP_FE(mb_strtolower, arginfo_mb_strtolower)
530 PHP_FE(mb_language, arginfo_mb_language)
531 PHP_FE(mb_internal_encoding, arginfo_mb_internal_encoding)
532 PHP_FE(mb_http_input, arginfo_mb_http_input)
533 PHP_FE(mb_http_output, arginfo_mb_http_output)
534 PHP_FE(mb_detect_order, arginfo_mb_detect_order)
535 PHP_FE(mb_substitute_character, arginfo_mb_substitute_character)
536 PHP_FE(mb_parse_str, arginfo_mb_parse_str)
537 PHP_FE(mb_output_handler, arginfo_mb_output_handler)
538 PHP_FE(mb_preferred_mime_name, arginfo_mb_preferred_mime_name)
539 PHP_FE(mb_strlen, arginfo_mb_strlen)
540 PHP_FE(mb_strpos, arginfo_mb_strpos)
541 PHP_FE(mb_strrpos, arginfo_mb_strrpos)
542 PHP_FE(mb_stripos, arginfo_mb_stripos)
543 PHP_FE(mb_strripos, arginfo_mb_strripos)
544 PHP_FE(mb_strstr, arginfo_mb_strstr)
545 PHP_FE(mb_strrchr, arginfo_mb_strrchr)
546 PHP_FE(mb_stristr, arginfo_mb_stristr)
547 PHP_FE(mb_strrichr, arginfo_mb_strrichr)
548 PHP_FE(mb_substr_count, arginfo_mb_substr_count)
549 PHP_FE(mb_substr, arginfo_mb_substr)
550 PHP_FE(mb_strcut, arginfo_mb_strcut)
551 PHP_FE(mb_strwidth, arginfo_mb_strwidth)
552 PHP_FE(mb_strimwidth, arginfo_mb_strimwidth)
553 PHP_FE(mb_convert_encoding, arginfo_mb_convert_encoding)
554 PHP_FE(mb_detect_encoding, arginfo_mb_detect_encoding)
555 PHP_FE(mb_list_encodings, arginfo_mb_list_encodings)
556 PHP_FE(mb_encoding_aliases, arginfo_mb_encoding_aliases)
557 PHP_FE(mb_convert_kana, arginfo_mb_convert_kana)
558 PHP_FE(mb_encode_mimeheader, arginfo_mb_encode_mimeheader)
559 PHP_FE(mb_decode_mimeheader, arginfo_mb_decode_mimeheader)
560 PHP_FE(mb_convert_variables, arginfo_mb_convert_variables)
561 PHP_FE(mb_encode_numericentity, arginfo_mb_encode_numericentity)
562 PHP_FE(mb_decode_numericentity, arginfo_mb_decode_numericentity)
563 PHP_FE(mb_send_mail, arginfo_mb_send_mail)
564 PHP_FE(mb_get_info, arginfo_mb_get_info)
565 PHP_FE(mb_check_encoding, arginfo_mb_check_encoding)
566 #if HAVE_MBREGEX
567 PHP_MBREGEX_FUNCTION_ENTRIES
568 #endif
569 PHP_FE_END
570 };
571
572
573
574 zend_module_entry mbstring_module_entry = {
575 STANDARD_MODULE_HEADER,
576 "mbstring",
577 mbstring_functions,
578 PHP_MINIT(mbstring),
579 PHP_MSHUTDOWN(mbstring),
580 PHP_RINIT(mbstring),
581 PHP_RSHUTDOWN(mbstring),
582 PHP_MINFO(mbstring),
583 NO_VERSION_YET,
584 PHP_MODULE_GLOBALS(mbstring),
585 PHP_GINIT(mbstring),
586 PHP_GSHUTDOWN(mbstring),
587 NULL,
588 STANDARD_MODULE_PROPERTIES_EX
589 };
590
591
592
593 static sapi_post_entry php_post_entries[] = {
594 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_std_post_handler },
595 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
596 { NULL, 0, NULL, NULL }
597 };
598
599
600 #ifdef COMPILE_DL_MBSTRING
601 ZEND_GET_MODULE(mbstring)
602 #endif
603
604 static char *get_internal_encoding(TSRMLS_D) {
605 if (PG(internal_encoding) && PG(internal_encoding)[0]) {
606 return PG(internal_encoding);
607 } else if (SG(default_charset)) {
608 return SG(default_charset);
609 }
610 return "";
611 }
612
613 static char *get_input_encoding(TSRMLS_D) {
614 if (PG(input_encoding) && PG(input_encoding)[0]) {
615 return PG(input_encoding);
616 } else if (SG(default_charset)) {
617 return SG(default_charset);
618 }
619 return "";
620 }
621
622 static char *get_output_encoding(TSRMLS_D) {
623 if (PG(output_encoding) && PG(output_encoding)[0]) {
624 return PG(output_encoding);
625 } else if (SG(default_charset)) {
626 return SG(default_charset);
627 }
628 return "";
629 }
630
631
632
633 static void *_php_mb_allocators_malloc(unsigned int sz)
634 {
635 return emalloc(sz);
636 }
637
638 static void *_php_mb_allocators_realloc(void *ptr, unsigned int sz)
639 {
640 return erealloc(ptr, sz);
641 }
642
643 static void *_php_mb_allocators_calloc(unsigned int nelems, unsigned int szelem)
644 {
645 return ecalloc(nelems, szelem);
646 }
647
648 static void _php_mb_allocators_free(void *ptr)
649 {
650 efree(ptr);
651 }
652
653 static void *_php_mb_allocators_pmalloc(unsigned int sz)
654 {
655 return pemalloc(sz, 1);
656 }
657
658 static void *_php_mb_allocators_prealloc(void *ptr, unsigned int sz)
659 {
660 return perealloc(ptr, sz, 1);
661 }
662
663 static void _php_mb_allocators_pfree(void *ptr)
664 {
665 pefree(ptr, 1);
666 }
667
668 static mbfl_allocators _php_mb_allocators = {
669 _php_mb_allocators_malloc,
670 _php_mb_allocators_realloc,
671 _php_mb_allocators_calloc,
672 _php_mb_allocators_free,
673 _php_mb_allocators_pmalloc,
674 _php_mb_allocators_prealloc,
675 _php_mb_allocators_pfree
676 };
677
678
679
680 static sapi_post_entry mbstr_post_entries[] = {
681 { DEFAULT_POST_CONTENT_TYPE, sizeof(DEFAULT_POST_CONTENT_TYPE)-1, sapi_read_standard_form_data, php_mb_post_handler },
682 { MULTIPART_CONTENT_TYPE, sizeof(MULTIPART_CONTENT_TYPE)-1, NULL, rfc1867_post_handler },
683 { NULL, 0, NULL, NULL }
684 };
685
686
687
688
689
690
691
692 static int
693 php_mb_parse_encoding_list(const char *value, size_t value_length, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
694 {
695 int size, bauto, ret = SUCCESS;
696 size_t n;
697 char *p, *p1, *p2, *endp, *tmpstr;
698 const mbfl_encoding **entry, **list;
699
700 list = NULL;
701 if (value == NULL || value_length <= 0) {
702 if (return_list) {
703 *return_list = NULL;
704 }
705 if (return_size) {
706 *return_size = 0;
707 }
708 return FAILURE;
709 } else {
710
711 if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
712 tmpstr = (char *)estrndup(value+1, value_length-2);
713 value_length -= 2;
714 }
715 else
716 tmpstr = (char *)estrndup(value, value_length);
717 if (tmpstr == NULL) {
718 return FAILURE;
719 }
720
721 endp = tmpstr + value_length;
722 n = 1;
723 p1 = tmpstr;
724 while ((p2 = php_memnstr(p1, ",", 1, endp)) != NULL) {
725 p1 = p2 + 1;
726 n++;
727 }
728 size = n + MBSTRG(default_detect_order_list_size);
729
730 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
731 if (list != NULL) {
732 entry = list;
733 n = 0;
734 bauto = 0;
735 p1 = tmpstr;
736 do {
737 p2 = p = php_memnstr(p1, ",", 1, endp);
738 if (p == NULL) {
739 p = endp;
740 }
741 *p = '\0';
742
743 while (p1 < p && (*p1 == ' ' || *p1 == '\t')) {
744 p1++;
745 }
746 p--;
747 while (p > p1 && (*p == ' ' || *p == '\t')) {
748 *p = '\0';
749 p--;
750 }
751
752 if (strcasecmp(p1, "auto") == 0) {
753 if (!bauto) {
754 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
755 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
756 size_t i;
757 bauto = 1;
758 for (i = 0; i < identify_list_size; i++) {
759 *entry++ = mbfl_no2encoding(*src++);
760 n++;
761 }
762 }
763 } else {
764 const mbfl_encoding *encoding = mbfl_name2encoding(p1);
765 if (encoding) {
766 *entry++ = encoding;
767 n++;
768 } else {
769 ret = 0;
770 }
771 }
772 p1 = p2 + 1;
773 } while (n < size && p2 != NULL);
774 if (n > 0) {
775 if (return_list) {
776 *return_list = list;
777 } else {
778 pefree(list, persistent);
779 }
780 } else {
781 pefree(list, persistent);
782 if (return_list) {
783 *return_list = NULL;
784 }
785 ret = 0;
786 }
787 if (return_size) {
788 *return_size = n;
789 }
790 } else {
791 if (return_list) {
792 *return_list = NULL;
793 }
794 if (return_size) {
795 *return_size = 0;
796 }
797 ret = 0;
798 }
799 efree(tmpstr);
800 }
801
802 return ret;
803 }
804
805
806
807
808
809
810
811 static int
812 php_mb_parse_encoding_array(zval *array, const mbfl_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
813 {
814 zval **hash_entry;
815 HashTable *target_hash;
816 int i, n, size, bauto, ret = SUCCESS;
817 const mbfl_encoding **list, **entry;
818
819 list = NULL;
820 if (Z_TYPE_P(array) == IS_ARRAY) {
821 target_hash = Z_ARRVAL_P(array);
822 zend_hash_internal_pointer_reset(target_hash);
823 i = zend_hash_num_elements(target_hash);
824 size = i + MBSTRG(default_detect_order_list_size);
825 list = (const mbfl_encoding **)pecalloc(size, sizeof(mbfl_encoding*), persistent);
826 if (list != NULL) {
827 entry = list;
828 bauto = 0;
829 n = 0;
830 while (i > 0) {
831 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
832 break;
833 }
834 convert_to_string_ex(hash_entry);
835 if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
836 if (!bauto) {
837 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
838 const size_t identify_list_size = MBSTRG(default_detect_order_list_size);
839 size_t j;
840
841 bauto = 1;
842 for (j = 0; j < identify_list_size; j++) {
843 *entry++ = mbfl_no2encoding(*src++);
844 n++;
845 }
846 }
847 } else {
848 const mbfl_encoding *encoding = mbfl_name2encoding(Z_STRVAL_PP(hash_entry));
849 if (encoding) {
850 *entry++ = encoding;
851 n++;
852 } else {
853 ret = FAILURE;
854 }
855 }
856 zend_hash_move_forward(target_hash);
857 i--;
858 }
859 if (n > 0) {
860 if (return_list) {
861 *return_list = list;
862 } else {
863 pefree(list, persistent);
864 }
865 } else {
866 pefree(list, persistent);
867 if (return_list) {
868 *return_list = NULL;
869 }
870 ret = FAILURE;
871 }
872 if (return_size) {
873 *return_size = n;
874 }
875 } else {
876 if (return_list) {
877 *return_list = NULL;
878 }
879 if (return_size) {
880 *return_size = 0;
881 }
882 ret = FAILURE;
883 }
884 }
885
886 return ret;
887 }
888
889
890
891 static const zend_encoding* php_mb_zend_encoding_fetcher(const char *encoding_name TSRMLS_DC)
892 {
893 return (const zend_encoding*)mbfl_name2encoding(encoding_name);
894 }
895
896 static const char *php_mb_zend_encoding_name_getter(const zend_encoding *encoding)
897 {
898 return ((const mbfl_encoding *)encoding)->name;
899 }
900
901 static int php_mb_zend_encoding_lexer_compatibility_checker(const zend_encoding *_encoding)
902 {
903 const mbfl_encoding *encoding = (const mbfl_encoding*)_encoding;
904 if (encoding->flag & MBFL_ENCTYPE_SBCS) {
905 return 1;
906 }
907 if ((encoding->flag & (MBFL_ENCTYPE_MBCS | MBFL_ENCTYPE_GL_UNSAFE)) == MBFL_ENCTYPE_MBCS) {
908 return 1;
909 }
910 return 0;
911 }
912
913 static const zend_encoding *php_mb_zend_encoding_detector(const unsigned char *arg_string, size_t arg_length, const zend_encoding **list, size_t list_size TSRMLS_DC)
914 {
915 mbfl_string string;
916
917 if (!list) {
918 list = (const zend_encoding **)MBSTRG(current_detect_order_list);
919 list_size = MBSTRG(current_detect_order_list_size);
920 }
921
922 mbfl_string_init(&string);
923 string.no_language = MBSTRG(language);
924 string.val = (unsigned char *)arg_string;
925 string.len = arg_length;
926 return (const zend_encoding *) mbfl_identify_encoding2(&string, (const mbfl_encoding **)list, list_size, 0);
927 }
928
929 static size_t php_mb_zend_encoding_converter(unsigned char **to, size_t *to_length, const unsigned char *from, size_t from_length, const zend_encoding *encoding_to, const zend_encoding *encoding_from TSRMLS_DC)
930 {
931 mbfl_string string, result;
932 mbfl_buffer_converter *convd;
933 int status, loc;
934
935
936
937 mbfl_string_init(&string);
938 mbfl_string_init(&result);
939 string.no_encoding = ((const mbfl_encoding*)encoding_from)->no_encoding;
940 string.no_language = MBSTRG(language);
941 string.val = (unsigned char*)from;
942 string.len = from_length;
943
944
945 convd = mbfl_buffer_converter_new2((const mbfl_encoding *)encoding_from, (const mbfl_encoding *)encoding_to, string.len);
946 if (convd == NULL) {
947 return -1;
948 }
949 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
950 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
951
952
953 status = mbfl_buffer_converter_feed2(convd, &string, &loc);
954 if (status) {
955 mbfl_buffer_converter_delete(convd);
956 return (size_t)-1;
957 }
958
959 mbfl_buffer_converter_flush(convd);
960 if (!mbfl_buffer_converter_result(convd, &result)) {
961 mbfl_buffer_converter_delete(convd);
962 return (size_t)-1;
963 }
964
965 *to = result.val;
966 *to_length = result.len;
967
968 mbfl_buffer_converter_delete(convd);
969
970 return loc;
971 }
972
973 static int php_mb_zend_encoding_list_parser(const char *encoding_list, size_t encoding_list_len, const zend_encoding ***return_list, size_t *return_size, int persistent TSRMLS_DC)
974 {
975 return php_mb_parse_encoding_list(encoding_list, encoding_list_len, (const mbfl_encoding ***)return_list, return_size, persistent TSRMLS_CC);
976 }
977
978 static const zend_encoding *php_mb_zend_internal_encoding_getter(TSRMLS_D)
979 {
980 return (const zend_encoding *)MBSTRG(internal_encoding);
981 }
982
983 static int php_mb_zend_internal_encoding_setter(const zend_encoding *encoding TSRMLS_DC)
984 {
985 MBSTRG(internal_encoding) = (const mbfl_encoding *)encoding;
986 return SUCCESS;
987 }
988
989 static zend_multibyte_functions php_mb_zend_multibyte_functions = {
990 "mbstring",
991 php_mb_zend_encoding_fetcher,
992 php_mb_zend_encoding_name_getter,
993 php_mb_zend_encoding_lexer_compatibility_checker,
994 php_mb_zend_encoding_detector,
995 php_mb_zend_encoding_converter,
996 php_mb_zend_encoding_list_parser,
997 php_mb_zend_internal_encoding_getter,
998 php_mb_zend_internal_encoding_setter
999 };
1000
1001
1002 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC);
1003 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len);
1004 static void _php_mb_free_regex(void *opaque);
1005
1006 #if HAVE_ONIG
1007
1008 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1009 {
1010 php_mb_regex_t *retval;
1011 OnigErrorInfo err_info;
1012 int err_code;
1013
1014 if ((err_code = onig_new(&retval,
1015 (const OnigUChar *)pattern,
1016 (const OnigUChar *)pattern + strlen(pattern),
1017 ONIG_OPTION_IGNORECASE | ONIG_OPTION_DONT_CAPTURE_GROUP,
1018 ONIG_ENCODING_ASCII, &OnigSyntaxPerl, &err_info))) {
1019 OnigUChar err_str[ONIG_MAX_ERROR_MESSAGE_LEN];
1020 onig_error_code_to_str(err_str, err_code, err_info);
1021 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s: %s", pattern, err_str);
1022 retval = NULL;
1023 }
1024 return retval;
1025 }
1026
1027
1028
1029 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1030 {
1031 return onig_search((php_mb_regex_t *)opaque, (const OnigUChar *)str,
1032 (const OnigUChar*)str + str_len, (const OnigUChar *)str,
1033 (const OnigUChar*)str + str_len, NULL, ONIG_OPTION_NONE) >= 0;
1034 }
1035
1036
1037
1038 static void _php_mb_free_regex(void *opaque)
1039 {
1040 onig_free((php_mb_regex_t *)opaque);
1041 }
1042
1043 #elif HAVE_PCRE || HAVE_BUNDLED_PCRE
1044
1045 static void *_php_mb_compile_regex(const char *pattern TSRMLS_DC)
1046 {
1047 pcre *retval;
1048 const char *err_str;
1049 int err_offset;
1050
1051 if (!(retval = pcre_compile(pattern,
1052 PCRE_CASELESS, &err_str, &err_offset, NULL))) {
1053 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s (offset=%d): %s", pattern, err_offset, err_str);
1054 }
1055 return retval;
1056 }
1057
1058
1059
1060 static int _php_mb_match_regex(void *opaque, const char *str, size_t str_len)
1061 {
1062 return pcre_exec((pcre *)opaque, NULL, str, (int)str_len, 0,
1063 0, NULL, 0) >= 0;
1064 }
1065
1066
1067
1068 static void _php_mb_free_regex(void *opaque)
1069 {
1070 pcre_free(opaque);
1071 }
1072
1073 #endif
1074
1075
1076 static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum mbfl_no_encoding **plist, size_t *plist_size)
1077 {
1078 size_t i;
1079
1080 *plist = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1081 *plist_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1082
1083 for (i = 0; i < sizeof(php_mb_default_identify_list) / sizeof(php_mb_default_identify_list[0]); i++) {
1084 if (php_mb_default_identify_list[i].lang == lang) {
1085 *plist = (enum mbfl_no_encoding *)php_mb_default_identify_list[i].list;
1086 *plist_size = php_mb_default_identify_list[i].list_size;
1087 return 1;
1088 }
1089 }
1090 return 0;
1091 }
1092
1093
1094 static char *php_mb_rfc1867_substring_conf(const zend_encoding *encoding, char *start, int len, char quote TSRMLS_DC)
1095 {
1096 char *result = emalloc(len + 2);
1097 char *resp = result;
1098 int i;
1099
1100 for (i = 0; i < len && start[i] != quote; ++i) {
1101 if (start[i] == '\\' && (start[i + 1] == '\\' || (quote && start[i + 1] == quote))) {
1102 *resp++ = start[++i];
1103 } else {
1104 size_t j = php_mb_mbchar_bytes_ex(start+i, (const mbfl_encoding *)encoding);
1105
1106 while (j-- > 0 && i < len) {
1107 *resp++ = start[i++];
1108 }
1109 --i;
1110 }
1111 }
1112
1113 *resp = '\0';
1114 return result;
1115 }
1116
1117 static char *php_mb_rfc1867_getword(const zend_encoding *encoding, char **line, char stop TSRMLS_DC)
1118 {
1119 char *pos = *line, quote;
1120 char *res;
1121
1122 while (*pos && *pos != stop) {
1123 if ((quote = *pos) == '"' || quote == '\'') {
1124 ++pos;
1125 while (*pos && *pos != quote) {
1126 if (*pos == '\\' && pos[1] && pos[1] == quote) {
1127 pos += 2;
1128 } else {
1129 ++pos;
1130 }
1131 }
1132 if (*pos) {
1133 ++pos;
1134 }
1135 } else {
1136 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1137
1138 }
1139 }
1140 if (*pos == '\0') {
1141 res = estrdup(*line);
1142 *line += strlen(*line);
1143 return res;
1144 }
1145
1146 res = estrndup(*line, pos - *line);
1147
1148 while (*pos == stop) {
1149 pos += php_mb_mbchar_bytes_ex(pos, (const mbfl_encoding *)encoding);
1150 }
1151
1152 *line = pos;
1153 return res;
1154 }
1155
1156
1157 static char *php_mb_rfc1867_getword_conf(const zend_encoding *encoding, char *str TSRMLS_DC)
1158 {
1159 while (*str && isspace(*(unsigned char *)str)) {
1160 ++str;
1161 }
1162
1163 if (!*str) {
1164 return estrdup("");
1165 }
1166
1167 if (*str == '"' || *str == '\'') {
1168 char quote = *str;
1169
1170 str++;
1171 return php_mb_rfc1867_substring_conf(encoding, str, strlen(str), quote TSRMLS_CC);
1172 } else {
1173 char *strend = str;
1174
1175 while (*strend && !isspace(*(unsigned char *)strend)) {
1176 ++strend;
1177 }
1178 return php_mb_rfc1867_substring_conf(encoding, str, strend - str, 0 TSRMLS_CC);
1179 }
1180 }
1181
1182
1183 static char *php_mb_rfc1867_basename(const zend_encoding *encoding, char *filename TSRMLS_DC)
1184 {
1185 char *s, *s2;
1186 const size_t filename_len = strlen(filename);
1187
1188
1189
1190
1191
1192
1193 s = php_mb_safe_strrchr_ex(filename, '\\', filename_len, (const mbfl_encoding *)encoding);
1194 s2 = php_mb_safe_strrchr_ex(filename, '/', filename_len, (const mbfl_encoding *)encoding);
1195
1196 if (s && s2) {
1197 if (s > s2) {
1198 return ++s;
1199 } else {
1200 return ++s2;
1201 }
1202 } else if (s) {
1203 return ++s;
1204 } else if (s2) {
1205 return ++s2;
1206 } else {
1207 return filename;
1208 }
1209 }
1210
1211
1212
1213
1214 static PHP_INI_MH(OnUpdate_mbstring_language)
1215 {
1216 enum mbfl_no_language no_language;
1217
1218 no_language = mbfl_name2no_language(new_value);
1219 if (no_language == mbfl_no_language_invalid) {
1220 MBSTRG(language) = mbfl_no_language_neutral;
1221 return FAILURE;
1222 }
1223 MBSTRG(language) = no_language;
1224 php_mb_nls_get_default_detect_order_list(no_language, &MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
1225 return SUCCESS;
1226 }
1227
1228
1229
1230 static PHP_INI_MH(OnUpdate_mbstring_detect_order)
1231 {
1232 const mbfl_encoding **list;
1233 size_t size;
1234
1235 if (!new_value) {
1236 if (MBSTRG(detect_order_list)) {
1237 pefree(MBSTRG(detect_order_list), 1);
1238 }
1239 MBSTRG(detect_order_list) = NULL;
1240 MBSTRG(detect_order_list_size) = 0;
1241 return SUCCESS;
1242 }
1243
1244 if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1245 return FAILURE;
1246 }
1247
1248 if (MBSTRG(detect_order_list)) {
1249 pefree(MBSTRG(detect_order_list), 1);
1250 }
1251 MBSTRG(detect_order_list) = list;
1252 MBSTRG(detect_order_list_size) = size;
1253 return SUCCESS;
1254 }
1255
1256
1257
1258 static PHP_INI_MH(OnUpdate_mbstring_http_input)
1259 {
1260 const mbfl_encoding **list;
1261 size_t size;
1262
1263 if (!new_value) {
1264 if (MBSTRG(http_input_list)) {
1265 pefree(MBSTRG(http_input_list), 1);
1266 }
1267 if (SUCCESS == php_mb_parse_encoding_list(get_input_encoding(TSRMLS_C), strlen(get_input_encoding(TSRMLS_C))+1, &list, &size, 1 TSRMLS_CC)) {
1268 MBSTRG(http_input_list) = list;
1269 MBSTRG(http_input_list_size) = size;
1270 return SUCCESS;
1271 }
1272 MBSTRG(http_input_list) = NULL;
1273 MBSTRG(http_input_list_size) = 0;
1274 return SUCCESS;
1275 }
1276
1277 if (FAILURE == php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 TSRMLS_CC)) {
1278 return FAILURE;
1279 }
1280
1281 if (MBSTRG(http_input_list)) {
1282 pefree(MBSTRG(http_input_list), 1);
1283 }
1284 MBSTRG(http_input_list) = list;
1285 MBSTRG(http_input_list_size) = size;
1286
1287 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1288 php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.http_input is deprecated");
1289 }
1290
1291 return SUCCESS;
1292 }
1293
1294
1295
1296 static PHP_INI_MH(OnUpdate_mbstring_http_output)
1297 {
1298 const mbfl_encoding *encoding;
1299
1300 if (new_value == NULL || new_value_length == 0) {
1301 encoding = mbfl_name2encoding(get_output_encoding(TSRMLS_C));
1302 if (!encoding) {
1303 MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1304 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1305 return SUCCESS;
1306 }
1307 } else {
1308 encoding = mbfl_name2encoding(new_value);
1309 if (!encoding) {
1310 MBSTRG(http_output_encoding) = &mbfl_encoding_pass;
1311 MBSTRG(current_http_output_encoding) = &mbfl_encoding_pass;
1312 return FAILURE;
1313 }
1314 }
1315 MBSTRG(http_output_encoding) = encoding;
1316 MBSTRG(current_http_output_encoding) = encoding;
1317
1318 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1319 php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.http_output is deprecated");
1320 }
1321
1322 return SUCCESS;
1323 }
1324
1325
1326
1327 int _php_mb_ini_mbstring_internal_encoding_set(const char *new_value, uint new_value_length TSRMLS_DC)
1328 {
1329 const mbfl_encoding *encoding;
1330
1331 if (!new_value || new_value_length == 0 || !(encoding = mbfl_name2encoding(new_value))) {
1332
1333 encoding = mbfl_no2encoding(mbfl_no_encoding_utf8);
1334 }
1335 MBSTRG(internal_encoding) = encoding;
1336 MBSTRG(current_internal_encoding) = encoding;
1337 #if HAVE_MBREGEX
1338 {
1339 const char *enc_name = new_value;
1340 if (FAILURE == php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC)) {
1341
1342 enc_name = "UTF-8";
1343 php_mb_regex_set_default_mbctype(enc_name TSRMLS_CC);
1344 }
1345 php_mb_regex_set_mbctype(new_value TSRMLS_CC);
1346 }
1347 #endif
1348 return SUCCESS;
1349 }
1350
1351
1352
1353 static PHP_INI_MH(OnUpdate_mbstring_internal_encoding)
1354 {
1355 if (stage & (PHP_INI_STAGE_ACTIVATE | PHP_INI_STAGE_RUNTIME)) {
1356 php_error_docref("ref.mbstring" TSRMLS_CC, E_DEPRECATED, "Use of mbstring.internal_encoding is deprecated");
1357 }
1358
1359 if (OnUpdateString(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC) == FAILURE) {
1360 return FAILURE;
1361 }
1362
1363 if (stage & (PHP_INI_STAGE_STARTUP | PHP_INI_STAGE_SHUTDOWN | PHP_INI_STAGE_RUNTIME)) {
1364 if (new_value_length) {
1365 return _php_mb_ini_mbstring_internal_encoding_set(new_value, new_value_length TSRMLS_CC);
1366 } else {
1367 return _php_mb_ini_mbstring_internal_encoding_set(get_internal_encoding(TSRMLS_C), strlen(get_internal_encoding(TSRMLS_C))+1 TSRMLS_CC);
1368 }
1369 } else {
1370
1371
1372
1373
1374
1375
1376 return SUCCESS;
1377 }
1378 }
1379
1380
1381
1382 static PHP_INI_MH(OnUpdate_mbstring_substitute_character)
1383 {
1384 int c;
1385 char *endptr = NULL;
1386
1387 if (new_value != NULL) {
1388 if (strcasecmp("none", new_value) == 0) {
1389 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1390 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1391 } else if (strcasecmp("long", new_value) == 0) {
1392 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1393 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1394 } else if (strcasecmp("entity", new_value) == 0) {
1395 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1396 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1397 } else {
1398 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1399 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1400 if (new_value_length >0) {
1401 c = strtol(new_value, &endptr, 0);
1402 if (*endptr == '\0') {
1403 MBSTRG(filter_illegal_substchar) = c;
1404 MBSTRG(current_filter_illegal_substchar) = c;
1405 }
1406 }
1407 }
1408 } else {
1409 MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1410 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1411 MBSTRG(filter_illegal_substchar) = 0x3f;
1412 MBSTRG(current_filter_illegal_substchar) = 0x3f;
1413 }
1414
1415 return SUCCESS;
1416 }
1417
1418
1419
1420 static PHP_INI_MH(OnUpdate_mbstring_encoding_translation)
1421 {
1422 if (new_value == NULL) {
1423 return FAILURE;
1424 }
1425
1426 OnUpdateBool(entry, new_value, new_value_length, mh_arg1, mh_arg2, mh_arg3, stage TSRMLS_CC);
1427
1428 if (MBSTRG(encoding_translation)) {
1429 sapi_unregister_post_entry(php_post_entries TSRMLS_CC);
1430 sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1431 } else {
1432 sapi_unregister_post_entry(mbstr_post_entries TSRMLS_CC);
1433 sapi_register_post_entries(php_post_entries TSRMLS_CC);
1434 }
1435
1436 return SUCCESS;
1437 }
1438
1439
1440
1441 static PHP_INI_MH(OnUpdate_mbstring_http_output_conv_mimetypes)
1442 {
1443 zval tmp;
1444 void *re = NULL;
1445
1446 if (!new_value) {
1447 new_value = entry->orig_value;
1448 new_value_length = entry->orig_value_length;
1449 }
1450 php_trim(new_value, new_value_length, NULL, 0, &tmp, 3 TSRMLS_CC);
1451
1452 if (Z_STRLEN(tmp) > 0) {
1453 if (!(re = _php_mb_compile_regex(Z_STRVAL(tmp) TSRMLS_CC))) {
1454 zval_dtor(&tmp);
1455 return FAILURE;
1456 }
1457 }
1458
1459 if (MBSTRG(http_output_conv_mimetypes)) {
1460 _php_mb_free_regex(MBSTRG(http_output_conv_mimetypes));
1461 }
1462
1463 MBSTRG(http_output_conv_mimetypes) = re;
1464
1465 zval_dtor(&tmp);
1466 return SUCCESS;
1467 }
1468
1469
1470
1471
1472 PHP_INI_BEGIN()
1473 PHP_INI_ENTRY("mbstring.language", "neutral", PHP_INI_ALL, OnUpdate_mbstring_language)
1474 PHP_INI_ENTRY("mbstring.detect_order", NULL, PHP_INI_ALL, OnUpdate_mbstring_detect_order)
1475 PHP_INI_ENTRY("mbstring.http_input", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_input)
1476 PHP_INI_ENTRY("mbstring.http_output", NULL, PHP_INI_ALL, OnUpdate_mbstring_http_output)
1477 STD_PHP_INI_ENTRY("mbstring.internal_encoding", NULL, PHP_INI_ALL, OnUpdate_mbstring_internal_encoding, internal_encoding_name, zend_mbstring_globals, mbstring_globals)
1478 PHP_INI_ENTRY("mbstring.substitute_character", NULL, PHP_INI_ALL, OnUpdate_mbstring_substitute_character)
1479 STD_PHP_INI_ENTRY("mbstring.func_overload", "0",
1480 PHP_INI_SYSTEM, OnUpdateLong, func_overload, zend_mbstring_globals, mbstring_globals)
1481
1482 STD_PHP_INI_BOOLEAN("mbstring.encoding_translation", "0",
1483 PHP_INI_SYSTEM | PHP_INI_PERDIR,
1484 OnUpdate_mbstring_encoding_translation,
1485 encoding_translation, zend_mbstring_globals, mbstring_globals)
1486 PHP_INI_ENTRY("mbstring.http_output_conv_mimetypes",
1487 "^(text/|application/xhtml\\+xml)",
1488 PHP_INI_ALL,
1489 OnUpdate_mbstring_http_output_conv_mimetypes)
1490
1491 STD_PHP_INI_BOOLEAN("mbstring.strict_detection", "0",
1492 PHP_INI_ALL,
1493 OnUpdateLong,
1494 strict_detection, zend_mbstring_globals, mbstring_globals)
1495 PHP_INI_END()
1496
1497
1498
1499 static PHP_GINIT_FUNCTION(mbstring)
1500 {
1501 mbstring_globals->language = mbfl_no_language_uni;
1502 mbstring_globals->internal_encoding = NULL;
1503 mbstring_globals->current_internal_encoding = mbstring_globals->internal_encoding;
1504 mbstring_globals->http_output_encoding = &mbfl_encoding_pass;
1505 mbstring_globals->current_http_output_encoding = &mbfl_encoding_pass;
1506 mbstring_globals->http_input_identify = NULL;
1507 mbstring_globals->http_input_identify_get = NULL;
1508 mbstring_globals->http_input_identify_post = NULL;
1509 mbstring_globals->http_input_identify_cookie = NULL;
1510 mbstring_globals->http_input_identify_string = NULL;
1511 mbstring_globals->http_input_list = NULL;
1512 mbstring_globals->http_input_list_size = 0;
1513 mbstring_globals->detect_order_list = NULL;
1514 mbstring_globals->detect_order_list_size = 0;
1515 mbstring_globals->current_detect_order_list = NULL;
1516 mbstring_globals->current_detect_order_list_size = 0;
1517 mbstring_globals->default_detect_order_list = (enum mbfl_no_encoding *) php_mb_default_identify_list_neut;
1518 mbstring_globals->default_detect_order_list_size = sizeof(php_mb_default_identify_list_neut) / sizeof(php_mb_default_identify_list_neut[0]);
1519 mbstring_globals->filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1520 mbstring_globals->filter_illegal_substchar = 0x3f;
1521 mbstring_globals->current_filter_illegal_mode = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
1522 mbstring_globals->current_filter_illegal_substchar = 0x3f;
1523 mbstring_globals->illegalchars = 0;
1524 mbstring_globals->func_overload = 0;
1525 mbstring_globals->encoding_translation = 0;
1526 mbstring_globals->strict_detection = 0;
1527 mbstring_globals->outconv = NULL;
1528 mbstring_globals->http_output_conv_mimetypes = NULL;
1529 #if HAVE_MBREGEX
1530 mbstring_globals->mb_regex_globals = php_mb_regex_globals_alloc(TSRMLS_C);
1531 #endif
1532 }
1533
1534
1535
1536 static PHP_GSHUTDOWN_FUNCTION(mbstring)
1537 {
1538 if (mbstring_globals->http_input_list) {
1539 free(mbstring_globals->http_input_list);
1540 }
1541 if (mbstring_globals->detect_order_list) {
1542 free(mbstring_globals->detect_order_list);
1543 }
1544 if (mbstring_globals->http_output_conv_mimetypes) {
1545 _php_mb_free_regex(mbstring_globals->http_output_conv_mimetypes);
1546 }
1547 #if HAVE_MBREGEX
1548 php_mb_regex_globals_free(mbstring_globals->mb_regex_globals TSRMLS_CC);
1549 #endif
1550 }
1551
1552
1553
1554 PHP_MINIT_FUNCTION(mbstring)
1555 {
1556 __mbfl_allocators = &_php_mb_allocators;
1557
1558 REGISTER_INI_ENTRIES();
1559
1560
1561 sapi_register_treat_data(mbstr_treat_data TSRMLS_CC);
1562
1563
1564 if (MBSTRG(encoding_translation)) {
1565 sapi_register_post_entries(mbstr_post_entries TSRMLS_CC);
1566 }
1567
1568 REGISTER_LONG_CONSTANT("MB_OVERLOAD_MAIL", MB_OVERLOAD_MAIL, CONST_CS | CONST_PERSISTENT);
1569 REGISTER_LONG_CONSTANT("MB_OVERLOAD_STRING", MB_OVERLOAD_STRING, CONST_CS | CONST_PERSISTENT);
1570 REGISTER_LONG_CONSTANT("MB_OVERLOAD_REGEX", MB_OVERLOAD_REGEX, CONST_CS | CONST_PERSISTENT);
1571
1572 REGISTER_LONG_CONSTANT("MB_CASE_UPPER", PHP_UNICODE_CASE_UPPER, CONST_CS | CONST_PERSISTENT);
1573 REGISTER_LONG_CONSTANT("MB_CASE_LOWER", PHP_UNICODE_CASE_LOWER, CONST_CS | CONST_PERSISTENT);
1574 REGISTER_LONG_CONSTANT("MB_CASE_TITLE", PHP_UNICODE_CASE_TITLE, CONST_CS | CONST_PERSISTENT);
1575
1576 #if HAVE_MBREGEX
1577 PHP_MINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1578 #endif
1579
1580 if (FAILURE == zend_multibyte_set_functions(&php_mb_zend_multibyte_functions TSRMLS_CC)) {
1581 return FAILURE;
1582 }
1583
1584 php_rfc1867_set_multibyte_callbacks(
1585 php_mb_encoding_translation,
1586 php_mb_gpc_get_detect_order,
1587 php_mb_gpc_set_input_encoding,
1588 php_mb_rfc1867_getword,
1589 php_mb_rfc1867_getword_conf,
1590 php_mb_rfc1867_basename);
1591
1592 return SUCCESS;
1593 }
1594
1595
1596
1597 PHP_MSHUTDOWN_FUNCTION(mbstring)
1598 {
1599 UNREGISTER_INI_ENTRIES();
1600
1601 #if HAVE_MBREGEX
1602 PHP_MSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1603 #endif
1604
1605 return SUCCESS;
1606 }
1607
1608
1609
1610 PHP_RINIT_FUNCTION(mbstring)
1611 {
1612 zend_function *func, *orig;
1613 const struct mb_overload_def *p;
1614
1615 MBSTRG(current_internal_encoding) = MBSTRG(internal_encoding);
1616 MBSTRG(current_http_output_encoding) = MBSTRG(http_output_encoding);
1617 MBSTRG(current_filter_illegal_mode) = MBSTRG(filter_illegal_mode);
1618 MBSTRG(current_filter_illegal_substchar) = MBSTRG(filter_illegal_substchar);
1619
1620 MBSTRG(illegalchars) = 0;
1621
1622 php_mb_populate_current_detect_order_list(TSRMLS_C);
1623
1624
1625 if (MBSTRG(func_overload)){
1626 p = &(mb_ovld[0]);
1627
1628 while (p->type > 0) {
1629 if ((MBSTRG(func_overload) & p->type) == p->type &&
1630 zend_hash_find(EG(function_table), p->save_func,
1631 strlen(p->save_func)+1, (void **)&orig) != SUCCESS) {
1632
1633 zend_hash_find(EG(function_table), p->ovld_func, strlen(p->ovld_func)+1 , (void **)&func);
1634
1635 if (zend_hash_find(EG(function_table), p->orig_func, strlen(p->orig_func)+1, (void **)&orig) != SUCCESS) {
1636 php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't find function %s.", p->orig_func);
1637 return FAILURE;
1638 } else {
1639 zend_hash_add(EG(function_table), p->save_func, strlen(p->save_func)+1, orig, sizeof(zend_function), NULL);
1640
1641 if (zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, func, sizeof(zend_function),
1642 NULL) == FAILURE) {
1643 php_error_docref("ref.mbstring" TSRMLS_CC, E_WARNING, "mbstring couldn't replace function %s.", p->orig_func);
1644 return FAILURE;
1645 }
1646 }
1647 }
1648 p++;
1649 }
1650 }
1651 #if HAVE_MBREGEX
1652 PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1653 #endif
1654 zend_multibyte_set_internal_encoding((const zend_encoding *)MBSTRG(internal_encoding) TSRMLS_CC);
1655
1656 return SUCCESS;
1657 }
1658
1659
1660
1661 PHP_RSHUTDOWN_FUNCTION(mbstring)
1662 {
1663 const struct mb_overload_def *p;
1664 zend_function *orig;
1665
1666 if (MBSTRG(current_detect_order_list) != NULL) {
1667 efree(MBSTRG(current_detect_order_list));
1668 MBSTRG(current_detect_order_list) = NULL;
1669 MBSTRG(current_detect_order_list_size) = 0;
1670 }
1671 if (MBSTRG(outconv) != NULL) {
1672 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
1673 mbfl_buffer_converter_delete(MBSTRG(outconv));
1674 MBSTRG(outconv) = NULL;
1675 }
1676
1677
1678 MBSTRG(http_input_identify) = NULL;
1679 MBSTRG(http_input_identify_post) = NULL;
1680 MBSTRG(http_input_identify_get) = NULL;
1681 MBSTRG(http_input_identify_cookie) = NULL;
1682 MBSTRG(http_input_identify_string) = NULL;
1683
1684
1685 if (MBSTRG(func_overload)){
1686 p = &(mb_ovld[0]);
1687 while (p->type > 0) {
1688 if ((MBSTRG(func_overload) & p->type) == p->type &&
1689 zend_hash_find(EG(function_table), p->save_func,
1690 strlen(p->save_func)+1, (void **)&orig) == SUCCESS) {
1691
1692 zend_hash_update(EG(function_table), p->orig_func, strlen(p->orig_func)+1, orig, sizeof(zend_function), NULL);
1693 zend_hash_del(EG(function_table), p->save_func, strlen(p->save_func)+1);
1694 }
1695 p++;
1696 }
1697 }
1698
1699 #if HAVE_MBREGEX
1700 PHP_RSHUTDOWN(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
1701 #endif
1702
1703 return SUCCESS;
1704 }
1705
1706
1707
1708 PHP_MINFO_FUNCTION(mbstring)
1709 {
1710 php_info_print_table_start();
1711 php_info_print_table_row(2, "Multibyte Support", "enabled");
1712 php_info_print_table_row(2, "Multibyte string engine", "libmbfl");
1713 php_info_print_table_row(2, "HTTP input encoding translation", MBSTRG(encoding_translation) ? "enabled": "disabled");
1714 {
1715 char tmp[256];
1716 snprintf(tmp, sizeof(tmp), "%d.%d.%d", MBFL_VERSION_MAJOR, MBFL_VERSION_MINOR, MBFL_VERSION_TEENY);
1717 php_info_print_table_row(2, "libmbfl version", tmp);
1718 }
1719 php_info_print_table_end();
1720
1721 php_info_print_table_start();
1722 php_info_print_table_header(1, "mbstring extension makes use of \"streamable kanji code filter and converter\", which is distributed under the GNU Lesser General Public License version 2.1.");
1723 php_info_print_table_end();
1724
1725 #if HAVE_MBREGEX
1726 PHP_MINFO(mb_regex)(ZEND_MODULE_INFO_FUNC_ARGS_PASSTHRU);
1727 #endif
1728
1729 DISPLAY_INI_ENTRIES();
1730 }
1731
1732
1733
1734
1735 PHP_FUNCTION(mb_language)
1736 {
1737 char *name = NULL;
1738 int name_len = 0;
1739
1740 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1741 return;
1742 }
1743 if (name == NULL) {
1744 RETVAL_STRING((char *)mbfl_no_language2name(MBSTRG(language)), 1);
1745 } else {
1746 if (FAILURE == zend_alter_ini_entry(
1747 "mbstring.language", sizeof("mbstring.language"),
1748 name, name_len, PHP_INI_USER, PHP_INI_STAGE_RUNTIME)) {
1749 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language \"%s\"", name);
1750 RETVAL_FALSE;
1751 } else {
1752 RETVAL_TRUE;
1753 }
1754 }
1755 }
1756
1757
1758
1759
1760 PHP_FUNCTION(mb_internal_encoding)
1761 {
1762 const char *name = NULL;
1763 int name_len;
1764 const mbfl_encoding *encoding;
1765
1766 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &name, &name_len) == FAILURE) {
1767 RETURN_FALSE;
1768 }
1769 if (name == NULL) {
1770 name = MBSTRG(current_internal_encoding) ? MBSTRG(current_internal_encoding)->name: NULL;
1771 if (name != NULL) {
1772 RETURN_STRING(name, 1);
1773 } else {
1774 RETURN_FALSE;
1775 }
1776 } else {
1777 encoding = mbfl_name2encoding(name);
1778 if (!encoding) {
1779 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1780 RETURN_FALSE;
1781 } else {
1782 MBSTRG(current_internal_encoding) = encoding;
1783 RETURN_TRUE;
1784 }
1785 }
1786 }
1787
1788
1789
1790
1791 PHP_FUNCTION(mb_http_input)
1792 {
1793 char *typ = NULL;
1794 int typ_len;
1795 int retname;
1796 char *list, *temp;
1797 const mbfl_encoding *result = NULL;
1798
1799 retname = 1;
1800 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
1801 RETURN_FALSE;
1802 }
1803 if (typ == NULL) {
1804 result = MBSTRG(http_input_identify);
1805 } else {
1806 switch (*typ) {
1807 case 'G':
1808 case 'g':
1809 result = MBSTRG(http_input_identify_get);
1810 break;
1811 case 'P':
1812 case 'p':
1813 result = MBSTRG(http_input_identify_post);
1814 break;
1815 case 'C':
1816 case 'c':
1817 result = MBSTRG(http_input_identify_cookie);
1818 break;
1819 case 'S':
1820 case 's':
1821 result = MBSTRG(http_input_identify_string);
1822 break;
1823 case 'I':
1824 case 'i':
1825 {
1826 const mbfl_encoding **entry = MBSTRG(http_input_list);
1827 const size_t n = MBSTRG(http_input_list_size);
1828 size_t i;
1829 array_init(return_value);
1830 for (i = 0; i < n; i++) {
1831 add_next_index_string(return_value, (*entry)->name, 1);
1832 entry++;
1833 }
1834 retname = 0;
1835 }
1836 break;
1837 case 'L':
1838 case 'l':
1839 {
1840 const mbfl_encoding **entry = MBSTRG(http_input_list);
1841 const size_t n = MBSTRG(http_input_list_size);
1842 size_t i;
1843 list = NULL;
1844 for (i = 0; i < n; i++) {
1845 if (list) {
1846 temp = list;
1847 spprintf(&list, 0, "%s,%s", temp, (*entry)->name);
1848 efree(temp);
1849 if (!list) {
1850 break;
1851 }
1852 } else {
1853 list = estrdup((*entry)->name);
1854 }
1855 entry++;
1856 }
1857 }
1858 if (!list) {
1859 RETURN_FALSE;
1860 }
1861 RETVAL_STRING(list, 0);
1862 retname = 0;
1863 break;
1864 default:
1865 result = MBSTRG(http_input_identify);
1866 break;
1867 }
1868 }
1869
1870 if (retname) {
1871 if (result) {
1872 RETVAL_STRING(result->name, 1);
1873 } else {
1874 RETVAL_FALSE;
1875 }
1876 }
1877 }
1878
1879
1880
1881
1882 PHP_FUNCTION(mb_http_output)
1883 {
1884 const char *name = NULL;
1885 int name_len;
1886 const mbfl_encoding *encoding;
1887
1888 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", (char **)&name, &name_len) == FAILURE) {
1889 RETURN_FALSE;
1890 }
1891
1892 if (name == NULL) {
1893 name = MBSTRG(current_http_output_encoding) ? MBSTRG(current_http_output_encoding)->name: NULL;
1894 if (name != NULL) {
1895 RETURN_STRING(name, 1);
1896 } else {
1897 RETURN_FALSE;
1898 }
1899 } else {
1900 encoding = mbfl_name2encoding(name);
1901 if (!encoding) {
1902 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
1903 RETURN_FALSE;
1904 } else {
1905 MBSTRG(current_http_output_encoding) = encoding;
1906 RETURN_TRUE;
1907 }
1908 }
1909 }
1910
1911
1912
1913
1914 PHP_FUNCTION(mb_detect_order)
1915 {
1916 zval **arg1 = NULL;
1917
1918 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1919 return;
1920 }
1921
1922 if (!arg1) {
1923 size_t i;
1924 size_t n = MBSTRG(current_detect_order_list_size);
1925 const mbfl_encoding **entry = MBSTRG(current_detect_order_list);
1926 array_init(return_value);
1927 for (i = 0; i < n; i++) {
1928 add_next_index_string(return_value, (*entry)->name, 1);
1929 entry++;
1930 }
1931 } else {
1932 const mbfl_encoding **list = NULL;
1933 size_t size = 0;
1934 switch (Z_TYPE_PP(arg1)) {
1935 case IS_ARRAY:
1936 if (FAILURE == php_mb_parse_encoding_array(*arg1, &list, &size, 0 TSRMLS_CC)) {
1937 if (list) {
1938 efree(list);
1939 }
1940 RETURN_FALSE;
1941 }
1942 break;
1943 default:
1944 convert_to_string_ex(arg1);
1945 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
1946 if (list) {
1947 efree(list);
1948 }
1949 RETURN_FALSE;
1950 }
1951 break;
1952 }
1953
1954 if (list == NULL) {
1955 RETURN_FALSE;
1956 }
1957
1958 if (MBSTRG(current_detect_order_list)) {
1959 efree(MBSTRG(current_detect_order_list));
1960 }
1961 MBSTRG(current_detect_order_list) = list;
1962 MBSTRG(current_detect_order_list_size) = size;
1963 RETURN_TRUE;
1964 }
1965 }
1966
1967
1968
1969
1970 PHP_FUNCTION(mb_substitute_character)
1971 {
1972 zval **arg1 = NULL;
1973
1974 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|Z", &arg1) == FAILURE) {
1975 return;
1976 }
1977
1978 if (!arg1) {
1979 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
1980 RETURN_STRING("none", 1);
1981 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
1982 RETURN_STRING("long", 1);
1983 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
1984 RETURN_STRING("entity", 1);
1985 } else {
1986 RETURN_LONG(MBSTRG(current_filter_illegal_substchar));
1987 }
1988 } else {
1989 RETVAL_TRUE;
1990
1991 switch (Z_TYPE_PP(arg1)) {
1992 case IS_STRING:
1993 if (strncasecmp("none", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1994 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE;
1995 } else if (strncasecmp("long", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1996 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG;
1997 } else if (strncasecmp("entity", Z_STRVAL_PP(arg1), Z_STRLEN_PP(arg1)) == 0) {
1998 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY;
1999 } else {
2000 convert_to_long_ex(arg1);
2001
2002 if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
2003 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2004 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
2005 } else {
2006 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
2007 RETURN_FALSE;
2008 }
2009 }
2010 break;
2011 default:
2012 convert_to_long_ex(arg1);
2013 if (Z_LVAL_PP(arg1) < 0xffff && Z_LVAL_PP(arg1) > 0x0) {
2014 MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
2015 MBSTRG(current_filter_illegal_substchar) = Z_LVAL_PP(arg1);
2016 } else {
2017 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown character.");
2018 RETURN_FALSE;
2019 }
2020 break;
2021 }
2022 }
2023 }
2024
2025
2026
2027
2028 PHP_FUNCTION(mb_preferred_mime_name)
2029 {
2030 enum mbfl_no_encoding no_encoding;
2031 char *name = NULL;
2032 int name_len;
2033
2034 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
2035 return;
2036 } else {
2037 no_encoding = mbfl_name2no_encoding(name);
2038 if (no_encoding == mbfl_no_encoding_invalid) {
2039 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
2040 RETVAL_FALSE;
2041 } else {
2042 const char *preferred_name = mbfl_no2preferred_mime_name(no_encoding);
2043 if (preferred_name == NULL || *preferred_name == '\0') {
2044 php_error_docref(NULL TSRMLS_CC, E_WARNING, "No MIME preferred name corresponding to \"%s\"", name);
2045 RETVAL_FALSE;
2046 } else {
2047 RETVAL_STRING((char *)preferred_name, 1);
2048 }
2049 }
2050 }
2051 }
2052
2053
2054 #define IS_SJIS1(c) ((((c)>=0x81 && (c)<=0x9f) || ((c)>=0xe0 && (c)<=0xf5)) ? 1 : 0)
2055 #define IS_SJIS2(c) ((((c)>=0x40 && (c)<=0x7e) || ((c)>=0x80 && (c)<=0xfc)) ? 1 : 0)
2056
2057
2058
2059 PHP_FUNCTION(mb_parse_str)
2060 {
2061 zval *track_vars_array = NULL;
2062 char *encstr = NULL;
2063 int encstr_len;
2064 php_mb_encoding_handler_info_t info;
2065 const mbfl_encoding *detected;
2066
2067 track_vars_array = NULL;
2068 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|z", &encstr, &encstr_len, &track_vars_array) == FAILURE) {
2069 return;
2070 }
2071
2072 if (track_vars_array != NULL) {
2073
2074 zval_dtor(track_vars_array);
2075 array_init(track_vars_array);
2076 }
2077
2078 encstr = estrndup(encstr, encstr_len);
2079
2080 info.data_type = PARSE_STRING;
2081 info.separator = PG(arg_separator).input;
2082 info.report_errors = 1;
2083 info.to_encoding = MBSTRG(current_internal_encoding);
2084 info.to_language = MBSTRG(language);
2085 info.from_encodings = MBSTRG(http_input_list);
2086 info.num_from_encodings = MBSTRG(http_input_list_size);
2087 info.from_language = MBSTRG(language);
2088
2089 if (track_vars_array != NULL) {
2090 detected = _php_mb_encoding_handler_ex(&info, track_vars_array, encstr TSRMLS_CC);
2091 } else {
2092 zval tmp;
2093 if (!EG(active_symbol_table)) {
2094 zend_rebuild_symbol_table(TSRMLS_C);
2095 }
2096 Z_ARRVAL(tmp) = EG(active_symbol_table);
2097 detected = _php_mb_encoding_handler_ex(&info, &tmp, encstr TSRMLS_CC);
2098 }
2099
2100 MBSTRG(http_input_identify) = detected;
2101
2102 RETVAL_BOOL(detected);
2103
2104 if (encstr != NULL) efree(encstr);
2105 }
2106
2107
2108
2109
2110 PHP_FUNCTION(mb_output_handler)
2111 {
2112 char *arg_string;
2113 int arg_string_len;
2114 long arg_status;
2115 mbfl_string string, result;
2116 const char *charset;
2117 char *p;
2118 const mbfl_encoding *encoding;
2119 int last_feed, len;
2120 unsigned char send_text_mimetype = 0;
2121 char *s, *mimetype = NULL;
2122
2123 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl", &arg_string, &arg_string_len, &arg_status) == FAILURE) {
2124 return;
2125 }
2126
2127 encoding = MBSTRG(current_http_output_encoding);
2128
2129
2130 if ((arg_status & PHP_OUTPUT_HANDLER_START) != 0) {
2131
2132 if (MBSTRG(outconv)) {
2133 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2134 mbfl_buffer_converter_delete(MBSTRG(outconv));
2135 MBSTRG(outconv) = NULL;
2136 }
2137 if (encoding == &mbfl_encoding_pass) {
2138 RETURN_STRINGL(arg_string, arg_string_len, 1);
2139 }
2140
2141
2142 if (SG(sapi_headers).mimetype &&
2143 _php_mb_match_regex(
2144 MBSTRG(http_output_conv_mimetypes),
2145 SG(sapi_headers).mimetype,
2146 strlen(SG(sapi_headers).mimetype))) {
2147 if ((s = strchr(SG(sapi_headers).mimetype,';')) == NULL){
2148 mimetype = estrdup(SG(sapi_headers).mimetype);
2149 } else {
2150 mimetype = estrndup(SG(sapi_headers).mimetype,s-SG(sapi_headers).mimetype);
2151 }
2152 send_text_mimetype = 1;
2153 } else if (SG(sapi_headers).send_default_content_type) {
2154 mimetype = SG(default_mimetype) ? SG(default_mimetype) : SAPI_DEFAULT_MIMETYPE;
2155 }
2156
2157
2158 if (SG(sapi_headers).send_default_content_type || send_text_mimetype) {
2159 charset = encoding->mime_name;
2160 if (charset) {
2161 len = spprintf( &p, 0, "Content-Type: %s; charset=%s", mimetype, charset );
2162 if (sapi_add_header(p, len, 0) != FAILURE) {
2163 SG(sapi_headers).send_default_content_type = 0;
2164 }
2165 }
2166
2167 MBSTRG(outconv) = mbfl_buffer_converter_new2(MBSTRG(current_internal_encoding), encoding, 0);
2168 if (send_text_mimetype){
2169 efree(mimetype);
2170 }
2171 }
2172 }
2173
2174
2175 if (MBSTRG(outconv) == NULL) {
2176 RETURN_STRINGL(arg_string, arg_string_len, 1);
2177 }
2178
2179
2180 last_feed = ((arg_status & PHP_OUTPUT_HANDLER_END) != 0);
2181
2182 mbfl_buffer_converter_illegal_mode(MBSTRG(outconv), MBSTRG(current_filter_illegal_mode));
2183 mbfl_buffer_converter_illegal_substchar(MBSTRG(outconv), MBSTRG(current_filter_illegal_substchar));
2184
2185
2186 mbfl_string_init(&string);
2187
2188
2189
2190
2191 string.val = (unsigned char *)arg_string;
2192 string.len = arg_string_len;
2193 mbfl_buffer_converter_feed(MBSTRG(outconv), &string);
2194 if (last_feed) {
2195 mbfl_buffer_converter_flush(MBSTRG(outconv));
2196 }
2197
2198 mbfl_buffer_converter_result(MBSTRG(outconv), &result);
2199 RETVAL_STRINGL((char *)result.val, result.len, 0);
2200
2201
2202 if (last_feed) {
2203 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(MBSTRG(outconv));
2204 mbfl_buffer_converter_delete(MBSTRG(outconv));
2205 MBSTRG(outconv) = NULL;
2206 }
2207 }
2208
2209
2210
2211
2212 PHP_FUNCTION(mb_strlen)
2213 {
2214 int n;
2215 mbfl_string string;
2216 char *enc_name = NULL;
2217 int enc_name_len;
2218
2219 mbfl_string_init(&string);
2220
2221 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2222 RETURN_FALSE;
2223 }
2224
2225 string.no_language = MBSTRG(language);
2226 if (enc_name == NULL) {
2227 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2228 } else {
2229 string.no_encoding = mbfl_name2no_encoding(enc_name);
2230 if (string.no_encoding == mbfl_no_encoding_invalid) {
2231 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2232 RETURN_FALSE;
2233 }
2234 }
2235
2236 n = mbfl_strlen(&string);
2237 if (n >= 0) {
2238 RETVAL_LONG(n);
2239 } else {
2240 RETVAL_FALSE;
2241 }
2242 }
2243
2244
2245
2246
2247 PHP_FUNCTION(mb_strpos)
2248 {
2249 int n, reverse = 0;
2250 long offset;
2251 mbfl_string haystack, needle;
2252 char *enc_name = NULL;
2253 int enc_name_len;
2254
2255 mbfl_string_init(&haystack);
2256 mbfl_string_init(&needle);
2257 haystack.no_language = MBSTRG(language);
2258 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2259 needle.no_language = MBSTRG(language);
2260 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2261 offset = 0;
2262
2263 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &offset, &enc_name, &enc_name_len) == FAILURE) {
2264 RETURN_FALSE;
2265 }
2266
2267 if (enc_name != NULL) {
2268 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2269 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2270 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2271 RETURN_FALSE;
2272 }
2273 }
2274
2275 if (offset < 0 || offset > mbfl_strlen(&haystack)) {
2276 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
2277 RETURN_FALSE;
2278 }
2279 if (needle.len == 0) {
2280 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2281 RETURN_FALSE;
2282 }
2283
2284 n = mbfl_strpos(&haystack, &needle, offset, reverse);
2285 if (n >= 0) {
2286 RETVAL_LONG(n);
2287 } else {
2288 switch (-n) {
2289 case 1:
2290 break;
2291 case 2:
2292 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Needle has not positive length");
2293 break;
2294 case 4:
2295 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding or conversion error");
2296 break;
2297 case 8:
2298 php_error_docref(NULL TSRMLS_CC, E_NOTICE, "Argument is empty");
2299 break;
2300 default:
2301 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown error in mb_strpos");
2302 break;
2303 }
2304 RETVAL_FALSE;
2305 }
2306 }
2307
2308
2309
2310
2311 PHP_FUNCTION(mb_strrpos)
2312 {
2313 int n;
2314 mbfl_string haystack, needle;
2315 char *enc_name = NULL;
2316 int enc_name_len;
2317 zval **zoffset = NULL;
2318 long offset = 0, str_flg;
2319 char *enc_name2 = NULL;
2320 int enc_name_len2;
2321
2322 mbfl_string_init(&haystack);
2323 mbfl_string_init(&needle);
2324 haystack.no_language = MBSTRG(language);
2325 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2326 needle.no_language = MBSTRG(language);
2327 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2328
2329 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|Zs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &zoffset, &enc_name, &enc_name_len) == FAILURE) {
2330 RETURN_FALSE;
2331 }
2332
2333 if (zoffset) {
2334 if (Z_TYPE_PP(zoffset) == IS_STRING) {
2335 enc_name2 = Z_STRVAL_PP(zoffset);
2336 enc_name_len2 = Z_STRLEN_PP(zoffset);
2337 str_flg = 1;
2338
2339 if (enc_name2 != NULL) {
2340 switch (*enc_name2) {
2341 case '0':
2342 case '1':
2343 case '2':
2344 case '3':
2345 case '4':
2346 case '5':
2347 case '6':
2348 case '7':
2349 case '8':
2350 case '9':
2351 case ' ':
2352 case '-':
2353 case '.':
2354 break;
2355 default :
2356 str_flg = 0;
2357 break;
2358 }
2359 }
2360
2361 if (str_flg) {
2362 convert_to_long_ex(zoffset);
2363 offset = Z_LVAL_PP(zoffset);
2364 } else {
2365 enc_name = enc_name2;
2366 enc_name_len = enc_name_len2;
2367 }
2368 } else {
2369 convert_to_long_ex(zoffset);
2370 offset = Z_LVAL_PP(zoffset);
2371 }
2372 }
2373
2374 if (enc_name != NULL) {
2375 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2376 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2377 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2378 RETURN_FALSE;
2379 }
2380 }
2381
2382 if (haystack.len <= 0) {
2383 RETURN_FALSE;
2384 }
2385 if (needle.len <= 0) {
2386 RETURN_FALSE;
2387 }
2388
2389 {
2390 int haystack_char_len = mbfl_strlen(&haystack);
2391 if ((offset > 0 && offset > haystack_char_len) ||
2392 (offset < 0 && -offset > haystack_char_len)) {
2393 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
2394 RETURN_FALSE;
2395 }
2396 }
2397
2398 n = mbfl_strpos(&haystack, &needle, offset, 1);
2399 if (n >= 0) {
2400 RETVAL_LONG(n);
2401 } else {
2402 RETVAL_FALSE;
2403 }
2404 }
2405
2406
2407
2408
2409 PHP_FUNCTION(mb_stripos)
2410 {
2411 int n;
2412 long offset;
2413 mbfl_string haystack, needle;
2414 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2415 int from_encoding_len;
2416 n = -1;
2417 offset = 0;
2418
2419 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2420 RETURN_FALSE;
2421 }
2422 if (needle.len == 0) {
2423 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2424 RETURN_FALSE;
2425 }
2426 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2427
2428 if (n >= 0) {
2429 RETVAL_LONG(n);
2430 } else {
2431 RETVAL_FALSE;
2432 }
2433 }
2434
2435
2436
2437
2438 PHP_FUNCTION(mb_strripos)
2439 {
2440 int n;
2441 long offset;
2442 mbfl_string haystack, needle;
2443 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2444 int from_encoding_len;
2445 n = -1;
2446 offset = 0;
2447
2448 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|ls", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &offset, &from_encoding, &from_encoding_len) == FAILURE) {
2449 RETURN_FALSE;
2450 }
2451
2452 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, offset, from_encoding TSRMLS_CC);
2453
2454 if (n >= 0) {
2455 RETVAL_LONG(n);
2456 } else {
2457 RETVAL_FALSE;
2458 }
2459 }
2460
2461
2462
2463
2464 PHP_FUNCTION(mb_strstr)
2465 {
2466 int n, len, mblen;
2467 mbfl_string haystack, needle, result, *ret = NULL;
2468 char *enc_name = NULL;
2469 int enc_name_len;
2470 zend_bool part = 0;
2471
2472 mbfl_string_init(&haystack);
2473 mbfl_string_init(&needle);
2474 haystack.no_language = MBSTRG(language);
2475 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2476 needle.no_language = MBSTRG(language);
2477 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2478
2479 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, (int *)&haystack.len, (char **)&needle.val, (int *)&needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2480 RETURN_FALSE;
2481 }
2482
2483 if (enc_name != NULL) {
2484 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2485 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2486 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2487 RETURN_FALSE;
2488 }
2489 }
2490
2491 if (needle.len <= 0) {
2492 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2493 RETURN_FALSE;
2494 }
2495 n = mbfl_strpos(&haystack, &needle, 0, 0);
2496 if (n >= 0) {
2497 mblen = mbfl_strlen(&haystack);
2498 if (part) {
2499 ret = mbfl_substr(&haystack, &result, 0, n);
2500 if (ret != NULL) {
2501 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2502 } else {
2503 RETVAL_FALSE;
2504 }
2505 } else {
2506 len = (mblen - n);
2507 ret = mbfl_substr(&haystack, &result, n, len);
2508 if (ret != NULL) {
2509 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2510 } else {
2511 RETVAL_FALSE;
2512 }
2513 }
2514 } else {
2515 RETVAL_FALSE;
2516 }
2517 }
2518
2519
2520
2521
2522 PHP_FUNCTION(mb_strrchr)
2523 {
2524 int n, len, mblen;
2525 mbfl_string haystack, needle, result, *ret = NULL;
2526 char *enc_name = NULL;
2527 int enc_name_len;
2528 zend_bool part = 0;
2529
2530 mbfl_string_init(&haystack);
2531 mbfl_string_init(&needle);
2532 haystack.no_language = MBSTRG(language);
2533 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2534 needle.no_language = MBSTRG(language);
2535 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2536
2537 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &enc_name, &enc_name_len) == FAILURE) {
2538 RETURN_FALSE;
2539 }
2540
2541 if (enc_name != NULL) {
2542 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2543 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2544 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2545 RETURN_FALSE;
2546 }
2547 }
2548
2549 if (haystack.len <= 0) {
2550 RETURN_FALSE;
2551 }
2552 if (needle.len <= 0) {
2553 RETURN_FALSE;
2554 }
2555 n = mbfl_strpos(&haystack, &needle, 0, 1);
2556 if (n >= 0) {
2557 mblen = mbfl_strlen(&haystack);
2558 if (part) {
2559 ret = mbfl_substr(&haystack, &result, 0, n);
2560 if (ret != NULL) {
2561 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2562 } else {
2563 RETVAL_FALSE;
2564 }
2565 } else {
2566 len = (mblen - n);
2567 ret = mbfl_substr(&haystack, &result, n, len);
2568 if (ret != NULL) {
2569 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2570 } else {
2571 RETVAL_FALSE;
2572 }
2573 }
2574 } else {
2575 RETVAL_FALSE;
2576 }
2577 }
2578
2579
2580
2581
2582 PHP_FUNCTION(mb_stristr)
2583 {
2584 zend_bool part = 0;
2585 unsigned int from_encoding_len, len, mblen;
2586 int n;
2587 mbfl_string haystack, needle, result, *ret = NULL;
2588 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
2589 mbfl_string_init(&haystack);
2590 mbfl_string_init(&needle);
2591 haystack.no_language = MBSTRG(language);
2592 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2593 needle.no_language = MBSTRG(language);
2594 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2595
2596
2597 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2598 RETURN_FALSE;
2599 }
2600
2601 if (!needle.len) {
2602 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty delimiter");
2603 RETURN_FALSE;
2604 }
2605
2606 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2607 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2608 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2609 RETURN_FALSE;
2610 }
2611
2612 n = php_mb_stripos(0, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2613
2614 if (n <0) {
2615 RETURN_FALSE;
2616 }
2617
2618 mblen = mbfl_strlen(&haystack);
2619
2620 if (part) {
2621 ret = mbfl_substr(&haystack, &result, 0, n);
2622 if (ret != NULL) {
2623 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2624 } else {
2625 RETVAL_FALSE;
2626 }
2627 } else {
2628 len = (mblen - n);
2629 ret = mbfl_substr(&haystack, &result, n, len);
2630 if (ret != NULL) {
2631 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2632 } else {
2633 RETVAL_FALSE;
2634 }
2635 }
2636 }
2637
2638
2639
2640
2641 PHP_FUNCTION(mb_strrichr)
2642 {
2643 zend_bool part = 0;
2644 int n, from_encoding_len, len, mblen;
2645 mbfl_string haystack, needle, result, *ret = NULL;
2646 const char *from_encoding = MBSTRG(current_internal_encoding)->name;
2647 mbfl_string_init(&haystack);
2648 mbfl_string_init(&needle);
2649 haystack.no_language = MBSTRG(language);
2650 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2651 needle.no_language = MBSTRG(language);
2652 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2653
2654
2655 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|bs", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &part, &from_encoding, &from_encoding_len) == FAILURE) {
2656 RETURN_FALSE;
2657 }
2658
2659 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
2660 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2661 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
2662 RETURN_FALSE;
2663 }
2664
2665 n = php_mb_stripos(1, (char *)haystack.val, haystack.len, (char *)needle.val, needle.len, 0, from_encoding TSRMLS_CC);
2666
2667 if (n <0) {
2668 RETURN_FALSE;
2669 }
2670
2671 mblen = mbfl_strlen(&haystack);
2672
2673 if (part) {
2674 ret = mbfl_substr(&haystack, &result, 0, n);
2675 if (ret != NULL) {
2676 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2677 } else {
2678 RETVAL_FALSE;
2679 }
2680 } else {
2681 len = (mblen - n);
2682 ret = mbfl_substr(&haystack, &result, n, len);
2683 if (ret != NULL) {
2684 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2685 } else {
2686 RETVAL_FALSE;
2687 }
2688 }
2689 }
2690
2691
2692
2693
2694 PHP_FUNCTION(mb_substr_count)
2695 {
2696 int n;
2697 mbfl_string haystack, needle;
2698 char *enc_name = NULL;
2699 int enc_name_len;
2700
2701 mbfl_string_init(&haystack);
2702 mbfl_string_init(&needle);
2703 haystack.no_language = MBSTRG(language);
2704 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2705 needle.no_language = MBSTRG(language);
2706 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2707
2708 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|s", (char **)&haystack.val, &haystack.len, (char **)&needle.val, &needle.len, &enc_name, &enc_name_len) == FAILURE) {
2709 return;
2710 }
2711
2712 if (enc_name != NULL) {
2713 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(enc_name);
2714 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
2715 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2716 RETURN_FALSE;
2717 }
2718 }
2719
2720 if (needle.len <= 0) {
2721 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty substring");
2722 RETURN_FALSE;
2723 }
2724
2725 n = mbfl_substr_count(&haystack, &needle);
2726 if (n >= 0) {
2727 RETVAL_LONG(n);
2728 } else {
2729 RETVAL_FALSE;
2730 }
2731 }
2732
2733
2734
2735
2736 PHP_FUNCTION(mb_substr)
2737 {
2738 size_t argc = ZEND_NUM_ARGS();
2739 char *str, *encoding;
2740 long from, len;
2741 int mblen, str_len, encoding_len;
2742 zval **z_len = NULL;
2743 mbfl_string string, result, *ret;
2744
2745 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", &str, &str_len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2746 return;
2747 }
2748
2749 mbfl_string_init(&string);
2750 string.no_language = MBSTRG(language);
2751 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2752
2753 if (argc == 4) {
2754 string.no_encoding = mbfl_name2no_encoding(encoding);
2755 if (string.no_encoding == mbfl_no_encoding_invalid) {
2756 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2757 RETURN_FALSE;
2758 }
2759 }
2760
2761 string.val = (unsigned char *)str;
2762 string.len = str_len;
2763
2764 if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2765 len = str_len;
2766 } else {
2767 convert_to_long_ex(z_len);
2768 len = Z_LVAL_PP(z_len);
2769 }
2770
2771
2772 mblen = 0;
2773 if (from < 0 || len < 0) {
2774 mblen = mbfl_strlen(&string);
2775 }
2776
2777
2778
2779
2780 if (from < 0) {
2781 from = mblen + from;
2782 if (from < 0) {
2783 from = 0;
2784 }
2785 }
2786
2787
2788
2789
2790 if (len < 0) {
2791 len = (mblen - from) + len;
2792 if (len < 0) {
2793 len = 0;
2794 }
2795 }
2796
2797 if (((MBSTRG(func_overload) & MB_OVERLOAD_STRING) == MB_OVERLOAD_STRING)
2798 && (from >= mbfl_strlen(&string))) {
2799 RETURN_FALSE;
2800 }
2801
2802 ret = mbfl_substr(&string, &result, from, len);
2803 if (NULL == ret) {
2804 RETURN_FALSE;
2805 }
2806
2807 RETURN_STRINGL((char *)ret->val, ret->len, 0);
2808 }
2809
2810
2811
2812
2813 PHP_FUNCTION(mb_strcut)
2814 {
2815 size_t argc = ZEND_NUM_ARGS();
2816 char *encoding;
2817 long from, len;
2818 int encoding_len;
2819 zval **z_len = NULL;
2820 mbfl_string string, result, *ret;
2821
2822 mbfl_string_init(&string);
2823 string.no_language = MBSTRG(language);
2824 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2825
2826 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|Zs", (char **)&string.val, (int **)&string.len, &from, &z_len, &encoding, &encoding_len) == FAILURE) {
2827 return;
2828 }
2829
2830 if (argc == 4) {
2831 string.no_encoding = mbfl_name2no_encoding(encoding);
2832 if (string.no_encoding == mbfl_no_encoding_invalid) {
2833 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2834 RETURN_FALSE;
2835 }
2836 }
2837
2838 if (argc < 3 || Z_TYPE_PP(z_len) == IS_NULL) {
2839 len = string.len;
2840 } else {
2841 convert_to_long_ex(z_len);
2842 len = Z_LVAL_PP(z_len);
2843 }
2844
2845
2846
2847
2848 if (from < 0) {
2849 from = string.len + from;
2850 if (from < 0) {
2851 from = 0;
2852 }
2853 }
2854
2855
2856
2857
2858 if (len < 0) {
2859 len = (string.len - from) + len;
2860 if (len < 0) {
2861 len = 0;
2862 }
2863 }
2864
2865 if ((unsigned int)from > string.len) {
2866 RETURN_FALSE;
2867 }
2868
2869 ret = mbfl_strcut(&string, &result, from, len);
2870 if (ret == NULL) {
2871 RETURN_FALSE;
2872 }
2873
2874 RETURN_STRINGL((char *)ret->val, ret->len, 0);
2875 }
2876
2877
2878
2879
2880 PHP_FUNCTION(mb_strwidth)
2881 {
2882 int n;
2883 mbfl_string string;
2884 char *enc_name = NULL;
2885 int enc_name_len;
2886
2887 mbfl_string_init(&string);
2888
2889 string.no_language = MBSTRG(language);
2890 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2891
2892 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s", (char **)&string.val, &string.len, &enc_name, &enc_name_len) == FAILURE) {
2893 return;
2894 }
2895
2896 if (enc_name != NULL) {
2897 string.no_encoding = mbfl_name2no_encoding(enc_name);
2898 if (string.no_encoding == mbfl_no_encoding_invalid) {
2899 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", enc_name);
2900 RETURN_FALSE;
2901 }
2902 }
2903
2904 n = mbfl_strwidth(&string);
2905 if (n >= 0) {
2906 RETVAL_LONG(n);
2907 } else {
2908 RETVAL_FALSE;
2909 }
2910 }
2911
2912
2913
2914
2915 PHP_FUNCTION(mb_strimwidth)
2916 {
2917 char *str, *trimmarker, *encoding;
2918 long from, width;
2919 int str_len, trimmarker_len, encoding_len;
2920 mbfl_string string, result, marker, *ret;
2921
2922 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sll|ss", &str, &str_len, &from, &width, &trimmarker, &trimmarker_len, &encoding, &encoding_len) == FAILURE) {
2923 return;
2924 }
2925
2926 mbfl_string_init(&string);
2927 mbfl_string_init(&marker);
2928 string.no_language = MBSTRG(language);
2929 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2930 marker.no_language = MBSTRG(language);
2931 marker.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
2932 marker.val = NULL;
2933 marker.len = 0;
2934
2935 if (ZEND_NUM_ARGS() == 5) {
2936 string.no_encoding = marker.no_encoding = mbfl_name2no_encoding(encoding);
2937 if (string.no_encoding == mbfl_no_encoding_invalid) {
2938 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
2939 RETURN_FALSE;
2940 }
2941 }
2942
2943 string.val = (unsigned char *)str;
2944 string.len = str_len;
2945
2946 if (from < 0 || from > str_len) {
2947 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Start position is out of range");
2948 RETURN_FALSE;
2949 }
2950
2951 if (width < 0) {
2952 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Width is negative value");
2953 RETURN_FALSE;
2954 }
2955
2956 if (ZEND_NUM_ARGS() >= 4) {
2957 marker.val = (unsigned char *)trimmarker;
2958 marker.len = trimmarker_len;
2959 }
2960
2961 ret = mbfl_strimwidth(&string, &marker, &result, from, width);
2962
2963 if (ret == NULL) {
2964 RETURN_FALSE;
2965 }
2966
2967 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
2968 }
2969
2970
2971
2972 MBSTRING_API char * php_mb_convert_encoding(const char *input, size_t length, const char *_to_encoding, const char *_from_encodings, size_t *output_len TSRMLS_DC)
2973 {
2974 mbfl_string string, result, *ret;
2975 const mbfl_encoding *from_encoding, *to_encoding;
2976 mbfl_buffer_converter *convd;
2977 size_t size;
2978 const mbfl_encoding **list;
2979 char *output=NULL;
2980
2981 if (output_len) {
2982 *output_len = 0;
2983 }
2984 if (!input) {
2985 return NULL;
2986 }
2987
2988 if (_to_encoding && strlen(_to_encoding)) {
2989 to_encoding = mbfl_name2encoding(_to_encoding);
2990 if (!to_encoding) {
2991 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", _to_encoding);
2992 return NULL;
2993 }
2994 } else {
2995 to_encoding = MBSTRG(current_internal_encoding);
2996 }
2997
2998
2999 mbfl_string_init(&string);
3000 mbfl_string_init(&result);
3001 from_encoding = MBSTRG(current_internal_encoding);
3002 string.no_encoding = from_encoding->no_encoding;
3003 string.no_language = MBSTRG(language);
3004 string.val = (unsigned char *)input;
3005 string.len = length;
3006
3007
3008 if (_from_encodings) {
3009 list = NULL;
3010 size = 0;
3011 php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), &list, &size, 0 TSRMLS_CC);
3012 if (size == 1) {
3013 from_encoding = *list;
3014 string.no_encoding = from_encoding->no_encoding;
3015 } else if (size > 1) {
3016
3017 from_encoding = mbfl_identify_encoding2(&string, list, size, MBSTRG(strict_detection));
3018 if (from_encoding) {
3019 string.no_encoding = from_encoding->no_encoding;
3020 } else {
3021 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect character encoding");
3022 from_encoding = &mbfl_encoding_pass;
3023 to_encoding = from_encoding;
3024 string.no_encoding = from_encoding->no_encoding;
3025 }
3026 } else {
3027 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal character encoding specified");
3028 }
3029 if (list != NULL) {
3030 efree((void *)list);
3031 }
3032 }
3033
3034
3035 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, string.len);
3036 if (convd == NULL) {
3037 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create character encoding converter");
3038 return NULL;
3039 }
3040 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3041 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3042
3043
3044 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3045 if (ret) {
3046 if (output_len) {
3047 *output_len = ret->len;
3048 }
3049 output = (char *)ret->val;
3050 }
3051
3052 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3053 mbfl_buffer_converter_delete(convd);
3054 return output;
3055 }
3056
3057
3058
3059
3060 PHP_FUNCTION(mb_convert_encoding)
3061 {
3062 char *arg_str, *arg_new;
3063 int str_len, new_len;
3064 zval *arg_old;
3065 int i;
3066 size_t size, l, n;
3067 char *_from_encodings = NULL, *ret, *s_free = NULL;
3068
3069 zval **hash_entry;
3070 HashTable *target_hash;
3071
3072 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "ss|z", &arg_str, &str_len, &arg_new, &new_len, &arg_old) == FAILURE) {
3073 return;
3074 }
3075
3076 if (ZEND_NUM_ARGS() == 3) {
3077 switch (Z_TYPE_P(arg_old)) {
3078 case IS_ARRAY:
3079 target_hash = Z_ARRVAL_P(arg_old);
3080 zend_hash_internal_pointer_reset(target_hash);
3081 i = zend_hash_num_elements(target_hash);
3082 _from_encodings = NULL;
3083
3084 while (i > 0) {
3085 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3086 break;
3087 }
3088
3089 convert_to_string_ex(hash_entry);
3090
3091 if ( _from_encodings) {
3092 l = strlen(_from_encodings);
3093 n = strlen(Z_STRVAL_PP(hash_entry));
3094 _from_encodings = erealloc(_from_encodings, l+n+2);
3095 strcpy(_from_encodings+l, ",");
3096 strcpy(_from_encodings+l+1, Z_STRVAL_PP(hash_entry));
3097 } else {
3098 _from_encodings = estrdup(Z_STRVAL_PP(hash_entry));
3099 }
3100
3101 zend_hash_move_forward(target_hash);
3102 i--;
3103 }
3104
3105 if (_from_encodings != NULL && !strlen(_from_encodings)) {
3106 efree(_from_encodings);
3107 _from_encodings = NULL;
3108 }
3109 s_free = _from_encodings;
3110 break;
3111 default:
3112 convert_to_string(arg_old);
3113 _from_encodings = Z_STRVAL_P(arg_old);
3114 break;
3115 }
3116 }
3117
3118
3119 ret = php_mb_convert_encoding(arg_str, str_len, arg_new, _from_encodings, &size TSRMLS_CC);
3120 if (ret != NULL) {
3121 RETVAL_STRINGL(ret, size, 0);
3122 } else {
3123 RETVAL_FALSE;
3124 }
3125
3126 if ( s_free) {
3127 efree(s_free);
3128 }
3129 }
3130
3131
3132
3133
3134 PHP_FUNCTION(mb_convert_case)
3135 {
3136 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3137 char *str;
3138 int str_len, from_encoding_len;
3139 long case_mode = 0;
3140 char *newstr;
3141 size_t ret_len;
3142
3143 RETVAL_FALSE;
3144 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sl|s!", &str, &str_len,
3145 &case_mode, &from_encoding, &from_encoding_len) == FAILURE)
3146 RETURN_FALSE;
3147
3148 newstr = php_unicode_convert_case(case_mode, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3149
3150 if (newstr) {
3151 RETVAL_STRINGL(newstr, ret_len, 0);
3152 }
3153 }
3154
3155
3156
3157
3158
3159 PHP_FUNCTION(mb_strtoupper)
3160 {
3161 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3162 char *str;
3163 int str_len, from_encoding_len;
3164 char *newstr;
3165 size_t ret_len;
3166
3167 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3168 &from_encoding, &from_encoding_len) == FAILURE) {
3169 return;
3170 }
3171 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3172
3173 if (newstr) {
3174 RETURN_STRINGL(newstr, ret_len, 0);
3175 }
3176 RETURN_FALSE;
3177 }
3178
3179
3180
3181
3182
3183 PHP_FUNCTION(mb_strtolower)
3184 {
3185 const char *from_encoding = MBSTRG(current_internal_encoding)->mime_name;
3186 char *str;
3187 int str_len, from_encoding_len;
3188 char *newstr;
3189 size_t ret_len;
3190
3191 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|s!", &str, &str_len,
3192 &from_encoding, &from_encoding_len) == FAILURE) {
3193 return;
3194 }
3195 newstr = php_unicode_convert_case(PHP_UNICODE_CASE_LOWER, str, (size_t) str_len, &ret_len, from_encoding TSRMLS_CC);
3196
3197 if (newstr) {
3198 RETURN_STRINGL(newstr, ret_len, 0);
3199 }
3200 RETURN_FALSE;
3201 }
3202
3203
3204
3205
3206 PHP_FUNCTION(mb_detect_encoding)
3207 {
3208 char *str;
3209 int str_len;
3210 zend_bool strict=0;
3211 zval *encoding_list;
3212
3213 mbfl_string string;
3214 const mbfl_encoding *ret;
3215 const mbfl_encoding **elist, **list;
3216 size_t size;
3217
3218 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|zb", &str, &str_len, &encoding_list, &strict) == FAILURE) {
3219 return;
3220 }
3221
3222
3223 list = NULL;
3224 size = 0;
3225 if (ZEND_NUM_ARGS() >= 2 && !ZVAL_IS_NULL(encoding_list)) {
3226 switch (Z_TYPE_P(encoding_list)) {
3227 case IS_ARRAY:
3228 if (FAILURE == php_mb_parse_encoding_array(encoding_list, &list, &size, 0 TSRMLS_CC)) {
3229 if (list) {
3230 efree(list);
3231 list = NULL;
3232 size = 0;
3233 }
3234 }
3235 break;
3236 default:
3237 convert_to_string(encoding_list);
3238 if (FAILURE == php_mb_parse_encoding_list(Z_STRVAL_P(encoding_list), Z_STRLEN_P(encoding_list), &list, &size, 0 TSRMLS_CC)) {
3239 if (list) {
3240 efree(list);
3241 list = NULL;
3242 size = 0;
3243 }
3244 }
3245 break;
3246 }
3247 if (size <= 0) {
3248 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Illegal argument");
3249 }
3250 }
3251
3252 if (ZEND_NUM_ARGS() < 3) {
3253 strict = (zend_bool)MBSTRG(strict_detection);
3254 }
3255
3256 if (size > 0 && list != NULL) {
3257 elist = list;
3258 } else {
3259 elist = MBSTRG(current_detect_order_list);
3260 size = MBSTRG(current_detect_order_list_size);
3261 }
3262
3263 mbfl_string_init(&string);
3264 string.no_language = MBSTRG(language);
3265 string.val = (unsigned char *)str;
3266 string.len = str_len;
3267 ret = mbfl_identify_encoding2(&string, elist, size, strict);
3268
3269 if (list != NULL) {
3270 efree((void *)list);
3271 }
3272
3273 if (ret == NULL) {
3274 RETURN_FALSE;
3275 }
3276
3277 RETVAL_STRING((char *)ret->name, 1);
3278 }
3279
3280
3281
3282
3283 PHP_FUNCTION(mb_list_encodings)
3284 {
3285 const mbfl_encoding **encodings;
3286 const mbfl_encoding *encoding;
3287 int i;
3288
3289 array_init(return_value);
3290 i = 0;
3291 encodings = mbfl_get_supported_encodings();
3292 while ((encoding = encodings[i++]) != NULL) {
3293 add_next_index_string(return_value, (char *) encoding->name, 1);
3294 }
3295 }
3296
3297
3298
3299
3300 PHP_FUNCTION(mb_encoding_aliases)
3301 {
3302 const mbfl_encoding *encoding;
3303 char *name = NULL;
3304 int name_len;
3305
3306 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", &name, &name_len) == FAILURE) {
3307 RETURN_FALSE;
3308 }
3309
3310 encoding = mbfl_name2encoding(name);
3311 if (!encoding) {
3312 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", name);
3313 RETURN_FALSE;
3314 }
3315
3316 array_init(return_value);
3317 if (encoding->aliases != NULL) {
3318 const char **alias;
3319 for (alias = *encoding->aliases; *alias; ++alias) {
3320 add_next_index_string(return_value, (char *)*alias, 1);
3321 }
3322 }
3323 }
3324
3325
3326
3327
3328 PHP_FUNCTION(mb_encode_mimeheader)
3329 {
3330 enum mbfl_no_encoding charset, transenc;
3331 mbfl_string string, result, *ret;
3332 char *charset_name = NULL;
3333 int charset_name_len;
3334 char *trans_enc_name = NULL;
3335 int trans_enc_name_len;
3336 char *linefeed = "\r\n";
3337 int linefeed_len;
3338 long indent = 0;
3339
3340 mbfl_string_init(&string);
3341 string.no_language = MBSTRG(language);
3342 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3343
3344 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|sssl", (char **)&string.val, &string.len, &charset_name, &charset_name_len, &trans_enc_name, &trans_enc_name_len, &linefeed, &linefeed_len, &indent) == FAILURE) {
3345 return;
3346 }
3347
3348 charset = mbfl_no_encoding_pass;
3349 transenc = mbfl_no_encoding_base64;
3350
3351 if (charset_name != NULL) {
3352 charset = mbfl_name2no_encoding(charset_name);
3353 if (charset == mbfl_no_encoding_invalid) {
3354 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", charset_name);
3355 RETURN_FALSE;
3356 }
3357 } else {
3358 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
3359 if (lang != NULL) {
3360 charset = lang->mail_charset;
3361 transenc = lang->mail_header_encoding;
3362 }
3363 }
3364
3365 if (trans_enc_name != NULL) {
3366 if (*trans_enc_name == 'B' || *trans_enc_name == 'b') {
3367 transenc = mbfl_no_encoding_base64;
3368 } else if (*trans_enc_name == 'Q' || *trans_enc_name == 'q') {
3369 transenc = mbfl_no_encoding_qprint;
3370 }
3371 }
3372
3373 mbfl_string_init(&result);
3374 ret = mbfl_mime_header_encode(&string, &result, charset, transenc, linefeed, indent);
3375 if (ret != NULL) {
3376 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3377 } else {
3378 RETVAL_FALSE;
3379 }
3380 }
3381
3382
3383
3384
3385 PHP_FUNCTION(mb_decode_mimeheader)
3386 {
3387 mbfl_string string, result, *ret;
3388
3389 mbfl_string_init(&string);
3390 string.no_language = MBSTRG(language);
3391 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3392
3393 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s", (char **)&string.val, &string.len) == FAILURE) {
3394 return;
3395 }
3396
3397 mbfl_string_init(&result);
3398 ret = mbfl_mime_header_decode(&string, &result, MBSTRG(current_internal_encoding)->no_encoding);
3399 if (ret != NULL) {
3400 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3401 } else {
3402 RETVAL_FALSE;
3403 }
3404 }
3405
3406
3407
3408
3409 PHP_FUNCTION(mb_convert_kana)
3410 {
3411 int opt, i;
3412 mbfl_string string, result, *ret;
3413 char *optstr = NULL;
3414 int optstr_len;
3415 char *encname = NULL;
3416 int encname_len;
3417
3418 mbfl_string_init(&string);
3419 string.no_language = MBSTRG(language);
3420 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3421
3422 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "s|ss", (char **)&string.val, &string.len, &optstr, &optstr_len, &encname, &encname_len) == FAILURE) {
3423 return;
3424 }
3425
3426
3427 if (optstr != NULL) {
3428 char *p = optstr;
3429 int n = optstr_len;
3430 i = 0;
3431 opt = 0;
3432 while (i < n) {
3433 i++;
3434 switch (*p++) {
3435 case 'A':
3436 opt |= 0x1;
3437 break;
3438 case 'a':
3439 opt |= 0x10;
3440 break;
3441 case 'R':
3442 opt |= 0x2;
3443 break;
3444 case 'r':
3445 opt |= 0x20;
3446 break;
3447 case 'N':
3448 opt |= 0x4;
3449 break;
3450 case 'n':
3451 opt |= 0x40;
3452 break;
3453 case 'S':
3454 opt |= 0x8;
3455 break;
3456 case 's':
3457 opt |= 0x80;
3458 break;
3459 case 'K':
3460 opt |= 0x100;
3461 break;
3462 case 'k':
3463 opt |= 0x1000;
3464 break;
3465 case 'H':
3466 opt |= 0x200;
3467 break;
3468 case 'h':
3469 opt |= 0x2000;
3470 break;
3471 case 'V':
3472 opt |= 0x800;
3473 break;
3474 case 'C':
3475 opt |= 0x10000;
3476 break;
3477 case 'c':
3478 opt |= 0x20000;
3479 break;
3480 case 'M':
3481 opt |= 0x100000;
3482 break;
3483 case 'm':
3484 opt |= 0x200000;
3485 break;
3486 }
3487 }
3488 } else {
3489 opt = 0x900;
3490 }
3491
3492
3493 if (encname != NULL) {
3494 string.no_encoding = mbfl_name2no_encoding(encname);
3495 if (string.no_encoding == mbfl_no_encoding_invalid) {
3496 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encname);
3497 RETURN_FALSE;
3498 }
3499 }
3500
3501 ret = mbfl_ja_jp_hantozen(&string, &result, opt);
3502 if (ret != NULL) {
3503 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3504 } else {
3505 RETVAL_FALSE;
3506 }
3507 }
3508
3509
3510 #define PHP_MBSTR_STACK_BLOCK_SIZE 32
3511
3512
3513
3514 PHP_FUNCTION(mb_convert_variables)
3515 {
3516 zval ***args, ***stack, **var, **hash_entry, **zfrom_enc;
3517 HashTable *target_hash;
3518 mbfl_string string, result, *ret;
3519 const mbfl_encoding *from_encoding, *to_encoding;
3520 mbfl_encoding_detector *identd;
3521 mbfl_buffer_converter *convd;
3522 int n, to_enc_len, argc, stack_level, stack_max;
3523 size_t elistsz;
3524 const mbfl_encoding **elist;
3525 char *to_enc;
3526 void *ptmp;
3527
3528 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sZ+", &to_enc, &to_enc_len, &zfrom_enc, &args, &argc) == FAILURE) {
3529 return;
3530 }
3531
3532
3533 to_encoding = mbfl_name2encoding(to_enc);
3534 if (!to_encoding) {
3535 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", to_enc);
3536 efree(args);
3537 RETURN_FALSE;
3538 }
3539
3540
3541 mbfl_string_init(&string);
3542 mbfl_string_init(&result);
3543 from_encoding = MBSTRG(current_internal_encoding);
3544 string.no_encoding = from_encoding->no_encoding;
3545 string.no_language = MBSTRG(language);
3546
3547
3548 elist = NULL;
3549 elistsz = 0;
3550 switch (Z_TYPE_PP(zfrom_enc)) {
3551 case IS_ARRAY:
3552 php_mb_parse_encoding_array(*zfrom_enc, &elist, &elistsz, 0 TSRMLS_CC);
3553 break;
3554 default:
3555 convert_to_string_ex(zfrom_enc);
3556 php_mb_parse_encoding_list(Z_STRVAL_PP(zfrom_enc), Z_STRLEN_PP(zfrom_enc), &elist, &elistsz, 0 TSRMLS_CC);
3557 break;
3558 }
3559 if (elistsz <= 0) {
3560 from_encoding = &mbfl_encoding_pass;
3561 } else if (elistsz == 1) {
3562 from_encoding = *elist;
3563 } else {
3564
3565 from_encoding = NULL;
3566 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3567 stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3568 stack_level = 0;
3569 identd = mbfl_encoding_detector_new2(elist, elistsz, MBSTRG(strict_detection));
3570 if (identd != NULL) {
3571 n = 0;
3572 while (n < argc || stack_level > 0) {
3573 if (stack_level <= 0) {
3574 var = args[n++];
3575 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3576 target_hash = HASH_OF(*var);
3577 if (target_hash != NULL) {
3578 zend_hash_internal_pointer_reset(target_hash);
3579 }
3580 }
3581 } else {
3582 stack_level--;
3583 var = stack[stack_level];
3584 }
3585 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3586 target_hash = HASH_OF(*var);
3587 if (target_hash != NULL) {
3588 while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3589 zend_hash_move_forward(target_hash);
3590 if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3591 if (stack_level >= stack_max) {
3592 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3593 ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3594 stack = (zval ***)ptmp;
3595 }
3596 stack[stack_level] = var;
3597 stack_level++;
3598 var = hash_entry;
3599 target_hash = HASH_OF(*var);
3600 if (target_hash != NULL) {
3601 zend_hash_internal_pointer_reset(target_hash);
3602 continue;
3603 }
3604 } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3605 string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3606 string.len = Z_STRLEN_PP(hash_entry);
3607 if (mbfl_encoding_detector_feed(identd, &string)) {
3608 goto detect_end;
3609 }
3610 }
3611 }
3612 }
3613 } else if (Z_TYPE_PP(var) == IS_STRING) {
3614 string.val = (unsigned char *)Z_STRVAL_PP(var);
3615 string.len = Z_STRLEN_PP(var);
3616 if (mbfl_encoding_detector_feed(identd, &string)) {
3617 goto detect_end;
3618 }
3619 }
3620 }
3621 detect_end:
3622 from_encoding = mbfl_encoding_detector_judge2(identd);
3623 mbfl_encoding_detector_delete(identd);
3624 }
3625 efree(stack);
3626
3627 if (!from_encoding) {
3628 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to detect encoding");
3629 from_encoding = &mbfl_encoding_pass;
3630 }
3631 }
3632 if (elist != NULL) {
3633 efree((void *)elist);
3634 }
3635
3636 convd = NULL;
3637 if (from_encoding != &mbfl_encoding_pass) {
3638 convd = mbfl_buffer_converter_new2(from_encoding, to_encoding, 0);
3639 if (convd == NULL) {
3640 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
3641 RETURN_FALSE;
3642 }
3643 mbfl_buffer_converter_illegal_mode(convd, MBSTRG(current_filter_illegal_mode));
3644 mbfl_buffer_converter_illegal_substchar(convd, MBSTRG(current_filter_illegal_substchar));
3645 }
3646
3647
3648 if (convd != NULL) {
3649 stack_max = PHP_MBSTR_STACK_BLOCK_SIZE;
3650 stack = (zval ***)safe_emalloc(stack_max, sizeof(zval **), 0);
3651 stack_level = 0;
3652 n = 0;
3653 while (n < argc || stack_level > 0) {
3654 if (stack_level <= 0) {
3655 var = args[n++];
3656 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3657 target_hash = HASH_OF(*var);
3658 if (target_hash != NULL) {
3659 zend_hash_internal_pointer_reset(target_hash);
3660 }
3661 }
3662 } else {
3663 stack_level--;
3664 var = stack[stack_level];
3665 }
3666 if (Z_TYPE_PP(var) == IS_ARRAY || Z_TYPE_PP(var) == IS_OBJECT) {
3667 target_hash = HASH_OF(*var);
3668 if (target_hash != NULL) {
3669 while (zend_hash_get_current_data(target_hash, (void **) &hash_entry) != FAILURE) {
3670 zend_hash_move_forward(target_hash);
3671 if (Z_TYPE_PP(hash_entry) == IS_ARRAY || Z_TYPE_PP(hash_entry) == IS_OBJECT) {
3672 if (stack_level >= stack_max) {
3673 stack_max += PHP_MBSTR_STACK_BLOCK_SIZE;
3674 ptmp = erealloc(stack, sizeof(zval **)*stack_max);
3675 stack = (zval ***)ptmp;
3676 }
3677 stack[stack_level] = var;
3678 stack_level++;
3679 var = hash_entry;
3680 SEPARATE_ZVAL(hash_entry);
3681 target_hash = HASH_OF(*var);
3682 if (target_hash != NULL) {
3683 zend_hash_internal_pointer_reset(target_hash);
3684 continue;
3685 }
3686 } else if (Z_TYPE_PP(hash_entry) == IS_STRING) {
3687 string.val = (unsigned char *)Z_STRVAL_PP(hash_entry);
3688 string.len = Z_STRLEN_PP(hash_entry);
3689 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3690 if (ret != NULL) {
3691 if (Z_REFCOUNT_PP(hash_entry) > 1) {
3692 Z_DELREF_PP(hash_entry);
3693 MAKE_STD_ZVAL(*hash_entry);
3694 } else {
3695 zval_dtor(*hash_entry);
3696 }
3697 ZVAL_STRINGL(*hash_entry, (char *)ret->val, ret->len, 0);
3698 }
3699 }
3700 }
3701 }
3702 } else if (Z_TYPE_PP(var) == IS_STRING) {
3703 string.val = (unsigned char *)Z_STRVAL_PP(var);
3704 string.len = Z_STRLEN_PP(var);
3705 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
3706 if (ret != NULL) {
3707 zval_dtor(*var);
3708 ZVAL_STRINGL(*var, (char *)ret->val, ret->len, 0);
3709 }
3710 }
3711 }
3712 efree(stack);
3713
3714 MBSTRG(illegalchars) += mbfl_buffer_illegalchars(convd);
3715 mbfl_buffer_converter_delete(convd);
3716 }
3717
3718 efree(args);
3719
3720 if (from_encoding) {
3721 RETURN_STRING(from_encoding->name, 1);
3722 } else {
3723 RETURN_FALSE;
3724 }
3725 }
3726
3727
3728
3729
3730 static void
3731 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAMETERS, int type)
3732 {
3733 char *str, *encoding;
3734 int str_len, encoding_len;
3735 zval *zconvmap, **hash_entry;
3736 HashTable *target_hash;
3737 size_t argc = ZEND_NUM_ARGS();
3738 int i, *convmap, *mapelm, mapsize=0;
3739 zend_bool is_hex = 0;
3740 mbfl_string string, result, *ret;
3741 enum mbfl_no_encoding no_encoding;
3742
3743 if (zend_parse_parameters(argc TSRMLS_CC, "sz|sb", &str, &str_len, &zconvmap, &encoding, &encoding_len, &is_hex) == FAILURE) {
3744 return;
3745 }
3746
3747 mbfl_string_init(&string);
3748 string.no_language = MBSTRG(language);
3749 string.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
3750 string.val = (unsigned char *)str;
3751 string.len = str_len;
3752
3753
3754 if ((argc == 3 || argc == 4) && encoding_len > 0) {
3755 no_encoding = mbfl_name2no_encoding(encoding);
3756 if (no_encoding == mbfl_no_encoding_invalid) {
3757 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", encoding);
3758 RETURN_FALSE;
3759 } else {
3760 string.no_encoding = no_encoding;
3761 }
3762 }
3763
3764 if (argc == 4) {
3765 if (type == 0 && is_hex) {
3766 type = 2;
3767 }
3768 }
3769
3770
3771 convmap = NULL;
3772 if (Z_TYPE_P(zconvmap) == IS_ARRAY) {
3773 target_hash = Z_ARRVAL_P(zconvmap);
3774 zend_hash_internal_pointer_reset(target_hash);
3775 i = zend_hash_num_elements(target_hash);
3776 if (i > 0) {
3777 convmap = (int *)safe_emalloc(i, sizeof(int), 0);
3778 mapelm = convmap;
3779 mapsize = 0;
3780 while (i > 0) {
3781 if (zend_hash_get_current_data(target_hash, (void **) &hash_entry) == FAILURE) {
3782 break;
3783 }
3784 convert_to_long_ex(hash_entry);
3785 *mapelm++ = Z_LVAL_PP(hash_entry);
3786 mapsize++;
3787 i--;
3788 zend_hash_move_forward(target_hash);
3789 }
3790 }
3791 }
3792 if (convmap == NULL) {
3793 RETURN_FALSE;
3794 }
3795 mapsize /= 4;
3796
3797 ret = mbfl_html_numeric_entity(&string, &result, convmap, mapsize, type);
3798 if (ret != NULL) {
3799 RETVAL_STRINGL((char *)ret->val, ret->len, 0);
3800 } else {
3801 RETVAL_FALSE;
3802 }
3803 efree((void *)convmap);
3804 }
3805
3806
3807
3808
3809 PHP_FUNCTION(mb_encode_numericentity)
3810 {
3811 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 0);
3812 }
3813
3814
3815
3816
3817 PHP_FUNCTION(mb_decode_numericentity)
3818 {
3819 php_mb_numericentity_exec(INTERNAL_FUNCTION_PARAM_PASSTHRU, 1);
3820 }
3821
3822
3823
3824
3825
3826
3827
3828 #define SKIP_LONG_HEADER_SEP_MBSTRING(str, pos) \
3829 if (str[pos] == '\r' && str[pos + 1] == '\n' && (str[pos + 2] == ' ' || str[pos + 2] == '\t')) { \
3830 pos += 2; \
3831 while (str[pos + 1] == ' ' || str[pos + 1] == '\t') { \
3832 pos++; \
3833 } \
3834 continue; \
3835 }
3836
3837 #define MAIL_ASCIIZ_CHECK_MBSTRING(str, len) \
3838 pp = str; \
3839 ee = pp + len; \
3840 while ((pp = memchr(pp, '\0', (ee - pp)))) { \
3841 *pp = ' '; \
3842 } \
3843
3844 #define APPEND_ONE_CHAR(ch) do { \
3845 if (token.a > 0) { \
3846 smart_str_appendc(&token, ch); \
3847 } else {\
3848 token.len++; \
3849 } \
3850 } while (0)
3851
3852 #define SEPARATE_SMART_STR(str) do {\
3853 if ((str)->a == 0) { \
3854 char *tmp_ptr; \
3855 (str)->a = 1; \
3856 while ((str)->a < (str)->len) { \
3857 (str)->a <<= 1; \
3858 } \
3859 tmp_ptr = emalloc((str)->a + 1); \
3860 memcpy(tmp_ptr, (str)->c, (str)->len); \
3861 (str)->c = tmp_ptr; \
3862 } \
3863 } while (0)
3864
3865 static void my_smart_str_dtor(smart_str *s)
3866 {
3867 if (s->a > 0) {
3868 smart_str_free(s);
3869 }
3870 }
3871
3872 static int _php_mbstr_parse_mail_headers(HashTable *ht, const char *str, size_t str_len)
3873 {
3874 const char *ps;
3875 size_t icnt;
3876 int state = 0;
3877 int crlf_state = -1;
3878
3879 smart_str token = { 0, 0, 0 };
3880 smart_str fld_name = { 0, 0, 0 }, fld_val = { 0, 0, 0 };
3881
3882 ps = str;
3883 icnt = str_len;
3884
3885
3886
3887
3888
3889
3890
3891
3892
3893
3894
3895
3896 while (icnt > 0) {
3897 switch (*ps) {
3898 case ':':
3899 if (crlf_state == 1) {
3900 APPEND_ONE_CHAR('\r');
3901 }
3902
3903 if (state == 0 || state == 1) {
3904 fld_name = token;
3905
3906 state = 2;
3907 } else {
3908 APPEND_ONE_CHAR(*ps);
3909 }
3910
3911 crlf_state = 0;
3912 break;
3913
3914 case '\n':
3915 if (crlf_state == -1) {
3916 goto out;
3917 }
3918 crlf_state = -1;
3919 break;
3920
3921 case '\r':
3922 if (crlf_state == 1) {
3923 APPEND_ONE_CHAR('\r');
3924 } else {
3925 crlf_state = 1;
3926 }
3927 break;
3928
3929 case ' ': case '\t':
3930 if (crlf_state == -1) {
3931 if (state == 3) {
3932
3933 SEPARATE_SMART_STR(&token);
3934 state = 4;
3935 } else {
3936
3937 state = 5;
3938 }
3939 } else {
3940 if (crlf_state == 1) {
3941 APPEND_ONE_CHAR('\r');
3942 }
3943 if (state == 1 || state == 3) {
3944 APPEND_ONE_CHAR(*ps);
3945 }
3946 }
3947 crlf_state = 0;
3948 break;
3949
3950 default:
3951 switch (state) {
3952 case 0:
3953 token.c = (char *)ps;
3954 token.len = 0;
3955 token.a = 0;
3956 state = 1;
3957 break;
3958
3959 case 2:
3960 if (crlf_state != -1) {
3961 token.c = (char *)ps;
3962 token.len = 0;
3963 token.a = 0;
3964
3965 state = 3;
3966 break;
3967 }
3968
3969
3970 case 3:
3971 if (crlf_state == -1) {
3972 fld_val = token;
3973
3974 if (fld_name.c != NULL && fld_val.c != NULL) {
3975 char *dummy;
3976
3977
3978
3979 SEPARATE_SMART_STR(&fld_name);
3980 php_strtoupper(fld_name.c, fld_name.len);
3981
3982 zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
3983
3984 my_smart_str_dtor(&fld_name);
3985 }
3986
3987 memset(&fld_name, 0, sizeof(smart_str));
3988 memset(&fld_val, 0, sizeof(smart_str));
3989
3990 token.c = (char *)ps;
3991 token.len = 0;
3992 token.a = 0;
3993
3994 state = 1;
3995 }
3996 break;
3997
3998 case 4:
3999 APPEND_ONE_CHAR(' ');
4000 state = 3;
4001 break;
4002 }
4003
4004 if (crlf_state == 1) {
4005 APPEND_ONE_CHAR('\r');
4006 }
4007
4008 APPEND_ONE_CHAR(*ps);
4009
4010 crlf_state = 0;
4011 break;
4012 }
4013 ps++, icnt--;
4014 }
4015 out:
4016 if (state == 2) {
4017 token.c = "";
4018 token.len = 0;
4019 token.a = 0;
4020
4021 state = 3;
4022 }
4023 if (state == 3) {
4024 fld_val = token;
4025
4026 if (fld_name.c != NULL && fld_val.c != NULL) {
4027 void *dummy;
4028
4029
4030
4031 SEPARATE_SMART_STR(&fld_name);
4032 php_strtoupper(fld_name.c, fld_name.len);
4033
4034 zend_hash_update(ht, (char *)fld_name.c, fld_name.len, &fld_val, sizeof(smart_str), (void **)&dummy);
4035
4036 my_smart_str_dtor(&fld_name);
4037 }
4038 }
4039 return state;
4040 }
4041
4042 PHP_FUNCTION(mb_send_mail)
4043 {
4044 int n;
4045 char *to = NULL;
4046 int to_len;
4047 char *message = NULL;
4048 int message_len;
4049 char *headers = NULL;
4050 int headers_len;
4051 char *subject = NULL;
4052 int subject_len;
4053 char *extra_cmd = NULL;
4054 int extra_cmd_len;
4055 int i;
4056 char *to_r = NULL;
4057 char *force_extra_parameters = INI_STR("mail.force_extra_parameters");
4058 struct {
4059 int cnt_type:1;
4060 int cnt_trans_enc:1;
4061 } suppressed_hdrs = { 0, 0 };
4062
4063 char *message_buf = NULL, *subject_buf = NULL, *p;
4064 mbfl_string orig_str, conv_str;
4065 mbfl_string *pstr;
4066 enum mbfl_no_encoding
4067 tran_cs,
4068 head_enc,
4069 body_enc;
4070 mbfl_memory_device device;
4071 const mbfl_language *lang;
4072 int err = 0;
4073 HashTable ht_headers;
4074 smart_str *s;
4075 extern void mbfl_memory_device_unput(mbfl_memory_device *device);
4076 char *pp, *ee;
4077
4078
4079 mbfl_memory_device_init(&device, 0, 0);
4080 mbfl_string_init(&orig_str);
4081 mbfl_string_init(&conv_str);
4082
4083
4084 tran_cs = mbfl_no_encoding_utf8;
4085 head_enc = mbfl_no_encoding_base64;
4086 body_enc = mbfl_no_encoding_base64;
4087 lang = mbfl_no2language(MBSTRG(language));
4088 if (lang != NULL) {
4089 tran_cs = lang->mail_charset;
4090 head_enc = lang->mail_header_encoding;
4091 body_enc = lang->mail_body_encoding;
4092 }
4093
4094 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "sss|ss", &to, &to_len, &subject, &subject_len, &message, &message_len, &headers, &headers_len, &extra_cmd, &extra_cmd_len) == FAILURE) {
4095 return;
4096 }
4097
4098
4099 MAIL_ASCIIZ_CHECK_MBSTRING(to, to_len);
4100 MAIL_ASCIIZ_CHECK_MBSTRING(subject, subject_len);
4101 MAIL_ASCIIZ_CHECK_MBSTRING(message, message_len);
4102 if (headers) {
4103 MAIL_ASCIIZ_CHECK_MBSTRING(headers, headers_len);
4104 }
4105 if (extra_cmd) {
4106 MAIL_ASCIIZ_CHECK_MBSTRING(extra_cmd, extra_cmd_len);
4107 }
4108
4109 zend_hash_init(&ht_headers, 0, NULL, (dtor_func_t) my_smart_str_dtor, 0);
4110
4111 if (headers != NULL) {
4112 _php_mbstr_parse_mail_headers(&ht_headers, headers, headers_len);
4113 }
4114
4115 if (zend_hash_find(&ht_headers, "CONTENT-TYPE", sizeof("CONTENT-TYPE") - 1, (void **)&s) == SUCCESS) {
4116 char *tmp;
4117 char *param_name;
4118 char *charset = NULL;
4119
4120 SEPARATE_SMART_STR(s);
4121 smart_str_0(s);
4122
4123 p = strchr(s->c, ';');
4124
4125 if (p != NULL) {
4126
4127 do {
4128 ++p;
4129 } while (*p == ' ' || *p == '\t');
4130
4131 if (*p != '\0') {
4132 if ((param_name = php_strtok_r(p, "= ", &tmp)) != NULL) {
4133 if (strcasecmp(param_name, "charset") == 0) {
4134 enum mbfl_no_encoding _tran_cs = tran_cs;
4135
4136 charset = php_strtok_r(NULL, "= \"", &tmp);
4137 if (charset != NULL) {
4138 _tran_cs = mbfl_name2no_encoding(charset);
4139 }
4140
4141 if (_tran_cs == mbfl_no_encoding_invalid) {
4142 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported charset \"%s\" - will be regarded as ascii", charset);
4143 _tran_cs = mbfl_no_encoding_ascii;
4144 }
4145 tran_cs = _tran_cs;
4146 }
4147 }
4148 }
4149 }
4150 suppressed_hdrs.cnt_type = 1;
4151 }
4152
4153 if (zend_hash_find(&ht_headers, "CONTENT-TRANSFER-ENCODING", sizeof("CONTENT-TRANSFER-ENCODING") - 1, (void **)&s) == SUCCESS) {
4154 enum mbfl_no_encoding _body_enc;
4155 SEPARATE_SMART_STR(s);
4156 smart_str_0(s);
4157
4158 _body_enc = mbfl_name2no_encoding(s->c);
4159 switch (_body_enc) {
4160 case mbfl_no_encoding_base64:
4161 case mbfl_no_encoding_7bit:
4162 case mbfl_no_encoding_8bit:
4163 body_enc = _body_enc;
4164 break;
4165
4166 default:
4167 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unsupported transfer encoding \"%s\" - will be regarded as 8bit", s->c);
4168 body_enc = mbfl_no_encoding_8bit;
4169 break;
4170 }
4171 suppressed_hdrs.cnt_trans_enc = 1;
4172 }
4173
4174
4175 if (to != NULL) {
4176 if (to_len > 0) {
4177 to_r = estrndup(to, to_len);
4178 for (; to_len; to_len--) {
4179 if (!isspace((unsigned char) to_r[to_len - 1])) {
4180 break;
4181 }
4182 to_r[to_len - 1] = '\0';
4183 }
4184 for (i = 0; to_r[i]; i++) {
4185 if (iscntrl((unsigned char) to_r[i])) {
4186
4187
4188
4189
4190
4191 SKIP_LONG_HEADER_SEP_MBSTRING(to_r, i);
4192 to_r[i] = ' ';
4193 }
4194 }
4195 } else {
4196 to_r = to;
4197 }
4198 } else {
4199 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing To: field");
4200 err = 1;
4201 }
4202
4203
4204 if (subject != NULL && subject_len >= 0) {
4205 orig_str.no_language = MBSTRG(language);
4206 orig_str.val = (unsigned char *)subject;
4207 orig_str.len = subject_len;
4208 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4209 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4210 const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4211 orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4212 }
4213 pstr = mbfl_mime_header_encode(&orig_str, &conv_str, tran_cs, head_enc, "\n", sizeof("Subject: [PHP-jp nnnnnnnn]"));
4214 if (pstr != NULL) {
4215 subject_buf = subject = (char *)pstr->val;
4216 }
4217 } else {
4218 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Missing Subject: field");
4219 err = 1;
4220 }
4221
4222
4223 if (message != NULL) {
4224 orig_str.no_language = MBSTRG(language);
4225 orig_str.val = (unsigned char *)message;
4226 orig_str.len = (unsigned int)message_len;
4227 orig_str.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4228
4229 if (orig_str.no_encoding == mbfl_no_encoding_invalid || orig_str.no_encoding == mbfl_no_encoding_pass) {
4230 const mbfl_encoding *encoding = mbfl_identify_encoding2(&orig_str, MBSTRG(current_detect_order_list), MBSTRG(current_detect_order_list_size), MBSTRG(strict_detection));
4231 orig_str.no_encoding = encoding ? encoding->no_encoding: mbfl_no_encoding_invalid;
4232 }
4233
4234 pstr = NULL;
4235 {
4236 mbfl_string tmpstr;
4237
4238 if (mbfl_convert_encoding(&orig_str, &tmpstr, tran_cs) != NULL) {
4239 tmpstr.no_encoding=mbfl_no_encoding_8bit;
4240 pstr = mbfl_convert_encoding(&tmpstr, &conv_str, body_enc);
4241 efree(tmpstr.val);
4242 }
4243 }
4244 if (pstr != NULL) {
4245 message_buf = message = (char *)pstr->val;
4246 }
4247 } else {
4248
4249 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Empty message body");
4250 message = NULL;
4251 }
4252
4253
4254 #define PHP_MBSTR_MAIL_MIME_HEADER1 "MIME-Version: 1.0"
4255 #define PHP_MBSTR_MAIL_MIME_HEADER2 "Content-Type: text/plain"
4256 #define PHP_MBSTR_MAIL_MIME_HEADER3 "; charset="
4257 #define PHP_MBSTR_MAIL_MIME_HEADER4 "Content-Transfer-Encoding: "
4258 if (headers != NULL) {
4259 p = headers;
4260 n = headers_len;
4261 mbfl_memory_device_strncat(&device, p, n);
4262 if (n > 0 && p[n - 1] != '\n') {
4263 mbfl_memory_device_strncat(&device, "\n", 1);
4264 }
4265 }
4266
4267 if (!zend_hash_exists(&ht_headers, "MIME-VERSION", sizeof("MIME-VERSION") - 1)) {
4268 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER1, sizeof(PHP_MBSTR_MAIL_MIME_HEADER1) - 1);
4269 mbfl_memory_device_strncat(&device, "\n", 1);
4270 }
4271
4272 if (!suppressed_hdrs.cnt_type) {
4273 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER2, sizeof(PHP_MBSTR_MAIL_MIME_HEADER2) - 1);
4274
4275 p = (char *)mbfl_no2preferred_mime_name(tran_cs);
4276 if (p != NULL) {
4277 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER3, sizeof(PHP_MBSTR_MAIL_MIME_HEADER3) - 1);
4278 mbfl_memory_device_strcat(&device, p);
4279 }
4280 mbfl_memory_device_strncat(&device, "\n", 1);
4281 }
4282 if (!suppressed_hdrs.cnt_trans_enc) {
4283 mbfl_memory_device_strncat(&device, PHP_MBSTR_MAIL_MIME_HEADER4, sizeof(PHP_MBSTR_MAIL_MIME_HEADER4) - 1);
4284 p = (char *)mbfl_no2preferred_mime_name(body_enc);
4285 if (p == NULL) {
4286 p = "7bit";
4287 }
4288 mbfl_memory_device_strcat(&device, p);
4289 mbfl_memory_device_strncat(&device, "\n", 1);
4290 }
4291
4292 mbfl_memory_device_unput(&device);
4293 mbfl_memory_device_output('\0', &device);
4294 headers = (char *)device.buffer;
4295
4296 if (force_extra_parameters) {
4297 extra_cmd = php_escape_shell_cmd(force_extra_parameters);
4298 } else if (extra_cmd) {
4299 extra_cmd = php_escape_shell_cmd(extra_cmd);
4300 }
4301
4302 if (!err && php_mail(to_r, subject, message, headers, extra_cmd TSRMLS_CC)) {
4303 RETVAL_TRUE;
4304 } else {
4305 RETVAL_FALSE;
4306 }
4307
4308 if (extra_cmd) {
4309 efree(extra_cmd);
4310 }
4311 if (to_r != to) {
4312 efree(to_r);
4313 }
4314 if (subject_buf) {
4315 efree((void *)subject_buf);
4316 }
4317 if (message_buf) {
4318 efree((void *)message_buf);
4319 }
4320 mbfl_memory_device_clear(&device);
4321 zend_hash_destroy(&ht_headers);
4322 }
4323
4324 #undef SKIP_LONG_HEADER_SEP_MBSTRING
4325 #undef MAIL_ASCIIZ_CHECK_MBSTRING
4326 #undef APPEND_ONE_CHAR
4327 #undef SEPARATE_SMART_STR
4328 #undef PHP_MBSTR_MAIL_MIME_HEADER1
4329 #undef PHP_MBSTR_MAIL_MIME_HEADER2
4330 #undef PHP_MBSTR_MAIL_MIME_HEADER3
4331 #undef PHP_MBSTR_MAIL_MIME_HEADER4
4332
4333
4334
4335
4336 PHP_FUNCTION(mb_get_info)
4337 {
4338 char *typ = NULL;
4339 int typ_len;
4340 size_t n;
4341 char *name;
4342 const struct mb_overload_def *over_func;
4343 zval *row1, *row2;
4344 const mbfl_language *lang = mbfl_no2language(MBSTRG(language));
4345 const mbfl_encoding **entry;
4346
4347 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|s", &typ, &typ_len) == FAILURE) {
4348 RETURN_FALSE;
4349 }
4350
4351 if (!typ || !strcasecmp("all", typ)) {
4352 array_init(return_value);
4353 if (MBSTRG(current_internal_encoding)) {
4354 add_assoc_string(return_value, "internal_encoding", (char *)MBSTRG(current_internal_encoding)->name, 1);
4355 }
4356 if (MBSTRG(http_input_identify)) {
4357 add_assoc_string(return_value, "http_input", (char *)MBSTRG(http_input_identify)->name, 1);
4358 }
4359 if (MBSTRG(current_http_output_encoding)) {
4360 add_assoc_string(return_value, "http_output", (char *)MBSTRG(current_http_output_encoding)->name, 1);
4361 }
4362 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4363 add_assoc_string(return_value, "http_output_conv_mimetypes", name, 1);
4364 }
4365 add_assoc_long(return_value, "func_overload", MBSTRG(func_overload));
4366 if (MBSTRG(func_overload)){
4367 over_func = &(mb_ovld[0]);
4368 MAKE_STD_ZVAL(row1);
4369 array_init(row1);
4370 while (over_func->type > 0) {
4371 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4372 add_assoc_string(row1, over_func->orig_func, over_func->ovld_func, 1);
4373 }
4374 over_func++;
4375 }
4376 add_assoc_zval(return_value, "func_overload_list", row1);
4377 } else {
4378 add_assoc_string(return_value, "func_overload_list", "no overload", 1);
4379 }
4380 if (lang != NULL) {
4381 if ((name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4382 add_assoc_string(return_value, "mail_charset", name, 1);
4383 }
4384 if ((name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4385 add_assoc_string(return_value, "mail_header_encoding", name, 1);
4386 }
4387 if ((name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4388 add_assoc_string(return_value, "mail_body_encoding", name, 1);
4389 }
4390 }
4391 add_assoc_long(return_value, "illegal_chars", MBSTRG(illegalchars));
4392 if (MBSTRG(encoding_translation)) {
4393 add_assoc_string(return_value, "encoding_translation", "On", 1);
4394 } else {
4395 add_assoc_string(return_value, "encoding_translation", "Off", 1);
4396 }
4397 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4398 add_assoc_string(return_value, "language", name, 1);
4399 }
4400 n = MBSTRG(current_detect_order_list_size);
4401 entry = MBSTRG(current_detect_order_list);
4402 if (n > 0) {
4403 size_t i;
4404 MAKE_STD_ZVAL(row2);
4405 array_init(row2);
4406 for (i = 0; i < n; i++) {
4407 add_next_index_string(row2, (*entry)->name, 1);
4408 entry++;
4409 }
4410 add_assoc_zval(return_value, "detect_order", row2);
4411 }
4412 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4413 add_assoc_string(return_value, "substitute_character", "none", 1);
4414 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4415 add_assoc_string(return_value, "substitute_character", "long", 1);
4416 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4417 add_assoc_string(return_value, "substitute_character", "entity", 1);
4418 } else {
4419 add_assoc_long(return_value, "substitute_character", MBSTRG(current_filter_illegal_substchar));
4420 }
4421 if (MBSTRG(strict_detection)) {
4422 add_assoc_string(return_value, "strict_detection", "On", 1);
4423 } else {
4424 add_assoc_string(return_value, "strict_detection", "Off", 1);
4425 }
4426 } else if (!strcasecmp("internal_encoding", typ)) {
4427 if (MBSTRG(current_internal_encoding)) {
4428 RETVAL_STRING((char *)MBSTRG(current_internal_encoding)->name, 1);
4429 }
4430 } else if (!strcasecmp("http_input", typ)) {
4431 if (MBSTRG(http_input_identify)) {
4432 RETVAL_STRING((char *)MBSTRG(http_input_identify)->name, 1);
4433 }
4434 } else if (!strcasecmp("http_output", typ)) {
4435 if (MBSTRG(current_http_output_encoding)) {
4436 RETVAL_STRING((char *)MBSTRG(current_http_output_encoding)->name, 1);
4437 }
4438 } else if (!strcasecmp("http_output_conv_mimetypes", typ)) {
4439 if ((name = (char *)zend_ini_string("mbstring.http_output_conv_mimetypes", sizeof("mbstring.http_output_conv_mimetypes"), 0)) != NULL) {
4440 RETVAL_STRING(name, 1);
4441 }
4442 } else if (!strcasecmp("func_overload", typ)) {
4443 RETVAL_LONG(MBSTRG(func_overload));
4444 } else if (!strcasecmp("func_overload_list", typ)) {
4445 if (MBSTRG(func_overload)){
4446 over_func = &(mb_ovld[0]);
4447 array_init(return_value);
4448 while (over_func->type > 0) {
4449 if ((MBSTRG(func_overload) & over_func->type) == over_func->type ) {
4450 add_assoc_string(return_value, over_func->orig_func, over_func->ovld_func, 1);
4451 }
4452 over_func++;
4453 }
4454 } else {
4455 RETVAL_STRING("no overload", 1);
4456 }
4457 } else if (!strcasecmp("mail_charset", typ)) {
4458 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_charset)) != NULL) {
4459 RETVAL_STRING(name, 1);
4460 }
4461 } else if (!strcasecmp("mail_header_encoding", typ)) {
4462 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_header_encoding)) != NULL) {
4463 RETVAL_STRING(name, 1);
4464 }
4465 } else if (!strcasecmp("mail_body_encoding", typ)) {
4466 if (lang != NULL && (name = (char *)mbfl_no_encoding2name(lang->mail_body_encoding)) != NULL) {
4467 RETVAL_STRING(name, 1);
4468 }
4469 } else if (!strcasecmp("illegal_chars", typ)) {
4470 RETVAL_LONG(MBSTRG(illegalchars));
4471 } else if (!strcasecmp("encoding_translation", typ)) {
4472 if (MBSTRG(encoding_translation)) {
4473 RETVAL_STRING("On", 1);
4474 } else {
4475 RETVAL_STRING("Off", 1);
4476 }
4477 } else if (!strcasecmp("language", typ)) {
4478 if ((name = (char *)mbfl_no_language2name(MBSTRG(language))) != NULL) {
4479 RETVAL_STRING(name, 1);
4480 }
4481 } else if (!strcasecmp("detect_order", typ)) {
4482 n = MBSTRG(current_detect_order_list_size);
4483 entry = MBSTRG(current_detect_order_list);
4484 if (n > 0) {
4485 size_t i;
4486 array_init(return_value);
4487 for (i = 0; i < n; i++) {
4488 add_next_index_string(return_value, (*entry)->name, 1);
4489 entry++;
4490 }
4491 }
4492 } else if (!strcasecmp("substitute_character", typ)) {
4493 if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE) {
4494 RETVAL_STRING("none", 1);
4495 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_LONG) {
4496 RETVAL_STRING("long", 1);
4497 } else if (MBSTRG(current_filter_illegal_mode) == MBFL_OUTPUTFILTER_ILLEGAL_MODE_ENTITY) {
4498 RETVAL_STRING("entity", 1);
4499 } else {
4500 RETVAL_LONG(MBSTRG(current_filter_illegal_substchar));
4501 }
4502 } else if (!strcasecmp("strict_detection", typ)) {
4503 if (MBSTRG(strict_detection)) {
4504 RETVAL_STRING("On", 1);
4505 } else {
4506 RETVAL_STRING("Off", 1);
4507 }
4508 } else {
4509 RETURN_FALSE;
4510 }
4511 }
4512
4513
4514
4515
4516 PHP_FUNCTION(mb_check_encoding)
4517 {
4518 char *var = NULL;
4519 int var_len;
4520 char *enc = NULL;
4521 int enc_len;
4522 mbfl_buffer_converter *convd;
4523 const mbfl_encoding *encoding = MBSTRG(current_internal_encoding);
4524 mbfl_string string, result, *ret = NULL;
4525 long illegalchars = 0;
4526
4527 if (zend_parse_parameters(ZEND_NUM_ARGS() TSRMLS_CC, "|ss", &var, &var_len, &enc, &enc_len) == FAILURE) {
4528 RETURN_FALSE;
4529 }
4530
4531 if (var == NULL) {
4532 RETURN_BOOL(MBSTRG(illegalchars) == 0);
4533 }
4534
4535 if (enc != NULL) {
4536 encoding = mbfl_name2encoding(enc);
4537 if (!encoding || encoding == &mbfl_encoding_pass) {
4538 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Invalid encoding \"%s\"", enc);
4539 RETURN_FALSE;
4540 }
4541 }
4542
4543 convd = mbfl_buffer_converter_new2(encoding, encoding, 0);
4544 if (convd == NULL) {
4545 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unable to create converter");
4546 RETURN_FALSE;
4547 }
4548 mbfl_buffer_converter_illegal_mode(convd, MBFL_OUTPUTFILTER_ILLEGAL_MODE_NONE);
4549 mbfl_buffer_converter_illegal_substchar(convd, 0);
4550
4551
4552 mbfl_string_init_set(&string, mbfl_no_language_neutral, encoding->no_encoding);
4553 mbfl_string_init(&result);
4554
4555 string.val = (unsigned char *)var;
4556 string.len = var_len;
4557 ret = mbfl_buffer_converter_feed_result(convd, &string, &result);
4558 illegalchars = mbfl_buffer_illegalchars(convd);
4559 mbfl_buffer_converter_delete(convd);
4560
4561 RETVAL_FALSE;
4562 if (ret != NULL) {
4563 if (illegalchars == 0 && string.len == result.len && memcmp(string.val, result.val, string.len) == 0) {
4564 RETVAL_TRUE;
4565 }
4566 mbfl_string_clear(&result);
4567 }
4568 }
4569
4570
4571
4572
4573 static void php_mb_populate_current_detect_order_list(TSRMLS_D)
4574 {
4575 const mbfl_encoding **entry = 0;
4576 size_t nentries;
4577
4578 if (MBSTRG(current_detect_order_list)) {
4579 return;
4580 }
4581
4582 if (MBSTRG(detect_order_list) && MBSTRG(detect_order_list_size)) {
4583 nentries = MBSTRG(detect_order_list_size);
4584 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4585 memcpy(entry, MBSTRG(detect_order_list), sizeof(mbfl_encoding*) * nentries);
4586 } else {
4587 const enum mbfl_no_encoding *src = MBSTRG(default_detect_order_list);
4588 size_t i;
4589 nentries = MBSTRG(default_detect_order_list_size);
4590 entry = (const mbfl_encoding **)safe_emalloc(nentries, sizeof(mbfl_encoding*), 0);
4591 for (i = 0; i < nentries; i++) {
4592 entry[i] = mbfl_no2encoding(src[i]);
4593 }
4594 }
4595 MBSTRG(current_detect_order_list) = entry;
4596 MBSTRG(current_detect_order_list_size) = nentries;
4597 }
4598
4599
4600 static int php_mb_encoding_translation(TSRMLS_D)
4601 {
4602 return MBSTRG(encoding_translation);
4603 }
4604
4605
4606
4607 MBSTRING_API size_t php_mb_mbchar_bytes_ex(const char *s, const mbfl_encoding *enc)
4608 {
4609 if (enc != NULL) {
4610 if (enc->flag & MBFL_ENCTYPE_MBCS) {
4611 if (enc->mblen_table != NULL) {
4612 if (s != NULL) return enc->mblen_table[*(unsigned char *)s];
4613 }
4614 } else if (enc->flag & (MBFL_ENCTYPE_WCS2BE | MBFL_ENCTYPE_WCS2LE)) {
4615 return 2;
4616 } else if (enc->flag & (MBFL_ENCTYPE_WCS4BE | MBFL_ENCTYPE_WCS4LE)) {
4617 return 4;
4618 }
4619 }
4620 return 1;
4621 }
4622
4623
4624
4625 MBSTRING_API size_t php_mb_mbchar_bytes(const char *s TSRMLS_DC)
4626 {
4627 return php_mb_mbchar_bytes_ex(s, MBSTRG(internal_encoding));
4628 }
4629
4630
4631
4632 MBSTRING_API char *php_mb_safe_strrchr_ex(const char *s, unsigned int c, size_t nbytes, const mbfl_encoding *enc)
4633 {
4634 register const char *p = s;
4635 char *last=NULL;
4636
4637 if (nbytes == (size_t)-1) {
4638 size_t nb = 0;
4639
4640 while (*p != '\0') {
4641 if (nb == 0) {
4642 if ((unsigned char)*p == (unsigned char)c) {
4643 last = (char *)p;
4644 }
4645 nb = php_mb_mbchar_bytes_ex(p, enc);
4646 if (nb == 0) {
4647 return NULL;
4648 }
4649 }
4650 --nb;
4651 ++p;
4652 }
4653 } else {
4654 register size_t bcnt = nbytes;
4655 register size_t nbytes_char;
4656 while (bcnt > 0) {
4657 if ((unsigned char)*p == (unsigned char)c) {
4658 last = (char *)p;
4659 }
4660 nbytes_char = php_mb_mbchar_bytes_ex(p, enc);
4661 if (bcnt < nbytes_char) {
4662 return NULL;
4663 }
4664 p += nbytes_char;
4665 bcnt -= nbytes_char;
4666 }
4667 }
4668 return last;
4669 }
4670
4671
4672
4673 MBSTRING_API char *php_mb_safe_strrchr(const char *s, unsigned int c, size_t nbytes TSRMLS_DC)
4674 {
4675 return php_mb_safe_strrchr_ex(s, c, nbytes, MBSTRG(internal_encoding));
4676 }
4677
4678
4679
4680
4681 MBSTRING_API int php_mb_stripos(int mode, const char *old_haystack, unsigned int old_haystack_len, const char *old_needle, unsigned int old_needle_len, long offset, const char *from_encoding TSRMLS_DC)
4682 {
4683 int n;
4684 mbfl_string haystack, needle;
4685 n = -1;
4686
4687 mbfl_string_init(&haystack);
4688 mbfl_string_init(&needle);
4689 haystack.no_language = MBSTRG(language);
4690 haystack.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4691 needle.no_language = MBSTRG(language);
4692 needle.no_encoding = MBSTRG(current_internal_encoding)->no_encoding;
4693
4694 do {
4695 size_t len = 0;
4696 haystack.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_haystack, old_haystack_len, &len, from_encoding TSRMLS_CC);
4697 haystack.len = len;
4698
4699 if (!haystack.val) {
4700 break;
4701 }
4702
4703 if (haystack.len <= 0) {
4704 break;
4705 }
4706
4707 needle.val = (unsigned char *)php_unicode_convert_case(PHP_UNICODE_CASE_UPPER, (char *)old_needle, old_needle_len, &len, from_encoding TSRMLS_CC);
4708 needle.len = len;
4709
4710 if (!needle.val) {
4711 break;
4712 }
4713
4714 if (needle.len <= 0) {
4715 break;
4716 }
4717
4718 haystack.no_encoding = needle.no_encoding = mbfl_name2no_encoding(from_encoding);
4719 if (haystack.no_encoding == mbfl_no_encoding_invalid) {
4720 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown encoding \"%s\"", from_encoding);
4721 break;
4722 }
4723
4724 {
4725 int haystack_char_len = mbfl_strlen(&haystack);
4726
4727 if (mode) {
4728 if ((offset > 0 && offset > haystack_char_len) ||
4729 (offset < 0 && -offset > haystack_char_len)) {
4730 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset is greater than the length of haystack string");
4731 break;
4732 }
4733 } else {
4734 if (offset < 0 || offset > haystack_char_len) {
4735 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Offset not contained in string");
4736 break;
4737 }
4738 }
4739 }
4740
4741 n = mbfl_strpos(&haystack, &needle, offset, mode);
4742 } while(0);
4743
4744 if (haystack.val) {
4745 efree(haystack.val);
4746 }
4747
4748 if (needle.val) {
4749 efree(needle.val);
4750 }
4751
4752 return n;
4753 }
4754
4755
4756 static void php_mb_gpc_get_detect_order(const zend_encoding ***list, size_t *list_size TSRMLS_DC)
4757 {
4758 *list = (const zend_encoding **)MBSTRG(http_input_list);
4759 *list_size = MBSTRG(http_input_list_size);
4760 }
4761
4762
4763 static void php_mb_gpc_set_input_encoding(const zend_encoding *encoding TSRMLS_DC)
4764 {
4765 MBSTRG(http_input_identify) = (const mbfl_encoding*)encoding;
4766 }
4767
4768
4769 #endif
4770
4771
4772
4773
4774
4775
4776
4777
4778