This source file includes following definitions.
- find_minlength
- set_table_bit
- set_type_bits
- set_nottype_bits
- set_start_bits
- pcre_study
- pcre_free_study
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45 #ifdef HAVE_CONFIG_H
46 #include "config.h"
47 #endif
48
49 #include "pcre_internal.h"
50
51 #define SET_BIT(c) start_bits[c/8] |= (1 << (c&7))
52
53
54
55 enum { SSB_FAIL, SSB_DONE, SSB_CONTINUE, SSB_UNKNOWN };
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82 static int
83 find_minlength(const REAL_PCRE *re, const pcre_uchar *code,
84 const pcre_uchar *startcode, int options, recurse_check *recurses,
85 int *countptr)
86 {
87 int length = -1;
88
89 BOOL utf = (options & PCRE_UTF8) != 0;
90 BOOL had_recurse = FALSE;
91 recurse_check this_recurse;
92 register int branchlength = 0;
93 register pcre_uchar *cc = (pcre_uchar *)code + 1 + LINK_SIZE;
94
95 if ((*countptr)++ > 1000) return -1;
96
97 if (*code == OP_CBRA || *code == OP_SCBRA ||
98 *code == OP_CBRAPOS || *code == OP_SCBRAPOS) cc += IMM2_SIZE;
99
100
101
102
103 for (;;)
104 {
105 int d, min;
106 pcre_uchar *cs, *ce;
107 register pcre_uchar op = *cc;
108
109 switch (op)
110 {
111 case OP_COND:
112 case OP_SCOND:
113
114
115
116
117
118 cs = cc + GET(cc, 1);
119 if (*cs != OP_ALT)
120 {
121 cc = cs + 1 + LINK_SIZE;
122 break;
123 }
124
125
126
127
128 case OP_CBRA:
129 case OP_SCBRA:
130 case OP_BRA:
131 case OP_SBRA:
132 case OP_CBRAPOS:
133 case OP_SCBRAPOS:
134 case OP_BRAPOS:
135 case OP_SBRAPOS:
136 case OP_ONCE:
137 case OP_ONCE_NC:
138 d = find_minlength(re, cc, startcode, options, recurses, countptr);
139 if (d < 0) return d;
140 branchlength += d;
141 do cc += GET(cc, 1); while (*cc == OP_ALT);
142 cc += 1 + LINK_SIZE;
143 break;
144
145
146
147 case OP_ACCEPT:
148 case OP_ASSERT_ACCEPT:
149 return -1;
150
151
152
153
154
155
156
157 case OP_ALT:
158 case OP_KET:
159 case OP_KETRMAX:
160 case OP_KETRMIN:
161 case OP_KETRPOS:
162 case OP_END:
163 if (length < 0 || (!had_recurse && branchlength < length))
164 length = branchlength;
165 if (op != OP_ALT) return length;
166 cc += 1 + LINK_SIZE;
167 branchlength = 0;
168 had_recurse = FALSE;
169 break;
170
171
172
173 case OP_ASSERT:
174 case OP_ASSERT_NOT:
175 case OP_ASSERTBACK:
176 case OP_ASSERTBACK_NOT:
177 do cc += GET(cc, 1); while (*cc == OP_ALT);
178
179
180
181
182 case OP_REVERSE:
183 case OP_CREF:
184 case OP_DNCREF:
185 case OP_RREF:
186 case OP_DNRREF:
187 case OP_DEF:
188 case OP_CALLOUT:
189 case OP_SOD:
190 case OP_SOM:
191 case OP_EOD:
192 case OP_EODN:
193 case OP_CIRC:
194 case OP_CIRCM:
195 case OP_DOLL:
196 case OP_DOLLM:
197 case OP_NOT_WORD_BOUNDARY:
198 case OP_WORD_BOUNDARY:
199 cc += PRIV(OP_lengths)[*cc];
200 break;
201
202
203
204 case OP_BRAZERO:
205 case OP_BRAMINZERO:
206 case OP_BRAPOSZERO:
207 case OP_SKIPZERO:
208 cc += PRIV(OP_lengths)[*cc];
209 do cc += GET(cc, 1); while (*cc == OP_ALT);
210 cc += 1 + LINK_SIZE;
211 break;
212
213
214
215 case OP_CHAR:
216 case OP_CHARI:
217 case OP_NOT:
218 case OP_NOTI:
219 case OP_PLUS:
220 case OP_PLUSI:
221 case OP_MINPLUS:
222 case OP_MINPLUSI:
223 case OP_POSPLUS:
224 case OP_POSPLUSI:
225 case OP_NOTPLUS:
226 case OP_NOTPLUSI:
227 case OP_NOTMINPLUS:
228 case OP_NOTMINPLUSI:
229 case OP_NOTPOSPLUS:
230 case OP_NOTPOSPLUSI:
231 branchlength++;
232 cc += 2;
233 #ifdef SUPPORT_UTF
234 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
235 #endif
236 break;
237
238 case OP_TYPEPLUS:
239 case OP_TYPEMINPLUS:
240 case OP_TYPEPOSPLUS:
241 branchlength++;
242 cc += (cc[1] == OP_PROP || cc[1] == OP_NOTPROP)? 4 : 2;
243 break;
244
245
246
247
248 case OP_EXACT:
249 case OP_EXACTI:
250 case OP_NOTEXACT:
251 case OP_NOTEXACTI:
252 branchlength += GET2(cc,1);
253 cc += 2 + IMM2_SIZE;
254 #ifdef SUPPORT_UTF
255 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
256 #endif
257 break;
258
259 case OP_TYPEEXACT:
260 branchlength += GET2(cc,1);
261 cc += 2 + IMM2_SIZE + ((cc[1 + IMM2_SIZE] == OP_PROP
262 || cc[1 + IMM2_SIZE] == OP_NOTPROP)? 2 : 0);
263 break;
264
265
266
267 case OP_PROP:
268 case OP_NOTPROP:
269 cc += 2;
270
271
272 case OP_NOT_DIGIT:
273 case OP_DIGIT:
274 case OP_NOT_WHITESPACE:
275 case OP_WHITESPACE:
276 case OP_NOT_WORDCHAR:
277 case OP_WORDCHAR:
278 case OP_ANY:
279 case OP_ALLANY:
280 case OP_EXTUNI:
281 case OP_HSPACE:
282 case OP_NOT_HSPACE:
283 case OP_VSPACE:
284 case OP_NOT_VSPACE:
285 branchlength++;
286 cc++;
287 break;
288
289
290
291
292 case OP_ANYNL:
293 branchlength += 1;
294 cc++;
295 break;
296
297
298
299
300
301 case OP_ANYBYTE:
302 #ifdef SUPPORT_UTF
303 if (utf) return -1;
304 #endif
305 branchlength++;
306 cc++;
307 break;
308
309
310
311
312 case OP_TYPESTAR:
313 case OP_TYPEMINSTAR:
314 case OP_TYPEQUERY:
315 case OP_TYPEMINQUERY:
316 case OP_TYPEPOSSTAR:
317 case OP_TYPEPOSQUERY:
318 if (cc[1] == OP_PROP || cc[1] == OP_NOTPROP) cc += 2;
319 cc += PRIV(OP_lengths)[op];
320 break;
321
322 case OP_TYPEUPTO:
323 case OP_TYPEMINUPTO:
324 case OP_TYPEPOSUPTO:
325 if (cc[1 + IMM2_SIZE] == OP_PROP
326 || cc[1 + IMM2_SIZE] == OP_NOTPROP) cc += 2;
327 cc += PRIV(OP_lengths)[op];
328 break;
329
330
331
332 case OP_CLASS:
333 case OP_NCLASS:
334 #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
335 case OP_XCLASS:
336
337
338 if (op == OP_XCLASS)
339 cc += GET(cc, 1);
340 else
341 cc += PRIV(OP_lengths)[OP_CLASS];
342 #else
343 cc += PRIV(OP_lengths)[OP_CLASS];
344 #endif
345
346 switch (*cc)
347 {
348 case OP_CRPLUS:
349 case OP_CRMINPLUS:
350 case OP_CRPOSPLUS:
351 branchlength++;
352
353
354 case OP_CRSTAR:
355 case OP_CRMINSTAR:
356 case OP_CRQUERY:
357 case OP_CRMINQUERY:
358 case OP_CRPOSSTAR:
359 case OP_CRPOSQUERY:
360 cc++;
361 break;
362
363 case OP_CRRANGE:
364 case OP_CRMINRANGE:
365 case OP_CRPOSRANGE:
366 branchlength += GET2(cc,1);
367 cc += 1 + 2 * IMM2_SIZE;
368 break;
369
370 default:
371 branchlength++;
372 break;
373 }
374 break;
375
376
377
378
379
380
381
382
383
384
385
386
387
388 case OP_DNREF:
389 case OP_DNREFI:
390 if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
391 {
392 int count = GET2(cc, 1+IMM2_SIZE);
393 pcre_uchar *slot = (pcre_uchar *)re +
394 re->name_table_offset + GET2(cc, 1) * re->name_entry_size;
395 d = INT_MAX;
396 while (count-- > 0)
397 {
398 ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(slot, 0));
399 if (cs == NULL) return -2;
400 do ce += GET(ce, 1); while (*ce == OP_ALT);
401 if (cc > cs && cc < ce)
402 {
403 d = 0;
404 had_recurse = TRUE;
405 break;
406 }
407 else
408 {
409 recurse_check *r = recurses;
410 for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
411 if (r != NULL)
412 {
413 d = 0;
414 had_recurse = TRUE;
415 break;
416 }
417 else
418 {
419 int dd;
420 this_recurse.prev = recurses;
421 this_recurse.group = cs;
422 dd = find_minlength(re, cs, startcode, options, &this_recurse,
423 countptr);
424 if (dd < d) d = dd;
425 }
426 }
427 slot += re->name_entry_size;
428 }
429 }
430 else d = 0;
431 cc += 1 + 2*IMM2_SIZE;
432 goto REPEAT_BACK_REFERENCE;
433
434 case OP_REF:
435 case OP_REFI:
436 if ((options & PCRE_JAVASCRIPT_COMPAT) == 0)
437 {
438 ce = cs = (pcre_uchar *)PRIV(find_bracket)(startcode, utf, GET2(cc, 1));
439 if (cs == NULL) return -2;
440 do ce += GET(ce, 1); while (*ce == OP_ALT);
441 if (cc > cs && cc < ce)
442 {
443 d = 0;
444 had_recurse = TRUE;
445 }
446 else
447 {
448 recurse_check *r = recurses;
449 for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
450 if (r != NULL)
451 {
452 d = 0;
453 had_recurse = TRUE;
454 }
455 else
456 {
457 this_recurse.prev = recurses;
458 this_recurse.group = cs;
459 d = find_minlength(re, cs, startcode, options, &this_recurse,
460 countptr);
461 }
462 }
463 }
464 else d = 0;
465 cc += 1 + IMM2_SIZE;
466
467
468
469 REPEAT_BACK_REFERENCE:
470 switch (*cc)
471 {
472 case OP_CRSTAR:
473 case OP_CRMINSTAR:
474 case OP_CRQUERY:
475 case OP_CRMINQUERY:
476 case OP_CRPOSSTAR:
477 case OP_CRPOSQUERY:
478 min = 0;
479 cc++;
480 break;
481
482 case OP_CRPLUS:
483 case OP_CRMINPLUS:
484 case OP_CRPOSPLUS:
485 min = 1;
486 cc++;
487 break;
488
489 case OP_CRRANGE:
490 case OP_CRMINRANGE:
491 case OP_CRPOSRANGE:
492 min = GET2(cc, 1);
493 cc += 1 + 2 * IMM2_SIZE;
494 break;
495
496 default:
497 min = 1;
498 break;
499 }
500
501 branchlength += min * d;
502 break;
503
504
505
506
507 case OP_RECURSE:
508 cs = ce = (pcre_uchar *)startcode + GET(cc, 1);
509 do ce += GET(ce, 1); while (*ce == OP_ALT);
510 if (cc > cs && cc < ce)
511 had_recurse = TRUE;
512 else
513 {
514 recurse_check *r = recurses;
515 for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
516 if (r != NULL)
517 had_recurse = TRUE;
518 else
519 {
520 this_recurse.prev = recurses;
521 this_recurse.group = cs;
522 branchlength += find_minlength(re, cs, startcode, options,
523 &this_recurse, countptr);
524 }
525 }
526 cc += 1 + LINK_SIZE;
527 break;
528
529
530
531
532
533
534
535
536
537 case OP_UPTO:
538 case OP_UPTOI:
539 case OP_NOTUPTO:
540 case OP_NOTUPTOI:
541 case OP_MINUPTO:
542 case OP_MINUPTOI:
543 case OP_NOTMINUPTO:
544 case OP_NOTMINUPTOI:
545 case OP_POSUPTO:
546 case OP_POSUPTOI:
547 case OP_NOTPOSUPTO:
548 case OP_NOTPOSUPTOI:
549
550 case OP_STAR:
551 case OP_STARI:
552 case OP_NOTSTAR:
553 case OP_NOTSTARI:
554 case OP_MINSTAR:
555 case OP_MINSTARI:
556 case OP_NOTMINSTAR:
557 case OP_NOTMINSTARI:
558 case OP_POSSTAR:
559 case OP_POSSTARI:
560 case OP_NOTPOSSTAR:
561 case OP_NOTPOSSTARI:
562
563 case OP_QUERY:
564 case OP_QUERYI:
565 case OP_NOTQUERY:
566 case OP_NOTQUERYI:
567 case OP_MINQUERY:
568 case OP_MINQUERYI:
569 case OP_NOTMINQUERY:
570 case OP_NOTMINQUERYI:
571 case OP_POSQUERY:
572 case OP_POSQUERYI:
573 case OP_NOTPOSQUERY:
574 case OP_NOTPOSQUERYI:
575
576 cc += PRIV(OP_lengths)[op];
577 #ifdef SUPPORT_UTF
578 if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
579 #endif
580 break;
581
582
583
584 case OP_MARK:
585 case OP_PRUNE_ARG:
586 case OP_SKIP_ARG:
587 case OP_THEN_ARG:
588 cc += PRIV(OP_lengths)[op] + cc[1];
589 break;
590
591
592
593 case OP_CLOSE:
594 case OP_COMMIT:
595 case OP_FAIL:
596 case OP_PRUNE:
597 case OP_SET_SOM:
598 case OP_SKIP:
599 case OP_THEN:
600 cc += PRIV(OP_lengths)[op];
601 break;
602
603
604
605
606 default:
607 return -3;
608 }
609 }
610
611 }
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634 static const pcre_uchar *
635 set_table_bit(pcre_uint8 *start_bits, const pcre_uchar *p, BOOL caseless,
636 compile_data *cd, BOOL utf)
637 {
638 pcre_uint32 c = *p;
639
640 #ifdef COMPILE_PCRE8
641 SET_BIT(c);
642
643 #ifdef SUPPORT_UTF
644 if (utf && c > 127)
645 {
646 GETCHARINC(c, p);
647 #ifdef SUPPORT_UCP
648 if (caseless)
649 {
650 pcre_uchar buff[6];
651 c = UCD_OTHERCASE(c);
652 (void)PRIV(ord2utf)(c, buff);
653 SET_BIT(buff[0]);
654 }
655 #endif
656 return p;
657 }
658 #else
659 (void)(utf);
660 #endif
661
662
663
664 if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
665 return p + 1;
666 #endif
667
668 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
669 if (c > 0xff)
670 {
671 c = 0xff;
672 caseless = FALSE;
673 }
674 SET_BIT(c);
675
676 #ifdef SUPPORT_UTF
677 if (utf && c > 127)
678 {
679 GETCHARINC(c, p);
680 #ifdef SUPPORT_UCP
681 if (caseless)
682 {
683 c = UCD_OTHERCASE(c);
684 if (c > 0xff)
685 c = 0xff;
686 SET_BIT(c);
687 }
688 #endif
689 return p;
690 }
691 #else
692 (void)(utf);
693 #endif
694
695 if (caseless && (cd->ctypes[c] & ctype_letter) != 0) SET_BIT(cd->fcc[c]);
696 return p + 1;
697 #endif
698 }
699
700
701
702
703
704
705
706
707
708
709
710
711
712
713
714
715
716
717
718
719
720
721
722 static void
723 set_type_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
724 compile_data *cd)
725 {
726 register pcre_uint32 c;
727 for (c = 0; c < table_limit; c++) start_bits[c] |= cd->cbits[c+cbit_type];
728 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
729 if (table_limit == 32) return;
730 for (c = 128; c < 256; c++)
731 {
732 if ((cd->cbits[c/8] & (1 << (c&7))) != 0)
733 {
734 pcre_uchar buff[6];
735 (void)PRIV(ord2utf)(c, buff);
736 SET_BIT(buff[0]);
737 }
738 }
739 #endif
740 }
741
742
743
744
745
746
747
748
749
750
751
752
753
754
755
756
757
758
759
760
761
762
763
764 static void
765 set_nottype_bits(pcre_uint8 *start_bits, int cbit_type, unsigned int table_limit,
766 compile_data *cd)
767 {
768 register pcre_uint32 c;
769 for (c = 0; c < table_limit; c++) start_bits[c] |= ~cd->cbits[c+cbit_type];
770 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
771 if (table_limit != 32) for (c = 24; c < 32; c++) start_bits[c] = 0xff;
772 #endif
773 }
774
775
776
777
778
779
780
781
782
783
784
785
786
787
788
789
790
791
792
793
794
795
796
797
798
799
800
801 static int
802 set_start_bits(const pcre_uchar *code, pcre_uint8 *start_bits, BOOL utf,
803 compile_data *cd)
804 {
805 register pcre_uint32 c;
806 int yield = SSB_DONE;
807 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
808 int table_limit = utf? 16:32;
809 #else
810 int table_limit = 32;
811 #endif
812
813 #if 0
814
815
816
817
818
819
820
821
822
823
824
825
826 volatile int dummy;
827
828 #endif
829
830 do
831 {
832 BOOL try_next = TRUE;
833 const pcre_uchar *tcode = code + 1 + LINK_SIZE;
834
835 if (*code == OP_CBRA || *code == OP_SCBRA ||
836 *code == OP_CBRAPOS || *code == OP_SCBRAPOS) tcode += IMM2_SIZE;
837
838 while (try_next)
839 {
840 int rc;
841
842 switch(*tcode)
843 {
844
845
846
847
848 default:
849 return SSB_UNKNOWN;
850
851
852
853 case OP_ACCEPT:
854 case OP_ASSERT_ACCEPT:
855 case OP_ALLANY:
856 case OP_ANY:
857 case OP_ANYBYTE:
858 case OP_CIRC:
859 case OP_CIRCM:
860 case OP_CLOSE:
861 case OP_COMMIT:
862 case OP_COND:
863 case OP_CREF:
864 case OP_DEF:
865 case OP_DNCREF:
866 case OP_DNREF:
867 case OP_DNREFI:
868 case OP_DNRREF:
869 case OP_DOLL:
870 case OP_DOLLM:
871 case OP_END:
872 case OP_EOD:
873 case OP_EODN:
874 case OP_EXTUNI:
875 case OP_FAIL:
876 case OP_MARK:
877 case OP_NOT:
878 case OP_NOTEXACT:
879 case OP_NOTEXACTI:
880 case OP_NOTI:
881 case OP_NOTMINPLUS:
882 case OP_NOTMINPLUSI:
883 case OP_NOTMINQUERY:
884 case OP_NOTMINQUERYI:
885 case OP_NOTMINSTAR:
886 case OP_NOTMINSTARI:
887 case OP_NOTMINUPTO:
888 case OP_NOTMINUPTOI:
889 case OP_NOTPLUS:
890 case OP_NOTPLUSI:
891 case OP_NOTPOSPLUS:
892 case OP_NOTPOSPLUSI:
893 case OP_NOTPOSQUERY:
894 case OP_NOTPOSQUERYI:
895 case OP_NOTPOSSTAR:
896 case OP_NOTPOSSTARI:
897 case OP_NOTPOSUPTO:
898 case OP_NOTPOSUPTOI:
899 case OP_NOTPROP:
900 case OP_NOTQUERY:
901 case OP_NOTQUERYI:
902 case OP_NOTSTAR:
903 case OP_NOTSTARI:
904 case OP_NOTUPTO:
905 case OP_NOTUPTOI:
906 case OP_NOT_HSPACE:
907 case OP_NOT_VSPACE:
908 case OP_PRUNE:
909 case OP_PRUNE_ARG:
910 case OP_RECURSE:
911 case OP_REF:
912 case OP_REFI:
913 case OP_REVERSE:
914 case OP_RREF:
915 case OP_SCOND:
916 case OP_SET_SOM:
917 case OP_SKIP:
918 case OP_SKIP_ARG:
919 case OP_SOD:
920 case OP_SOM:
921 case OP_THEN:
922 case OP_THEN_ARG:
923 return SSB_FAIL;
924
925
926
927
928
929
930 case OP_PROP:
931 if (tcode[1] != PT_CLIST) return SSB_FAIL;
932 {
933 const pcre_uint32 *p = PRIV(ucd_caseless_sets) + tcode[2];
934 while ((c = *p++) < NOTACHAR)
935 {
936 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
937 if (utf)
938 {
939 pcre_uchar buff[6];
940 (void)PRIV(ord2utf)(c, buff);
941 c = buff[0];
942 }
943 #endif
944 if (c > 0xff) SET_BIT(0xff); else SET_BIT(c);
945 }
946 }
947 try_next = FALSE;
948 break;
949
950
951
952 case OP_WORD_BOUNDARY:
953 case OP_NOT_WORD_BOUNDARY:
954 tcode++;
955 break;
956
957
958
959
960
961
962 case OP_BRA:
963 case OP_SBRA:
964 case OP_CBRA:
965 case OP_SCBRA:
966 case OP_BRAPOS:
967 case OP_SBRAPOS:
968 case OP_CBRAPOS:
969 case OP_SCBRAPOS:
970 case OP_ONCE:
971 case OP_ONCE_NC:
972 case OP_ASSERT:
973 rc = set_start_bits(tcode, start_bits, utf, cd);
974 if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
975 if (rc == SSB_DONE) try_next = FALSE; else
976 {
977 do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
978 tcode += 1 + LINK_SIZE;
979 }
980 break;
981
982
983
984
985
986
987
988
989 case OP_ALT:
990 yield = SSB_CONTINUE;
991 try_next = FALSE;
992 break;
993
994 case OP_KET:
995 case OP_KETRMAX:
996 case OP_KETRMIN:
997 case OP_KETRPOS:
998 return SSB_CONTINUE;
999
1000
1001
1002 case OP_CALLOUT:
1003 tcode += 2 + 2*LINK_SIZE;
1004 break;
1005
1006
1007
1008 case OP_ASSERT_NOT:
1009 case OP_ASSERTBACK:
1010 case OP_ASSERTBACK_NOT:
1011 do tcode += GET(tcode, 1); while (*tcode == OP_ALT);
1012 tcode += 1 + LINK_SIZE;
1013 break;
1014
1015
1016
1017 case OP_BRAZERO:
1018 case OP_BRAMINZERO:
1019 case OP_BRAPOSZERO:
1020 rc = set_start_bits(++tcode, start_bits, utf, cd);
1021 if (rc == SSB_FAIL || rc == SSB_UNKNOWN) return rc;
1022
1023
1024
1025
1026
1027 do tcode += GET(tcode,1); while (*tcode == OP_ALT);
1028 tcode += 1 + LINK_SIZE;
1029 break;
1030
1031
1032
1033 case OP_SKIPZERO:
1034 tcode++;
1035 do tcode += GET(tcode,1); while (*tcode == OP_ALT);
1036 tcode += 1 + LINK_SIZE;
1037 break;
1038
1039
1040
1041 case OP_STAR:
1042 case OP_MINSTAR:
1043 case OP_POSSTAR:
1044 case OP_QUERY:
1045 case OP_MINQUERY:
1046 case OP_POSQUERY:
1047 tcode = set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
1048 break;
1049
1050 case OP_STARI:
1051 case OP_MINSTARI:
1052 case OP_POSSTARI:
1053 case OP_QUERYI:
1054 case OP_MINQUERYI:
1055 case OP_POSQUERYI:
1056 tcode = set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
1057 break;
1058
1059
1060
1061 case OP_UPTO:
1062 case OP_MINUPTO:
1063 case OP_POSUPTO:
1064 tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, FALSE, cd, utf);
1065 break;
1066
1067 case OP_UPTOI:
1068 case OP_MINUPTOI:
1069 case OP_POSUPTOI:
1070 tcode = set_table_bit(start_bits, tcode + 1 + IMM2_SIZE, TRUE, cd, utf);
1071 break;
1072
1073
1074
1075 case OP_EXACT:
1076 tcode += IMM2_SIZE;
1077
1078 case OP_CHAR:
1079 case OP_PLUS:
1080 case OP_MINPLUS:
1081 case OP_POSPLUS:
1082 (void)set_table_bit(start_bits, tcode + 1, FALSE, cd, utf);
1083 try_next = FALSE;
1084 break;
1085
1086 case OP_EXACTI:
1087 tcode += IMM2_SIZE;
1088
1089 case OP_CHARI:
1090 case OP_PLUSI:
1091 case OP_MINPLUSI:
1092 case OP_POSPLUSI:
1093 (void)set_table_bit(start_bits, tcode + 1, TRUE, cd, utf);
1094 try_next = FALSE;
1095 break;
1096
1097
1098
1099
1100
1101
1102
1103 case OP_HSPACE:
1104 SET_BIT(CHAR_HT);
1105 SET_BIT(CHAR_SPACE);
1106 #ifdef SUPPORT_UTF
1107 if (utf)
1108 {
1109 #ifdef COMPILE_PCRE8
1110 SET_BIT(0xC2);
1111 SET_BIT(0xE1);
1112 SET_BIT(0xE2);
1113 SET_BIT(0xE3);
1114 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1115 SET_BIT(0xA0);
1116 SET_BIT(0xFF);
1117 #endif
1118 }
1119 else
1120 #endif
1121 {
1122 #ifndef EBCDIC
1123 SET_BIT(0xA0);
1124 #endif
1125 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1126 SET_BIT(0xFF);
1127 #endif
1128 }
1129 try_next = FALSE;
1130 break;
1131
1132 case OP_ANYNL:
1133 case OP_VSPACE:
1134 SET_BIT(CHAR_LF);
1135 SET_BIT(CHAR_VT);
1136 SET_BIT(CHAR_FF);
1137 SET_BIT(CHAR_CR);
1138 #ifdef SUPPORT_UTF
1139 if (utf)
1140 {
1141 #ifdef COMPILE_PCRE8
1142 SET_BIT(0xC2);
1143 SET_BIT(0xE2);
1144 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1145 SET_BIT(CHAR_NEL);
1146 SET_BIT(0xFF);
1147 #endif
1148 }
1149 else
1150 #endif
1151 {
1152 SET_BIT(CHAR_NEL);
1153 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1154 SET_BIT(0xFF);
1155 #endif
1156 }
1157 try_next = FALSE;
1158 break;
1159
1160
1161
1162
1163
1164
1165 case OP_NOT_DIGIT:
1166 set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
1167 try_next = FALSE;
1168 break;
1169
1170 case OP_DIGIT:
1171 set_type_bits(start_bits, cbit_digit, table_limit, cd);
1172 try_next = FALSE;
1173 break;
1174
1175
1176
1177
1178
1179 case OP_NOT_WHITESPACE:
1180 set_nottype_bits(start_bits, cbit_space, table_limit, cd);
1181 try_next = FALSE;
1182 break;
1183
1184 case OP_WHITESPACE:
1185 set_type_bits(start_bits, cbit_space, table_limit, cd);
1186 try_next = FALSE;
1187 break;
1188
1189 case OP_NOT_WORDCHAR:
1190 set_nottype_bits(start_bits, cbit_word, table_limit, cd);
1191 try_next = FALSE;
1192 break;
1193
1194 case OP_WORDCHAR:
1195 set_type_bits(start_bits, cbit_word, table_limit, cd);
1196 try_next = FALSE;
1197 break;
1198
1199
1200
1201
1202 case OP_TYPEPLUS:
1203 case OP_TYPEMINPLUS:
1204 case OP_TYPEPOSPLUS:
1205 tcode++;
1206 break;
1207
1208 case OP_TYPEEXACT:
1209 tcode += 1 + IMM2_SIZE;
1210 break;
1211
1212
1213
1214
1215 case OP_TYPEUPTO:
1216 case OP_TYPEMINUPTO:
1217 case OP_TYPEPOSUPTO:
1218 tcode += IMM2_SIZE;
1219
1220 case OP_TYPESTAR:
1221 case OP_TYPEMINSTAR:
1222 case OP_TYPEPOSSTAR:
1223 case OP_TYPEQUERY:
1224 case OP_TYPEMINQUERY:
1225 case OP_TYPEPOSQUERY:
1226 switch(tcode[1])
1227 {
1228 default:
1229 case OP_ANY:
1230 case OP_ALLANY:
1231 return SSB_FAIL;
1232
1233 case OP_HSPACE:
1234 SET_BIT(CHAR_HT);
1235 SET_BIT(CHAR_SPACE);
1236 #ifdef SUPPORT_UTF
1237 if (utf)
1238 {
1239 #ifdef COMPILE_PCRE8
1240 SET_BIT(0xC2);
1241 SET_BIT(0xE1);
1242 SET_BIT(0xE2);
1243 SET_BIT(0xE3);
1244 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1245 SET_BIT(0xA0);
1246 SET_BIT(0xFF);
1247 #endif
1248 }
1249 else
1250 #endif
1251 #ifndef EBCDIC
1252 SET_BIT(0xA0);
1253 #endif
1254 break;
1255
1256 case OP_ANYNL:
1257 case OP_VSPACE:
1258 SET_BIT(CHAR_LF);
1259 SET_BIT(CHAR_VT);
1260 SET_BIT(CHAR_FF);
1261 SET_BIT(CHAR_CR);
1262 #ifdef SUPPORT_UTF
1263 if (utf)
1264 {
1265 #ifdef COMPILE_PCRE8
1266 SET_BIT(0xC2);
1267 SET_BIT(0xE2);
1268 #elif defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1269 SET_BIT(CHAR_NEL);
1270 SET_BIT(0xFF);
1271 #endif
1272 }
1273 else
1274 #endif
1275 SET_BIT(CHAR_NEL);
1276 break;
1277
1278 case OP_NOT_DIGIT:
1279 set_nottype_bits(start_bits, cbit_digit, table_limit, cd);
1280 break;
1281
1282 case OP_DIGIT:
1283 set_type_bits(start_bits, cbit_digit, table_limit, cd);
1284 break;
1285
1286
1287
1288
1289
1290 case OP_NOT_WHITESPACE:
1291 set_nottype_bits(start_bits, cbit_space, table_limit, cd);
1292 break;
1293
1294 case OP_WHITESPACE:
1295 set_type_bits(start_bits, cbit_space, table_limit, cd);
1296 break;
1297
1298 case OP_NOT_WORDCHAR:
1299 set_nottype_bits(start_bits, cbit_word, table_limit, cd);
1300 break;
1301
1302 case OP_WORDCHAR:
1303 set_type_bits(start_bits, cbit_word, table_limit, cd);
1304 break;
1305 }
1306
1307 tcode += 2;
1308 break;
1309
1310
1311
1312
1313
1314
1315
1316 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1317 case OP_XCLASS:
1318 if ((tcode[1 + LINK_SIZE] & XCL_HASPROP) != 0)
1319 return SSB_FAIL;
1320
1321 if ((tcode[1 + LINK_SIZE] & XCL_MAP) == 0 && (tcode[1 + LINK_SIZE] & XCL_NOT) != 0)
1322 return SSB_FAIL;
1323 #endif
1324
1325
1326 case OP_NCLASS:
1327 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1328 if (utf)
1329 {
1330 start_bits[24] |= 0xf0;
1331 memset(start_bits+25, 0xff, 7);
1332 }
1333 #endif
1334 #if defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1335 SET_BIT(0xFF);
1336 #endif
1337
1338
1339 case OP_CLASS:
1340 {
1341 pcre_uint8 *map;
1342 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1343 map = NULL;
1344 if (*tcode == OP_XCLASS)
1345 {
1346 if ((tcode[1 + LINK_SIZE] & XCL_MAP) != 0)
1347 map = (pcre_uint8 *)(tcode + 1 + LINK_SIZE + 1);
1348 tcode += GET(tcode, 1);
1349 }
1350 else
1351 #endif
1352 {
1353 tcode++;
1354 map = (pcre_uint8 *)tcode;
1355 tcode += 32 / sizeof(pcre_uchar);
1356 }
1357
1358
1359
1360
1361
1362
1363
1364 #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
1365 if (map != NULL)
1366 #endif
1367 {
1368 #if defined SUPPORT_UTF && defined COMPILE_PCRE8
1369 if (utf)
1370 {
1371 for (c = 0; c < 16; c++) start_bits[c] |= map[c];
1372 for (c = 128; c < 256; c++)
1373 {
1374 if ((map[c/8] && (1 << (c&7))) != 0)
1375 {
1376 int d = (c >> 6) | 0xc0;
1377 start_bits[d/8] |= (1 << (d&7));
1378 c = (c & 0xc0) + 0x40 - 1;
1379 }
1380 }
1381 }
1382 else
1383 #endif
1384 {
1385
1386 for (c = 0; c < 32; c++) start_bits[c] |= map[c];
1387 }
1388 }
1389
1390
1391
1392
1393 switch (*tcode)
1394 {
1395 case OP_CRSTAR:
1396 case OP_CRMINSTAR:
1397 case OP_CRQUERY:
1398 case OP_CRMINQUERY:
1399 case OP_CRPOSSTAR:
1400 case OP_CRPOSQUERY:
1401 tcode++;
1402 break;
1403
1404 case OP_CRRANGE:
1405 case OP_CRMINRANGE:
1406 case OP_CRPOSRANGE:
1407 if (GET2(tcode, 1) == 0) tcode += 1 + 2 * IMM2_SIZE;
1408 else try_next = FALSE;
1409 break;
1410
1411 default:
1412 try_next = FALSE;
1413 break;
1414 }
1415 }
1416 break;
1417
1418 }
1419 }
1420
1421 code += GET(code, 1);
1422 }
1423 while (*code == OP_ALT);
1424 return yield;
1425 }
1426
1427
1428
1429
1430
1431
1432
1433
1434
1435
1436
1437
1438
1439
1440
1441
1442
1443
1444
1445
1446
1447
1448
1449
1450 #if defined COMPILE_PCRE8
1451 PCRE_EXP_DEFN pcre_extra * PCRE_CALL_CONVENTION
1452 pcre_study(const pcre *external_re, int options, const char **errorptr)
1453 #elif defined COMPILE_PCRE16
1454 PCRE_EXP_DEFN pcre16_extra * PCRE_CALL_CONVENTION
1455 pcre16_study(const pcre16 *external_re, int options, const char **errorptr)
1456 #elif defined COMPILE_PCRE32
1457 PCRE_EXP_DEFN pcre32_extra * PCRE_CALL_CONVENTION
1458 pcre32_study(const pcre32 *external_re, int options, const char **errorptr)
1459 #endif
1460 {
1461 int min;
1462 int count = 0;
1463 BOOL bits_set = FALSE;
1464 pcre_uint8 start_bits[32];
1465 PUBL(extra) *extra = NULL;
1466 pcre_study_data *study;
1467 const pcre_uint8 *tables;
1468 pcre_uchar *code;
1469 compile_data compile_block;
1470 const REAL_PCRE *re = (const REAL_PCRE *)external_re;
1471
1472
1473 *errorptr = NULL;
1474
1475 if (re == NULL || re->magic_number != MAGIC_NUMBER)
1476 {
1477 *errorptr = "argument is not a compiled regular expression";
1478 return NULL;
1479 }
1480
1481 if ((re->flags & PCRE_MODE) == 0)
1482 {
1483 #if defined COMPILE_PCRE8
1484 *errorptr = "argument not compiled in 8 bit mode";
1485 #elif defined COMPILE_PCRE16
1486 *errorptr = "argument not compiled in 16 bit mode";
1487 #elif defined COMPILE_PCRE32
1488 *errorptr = "argument not compiled in 32 bit mode";
1489 #endif
1490 return NULL;
1491 }
1492
1493 if ((options & ~PUBLIC_STUDY_OPTIONS) != 0)
1494 {
1495 *errorptr = "unknown or incorrect option bit(s) set";
1496 return NULL;
1497 }
1498
1499 code = (pcre_uchar *)re + re->name_table_offset +
1500 (re->name_count * re->name_entry_size);
1501
1502
1503
1504
1505
1506 if ((re->options & PCRE_ANCHORED) == 0 &&
1507 (re->flags & (PCRE_FIRSTSET|PCRE_STARTLINE)) == 0)
1508 {
1509 int rc;
1510
1511
1512
1513 tables = re->tables;
1514
1515 #if defined COMPILE_PCRE8
1516 if (tables == NULL)
1517 (void)pcre_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1518 (void *)(&tables));
1519 #elif defined COMPILE_PCRE16
1520 if (tables == NULL)
1521 (void)pcre16_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1522 (void *)(&tables));
1523 #elif defined COMPILE_PCRE32
1524 if (tables == NULL)
1525 (void)pcre32_fullinfo(external_re, NULL, PCRE_INFO_DEFAULT_TABLES,
1526 (void *)(&tables));
1527 #endif
1528
1529 compile_block.lcc = tables + lcc_offset;
1530 compile_block.fcc = tables + fcc_offset;
1531 compile_block.cbits = tables + cbits_offset;
1532 compile_block.ctypes = tables + ctypes_offset;
1533
1534
1535
1536 memset(start_bits, 0, 32 * sizeof(pcre_uint8));
1537 rc = set_start_bits(code, start_bits, (re->options & PCRE_UTF8) != 0,
1538 &compile_block);
1539 bits_set = rc == SSB_DONE;
1540 if (rc == SSB_UNKNOWN)
1541 {
1542 *errorptr = "internal error: opcode not recognized";
1543 return NULL;
1544 }
1545 }
1546
1547
1548
1549 switch(min = find_minlength(re, code, code, re->options, NULL, &count))
1550 {
1551 case -2: *errorptr = "internal error: missing capturing bracket"; return NULL;
1552 case -3: *errorptr = "internal error: opcode not recognized"; return NULL;
1553 default: break;
1554 }
1555
1556
1557
1558
1559
1560
1561
1562
1563
1564
1565 if (bits_set || min > 0 || (options & (
1566 #ifdef SUPPORT_JIT
1567 PCRE_STUDY_JIT_COMPILE | PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE |
1568 PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE |
1569 #endif
1570 PCRE_STUDY_EXTRA_NEEDED)) != 0)
1571 {
1572 extra = (PUBL(extra) *)(PUBL(malloc))
1573 (sizeof(PUBL(extra)) + sizeof(pcre_study_data));
1574 if (extra == NULL)
1575 {
1576 *errorptr = "failed to get memory";
1577 return NULL;
1578 }
1579
1580 study = (pcre_study_data *)((char *)extra + sizeof(PUBL(extra)));
1581 extra->flags = PCRE_EXTRA_STUDY_DATA;
1582 extra->study_data = study;
1583
1584 study->size = sizeof(pcre_study_data);
1585 study->flags = 0;
1586
1587
1588
1589
1590
1591 if (bits_set)
1592 {
1593 study->flags |= PCRE_STUDY_MAPPED;
1594 memcpy(study->start_bits, start_bits, sizeof(start_bits));
1595 }
1596 else memset(study->start_bits, 0, 32 * sizeof(pcre_uint8));
1597
1598 #ifdef PCRE_DEBUG
1599 if (bits_set)
1600 {
1601 pcre_uint8 *ptr = start_bits;
1602 int i;
1603
1604 printf("Start bits:\n");
1605 for (i = 0; i < 32; i++)
1606 printf("%3d: %02x%s", i * 8, *ptr++, ((i + 1) & 0x7) != 0? " " : "\n");
1607 }
1608 #endif
1609
1610
1611
1612
1613
1614
1615 if (min > 0)
1616 {
1617 study->flags |= PCRE_STUDY_MINLEN;
1618 study->minlength = min;
1619 }
1620 else study->minlength = 0;
1621
1622
1623
1624
1625
1626
1627 #ifdef SUPPORT_JIT
1628 extra->executable_jit = NULL;
1629 if ((options & PCRE_STUDY_JIT_COMPILE) != 0)
1630 PRIV(jit_compile)(re, extra, JIT_COMPILE);
1631 if ((options & PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE) != 0)
1632 PRIV(jit_compile)(re, extra, JIT_PARTIAL_SOFT_COMPILE);
1633 if ((options & PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE) != 0)
1634 PRIV(jit_compile)(re, extra, JIT_PARTIAL_HARD_COMPILE);
1635
1636 if (study->flags == 0 && (extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) == 0 &&
1637 (options & PCRE_STUDY_EXTRA_NEEDED) == 0)
1638 {
1639 #if defined COMPILE_PCRE8
1640 pcre_free_study(extra);
1641 #elif defined COMPILE_PCRE16
1642 pcre16_free_study(extra);
1643 #elif defined COMPILE_PCRE32
1644 pcre32_free_study(extra);
1645 #endif
1646 extra = NULL;
1647 }
1648 #endif
1649 }
1650
1651 return extra;
1652 }
1653
1654
1655
1656
1657
1658
1659
1660
1661
1662
1663
1664
1665 #if defined COMPILE_PCRE8
1666 PCRE_EXP_DEFN void
1667 pcre_free_study(pcre_extra *extra)
1668 #elif defined COMPILE_PCRE16
1669 PCRE_EXP_DEFN void
1670 pcre16_free_study(pcre16_extra *extra)
1671 #elif defined COMPILE_PCRE32
1672 PCRE_EXP_DEFN void
1673 pcre32_free_study(pcre32_extra *extra)
1674 #endif
1675 {
1676 if (extra == NULL)
1677 return;
1678 #ifdef SUPPORT_JIT
1679 if ((extra->flags & PCRE_EXTRA_EXECUTABLE_JIT) != 0 &&
1680 extra->executable_jit != NULL)
1681 PRIV(jit_free)(extra->executable_jit);
1682 #endif
1683 PUBL(free)(extra);
1684 }
1685
1686