root/ext/ereg/regex/engine.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. matcher
  2. dissect
  3. backref
  4. fast
  5. slow
  6. step
  7. print
  8. at
  9. pchar

   1 /*
   2  * The matching engine and friends.  This file is #included by regexec.c
   3  * after suitable #defines of a variety of macros used herein, so that
   4  * different state representations can be used without duplicating masses
   5  * of code.
   6  */
   7 
   8 #ifdef SNAMES
   9 #define matcher smatcher
  10 #define fast    sfast
  11 #define slow    sslow
  12 #define dissect sdissect
  13 #define backref sbackref
  14 #define step    sstep
  15 #define print   sprint
  16 #define at      sat
  17 #define match   smat
  18 #endif
  19 #ifdef LNAMES
  20 #define matcher lmatcher
  21 #define fast    lfast
  22 #define slow    lslow
  23 #define dissect ldissect
  24 #define backref lbackref
  25 #define step    lstep
  26 #define print   lprint
  27 #define at      lat
  28 #define match   lmat
  29 #endif
  30 
  31 /* another structure passed up and down to avoid zillions of parameters */
  32 struct match {
  33         struct re_guts *g;
  34         int eflags;
  35         regmatch_t *pmatch;     /* [nsub+1] (0 element unused) */
  36         unsigned char *offp;            /* offsets work from here */
  37         unsigned char *beginp;          /* start of string -- virtual NUL precedes */
  38         unsigned char *endp;            /* end of string -- virtual NUL here */
  39         unsigned char *coldp;           /* can be no match starting before here */
  40         unsigned char **lastpos;                /* [nplus+1] */
  41         STATEVARS;
  42         states st;              /* current states */
  43         states fresh;           /* states for a fresh start */
  44         states tmp;             /* temporary */
  45         states empty;           /* empty set of states */
  46 };
  47 
  48 #include "engine.ih"
  49 
  50 #ifdef REDEBUG
  51 #define SP(t, s, c)     print(m, t, s, c, stdout)
  52 #define AT(t, p1, p2, s1, s2)   at(m, t, p1, p2, s1, s2)
  53 #define NOTE(str)       { if (m->eflags&REG_TRACE) printf("=%s\n", (str)); }
  54 #else
  55 #define SP(t, s, c)     /* nothing */
  56 #define AT(t, p1, p2, s1, s2)   /* nothing */
  57 #define NOTE(s) /* nothing */
  58 #endif
  59 
  60 /*
  61  - matcher - the actual matching engine
  62  == static int matcher(register struct re_guts *g, char *string, \
  63  ==     size_t nmatch, regmatch_t pmatch[], int eflags);
  64  */
  65 static int                      /* 0 success, REG_NOMATCH failure */
  66 matcher(g, string, nmatch, pmatch, eflags)
  67 register struct re_guts *g;
  68 unsigned char *string;
  69 size_t nmatch;
  70 regmatch_t pmatch[];
  71 int eflags;
  72 {
  73         register unsigned char *endp;
  74         register size_t i;
  75         struct match mv;
  76         register struct match *m = &mv;
  77         register unsigned char *dp;
  78         const register sopno gf = g->firststate+1;      /* +1 for OEND */
  79         const register sopno gl = g->laststate;
  80         unsigned char *start;
  81         unsigned char *stop;
  82 
  83         /* simplify the situation where possible */
  84         if (g->cflags&REG_NOSUB)
  85                 nmatch = 0;
  86         if (eflags&REG_STARTEND) {
  87                 start = string + pmatch[0].rm_so;
  88                 stop = string + pmatch[0].rm_eo;
  89         } else {
  90                 start = string;
  91                 stop = start + strlen(start);
  92         }
  93         if (stop < start)
  94                 return(REG_INVARG);
  95 
  96         /* prescreening; this does wonders for this rather slow code */
  97         if (g->must != NULL) {
  98                 for (dp = start; dp < stop; dp++)
  99                         if (*dp == g->must[0] && stop - dp >= g->mlen &&
 100                                 memcmp(dp, g->must, (size_t)g->mlen) == 0)
 101                                 break;
 102                 if (dp == stop)         /* we didn't find g->must */
 103                         return(REG_NOMATCH);
 104         }
 105 
 106         /* match struct setup */
 107         m->g = g;
 108         m->eflags = eflags;
 109         m->pmatch = NULL;
 110         m->lastpos = NULL;
 111         m->offp = string;
 112         m->beginp = start;
 113         m->endp = stop;
 114         STATESETUP(m, 4);
 115         SETUP(m->st);
 116         SETUP(m->fresh);
 117         SETUP(m->tmp);
 118         SETUP(m->empty);
 119         CLEAR(m->empty);
 120 
 121         /* this loop does only one repetition except for backrefs */
 122         for (;;) {
 123                 endp = fast(m, start, stop, gf, gl);
 124                 if (endp == NULL) {             /* a miss */
 125                         STATETEARDOWN(m);
 126                         return(REG_NOMATCH);
 127                 }
 128                 if (nmatch == 0 && !g->backrefs)
 129                         break;          /* no further info needed */
 130 
 131                 /* where? */
 132                 assert(m->coldp != NULL);
 133                 for (;;) {
 134                         NOTE("finding start");
 135                         endp = slow(m, m->coldp, stop, gf, gl);
 136                         if (endp != NULL)
 137                                 break;
 138                         assert(m->coldp < m->endp);
 139                         m->coldp++;
 140                 }
 141                 if (nmatch == 1 && !g->backrefs)
 142                         break;          /* no further info needed */
 143 
 144                 /* oh my, he wants the subexpressions... */
 145                 if (m->pmatch == NULL)
 146                         m->pmatch = (regmatch_t *)malloc((m->g->nsub + 1) *
 147                                                         sizeof(regmatch_t));
 148                 if (m->pmatch == NULL) {
 149                         STATETEARDOWN(m);
 150                         return(REG_ESPACE);
 151                 }
 152                 for (i = 1; i <= m->g->nsub; i++)
 153                         m->pmatch[i].rm_so = m->pmatch[i].rm_eo = -1;
 154                 if (!g->backrefs && !(m->eflags&REG_BACKR)) {
 155                         NOTE("dissecting");
 156                         dp = dissect(m, m->coldp, endp, gf, gl);
 157                 } else {
 158                         if (g->nplus > 0 && m->lastpos == NULL)
 159                                 m->lastpos = (unsigned char **)malloc((g->nplus+1) *
 160                                                         sizeof(unsigned char *));
 161                         if (g->nplus > 0 && m->lastpos == NULL) {
 162                                 free((char *)m->pmatch);
 163                                 STATETEARDOWN(m);
 164                                 return(REG_ESPACE);
 165                         }
 166                         NOTE("backref dissect");
 167                         dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
 168                 }
 169                 if (dp != NULL)
 170                         break;
 171 
 172                 /* uh-oh... we couldn't find a subexpression-level match */
 173                 assert(g->backrefs);    /* must be back references doing it */
 174                 assert(g->nplus == 0 || m->lastpos != NULL);
 175                 for (;;) {
 176                         if (dp != NULL || endp <= m->coldp)
 177                                 break;          /* defeat */
 178                         NOTE("backoff");
 179                         endp = slow(m, m->coldp, endp-1, gf, gl);
 180                         if (endp == NULL)
 181                                 break;          /* defeat */
 182                         /* try it on a shorter possibility */
 183 #ifndef NDEBUG
 184                         for (i = 1; i <= m->g->nsub; i++) {
 185                                 assert(m->pmatch[i].rm_so == -1);
 186                                 assert(m->pmatch[i].rm_eo == -1);
 187                         }
 188 #endif
 189                         NOTE("backoff dissect");
 190                         dp = backref(m, m->coldp, endp, gf, gl, (sopno)0);
 191                 }
 192                 assert(dp == NULL || dp == endp);
 193                 if (dp != NULL)         /* found a shorter one */
 194                         break;
 195 
 196                 /* despite initial appearances, there is no match here */
 197                 NOTE("false alarm");
 198                 start = m->coldp + 1;   /* recycle starting later */
 199                 assert(start <= stop);
 200         }
 201 
 202         /* fill in the details if requested */
 203         if (nmatch > 0) {
 204                 pmatch[0].rm_so = m->coldp - m->offp;
 205                 pmatch[0].rm_eo = endp - m->offp;
 206         }
 207         if (nmatch > 1) {
 208                 assert(m->pmatch != NULL);
 209                 for (i = 1; i < nmatch; i++)
 210                         if (i <= m->g->nsub)
 211                                 pmatch[i] = m->pmatch[i];
 212                         else {
 213                                 pmatch[i].rm_so = -1;
 214                                 pmatch[i].rm_eo = -1;
 215                         }
 216         }
 217 
 218         if (m->pmatch != NULL)
 219                 free((char *)m->pmatch);
 220         if (m->lastpos != NULL)
 221                 free((char *)m->lastpos);
 222         STATETEARDOWN(m);
 223         return(0);
 224 }
 225 
 226 /*
 227  - dissect - figure out what matched what, no back references
 228  == static unsigned char *dissect(register struct match *m, unsigned char *start, \
 229  ==     unsigned char *stop, sopno startst, sopno stopst);
 230  */
 231 static unsigned char *                  /* == stop (success) always */
 232 dissect(m, start, stop, startst, stopst)
 233 register struct match *m;
 234 unsigned char *start;
 235 unsigned char *stop;
 236 sopno startst;
 237 sopno stopst;
 238 {
 239         register int i;
 240         register sopno ss;      /* start sop of current subRE */
 241         register sopno es;      /* end sop of current subRE */
 242         register unsigned char *sp;     /* start of string matched by it */
 243         register unsigned char *stp;    /* string matched by it cannot pass here */
 244         register unsigned char *rest;   /* start of rest of string */
 245         register unsigned char *tail;   /* string unmatched by rest of RE */
 246         register sopno ssub;    /* start sop of subsubRE */
 247         register sopno esub;    /* end sop of subsubRE */
 248         register unsigned char *ssp;    /* start of string matched by subsubRE */
 249         register unsigned char *sep;    /* end of string matched by subsubRE */
 250         register unsigned char *oldssp; /* previous ssp */
 251         register unsigned char *dp;
 252 
 253         AT("diss", start, stop, startst, stopst);
 254         sp = start;
 255         for (ss = startst; ss < stopst; ss = es) {
 256                 /* identify end of subRE */
 257                 es = ss;
 258                 switch (OP(m->g->strip[es])) {
 259                 case OPLUS_:
 260                 case OQUEST_:
 261                         es += OPND(m->g->strip[es]);
 262                         break;
 263                 case OCH_:
 264                         while (OP(m->g->strip[es]) != O_CH)
 265                                 es += OPND(m->g->strip[es]);
 266                         break;
 267                 }
 268                 es++;
 269 
 270                 /* figure out what it matched */
 271                 switch (OP(m->g->strip[ss])) {
 272                 case OEND:
 273                         assert(PHP_REGEX_NOPE);
 274                         break;
 275                 case OCHAR:
 276                         sp++;
 277                         break;
 278                 case OBOL:
 279                 case OEOL:
 280                 case OBOW:
 281                 case OEOW:
 282                         break;
 283                 case OANY:
 284                 case OANYOF:
 285                         sp++;
 286                         break;
 287                 case OBACK_:
 288                 case O_BACK:
 289                         assert(PHP_REGEX_NOPE);
 290                         break;
 291                 /* cases where length of match is hard to find */
 292                 case OQUEST_:
 293                         stp = stop;
 294                         for (;;) {
 295                                 /* how long could this one be? */
 296                                 rest = slow(m, sp, stp, ss, es);
 297                                 assert(rest != NULL);   /* it did match */
 298                                 /* could the rest match the rest? */
 299                                 tail = slow(m, rest, stop, es, stopst);
 300                                 if (tail == stop)
 301                                         break;          /* yes! */
 302                                 /* no -- try a shorter match for this one */
 303                                 stp = rest - 1;
 304                                 assert(stp >= sp);      /* it did work */
 305                         }
 306                         ssub = ss + 1;
 307                         esub = es - 1;
 308                         /* did innards match? */
 309                         if (slow(m, sp, rest, ssub, esub) != NULL) {
 310                                 dp = dissect(m, sp, rest, ssub, esub);
 311                                 assert(dp == rest);
 312                         } else          /* no */
 313                                 assert(sp == rest);
 314                         sp = rest;
 315                         break;
 316                 case OPLUS_:
 317                         stp = stop;
 318                         for (;;) {
 319                                 /* how long could this one be? */
 320                                 rest = slow(m, sp, stp, ss, es);
 321                                 assert(rest != NULL);   /* it did match */
 322                                 /* could the rest match the rest? */
 323                                 tail = slow(m, rest, stop, es, stopst);
 324                                 if (tail == stop)
 325                                         break;          /* yes! */
 326                                 /* no -- try a shorter match for this one */
 327                                 stp = rest - 1;
 328                                 assert(stp >= sp);      /* it did work */
 329                         }
 330                         ssub = ss + 1;
 331                         esub = es - 1;
 332                         ssp = sp;
 333                         oldssp = ssp;
 334                         for (;;) {      /* find last match of innards */
 335                                 sep = slow(m, ssp, rest, ssub, esub);
 336                                 if (sep == NULL || sep == ssp)
 337                                         break;  /* failed or matched null */
 338                                 oldssp = ssp;   /* on to next try */
 339                                 ssp = sep;
 340                         }
 341                         if (sep == NULL) {
 342                                 /* last successful match */
 343                                 sep = ssp;
 344                                 ssp = oldssp;
 345                         }
 346                         assert(sep == rest);    /* must exhaust substring */
 347                         assert(slow(m, ssp, sep, ssub, esub) == rest);
 348                         dp = dissect(m, ssp, sep, ssub, esub);
 349                         assert(dp == sep);
 350                         sp = rest;
 351                         break;
 352                 case OCH_:
 353                         stp = stop;
 354                         for (;;) {
 355                                 /* how long could this one be? */
 356                                 rest = slow(m, sp, stp, ss, es);
 357                                 assert(rest != NULL);   /* it did match */
 358                                 /* could the rest match the rest? */
 359                                 tail = slow(m, rest, stop, es, stopst);
 360                                 if (tail == stop)
 361                                         break;          /* yes! */
 362                                 /* no -- try a shorter match for this one */
 363                                 stp = rest - 1;
 364                                 assert(stp >= sp);      /* it did work */
 365                         }
 366                         ssub = ss + 1;
 367                         esub = ss + OPND(m->g->strip[ss]) - 1;
 368                         assert(OP(m->g->strip[esub]) == OOR1);
 369                         for (;;) {      /* find first matching branch */
 370                                 if (slow(m, sp, rest, ssub, esub) == rest)
 371                                         break;  /* it matched all of it */
 372                                 /* that one missed, try next one */
 373                                 assert(OP(m->g->strip[esub]) == OOR1);
 374                                 esub++;
 375                                 assert(OP(m->g->strip[esub]) == OOR2);
 376                                 ssub = esub + 1;
 377                                 esub += OPND(m->g->strip[esub]);
 378                                 if (OP(m->g->strip[esub]) == OOR2)
 379                                         esub--;
 380                                 else
 381                                         assert(OP(m->g->strip[esub]) == O_CH);
 382                         }
 383                         dp = dissect(m, sp, rest, ssub, esub);
 384                         assert(dp == rest);
 385                         sp = rest;
 386                         break;
 387                 case O_PLUS:
 388                 case O_QUEST:
 389                 case OOR1:
 390                 case OOR2:
 391                 case O_CH:
 392                         assert(PHP_REGEX_NOPE);
 393                         break;
 394                 case OLPAREN:
 395                         i = OPND(m->g->strip[ss]);
 396                         assert(0 < i && i <= m->g->nsub);
 397                         m->pmatch[i].rm_so = sp - m->offp;
 398                         break;
 399                 case ORPAREN:
 400                         i = OPND(m->g->strip[ss]);
 401                         assert(0 < i && i <= m->g->nsub);
 402                         m->pmatch[i].rm_eo = sp - m->offp;
 403                         break;
 404                 default:                /* uh oh */
 405                         assert(PHP_REGEX_NOPE);
 406                         break;
 407                 }
 408         }
 409 
 410         assert(sp == stop);
 411         return(sp);
 412 }
 413 
 414 /*
 415  - backref - figure out what matched what, figuring in back references
 416  == static unsigned char *backref(register struct match *m, unsigned char *start, \
 417  ==     unsigned char *stop, sopno startst, sopno stopst, sopno lev);
 418  */
 419 static unsigned char *                  /* == stop (success) or NULL (failure) */
 420 backref(m, start, stop, startst, stopst, lev)
 421 register struct match *m;
 422 unsigned char *start;
 423 unsigned char *stop;
 424 sopno startst;
 425 sopno stopst;
 426 sopno lev;                      /* PLUS nesting level */
 427 {
 428         register int i;
 429         register sopno ss;      /* start sop of current subRE */
 430         register unsigned char *sp;     /* start of string matched by it */
 431         register sopno ssub;    /* start sop of subsubRE */
 432         register sopno esub;    /* end sop of subsubRE */
 433         register unsigned char *ssp;    /* start of string matched by subsubRE */
 434         register unsigned char *dp;
 435         register size_t len;
 436         register int hard;
 437         register sop s;
 438         register regoff_t offsave;
 439         register cset *cs;
 440 
 441         AT("back", start, stop, startst, stopst);
 442         sp = start;
 443 
 444         /* get as far as we can with easy stuff */
 445         hard = 0;
 446         for (ss = startst; !hard && ss < stopst; ss++)
 447                 switch (OP(s = m->g->strip[ss])) {
 448                 case OCHAR:
 449                         if (sp == stop || *sp++ != (unsigned char)OPND(s))
 450                                 return(NULL);
 451                         break;
 452                 case OANY:
 453                         if (sp == stop)
 454                                 return(NULL);
 455                         sp++;
 456                         break;
 457                 case OANYOF:
 458                         cs = &m->g->sets[OPND(s)];
 459                         if (sp == stop || !CHIN(cs, *sp++))
 460                                 return(NULL);
 461                         break;
 462                 case OBOL:
 463                         if ( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
 464                                         (sp < m->endp && *(sp-1) == '\n' &&
 465                                                 (m->g->cflags&REG_NEWLINE)) )
 466                                 { /* yes */ }
 467                         else
 468                                 return(NULL);
 469                         break;
 470                 case OEOL:
 471                         if ( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
 472                                         (sp < m->endp && *sp == '\n' &&
 473                                                 (m->g->cflags&REG_NEWLINE)) )
 474                                 { /* yes */ }
 475                         else
 476                                 return(NULL);
 477                         break;
 478                 case OBOW:
 479                         if (( (sp == m->beginp && !(m->eflags&REG_NOTBOL)) ||
 480                                         (sp < m->endp && *(sp-1) == '\n' &&
 481                                                 (m->g->cflags&REG_NEWLINE)) ||
 482                                         (sp > m->beginp &&
 483                                                         !ISWORD(*(sp-1))) ) &&
 484                                         (sp < m->endp && ISWORD(*sp)) )
 485                                 { /* yes */ }
 486                         else
 487                                 return(NULL);
 488                         break;
 489                 case OEOW:
 490                         if (( (sp == m->endp && !(m->eflags&REG_NOTEOL)) ||
 491                                         (sp < m->endp && *sp == '\n' &&
 492                                                 (m->g->cflags&REG_NEWLINE)) ||
 493                                         (sp < m->endp && !ISWORD(*sp)) ) &&
 494                                         (sp > m->beginp && ISWORD(*(sp-1))) )
 495                                 { /* yes */ }
 496                         else
 497                                 return(NULL);
 498                         break;
 499                 case O_QUEST:
 500                         break;
 501                 case OOR1:      /* matches null but needs to skip */
 502                         ss++;
 503                         s = m->g->strip[ss];
 504                         do {
 505                                 assert(OP(s) == OOR2);
 506                                 ss += OPND(s);
 507                         } while (OP(s = m->g->strip[ss]) != O_CH);
 508                         /* note that the ss++ gets us past the O_CH */
 509                         break;
 510                 default:        /* have to make a choice */
 511                         hard = 1;
 512                         break;
 513                 }
 514         if (!hard) {            /* that was it! */
 515                 if (sp != stop)
 516                         return(NULL);
 517                 return(sp);
 518         }
 519         ss--;                   /* adjust for the for's final increment */
 520 
 521         /* the hard stuff */
 522         AT("hard", sp, stop, ss, stopst);
 523         s = m->g->strip[ss];
 524         switch (OP(s)) {
 525         case OBACK_:            /* the vilest depths */
 526                 i = OPND(s);
 527                 assert(0 < i && i <= m->g->nsub);
 528                 if (m->pmatch[i].rm_eo == -1)
 529                         return(NULL);
 530                 assert(m->pmatch[i].rm_so != -1);
 531                 len = m->pmatch[i].rm_eo - m->pmatch[i].rm_so;
 532                 assert(stop - m->beginp >= len);
 533                 if (sp > stop - len)
 534                         return(NULL);   /* not enough left to match */
 535                 ssp = m->offp + m->pmatch[i].rm_so;
 536                 if (memcmp(sp, ssp, len) != 0)
 537                         return(NULL);
 538                 while (m->g->strip[ss] != SOP(O_BACK, i))
 539                         ss++;
 540                 return(backref(m, sp+len, stop, ss+1, stopst, lev));
 541                 break;
 542         case OQUEST_:           /* to null or not */
 543                 dp = backref(m, sp, stop, ss+1, stopst, lev);
 544                 if (dp != NULL)
 545                         return(dp);     /* not */
 546                 return(backref(m, sp, stop, ss+OPND(s)+1, stopst, lev));
 547                 break;
 548         case OPLUS_:
 549                 assert(m->lastpos != NULL);
 550                 assert(lev+1 <= m->g->nplus);
 551                 m->lastpos[lev+1] = sp;
 552                 return(backref(m, sp, stop, ss+1, stopst, lev+1));
 553                 break;
 554         case O_PLUS:
 555                 if (sp == m->lastpos[lev])      /* last pass matched null */
 556                         return(backref(m, sp, stop, ss+1, stopst, lev-1));
 557                 /* try another pass */
 558                 m->lastpos[lev] = sp;
 559                 dp = backref(m, sp, stop, ss-OPND(s)+1, stopst, lev);
 560                 if (dp == NULL)
 561                         return(backref(m, sp, stop, ss+1, stopst, lev-1));
 562                 else
 563                         return(dp);
 564                 break;
 565         case OCH_:              /* find the right one, if any */
 566                 ssub = ss + 1;
 567                 esub = ss + OPND(s) - 1;
 568                 assert(OP(m->g->strip[esub]) == OOR1);
 569                 for (;;) {      /* find first matching branch */
 570                         dp = backref(m, sp, stop, ssub, esub, lev);
 571                         if (dp != NULL)
 572                                 return(dp);
 573                         /* that one missed, try next one */
 574                         if (OP(m->g->strip[esub]) == O_CH)
 575                                 return(NULL);   /* there is none */
 576                         esub++;
 577                         assert(OP(m->g->strip[esub]) == OOR2);
 578                         ssub = esub + 1;
 579                         esub += OPND(m->g->strip[esub]);
 580                         if (OP(m->g->strip[esub]) == OOR2)
 581                                 esub--;
 582                         else
 583                                 assert(OP(m->g->strip[esub]) == O_CH);
 584                 }
 585                 break;
 586         case OLPAREN:           /* must undo assignment if rest fails */
 587                 i = OPND(s);
 588                 assert(0 < i && i <= m->g->nsub);
 589                 offsave = m->pmatch[i].rm_so;
 590                 m->pmatch[i].rm_so = sp - m->offp;
 591                 dp = backref(m, sp, stop, ss+1, stopst, lev);
 592                 if (dp != NULL)
 593                         return(dp);
 594                 m->pmatch[i].rm_so = offsave;
 595                 return(NULL);
 596                 break;
 597         case ORPAREN:           /* must undo assignment if rest fails */
 598                 i = OPND(s);
 599                 assert(0 < i && i <= m->g->nsub);
 600                 offsave = m->pmatch[i].rm_eo;
 601                 m->pmatch[i].rm_eo = sp - m->offp;
 602                 dp = backref(m, sp, stop, ss+1, stopst, lev);
 603                 if (dp != NULL)
 604                         return(dp);
 605                 m->pmatch[i].rm_eo = offsave;
 606                 return(NULL);
 607                 break;
 608         default:                /* uh oh */
 609                 assert(PHP_REGEX_NOPE);
 610                 break;
 611         }
 612 
 613         /* "can't happen" */
 614         assert(PHP_REGEX_NOPE);
 615         /* NOTREACHED */
 616         return((unsigned char *)NULL);  /* dummy */
 617 }
 618 
 619 /*
 620  - fast - step through the string at top speed
 621  == static unsigned char *fast(register struct match *m, unsigned char *start, \
 622  ==     unsigned char *stop, sopno startst, sopno stopst);
 623  */
 624 static unsigned char *                  /* where tentative match ended, or NULL */
 625 fast(m, start, stop, startst, stopst)
 626 register struct match *m;
 627 unsigned char *start;
 628 unsigned char *stop;
 629 sopno startst;
 630 sopno stopst;
 631 {
 632         register states st = m->st;
 633         register states fresh = m->fresh;
 634         register states tmp = m->tmp;
 635         register unsigned char *p = start;
 636         register int c = (start == m->beginp) ? OUT : *(start-1);
 637         register int lastc;     /* previous c */
 638         register int flagch;
 639         register int i;
 640         register unsigned char *coldp;  /* last p after which no match was underway */
 641 
 642         CLEAR(st);
 643         SET1(st, startst);
 644         st = step(m->g, startst, stopst, st, NOTHING, st);
 645         ASSIGN(fresh, st);
 646         SP("start", st, *p);
 647         coldp = NULL;
 648         for (;;) {
 649                 /* next character */
 650                 lastc = c;
 651                 c = (p == m->endp) ? OUT : *p;
 652                 if (EQ(st, fresh))
 653                         coldp = p;
 654 
 655                 /* is there an EOL and/or BOL between lastc and c? */
 656                 flagch = '\0';
 657                 i = 0;
 658                 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
 659                                 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
 660                         flagch = BOL;
 661                         i = m->g->nbol;
 662                 }
 663                 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
 664                                 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
 665                         flagch = (flagch == BOL) ? BOLEOL : EOL;
 666                         i += m->g->neol;
 667                 }
 668                 if (i != 0) {
 669                         for (; i > 0; i--)
 670                                 st = step(m->g, startst, stopst, st, flagch, st);
 671                         SP("boleol", st, c);
 672                 }
 673 
 674                 /* how about a word boundary? */
 675                 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
 676                                         (c != OUT && ISWORD(c)) ) {
 677                         flagch = BOW;
 678                 }
 679                 if ( (lastc != OUT && ISWORD(lastc)) &&
 680                                 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
 681                         flagch = EOW;
 682                 }
 683                 if (flagch == BOW || flagch == EOW) {
 684                         st = step(m->g, startst, stopst, st, flagch, st);
 685                         SP("boweow", st, c);
 686                 }
 687 
 688                 /* are we done? */
 689                 if (ISSET(st, stopst) || p == stop)
 690                         break;          /* NOTE BREAK OUT */
 691 
 692                 /* no, we must deal with this character */
 693                 ASSIGN(tmp, st);
 694                 ASSIGN(st, fresh);
 695                 assert(c != OUT);
 696                 st = step(m->g, startst, stopst, tmp, c, st);
 697                 SP("aft", st, c);
 698                 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
 699                 p++;
 700         }
 701 
 702         assert(coldp != NULL);
 703         m->coldp = coldp;
 704         if (ISSET(st, stopst))
 705                 return(p+1);
 706         else
 707                 return(NULL);
 708 }
 709 
 710 /*
 711  - slow - step through the string more deliberately
 712  == static unsigned char *slow(register struct match *m, unsigned char *start, \
 713  ==     unsigned char *stop, sopno startst, sopno stopst);
 714  */
 715 static unsigned char *                  /* where it ended */
 716 slow(m, start, stop, startst, stopst)
 717 register struct match *m;
 718 unsigned char *start;
 719 unsigned char *stop;
 720 sopno startst;
 721 sopno stopst;
 722 {
 723         register states st = m->st;
 724         register states empty = m->empty;
 725         register states tmp = m->tmp;
 726         register unsigned char *p = start;
 727         register int c = (start == m->beginp) ? OUT : *(start-1);
 728         register int lastc;     /* previous c */
 729         register int flagch;
 730         register int i;
 731         register unsigned char *matchp; /* last p at which a match ended */
 732 
 733         AT("slow", start, stop, startst, stopst);
 734         CLEAR(st);
 735         SET1(st, startst);
 736         SP("sstart", st, *p);
 737         st = step(m->g, startst, stopst, st, NOTHING, st);
 738         matchp = NULL;
 739         for (;;) {
 740                 /* next character */
 741                 lastc = c;
 742                 c = (p == m->endp) ? OUT : *p;
 743 
 744                 /* is there an EOL and/or BOL between lastc and c? */
 745                 flagch = '\0';
 746                 i = 0;
 747                 if ( (lastc == '\n' && m->g->cflags&REG_NEWLINE) ||
 748                                 (lastc == OUT && !(m->eflags&REG_NOTBOL)) ) {
 749                         flagch = BOL;
 750                         i = m->g->nbol;
 751                 }
 752                 if ( (c == '\n' && m->g->cflags&REG_NEWLINE) ||
 753                                 (c == OUT && !(m->eflags&REG_NOTEOL)) ) {
 754                         flagch = (flagch == BOL) ? BOLEOL : EOL;
 755                         i += m->g->neol;
 756                 }
 757                 if (i != 0) {
 758                         for (; i > 0; i--)
 759                                 st = step(m->g, startst, stopst, st, flagch, st);
 760                         SP("sboleol", st, c);
 761                 }
 762 
 763                 /* how about a word boundary? */
 764                 if ( (flagch == BOL || (lastc != OUT && !ISWORD(lastc))) &&
 765                                         (c != OUT && ISWORD(c)) ) {
 766                         flagch = BOW;
 767                 }
 768                 if ( (lastc != OUT && ISWORD(lastc)) &&
 769                                 (flagch == EOL || (c != OUT && !ISWORD(c))) ) {
 770                         flagch = EOW;
 771                 }
 772                 if (flagch == BOW || flagch == EOW) {
 773                         st = step(m->g, startst, stopst, st, flagch, st);
 774                         SP("sboweow", st, c);
 775                 }
 776 
 777                 /* are we done? */
 778                 if (ISSET(st, stopst))
 779                         matchp = p;
 780                 if (EQ(st, empty) || p == stop)
 781                         break;          /* NOTE BREAK OUT */
 782 
 783                 /* no, we must deal with this character */
 784                 ASSIGN(tmp, st);
 785                 ASSIGN(st, empty);
 786                 assert(c != OUT);
 787                 st = step(m->g, startst, stopst, tmp, c, st);
 788                 SP("saft", st, c);
 789                 assert(EQ(step(m->g, startst, stopst, st, NOTHING, st), st));
 790                 p++;
 791         }
 792 
 793         return(matchp);
 794 }
 795 
 796 
 797 /*
 798  - step - map set of states reachable before char to set reachable after
 799  == static states step(register struct re_guts *g, sopno start, sopno stop, \
 800  ==     register states bef, int ch, register states aft);
 801  == #define     BOL     (OUT+1)
 802  == #define     EOL     (BOL+1)
 803  == #define     BOLEOL  (BOL+2)
 804  == #define     NOTHING (BOL+3)
 805  == #define     BOW     (BOL+4)
 806  == #define     EOW     (BOL+5)
 807  == #define     CODEMAX (BOL+5)         // highest code used
 808  == #define     NONCHAR(c)      ((c) > UCHAR_MAX)
 809  == #define     NNONCHAR        (CODEMAX-UCHAR_MAX)
 810  */
 811 static states
 812 step(g, start, stop, bef, ch, aft)
 813 register struct re_guts *g;
 814 sopno start;                    /* start state within strip */
 815 sopno stop;                     /* state after stop state within strip */
 816 register states bef;            /* states reachable before */
 817 int ch;                         /* character or NONCHAR code */
 818 register states aft;            /* states already known reachable after */
 819 {
 820         register cset *cs;
 821         register sop s;
 822         register sopno pc;
 823         register onestate here;         /* note, macros know this name */
 824         register sopno look;
 825         register long i;
 826 
 827         for (pc = start, INIT(here, pc); pc != stop; pc++, INC(here)) {
 828                 s = g->strip[pc];
 829                 switch (OP(s)) {
 830                 case OEND:
 831                         assert(pc == stop-1);
 832                         break;
 833                 case OCHAR:
 834                         /* only characters can match */
 835                         assert(!NONCHAR(ch) || ch != (unsigned char)OPND(s));
 836                         if (ch == (unsigned char)OPND(s))
 837                                 FWD(aft, bef, 1);
 838                         break;
 839                 case OBOL:
 840                         if (ch == BOL || ch == BOLEOL)
 841                                 FWD(aft, bef, 1);
 842                         break;
 843                 case OEOL:
 844                         if (ch == EOL || ch == BOLEOL)
 845                                 FWD(aft, bef, 1);
 846                         break;
 847                 case OBOW:
 848                         if (ch == BOW)
 849                                 FWD(aft, bef, 1);
 850                         break;
 851                 case OEOW:
 852                         if (ch == EOW)
 853                                 FWD(aft, bef, 1);
 854                         break;
 855                 case OANY:
 856                         if (!NONCHAR(ch))
 857                                 FWD(aft, bef, 1);
 858                         break;
 859                 case OANYOF:
 860                         cs = &g->sets[OPND(s)];
 861                         if (!NONCHAR(ch) && CHIN(cs, ch))
 862                                 FWD(aft, bef, 1);
 863                         break;
 864                 case OBACK_:            /* ignored here */
 865                 case O_BACK:
 866                         FWD(aft, aft, 1);
 867                         break;
 868                 case OPLUS_:            /* forward, this is just an empty */
 869                         FWD(aft, aft, 1);
 870                         break;
 871                 case O_PLUS:            /* both forward and back */
 872                         FWD(aft, aft, 1);
 873                         i = ISSETBACK(aft, OPND(s));
 874                         BACK(aft, aft, OPND(s));
 875                         if (!i && ISSETBACK(aft, OPND(s))) {
 876                                 /* oho, must reconsider loop body */
 877                                 pc -= OPND(s) + 1;
 878                                 INIT(here, pc);
 879                         }
 880                         break;
 881                 case OQUEST_:           /* two branches, both forward */
 882                         FWD(aft, aft, 1);
 883                         FWD(aft, aft, OPND(s));
 884                         break;
 885                 case O_QUEST:           /* just an empty */
 886                         FWD(aft, aft, 1);
 887                         break;
 888                 case OLPAREN:           /* not significant here */
 889                 case ORPAREN:
 890                         FWD(aft, aft, 1);
 891                         break;
 892                 case OCH_:              /* mark the first two branches */
 893                         FWD(aft, aft, 1);
 894                         assert(OP(g->strip[pc+OPND(s)]) == OOR2);
 895                         FWD(aft, aft, OPND(s));
 896                         break;
 897                 case OOR1:              /* done a branch, find the O_CH */
 898                         if (ISSTATEIN(aft, here)) {
 899                                 for (look = 1;
 900                                                 OP(s = g->strip[pc+look]) != O_CH;
 901                                                 look += OPND(s))
 902                                         assert(OP(s) == OOR2);
 903                                 FWD(aft, aft, look);
 904                         }
 905                         break;
 906                 case OOR2:              /* propagate OCH_'s marking */
 907                         FWD(aft, aft, 1);
 908                         if (OP(g->strip[pc+OPND(s)]) != O_CH) {
 909                                 assert(OP(g->strip[pc+OPND(s)]) == OOR2);
 910                                 FWD(aft, aft, OPND(s));
 911                         }
 912                         break;
 913                 case O_CH:              /* just empty */
 914                         FWD(aft, aft, 1);
 915                         break;
 916                 default:                /* ooooops... */
 917                         assert(PHP_REGEX_NOPE);
 918                         break;
 919                 }
 920         }
 921 
 922         return(aft);
 923 }
 924 
 925 #ifdef REDEBUG
 926 /*
 927  - print - print a set of states
 928  == #ifdef REDEBUG
 929  == static void print(struct match *m, unsigned char *caption, states st, \
 930  ==     int ch, FILE *d);
 931  == #endif
 932  */
 933 static void
 934 print(m, caption, st, ch, d)
 935 struct match *m;
 936 unsigned char *caption;
 937 states st;
 938 int ch;
 939 FILE *d;
 940 {
 941         register struct re_guts *g = m->g;
 942         register int i;
 943         register int first = 1;
 944 
 945         if (!(m->eflags&REG_TRACE))
 946                 return;
 947 
 948         fprintf(d, "%s", caption);
 949         if (ch != '\0')
 950                 fprintf(d, " %s", pchar(ch));
 951         for (i = 0; i < g->nstates; i++)
 952                 if (ISSET(st, i)) {
 953                         fprintf(d, "%s%d", (first) ? "\t" : ", ", i);
 954                         first = 0;
 955                 }
 956         fprintf(d, "\n");
 957 }
 958 
 959 /* 
 960  - at - print current situation
 961  == #ifdef REDEBUG
 962  == static void at(struct match *m, unsigned char *title, unsigned char *start, unsigned char *stop, \
 963  ==                                             sopno startst, sopno stopst);
 964  == #endif
 965  */
 966 static void
 967 at(m, title, start, stop, startst, stopst)
 968 struct match *m;
 969 unsigned char *title;
 970 unsigned char *start;
 971 unsigned char *stop;
 972 sopno startst;
 973 sopno stopst;
 974 {
 975         if (!(m->eflags&REG_TRACE))
 976                 return;
 977 
 978         printf("%s %s-", title, pchar(*start));
 979         printf("%s ", pchar(*stop));
 980         printf("%ld-%ld\n", (long)startst, (long)stopst);
 981 }
 982 
 983 #ifndef PCHARDONE
 984 #define PCHARDONE       /* never again */
 985 /*
 986  - pchar - make a character printable
 987  == #ifdef REDEBUG
 988  == static unsigned char *pchar(int ch);
 989  == #endif
 990  *
 991  * Is this identical to regchar() over in debug.c?  Well, yes.  But a
 992  * duplicate here avoids having a debugging-capable regexec.o tied to
 993  * a matching debug.o, and this is convenient.  It all disappears in
 994  * the non-debug compilation anyway, so it doesn't matter much.
 995  */
 996 static unsigned char *                  /* -> representation */
 997 pchar(ch)
 998 int ch;
 999 {
1000         static unsigned char pbuf[10];
1001 
1002         if (isprint(ch) || ch == ' ')
1003                 sprintf(pbuf, "%c", ch);
1004         else
1005                 sprintf(pbuf, "\\%o", ch);
1006         return(pbuf);
1007 }
1008 #endif
1009 #endif
1010 
1011 #undef  matcher
1012 #undef  fast
1013 #undef  slow
1014 #undef  dissect
1015 #undef  backref
1016 #undef  step
1017 #undef  print
1018 #undef  at
1019 #undef  match

/* [<][>][^][v][top][bottom][index][help] */