root/ext/ereg/regex/main.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. main
  2. regress
  3. try
  4. options
  5. opt
  6. fixstr
  7. check
  8. eprint
  9. efind

   1 #include <stdio.h>
   2 #include <string.h>
   3 #include <sys/types.h>
   4 #include <regex.h>
   5 #include <assert.h>
   6 #include <stdlib.h>
   7 
   8 #include "main.ih"
   9 
  10 char *progname;
  11 int debug = 0;
  12 int line = 0;
  13 int status = 0;
  14 
  15 int copts = REG_EXTENDED;
  16 int eopts = 0;
  17 regoff_t startoff = 0;
  18 regoff_t endoff = 0;
  19 
  20 
  21 extern int split();
  22 extern void regprint();
  23 
  24 /*
  25  - main - do the simple case, hand off to regress() for regression
  26  */
  27 int main(argc, argv)
  28 int argc;
  29 char *argv[];
  30 {
  31         regex_t re;
  32 #       define  NS      10
  33         regmatch_t subs[NS];
  34         char erbuf[100];
  35         int err;
  36         size_t len;
  37         int c;
  38         int errflg = 0;
  39         register int i;
  40         extern int optind;
  41         extern char *optarg;
  42 
  43         progname = argv[0];
  44 
  45         while ((c = getopt(argc, argv, "c:e:S:E:x")) != EOF)
  46                 switch (c) {
  47                 case 'c':       /* compile options */
  48                         copts = options('c', optarg);
  49                         break;
  50                 case 'e':       /* execute options */
  51                         eopts = options('e', optarg);
  52                         break;
  53                 case 'S':       /* start offset */
  54                         startoff = (regoff_t)atoi(optarg);
  55                         break;
  56                 case 'E':       /* end offset */
  57                         endoff = (regoff_t)atoi(optarg);
  58                         break;
  59                 case 'x':       /* Debugging. */
  60                         debug++;
  61                         break;
  62                 case '?':
  63                 default:
  64                         errflg++;
  65                         break;
  66                 }
  67         if (errflg) {
  68                 fprintf(stderr, "usage: %s ", progname);
  69                 fprintf(stderr, "[-c copt][-C][-d] [re]\n");
  70                 exit(2);
  71         }
  72 
  73         if (optind >= argc) {
  74                 regress(stdin);
  75                 exit(status);
  76         }
  77 
  78         err = regcomp(&re, argv[optind++], copts);
  79         if (err) {
  80                 len = regerror(err, &re, erbuf, sizeof(erbuf));
  81                 fprintf(stderr, "error %s, %d/%d `%s'\n",
  82                         eprint(err), len, sizeof(erbuf), erbuf);
  83                 exit(status);
  84         }
  85         regprint(&re, stdout);  
  86 
  87         if (optind >= argc) {
  88                 regfree(&re);
  89                 exit(status);
  90         }
  91 
  92         if (eopts&REG_STARTEND) {
  93                 subs[0].rm_so = startoff;
  94                 subs[0].rm_eo = strlen(argv[optind]) - endoff;
  95         }
  96         err = regexec(&re, argv[optind], (size_t)NS, subs, eopts);
  97         if (err) {
  98                 len = regerror(err, &re, erbuf, sizeof(erbuf));
  99                 fprintf(stderr, "error %s, %d/%d `%s'\n",
 100                         eprint(err), len, sizeof(erbuf), erbuf);
 101                 exit(status);
 102         }
 103         if (!(copts&REG_NOSUB)) {
 104                 len = (int)(subs[0].rm_eo - subs[0].rm_so);
 105                 if (subs[0].rm_so != -1) {
 106                         if (len != 0)
 107                                 printf("match `%.*s'\n", (int)len,
 108                                         argv[optind] + subs[0].rm_so);
 109                         else
 110                                 printf("match `'@%.1s\n",
 111                                         argv[optind] + subs[0].rm_so);
 112                 }
 113                 for (i = 1; i < NS; i++)
 114                         if (subs[i].rm_so != -1)
 115                                 printf("(%d) `%.*s'\n", i,
 116                                         (int)(subs[i].rm_eo - subs[i].rm_so),
 117                                         argv[optind] + subs[i].rm_so);
 118         }
 119         exit(status);
 120 }
 121 
 122 /*
 123  - regress - main loop of regression test
 124  == void regress(FILE *in);
 125  */
 126 void
 127 regress(in)
 128 FILE *in;
 129 {
 130         char inbuf[1000];
 131 #       define  MAXF    10
 132         char *f[MAXF];
 133         int nf;
 134         int i;
 135         char erbuf[100];
 136         size_t ne;
 137         char *badpat = "invalid regular expression";
 138 #       define  SHORT   10
 139         char *bpname = "REG_BADPAT";
 140         regex_t re;
 141 
 142         while (fgets(inbuf, sizeof(inbuf), in) != NULL) {
 143                 line++;
 144                 if (inbuf[0] == '#' || inbuf[0] == '\n')
 145                         continue;                       /* NOTE CONTINUE */
 146                 inbuf[strlen(inbuf)-1] = '\0';  /* get rid of stupid \n */
 147                 if (debug)
 148                         fprintf(stdout, "%d:\n", line);
 149                 nf = split(inbuf, f, MAXF, "\t\t");
 150                 if (nf < 3) {
 151                         fprintf(stderr, "bad input, line %d\n", line);
 152                         exit(1);
 153                 }
 154                 for (i = 0; i < nf; i++)
 155                         if (strcmp(f[i], "\"\"") == 0)
 156                                 f[i] = "";
 157                 if (nf <= 3)
 158                         f[3] = NULL;
 159                 if (nf <= 4)
 160                         f[4] = NULL;
 161                 try(f[0], f[1], f[2], f[3], f[4], options('c', f[1]));
 162                 if (opt('&', f[1]))     /* try with either type of RE */
 163                         try(f[0], f[1], f[2], f[3], f[4],
 164                                         options('c', f[1]) &~ REG_EXTENDED);
 165         }
 166 
 167         ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
 168         if (strcmp(erbuf, badpat) != 0 || ne != strlen(badpat)+1) {
 169                 fprintf(stderr, "end: regerror() test gave `%s' not `%s'\n",
 170                                                         erbuf, badpat);
 171                 status = 1;
 172         }
 173         ne = regerror(REG_BADPAT, (regex_t *)NULL, erbuf, (size_t)SHORT);
 174         if (strncmp(erbuf, badpat, SHORT-1) != 0 || erbuf[SHORT-1] != '\0' ||
 175                                                 ne != strlen(badpat)+1) {
 176                 fprintf(stderr, "end: regerror() short test gave `%s' not `%.*s'\n",
 177                                                 erbuf, SHORT-1, badpat);
 178                 status = 1;
 179         }
 180         ne = regerror(REG_ITOA|REG_BADPAT, (regex_t *)NULL, erbuf, sizeof(erbuf));
 181         if (strcmp(erbuf, bpname) != 0 || ne != strlen(bpname)+1) {
 182                 fprintf(stderr, "end: regerror() ITOA test gave `%s' not `%s'\n",
 183                                                 erbuf, bpname);
 184                 status = 1;
 185         }
 186         re.re_endp = bpname;
 187         ne = regerror(REG_ATOI, &re, erbuf, sizeof(erbuf));
 188         if (atoi(erbuf) != (int)REG_BADPAT) {
 189                 fprintf(stderr, "end: regerror() ATOI test gave `%s' not `%ld'\n",
 190                                                 erbuf, (long)REG_BADPAT);
 191                 status = 1;
 192         } else if (ne != strlen(erbuf)+1) {
 193                 fprintf(stderr, "end: regerror() ATOI test len(`%s') = %ld\n",
 194                                                 erbuf, (long)REG_BADPAT);
 195                 status = 1;
 196         }
 197 }
 198 
 199 /*
 200  - try - try it, and report on problems
 201  == void try(char *f0, char *f1, char *f2, char *f3, char *f4, int opts);
 202  */
 203 void
 204 try(f0, f1, f2, f3, f4, opts)
 205 char *f0;
 206 char *f1;
 207 char *f2;
 208 char *f3;
 209 char *f4;
 210 int opts;                       /* may not match f1 */
 211 {
 212         regex_t re;
 213 #       define  NSUBS   10
 214         regmatch_t subs[NSUBS];
 215 #       define  NSHOULD 15
 216         char *should[NSHOULD];
 217         int nshould;
 218         char erbuf[100];
 219         int err;
 220         int len;
 221         char *type = (opts & REG_EXTENDED) ? "ERE" : "BRE";
 222         register int i;
 223         char *grump;
 224         char f0copy[1000];
 225         char f2copy[1000];
 226 
 227         strcpy(f0copy, f0);
 228         re.re_endp = (opts&REG_PEND) ? f0copy + strlen(f0copy) : NULL;
 229         fixstr(f0copy);
 230         err = regcomp(&re, f0copy, opts);
 231         if (err != 0 && (!opt('C', f1) || err != efind(f2))) {
 232                 /* unexpected error or wrong error */
 233                 len = regerror(err, &re, erbuf, sizeof(erbuf));
 234                 fprintf(stderr, "%d: %s error %s, %d/%d `%s'\n",
 235                                         line, type, eprint(err), len,
 236                                         sizeof(erbuf), erbuf);
 237                 status = 1;
 238         } else if (err == 0 && opt('C', f1)) {
 239                 /* unexpected success */
 240                 fprintf(stderr, "%d: %s should have given REG_%s\n",
 241                                                 line, type, f2);
 242                 status = 1;
 243                 err = 1;        /* so we won't try regexec */
 244         }
 245 
 246         if (err != 0) {
 247                 regfree(&re);
 248                 return;
 249         }
 250 
 251         strcpy(f2copy, f2);
 252         fixstr(f2copy);
 253 
 254         if (options('e', f1)&REG_STARTEND) {
 255                 if (strchr(f2, '(') == NULL || strchr(f2, ')') == NULL)
 256                         fprintf(stderr, "%d: bad STARTEND syntax\n", line);
 257                 subs[0].rm_so = strchr(f2, '(') - f2 + 1;
 258                 subs[0].rm_eo = strchr(f2, ')') - f2;
 259         }
 260         err = regexec(&re, f2copy, NSUBS, subs, options('e', f1));
 261 
 262         if (err != 0 && (f3 != NULL || err != REG_NOMATCH)) {
 263                 /* unexpected error or wrong error */
 264                 len = regerror(err, &re, erbuf, sizeof(erbuf));
 265                 fprintf(stderr, "%d: %s exec error %s, %d/%d `%s'\n",
 266                                         line, type, eprint(err), len,
 267                                         sizeof(erbuf), erbuf);
 268                 status = 1;
 269         } else if (err != 0) {
 270                 /* nothing more to check */
 271         } else if (f3 == NULL) {
 272                 /* unexpected success */
 273                 fprintf(stderr, "%d: %s exec should have failed\n",
 274                                                 line, type);
 275                 status = 1;
 276                 err = 1;                /* just on principle */
 277         } else if (opts&REG_NOSUB) {
 278                 /* nothing more to check */
 279         } else if ((grump = check(f2, subs[0], f3)) != NULL) {
 280                 fprintf(stderr, "%d: %s %s\n", line, type, grump);
 281                 status = 1;
 282                 err = 1;
 283         }
 284 
 285         if (err != 0 || f4 == NULL) {
 286                 regfree(&re);
 287                 return;
 288         }
 289 
 290         for (i = 1; i < NSHOULD; i++)
 291                 should[i] = NULL;
 292         nshould = split(f4, should+1, NSHOULD-1, ",");
 293         if (nshould == 0) {
 294                 nshould = 1;
 295                 should[1] = "";
 296         }
 297         for (i = 1; i < NSUBS; i++) {
 298                 grump = check(f2, subs[i], should[i]);
 299                 if (grump != NULL) {
 300                         fprintf(stderr, "%d: %s $%d %s\n", line,
 301                                                         type, i, grump);
 302                         status = 1;
 303                         err = 1;
 304                 }
 305         }
 306 
 307         regfree(&re);
 308 }
 309 
 310 /*
 311  - options - pick options out of a regression-test string
 312  == int options(int type, char *s);
 313  */
 314 int
 315 options(type, s)
 316 int type;                       /* 'c' compile, 'e' exec */
 317 char *s;
 318 {
 319         register char *p;
 320         register int o = (type == 'c') ? copts : eopts;
 321         register char *legal = (type == 'c') ? "bisnmp" : "^$#tl";
 322 
 323         for (p = s; *p != '\0'; p++)
 324                 if (strchr(legal, *p) != NULL)
 325                         switch (*p) {
 326                         case 'b':
 327                                 o &= ~REG_EXTENDED;
 328                                 break;
 329                         case 'i':
 330                                 o |= REG_ICASE;
 331                                 break;
 332                         case 's':
 333                                 o |= REG_NOSUB;
 334                                 break;
 335                         case 'n':
 336                                 o |= REG_NEWLINE;
 337                                 break;
 338                         case 'm':
 339                                 o &= ~REG_EXTENDED;
 340                                 o |= REG_NOSPEC;
 341                                 break;
 342                         case 'p':
 343                                 o |= REG_PEND;
 344                                 break;
 345                         case '^':
 346                                 o |= REG_NOTBOL;
 347                                 break;
 348                         case '$':
 349                                 o |= REG_NOTEOL;
 350                                 break;
 351                         case '#':
 352                                 o |= REG_STARTEND;
 353                                 break;
 354                         case 't':       /* trace */
 355                                 o |= REG_TRACE;
 356                                 break;
 357                         case 'l':       /* force long representation */
 358                                 o |= REG_LARGE;
 359                                 break;
 360                         case 'r':       /* force backref use */
 361                                 o |= REG_BACKR;
 362                                 break;
 363                         }
 364         return(o);
 365 }
 366 
 367 /*
 368  - opt - is a particular option in a regression string?
 369  == int opt(int c, char *s);
 370  */
 371 int                             /* predicate */
 372 opt(c, s)
 373 int c;
 374 char *s;
 375 {
 376         return(strchr(s, c) != NULL);
 377 }
 378 
 379 /*
 380  - fixstr - transform magic characters in strings
 381  == void fixstr(register char *p);
 382  */
 383 void
 384 fixstr(p)
 385 register char *p;
 386 {
 387         if (p == NULL)
 388                 return;
 389 
 390         for (; *p != '\0'; p++)
 391                 if (*p == 'N')
 392                         *p = '\n';
 393                 else if (*p == 'T')
 394                         *p = '\t';
 395                 else if (*p == 'S')
 396                         *p = ' ';
 397                 else if (*p == 'Z')
 398                         *p = '\0';
 399 }
 400 
 401 /*
 402  - check - check a substring match
 403  == char *check(char *str, regmatch_t sub, char *should);
 404  */
 405 char *                          /* NULL or complaint */
 406 check(str, sub, should)
 407 char *str;
 408 regmatch_t sub;
 409 char *should;
 410 {
 411         register int len;
 412         register int shlen;
 413         register char *p;
 414         static char grump[500];
 415         register char *at = NULL;
 416 
 417         if (should != NULL && strcmp(should, "-") == 0)
 418                 should = NULL;
 419         if (should != NULL && should[0] == '@') {
 420                 at = should + 1;
 421                 should = "";
 422         }
 423 
 424         /* check rm_so and rm_eo for consistency */
 425         if (sub.rm_so > sub.rm_eo || (sub.rm_so == -1 && sub.rm_eo != -1) ||
 426                                 (sub.rm_so != -1 && sub.rm_eo == -1) ||
 427                                 (sub.rm_so != -1 && sub.rm_so < 0) ||
 428                                 (sub.rm_eo != -1 && sub.rm_eo < 0) ) {
 429                 sprintf(grump, "start %ld end %ld", (long)sub.rm_so,
 430                                                         (long)sub.rm_eo);
 431                 return(grump);
 432         }
 433 
 434         /* check for no match */
 435         if (sub.rm_so == -1 && should == NULL)
 436                 return(NULL);
 437         if (sub.rm_so == -1)
 438                 return("did not match");
 439 
 440         /* check for in range */
 441         if (sub.rm_eo > strlen(str)) {
 442                 sprintf(grump, "start %ld end %ld, past end of string",
 443                                         (long)sub.rm_so, (long)sub.rm_eo);
 444                 return(grump);
 445         }
 446 
 447         len = (int)(sub.rm_eo - sub.rm_so);
 448         shlen = (int)strlen(should);
 449         p = str + sub.rm_so;
 450 
 451         /* check for not supposed to match */
 452         if (should == NULL) {
 453                 sprintf(grump, "matched `%.*s'", len, p);
 454                 return(grump);
 455         }
 456 
 457         /* check for wrong match */
 458         if (len != shlen || strncmp(p, should, (size_t)shlen) != 0) {
 459                 sprintf(grump, "matched `%.*s' instead", len, p);
 460                 return(grump);
 461         }
 462         if (shlen > 0)
 463                 return(NULL);
 464 
 465         /* check null match in right place */
 466         if (at == NULL)
 467                 return(NULL);
 468         shlen = strlen(at);
 469         if (shlen == 0)
 470                 shlen = 1;      /* force check for end-of-string */
 471         if (strncmp(p, at, shlen) != 0) {
 472                 sprintf(grump, "matched null at `%.20s'", p);
 473                 return(grump);
 474         }
 475         return(NULL);
 476 }
 477 
 478 /*
 479  - eprint - convert error number to name
 480  == static char *eprint(int err);
 481  */
 482 static char *
 483 eprint(err)
 484 int err;
 485 {
 486         static char epbuf[100];
 487         size_t len;
 488 
 489         len = regerror(REG_ITOA|err, (regex_t *)NULL, epbuf, sizeof(epbuf));
 490         assert(len <= sizeof(epbuf));
 491         return(epbuf);
 492 }
 493 
 494 /*
 495  - efind - convert error name to number
 496  == static int efind(char *name);
 497  */
 498 static int
 499 efind(name)
 500 char *name;
 501 {
 502         static char efbuf[100];
 503         regex_t re;
 504 
 505         sprintf(efbuf, "REG_%s", name);
 506         assert(strlen(efbuf) < sizeof(efbuf));
 507         re.re_endp = efbuf;
 508         (void) regerror(REG_ATOI, &re, efbuf, sizeof(efbuf));
 509         return(atoi(efbuf));
 510 }

/* [<][>][^][v][top][bottom][index][help] */