root/ext/ereg/regex/split.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. split
  2. main
  3. dosplit
  4. print
  5. regress

   1 #include <stdio.h>
   2 #include <string.h>
   3 
   4 /*
   5  - split - divide a string into fields, like awk split()
   6  = int split(char *string, char *fields[], int nfields, char *sep);
   7  */
   8 int                             /* number of fields, including overflow */
   9 split(string, fields, nfields, sep)
  10 char *string;
  11 char *fields[];                 /* list is not NULL-terminated */
  12 int nfields;                    /* number of entries available in fields[] */
  13 char *sep;                      /* "" white, "c" single char, "ab" [ab]+ */
  14 {
  15         register char *p = string;
  16         register char c;                        /* latest character */
  17         register char sepc = sep[0];
  18         register char sepc2;
  19         register int fn;
  20         register char **fp = fields;
  21         register char *sepp;
  22         register int trimtrail;
  23 
  24         /* white space */
  25         if (sepc == '\0') {
  26                 while ((c = *p++) == ' ' || c == '\t')
  27                         continue;
  28                 p--;
  29                 trimtrail = 1;
  30                 sep = " \t";    /* note, code below knows this is 2 long */
  31                 sepc = ' ';
  32         } else
  33                 trimtrail = 0;
  34         sepc2 = sep[1];         /* now we can safely pick this up */
  35 
  36         /* catch empties */
  37         if (*p == '\0')
  38                 return(0);
  39 
  40         /* single separator */
  41         if (sepc2 == '\0') {
  42                 fn = nfields;
  43                 for (;;) {
  44                         *fp++ = p;
  45                         fn--;
  46                         if (fn == 0)
  47                                 break;
  48                         while ((c = *p++) != sepc)
  49                                 if (c == '\0')
  50                                         return(nfields - fn);
  51                         *(p-1) = '\0';
  52                 }
  53                 /* we have overflowed the fields vector -- just count them */
  54                 fn = nfields;
  55                 for (;;) {
  56                         while ((c = *p++) != sepc)
  57                                 if (c == '\0')
  58                                         return(fn);
  59                         fn++;
  60                 }
  61                 /* not reached */
  62         }
  63 
  64         /* two separators */
  65         if (sep[2] == '\0') {
  66                 fn = nfields;
  67                 for (;;) {
  68                         *fp++ = p;
  69                         fn--;
  70                         while ((c = *p++) != sepc && c != sepc2)
  71                                 if (c == '\0') {
  72                                         if (trimtrail && **(fp-1) == '\0')
  73                                                 fn++;
  74                                         return(nfields - fn);
  75                                 }
  76                         if (fn == 0)
  77                                 break;
  78                         *(p-1) = '\0';
  79                         while ((c = *p++) == sepc || c == sepc2)
  80                                 continue;
  81                         p--;
  82                 }
  83                 /* we have overflowed the fields vector -- just count them */
  84                 fn = nfields;
  85                 while (c != '\0') {
  86                         while ((c = *p++) == sepc || c == sepc2)
  87                                 continue;
  88                         p--;
  89                         fn++;
  90                         while ((c = *p++) != '\0' && c != sepc && c != sepc2)
  91                                 continue;
  92                 }
  93                 /* might have to trim trailing white space */
  94                 if (trimtrail) {
  95                         p--;
  96                         while ((c = *--p) == sepc || c == sepc2)
  97                                 continue;
  98                         p++;
  99                         if (*p != '\0') {
 100                                 if (fn == nfields+1)
 101                                         *p = '\0';
 102                                 fn--;
 103                         }
 104                 }
 105                 return(fn);
 106         }
 107 
 108         /* n separators */
 109         fn = 0;
 110         for (;;) {
 111                 if (fn < nfields)
 112                         *fp++ = p;
 113                 fn++;
 114                 for (;;) {
 115                         c = *p++;
 116                         if (c == '\0')
 117                                 return(fn);
 118                         sepp = sep;
 119                         while ((sepc = *sepp++) != '\0' && sepc != c)
 120                                 continue;
 121                         if (sepc != '\0')       /* it was a separator */
 122                                 break;
 123                 }
 124                 if (fn < nfields)
 125                         *(p-1) = '\0';
 126                 for (;;) {
 127                         c = *p++;
 128                         sepp = sep;
 129                         while ((sepc = *sepp++) != '\0' && sepc != c)
 130                                 continue;
 131                         if (sepc == '\0')       /* it wasn't a separator */
 132                                 break;
 133                 }
 134                 p--;
 135         }
 136 
 137         /* not reached */
 138 }
 139 
 140 #ifdef TEST_SPLIT
 141 
 142 
 143 /*
 144  * test program
 145  * pgm          runs regression
 146  * pgm sep      splits stdin lines by sep
 147  * pgm str sep  splits str by sep
 148  * pgm str sep n        splits str by sep n times
 149  */
 150 int
 151 main(argc, argv)
 152 int argc;
 153 char *argv[];
 154 {
 155         char buf[512];
 156         register int n;
 157 #       define  MNF     10
 158         char *fields[MNF];
 159 
 160         if (argc > 4)
 161                 for (n = atoi(argv[3]); n > 0; n--) {
 162                         (void) strcpy(buf, argv[1]);
 163                 }
 164         else if (argc > 3)
 165                 for (n = atoi(argv[3]); n > 0; n--) {
 166                         (void) strcpy(buf, argv[1]);
 167                         (void) split(buf, fields, MNF, argv[2]);
 168                 }
 169         else if (argc > 2)
 170                 dosplit(argv[1], argv[2]);
 171         else if (argc > 1)
 172                 while (fgets(buf, sizeof(buf), stdin) != NULL) {
 173                         buf[strlen(buf)-1] = '\0';      /* stomp newline */
 174                         dosplit(buf, argv[1]);
 175                 }
 176         else
 177                 regress();
 178 
 179         exit(0);
 180 }
 181 
 182 dosplit(string, seps)
 183 char *string;
 184 char *seps;
 185 {
 186 #       define  NF      5
 187         char *fields[NF];
 188         register int nf;
 189 
 190         nf = split(string, fields, NF, seps);
 191         print(nf, NF, fields);
 192 }
 193 
 194 print(nf, nfp, fields)
 195 int nf;
 196 int nfp;
 197 char *fields[];
 198 {
 199         register int fn;
 200         register int bound;
 201 
 202         bound = (nf > nfp) ? nfp : nf;
 203         printf("%d:\t", nf);
 204         for (fn = 0; fn < bound; fn++)
 205                 printf("\"%s\"%s", fields[fn], (fn+1 < nf) ? ", " : "\n");
 206 }
 207 
 208 #define RNF     5               /* some table entries know this */
 209 struct {
 210         char *str;
 211         char *seps;
 212         int nf;
 213         char *fi[RNF];
 214 } tests[] = {
 215         "",             " ",    0,      { "" },
 216         " ",            " ",    2,      { "", "" },
 217         "x",            " ",    1,      { "x" },
 218         "xy",           " ",    1,      { "xy" },
 219         "x y",          " ",    2,      { "x", "y" },
 220         "abc def  g ",  " ",    5,      { "abc", "def", "", "g", "" },
 221         "  a bcd",      " ",    4,      { "", "", "a", "bcd" },
 222         "a b c d e f",  " ",    6,      { "a", "b", "c", "d", "e f" },
 223         " a b c d ",    " ",    6,      { "", "a", "b", "c", "d " },
 224 
 225         "",             " _",   0,      { "" },
 226         " ",            " _",   2,      { "", "" },
 227         "x",            " _",   1,      { "x" },
 228         "x y",          " _",   2,      { "x", "y" },
 229         "ab _ cd",      " _",   2,      { "ab", "cd" },
 230         " a_b  c ",     " _",   5,      { "", "a", "b", "c", "" },
 231         "a b c_d e f",  " _",   6,      { "a", "b", "c", "d", "e f" },
 232         " a b c d ",    " _",   6,      { "", "a", "b", "c", "d " },
 233 
 234         "",             " _~",  0,      { "" },
 235         " ",            " _~",  2,      { "", "" },
 236         "x",            " _~",  1,      { "x" },
 237         "x y",          " _~",  2,      { "x", "y" },
 238         "ab _~ cd",     " _~",  2,      { "ab", "cd" },
 239         " a_b  c~",     " _~",  5,      { "", "a", "b", "c", "" },
 240         "a b_c d~e f",  " _~",  6,      { "a", "b", "c", "d", "e f" },
 241         "~a b c d ",    " _~",  6,      { "", "a", "b", "c", "d " },
 242 
 243         "",             " _~-", 0,      { "" },
 244         " ",            " _~-", 2,      { "", "" },
 245         "x",            " _~-", 1,      { "x" },
 246         "x y",          " _~-", 2,      { "x", "y" },
 247         "ab _~- cd",    " _~-", 2,      { "ab", "cd" },
 248         " a_b  c~",     " _~-", 5,      { "", "a", "b", "c", "" },
 249         "a b_c-d~e f",  " _~-", 6,      { "a", "b", "c", "d", "e f" },
 250         "~a-b c d ",    " _~-", 6,      { "", "a", "b", "c", "d " },
 251 
 252         "",             "  ",   0,      { "" },
 253         " ",            "  ",   2,      { "", "" },
 254         "x",            "  ",   1,      { "x" },
 255         "xy",           "  ",   1,      { "xy" },
 256         "x y",          "  ",   2,      { "x", "y" },
 257         "abc def  g ",  "  ",   4,      { "abc", "def", "g", "" },
 258         "  a bcd",      "  ",   3,      { "", "a", "bcd" },
 259         "a b c d e f",  "  ",   6,      { "a", "b", "c", "d", "e f" },
 260         " a b c d ",    "  ",   6,      { "", "a", "b", "c", "d " },
 261 
 262         "",             "",     0,      { "" },
 263         " ",            "",     0,      { "" },
 264         "x",            "",     1,      { "x" },
 265         "xy",           "",     1,      { "xy" },
 266         "x y",          "",     2,      { "x", "y" },
 267         "abc def  g ",  "",     3,      { "abc", "def", "g" },
 268         "\t a bcd",     "",     2,      { "a", "bcd" },
 269         "  a \tb\t c ", "",     3,      { "a", "b", "c" },
 270         "a b c d e ",   "",     5,      { "a", "b", "c", "d", "e" },
 271         "a b\tc d e f", "",     6,      { "a", "b", "c", "d", "e f" },
 272         " a b c d e f ",        "",     6,      { "a", "b", "c", "d", "e f " },
 273 
 274         NULL,           NULL,   0,      { NULL },
 275 };
 276 
 277 regress()
 278 {
 279         char buf[512];
 280         register int n;
 281         char *fields[RNF+1];
 282         register int nf;
 283         register int i;
 284         register int printit;
 285         register char *f;
 286 
 287         for (n = 0; tests[n].str != NULL; n++) {
 288                 (void) strcpy(buf, tests[n].str);
 289                 fields[RNF] = NULL;
 290                 nf = split(buf, fields, RNF, tests[n].seps);
 291                 printit = 0;
 292                 if (nf != tests[n].nf) {
 293                         printf("split `%s' by `%s' gave %d fields, not %d\n",
 294                                 tests[n].str, tests[n].seps, nf, tests[n].nf);
 295                         printit = 1;
 296                 } else if (fields[RNF] != NULL) {
 297                         printf("split() went beyond array end\n");
 298                         printit = 1;
 299                 } else {
 300                         for (i = 0; i < nf && i < RNF; i++) {
 301                                 f = fields[i];
 302                                 if (f == NULL)
 303                                         f = "(NULL)";
 304                                 if (strcmp(f, tests[n].fi[i]) != 0) {
 305                                         printf("split `%s' by `%s', field %d is `%s', not `%s'\n",
 306                                                 tests[n].str, tests[n].seps,
 307                                                 i, fields[i], tests[n].fi[i]);
 308                                         printit = 1;
 309                                 }
 310                         }
 311                 }
 312                 if (printit)
 313                         print(nf, RNF, fields);
 314         }
 315 }
 316 #endif

/* [<][>][^][v][top][bottom][index][help] */