root/ext/standard/scanf.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. BuildCharSet
  2. CharInSet
  3. ReleaseCharSet
  4. ValidateFormat
  5. php_sscanf_internal
  6. scan_set_error_return

   1 /*
   2    +----------------------------------------------------------------------+
   3    | PHP Version 5                                                        |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 1997-2016 The PHP Group                                |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15    | Author: Clayton Collie <clcollie@mindspring.com>                     |
  16    +----------------------------------------------------------------------+
  17 */
  18 
  19 /* $Id$ */
  20 
  21 /*
  22         scanf.c --
  23 
  24         This file contains the base code which implements sscanf and by extension
  25         fscanf. Original code is from TCL8.3.0 and bears the following copyright:
  26 
  27         This software is copyrighted by the Regents of the University of
  28         California, Sun Microsystems, Inc., Scriptics Corporation,
  29         and other parties.  The following terms apply to all files associated
  30         with the software unless explicitly disclaimed in individual files.
  31 
  32         The authors hereby grant permission to use, copy, modify, distribute,
  33         and license this software and its documentation for any purpose, provided
  34         that existing copyright notices are retained in all copies and that this
  35         notice is included verbatim in any distributions. No written agreement,
  36         license, or royalty fee is required for any of the authorized uses.
  37         Modifications to this software may be copyrighted by their authors
  38         and need not follow the licensing terms described here, provided that
  39         the new terms are clearly indicated on the first page of each file where
  40         they apply.
  41 
  42         IN NO EVENT SHALL THE AUTHORS OR DISTRIBUTORS BE LIABLE TO ANY PARTY
  43         FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES
  44         ARISING OUT OF THE USE OF THIS SOFTWARE, ITS DOCUMENTATION, OR ANY
  45         DERIVATIVES THEREOF, EVEN IF THE AUTHORS HAVE BEEN ADVISED OF THE
  46         POSSIBILITY OF SUCH DAMAGE.
  47 
  48         THE AUTHORS AND DISTRIBUTORS SPECIFICALLY DISCLAIM ANY WARRANTIES,
  49         INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY,
  50         FITNESS FOR A PARTICULAR PURPOSE, AND NON-INFRINGEMENT.  THIS SOFTWARE
  51         IS PROVIDED ON AN "AS IS" BASIS, AND THE AUTHORS AND DISTRIBUTORS HAVE
  52         NO OBLIGATION TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR
  53         MODIFICATIONS.
  54 
  55         GOVERNMENT USE: If you are acquiring this software on behalf of the
  56         U.S. government, the Government shall have only "Restricted Rights"
  57         in the software and related documentation as defined in the Federal
  58         Acquisition Regulations (FARs) in Clause 52.227.19 (c) (2).  If you
  59         are acquiring the software on behalf of the Department of Defense, the
  60         software shall be classified as "Commercial Computer Software" and the
  61         Government shall have only "Restricted Rights" as defined in Clause
  62         252.227-7013 (c) (1) of DFARs.  Notwithstanding the foregoing, the
  63         authors grant the U.S. Government and others acting in its behalf
  64         permission to use and distribute the software in accordance with the
  65         terms specified in this license.
  66 */
  67 
  68 #include <stdio.h>
  69 #include <limits.h>
  70 #include <ctype.h>
  71 #include "php.h"
  72 #include "php_variables.h"
  73 #ifdef HAVE_LOCALE_H
  74 #include <locale.h>
  75 #endif
  76 #include "zend_execute.h"
  77 #include "zend_operators.h"
  78 #include "zend_strtod.h"
  79 #include "php_globals.h"
  80 #include "basic_functions.h"
  81 #include "scanf.h"
  82 
  83 /*
  84  * Flag values used internally by [f|s]canf.
  85  */
  86 #define SCAN_NOSKIP     0x1       /* Don't skip blanks. */
  87 #define SCAN_SUPPRESS   0x2       /* Suppress assignment. */
  88 #define SCAN_UNSIGNED   0x4       /* Read an unsigned value. */
  89 #define SCAN_WIDTH      0x8       /* A width value was supplied. */
  90 
  91 #define SCAN_SIGNOK     0x10      /* A +/- character is allowed. */
  92 #define SCAN_NODIGITS   0x20      /* No digits have been scanned. */
  93 #define SCAN_NOZERO     0x40      /* No zero digits have been scanned. */
  94 #define SCAN_XOK        0x80      /* An 'x' is allowed. */
  95 #define SCAN_PTOK       0x100     /* Decimal point is allowed. */
  96 #define SCAN_EXPOK      0x200     /* An exponent is allowed. */
  97 
  98 #define UCHAR(x)                (zend_uchar)(x)
  99 
 100 /*
 101  * The following structure contains the information associated with
 102  * a character set.
 103  */
 104 typedef struct CharSet {
 105         int exclude;            /* 1 if this is an exclusion set. */
 106         int nchars;
 107         char *chars;
 108         int nranges;
 109         struct Range {
 110                 char start;
 111                 char end;
 112         } *ranges;
 113 } CharSet;
 114 
 115 /*
 116  * Declarations for functions used only in this file.
 117  */
 118 static char *BuildCharSet(CharSet *cset, char *format);
 119 static int      CharInSet(CharSet *cset, int ch);
 120 static void     ReleaseCharSet(CharSet *cset);
 121 static inline void scan_set_error_return(int numVars, zval **return_value);
 122 
 123 
 124 /* {{{ BuildCharSet
 125  *----------------------------------------------------------------------
 126  *
 127  * BuildCharSet --
 128  *
 129  *      This function examines a character set format specification
 130  *      and builds a CharSet containing the individual characters and
 131  *      character ranges specified.
 132  *
 133  * Results:
 134  *      Returns the next format position.
 135  *
 136  * Side effects:
 137  *      Initializes the charset.
 138  *
 139  *----------------------------------------------------------------------
 140  */
 141 static char * BuildCharSet(CharSet *cset, char *format)
 142 {
 143         char *ch, start;
 144         int  nranges;
 145         char *end;
 146 
 147         memset(cset, 0, sizeof(CharSet));
 148 
 149         ch = format;
 150         if (*ch == '^') {
 151                 cset->exclude = 1;
 152                 ch = ++format;
 153         }
 154         end = format + 1;       /* verify this - cc */
 155 
 156         /*
 157          * Find the close bracket so we can overallocate the set.
 158          */
 159         if (*ch == ']') {
 160                 ch = end++;
 161         }
 162         nranges = 0;
 163         while (*ch != ']') {
 164                 if (*ch == '-') {
 165                         nranges++;
 166                 }
 167                 ch = end++;
 168         }
 169 
 170         cset->chars = (char *) safe_emalloc(sizeof(char), (end - format - 1), 0);
 171         if (nranges > 0) {
 172                 cset->ranges = (struct Range *) safe_emalloc(sizeof(struct Range), nranges, 0);
 173         } else {
 174                 cset->ranges = NULL;
 175         }
 176 
 177         /*
 178          * Now build the character set.
 179          */
 180         cset->nchars = cset->nranges = 0;
 181         ch    = format++;
 182         start = *ch;
 183         if (*ch == ']' || *ch == '-') {
 184                 cset->chars[cset->nchars++] = *ch;
 185                 ch = format++;
 186         }
 187         while (*ch != ']') {
 188                 if (*format == '-') {
 189                         /*
 190                          * This may be the first character of a range, so don't add
 191                          * it yet.
 192                          */
 193                         start = *ch;
 194                 } else if (*ch == '-') {
 195                         /*
 196                          * Check to see if this is the last character in the set, in which
 197                          * case it is not a range and we should add the previous character
 198                          * as well as the dash.
 199                          */
 200                         if (*format == ']') {
 201                                 cset->chars[cset->nchars++] = start;
 202                                 cset->chars[cset->nchars++] = *ch;
 203                         } else {
 204                                 ch = format++;
 205 
 206                                 /*
 207                                  * Check to see if the range is in reverse order.
 208                                  */
 209                                 if (start < *ch) {
 210                                         cset->ranges[cset->nranges].start = start;
 211                                         cset->ranges[cset->nranges].end = *ch;
 212                                 } else {
 213                                         cset->ranges[cset->nranges].start = *ch;
 214                                         cset->ranges[cset->nranges].end = start;
 215                                 }
 216                                 cset->nranges++;
 217                         }
 218                 } else {
 219                         cset->chars[cset->nchars++] = *ch;
 220                 }
 221                 ch = format++;
 222         }
 223         return format;
 224 }
 225 /* }}} */
 226 
 227 /* {{{ CharInSet
 228  *----------------------------------------------------------------------
 229  *
 230  * CharInSet --
 231  *
 232  *      Check to see if a character matches the given set.
 233  *
 234  * Results:
 235  *      Returns non-zero if the character matches the given set.
 236  *
 237  * Side effects:
 238  *      None.
 239  *
 240  *----------------------------------------------------------------------
 241  */
 242 static int CharInSet(CharSet *cset, int c)
 243 {
 244         char ch = (char) c;
 245         int i, match = 0;
 246 
 247         for (i = 0; i < cset->nchars; i++) {
 248                 if (cset->chars[i] == ch) {
 249                         match = 1;
 250                         break;
 251                 }
 252         }
 253         if (!match) {
 254                 for (i = 0; i < cset->nranges; i++) {
 255                         if ((cset->ranges[i].start <= ch)
 256                                 && (ch <= cset->ranges[i].end)) {
 257                                 match = 1;
 258                                 break;
 259                         }
 260                 }
 261         }
 262         return (cset->exclude ? !match : match);
 263 }
 264 /* }}} */
 265 
 266 /* {{{ ReleaseCharSet
 267  *----------------------------------------------------------------------
 268  *
 269  * ReleaseCharSet --
 270  *
 271  *      Free the storage associated with a character set.
 272  *
 273  * Results:
 274  *      None.
 275  *
 276  * Side effects:
 277  *      None.
 278  *
 279  *----------------------------------------------------------------------
 280  */
 281 static void ReleaseCharSet(CharSet *cset)
 282 {
 283         efree((char *)cset->chars);
 284         if (cset->ranges) {
 285                 efree((char *)cset->ranges);
 286         }
 287 }
 288 /* }}} */
 289 
 290 /* {{{ ValidateFormat
 291  *----------------------------------------------------------------------
 292  *
 293  * ValidateFormat --
 294  *
 295  *      Parse the format string and verify that it is properly formed
 296  *      and that there are exactly enough variables on the command line.
 297  *
 298  * Results:
 299  *    FAILURE or SUCCESS.
 300  *
 301  * Side effects:
 302  *     May set php_error based on abnormal conditions.
 303  *
 304  * Parameters :
 305  *     format     The format string.
 306  *     numVars    The number of variables passed to the scan command.
 307  *     totalSubs  The number of variables that will be required.
 308  *
 309  *----------------------------------------------------------------------
 310 */
 311 PHPAPI int ValidateFormat(char *format, int numVars, int *totalSubs)
 312 {
 313 #define STATIC_LIST_SIZE 16
 314         int gotXpg, gotSequential, value, i, flags;
 315         char *end, *ch = NULL;
 316         int staticAssign[STATIC_LIST_SIZE];
 317         int *nassign = staticAssign;
 318         int objIndex, xpgSize, nspace = STATIC_LIST_SIZE;
 319         TSRMLS_FETCH();
 320 
 321         /*
 322          * Initialize an array that records the number of times a variable
 323          * is assigned to by the format string.  We use this to detect if
 324          * a variable is multiply assigned or left unassigned.
 325          */
 326         if (numVars > nspace) {
 327                 nassign = (int*)safe_emalloc(sizeof(int), numVars, 0);
 328                 nspace = numVars;
 329         }
 330         for (i = 0; i < nspace; i++) {
 331                 nassign[i] = 0;
 332         }
 333 
 334         xpgSize = objIndex = gotXpg = gotSequential = 0;
 335 
 336         while (*format != '\0') {
 337                 ch = format++;
 338                 flags = 0;
 339 
 340                 if (*ch != '%') {
 341                         continue;
 342                 }
 343                 ch = format++;
 344                 if (*ch == '%') {
 345                         continue;
 346                 }
 347                 if (*ch == '*') {
 348                         flags |= SCAN_SUPPRESS;
 349                         ch = format++;
 350                         goto xpgCheckDone;
 351                 }
 352 
 353                 if ( isdigit( (int)*ch ) ) {
 354                         /*
 355                          * Check for an XPG3-style %n$ specification.  Note: there
 356                          * must not be a mixture of XPG3 specs and non-XPG3 specs
 357                          * in the same format string.
 358                          */
 359                         value = strtoul(format-1, &end, 10);
 360                         if (*end != '$') {
 361                                 goto notXpg;
 362                         }
 363                         format = end+1;
 364                         ch     = format++;
 365                         gotXpg = 1;
 366                         if (gotSequential) {
 367                                 goto mixedXPG;
 368                         }
 369                         objIndex = value - 1;
 370                         if ((objIndex < 0) || (numVars && (objIndex >= numVars))) {
 371                                 goto badIndex;
 372                         } else if (numVars == 0) {
 373                                 /*
 374                                  * In the case where no vars are specified, the user can
 375                                  * specify %9999$ legally, so we have to consider special
 376                                  * rules for growing the assign array.  'value' is
 377                                  * guaranteed to be > 0.
 378                                  */
 379 
 380                                 /* set a lower artificial limit on this
 381                                  * in the interest of security and resource friendliness
 382                                  * 255 arguments should be more than enough. - cc
 383                                  */
 384                                 if (value > SCAN_MAX_ARGS) {
 385                                         goto badIndex;
 386                                 }
 387 
 388                                 xpgSize = (xpgSize > value) ? xpgSize : value;
 389                         }
 390                         goto xpgCheckDone;
 391                 }
 392 
 393 notXpg:
 394                 gotSequential = 1;
 395                 if (gotXpg) {
 396 mixedXPG:
 397                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "cannot mix \"%\" and \"%n$\" conversion specifiers");
 398                         goto error;
 399                 }
 400 
 401 xpgCheckDone:
 402                 /*
 403                  * Parse any width specifier.
 404                  */
 405                 if (isdigit(UCHAR(*ch))) {
 406                         value = strtoul(format-1, &format, 10);
 407                         flags |= SCAN_WIDTH;
 408                         ch = format++;
 409                 }
 410 
 411                 /*
 412                  * Ignore size specifier.
 413                  */
 414                 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
 415                         ch = format++;
 416                 }
 417 
 418                 if (!(flags & SCAN_SUPPRESS) && numVars && (objIndex >= numVars)) {
 419                         goto badIndex;
 420                 }
 421 
 422                 /*
 423                  * Handle the various field types.
 424                  */
 425                 switch (*ch) {
 426                         case 'n':
 427                         case 'd':
 428                         case 'D':
 429                         case 'i':
 430                         case 'o':
 431                         case 'x':
 432                         case 'X':
 433                         case 'u':
 434                         case 'f':
 435                         case 'e':
 436                         case 'E':
 437                         case 'g':
 438                         case 's':
 439                                 break;
 440 
 441                         case 'c':
 442                                 /* we differ here with the TCL implementation in allowing for */
 443                                 /* a character width specification, to be more consistent with */
 444                                 /* ANSI. since Zend auto allocates space for vars, this is no */
 445                                 /* problem - cc                                               */
 446                                 /*
 447                                 if (flags & SCAN_WIDTH) {
 448                                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Field width may not be specified in %c conversion");
 449                                         goto error;
 450                                 }
 451                                 */
 452                                 break;
 453 
 454                         case '[':
 455                                 if (*format == '\0') {
 456                                         goto badSet;
 457                                 }
 458                                 ch = format++;
 459                                 if (*ch == '^') {
 460                                         if (*format == '\0') {
 461                                                 goto badSet;
 462                                         }
 463                                         ch = format++;
 464                                 }
 465                                 if (*ch == ']') {
 466                                         if (*format == '\0') {
 467                                                 goto badSet;
 468                                         }
 469                                         ch = format++;
 470                                 }
 471                                 while (*ch != ']') {
 472                                         if (*format == '\0') {
 473                                                 goto badSet;
 474                                         }
 475                                         ch = format++;
 476                                 }
 477                                 break;
 478 badSet:
 479                                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unmatched [ in format string");
 480                                 goto error;
 481 
 482                         default: {
 483                                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Bad scan conversion character \"%c\"", *ch);
 484                                 goto error;
 485                         }
 486                 }
 487 
 488                 if (!(flags & SCAN_SUPPRESS)) {
 489                         if (objIndex >= nspace) {
 490                                 /*
 491                                  * Expand the nassign buffer.  If we are using XPG specifiers,
 492                                  * make sure that we grow to a large enough size.  xpgSize is
 493                                  * guaranteed to be at least one larger than objIndex.
 494                                  */
 495                                 value = nspace;
 496                                 if (xpgSize) {
 497                                         nspace = xpgSize;
 498                                 } else {
 499                                         nspace += STATIC_LIST_SIZE;
 500                                 }
 501                                 if (nassign == staticAssign) {
 502                                         nassign = (void *)safe_emalloc(nspace, sizeof(int), 0);
 503                                         for (i = 0; i < STATIC_LIST_SIZE; ++i) {
 504                                                 nassign[i] = staticAssign[i];
 505                                         }
 506                                 } else {
 507                                         nassign = (void *)erealloc((void *)nassign, nspace * sizeof(int));
 508                                 }
 509                                 for (i = value; i < nspace; i++) {
 510                                         nassign[i] = 0;
 511                                 }
 512                         }
 513                         nassign[objIndex]++;
 514                         objIndex++;
 515                 }
 516         } /* while (*format != '\0') */
 517 
 518         /*
 519          * Verify that all of the variable were assigned exactly once.
 520          */
 521         if (numVars == 0) {
 522                 if (xpgSize) {
 523                         numVars = xpgSize;
 524                 } else {
 525                         numVars = objIndex;
 526                 }
 527         }
 528         if (totalSubs) {
 529                 *totalSubs = numVars;
 530         }
 531         for (i = 0; i < numVars; i++) {
 532                 if (nassign[i] > 1) {
 533                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "Variable is assigned by multiple \"%n$\" conversion specifiers");
 534                         goto error;
 535                 } else if (!xpgSize && (nassign[i] == 0)) {
 536                         /*
 537                          * If the space is empty, and xpgSize is 0 (means XPG wasn't
 538                          * used, and/or numVars != 0), then too many vars were given
 539                          */
 540                         php_error_docref(NULL TSRMLS_CC, E_WARNING, "Variable is not assigned by any conversion specifiers");
 541                         goto error;
 542                 }
 543         }
 544 
 545         if (nassign != staticAssign) {
 546                 efree((char *)nassign);
 547         }
 548         return SCAN_SUCCESS;
 549 
 550 badIndex:
 551         if (gotXpg) {
 552                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "%s", "\"%n$\" argument index out of range");
 553         } else {
 554                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Different numbers of variable names and field specifiers");
 555         }
 556 
 557 error:
 558         if (nassign != staticAssign) {
 559                 efree((char *)nassign);
 560         }
 561         return SCAN_ERROR_INVALID_FORMAT;
 562 #undef STATIC_LIST_SIZE
 563 }
 564 /* }}} */
 565 
 566 /* {{{ php_sscanf_internal
 567  * This is the internal function which does processing on behalf of
 568  * both sscanf() and fscanf()
 569  *
 570  * parameters :
 571  *              string          literal string to be processed
 572  *              format          format string
 573  *              argCount        total number of elements in the args array
 574  *              args            arguments passed in from user function (f|s)scanf
 575  *              varStart        offset (in args) of 1st variable passed in to (f|s)scanf
 576  *              return_value set with the results of the scan
 577  */
 578 
 579 PHPAPI int php_sscanf_internal( char *string, char *format,
 580                                 int argCount, zval ***args,
 581                                 int varStart, zval **return_value TSRMLS_DC)
 582 {
 583         int  numVars, nconversions, totalVars = -1;
 584         int  i, result;
 585         long value;
 586         int  objIndex;
 587         char *end, *baseString;
 588         zval **current;
 589         char op   = 0;
 590         int  base = 0;
 591         int  underflow = 0;
 592         size_t width;
 593         long (*fn)() = NULL;
 594         char *ch, sch;
 595         int  flags;
 596         char buf[64];   /* Temporary buffer to hold scanned number
 597                                          * strings before they are passed to strtoul() */
 598 
 599         /* do some sanity checking */
 600         if ((varStart > argCount) || (varStart < 0)){
 601                 varStart = SCAN_MAX_ARGS + 1;
 602         }
 603         numVars = argCount - varStart;
 604         if (numVars < 0) {
 605                 numVars = 0;
 606         }
 607 
 608 #if 0
 609         zend_printf("<br>in sscanf_internal : <br> string is \"%s\", format = \"%s\"<br> NumVars = %d. VarStart = %d<br>-------------------------<br>",
 610                                         string, format, numVars, varStart);
 611 #endif
 612         /*
 613          * Check for errors in the format string.
 614          */
 615         if (ValidateFormat(format, numVars, &totalVars) != SCAN_SUCCESS) {
 616                 scan_set_error_return( numVars, return_value );
 617                 return SCAN_ERROR_INVALID_FORMAT;
 618         }
 619 
 620         objIndex = numVars ? varStart : 0;
 621 
 622         /*
 623          * If any variables are passed, make sure they are all passed by reference
 624          */
 625         if (numVars) {
 626                 for (i = varStart;i < argCount;i++){
 627                         if ( ! PZVAL_IS_REF( *args[ i ] ) ) {
 628                                 php_error_docref(NULL TSRMLS_CC, E_WARNING, "Parameter %d must be passed by reference", i);
 629                                 scan_set_error_return(numVars, return_value);
 630                                 return SCAN_ERROR_VAR_PASSED_BYVAL;
 631                         }
 632                 }
 633         }
 634 
 635         /*
 636          * Allocate space for the result objects. Only happens when no variables
 637          * are specified
 638          */
 639         if (!numVars) {
 640                 zval *tmp;
 641 
 642                 /* allocate an array for return */
 643                 array_init(*return_value);
 644 
 645                 for (i = 0; i < totalVars; i++) {
 646                         MAKE_STD_ZVAL(tmp);
 647                         ZVAL_NULL(tmp);
 648                         if (add_next_index_zval(*return_value, tmp) == FAILURE) {
 649                                 scan_set_error_return(0, return_value);
 650                                 return FAILURE;
 651                         }
 652                 }
 653                 varStart = 0; /* Array index starts from 0 */
 654         }
 655 
 656         baseString = string;
 657 
 658         /*
 659          * Iterate over the format string filling in the result objects until
 660          * we reach the end of input, the end of the format string, or there
 661          * is a mismatch.
 662          */
 663         nconversions = 0;
 664         /* note ! - we need to limit the loop for objIndex to keep it in bounds */
 665 
 666         while (*format != '\0') {
 667                 ch    = format++;
 668                 flags = 0;
 669 
 670                 /*
 671                  * If we see whitespace in the format, skip whitespace in the string.
 672                  */
 673                 if ( isspace( (int)*ch ) ) {
 674                         sch = *string;
 675                         while ( isspace( (int)sch ) ) {
 676                                 if (*string == '\0') {
 677                                         goto done;
 678                                 }
 679                                 string++;
 680                                 sch = *string;
 681                         }
 682                         continue;
 683                 }
 684 
 685                 if (*ch != '%') {
 686 literal:
 687                         if (*string == '\0') {
 688                                 underflow = 1;
 689                                 goto done;
 690                         }
 691                         sch = *string;
 692                         string++;
 693                         if (*ch != sch) {
 694                                 goto done;
 695                         }
 696                         continue;
 697                 }
 698 
 699                 ch = format++;
 700                 if (*ch == '%') {
 701                         goto literal;
 702                 }
 703 
 704                 /*
 705                  * Check for assignment suppression ('*') or an XPG3-style
 706                  * assignment ('%n$').
 707                  */
 708                 if (*ch == '*') {
 709                         flags |= SCAN_SUPPRESS;
 710                         ch = format++;
 711                 } else if ( isdigit(UCHAR(*ch))) {
 712                         value = strtoul(format-1, &end, 10);
 713                         if (*end == '$') {
 714                                 format = end+1;
 715                                 ch = format++;
 716                                 objIndex = varStart + value - 1;
 717                         }
 718                 }
 719 
 720                 /*
 721                  * Parse any width specifier.
 722                  */
 723                 if ( isdigit(UCHAR(*ch))) {
 724                         width = strtoul(format-1, &format, 10);
 725                         ch = format++;
 726                 } else {
 727                         width = 0;
 728                 }
 729 
 730                 /*
 731                  * Ignore size specifier.
 732                  */
 733                 if ((*ch == 'l') || (*ch == 'L') || (*ch == 'h')) {
 734                         ch = format++;
 735                 }
 736 
 737                 /*
 738                  * Handle the various field types.
 739                  */
 740                 switch (*ch) {
 741                         case 'n':
 742                                 if (!(flags & SCAN_SUPPRESS)) {
 743                                         if (numVars && objIndex >= argCount) {
 744                                                 break;
 745                                         } else if (numVars) {
 746                                                 zend_uint refcount;
 747 
 748                                                 current = args[objIndex++];
 749                                                 refcount = Z_REFCOUNT_PP(current);
 750                                                 zval_dtor( *current );
 751                                                 ZVAL_LONG( *current, (long)(string - baseString) );
 752                                                 Z_SET_REFCOUNT_PP(current, refcount);
 753                                                 Z_SET_ISREF_PP(current);
 754                                         } else {
 755                                                 add_index_long(*return_value, objIndex++, string - baseString);
 756                                         }
 757                                 }
 758                                 nconversions++;
 759                                 continue;
 760 
 761                         case 'd':
 762                         case 'D':
 763                                 op = 'i';
 764                                 base = 10;
 765                                 fn = (long (*)())strtol;
 766                                 break;
 767                         case 'i':
 768                                 op = 'i';
 769                                 base = 0;
 770                                 fn = (long (*)())strtol;
 771                                 break;
 772                         case 'o':
 773                                 op = 'i';
 774                                 base = 8;
 775                                 fn = (long (*)())strtol;
 776                                 break;
 777                         case 'x':
 778                         case 'X':
 779                                 op = 'i';
 780                                 base = 16;
 781                                 fn = (long (*)())strtol;
 782                                 break;
 783                         case 'u':
 784                                 op = 'i';
 785                                 base = 10;
 786                                 flags |= SCAN_UNSIGNED;
 787                                 fn = (long (*)())strtoul;
 788                                 break;
 789 
 790                         case 'f':
 791                         case 'e':
 792                         case 'E':
 793                         case 'g':
 794                                 op = 'f';
 795                                 break;
 796 
 797                         case 's':
 798                                 op = 's';
 799                                 break;
 800 
 801                         case 'c':
 802                                 op = 's';
 803                                 flags |= SCAN_NOSKIP;
 804                                 /*-cc-*/
 805                                 if (0 == width) {
 806                                         width = 1;
 807                                 }
 808                                 /*-cc-*/
 809                                 break;
 810                         case '[':
 811                                 op = '[';
 812                                 flags |= SCAN_NOSKIP;
 813                                 break;
 814                 }   /* switch */
 815 
 816                 /*
 817                  * At this point, we will need additional characters from the
 818                  * string to proceed.
 819                  */
 820                 if (*string == '\0') {
 821                         underflow = 1;
 822                         goto done;
 823                 }
 824 
 825                 /*
 826                  * Skip any leading whitespace at the beginning of a field unless
 827                  * the format suppresses this behavior.
 828                  */
 829                 if (!(flags & SCAN_NOSKIP)) {
 830                         while (*string != '\0') {
 831                                 sch = *string;
 832                                 if (! isspace((int)sch) ) {
 833                                         break;
 834                                 }
 835                                 string++;
 836                         }
 837                         if (*string == '\0') {
 838                                 underflow = 1;
 839                                 goto done;
 840                         }
 841                 }
 842 
 843                 /*
 844                  * Perform the requested scanning operation.
 845                  */
 846                 switch (op) {
 847                         case 'c':
 848                         case 's':
 849                                 /*
 850                                  * Scan a string up to width characters or whitespace.
 851                                  */
 852                                 if (width == 0) {
 853                                         width = (size_t) ~0;
 854                                 }
 855                                 end = string;
 856                                 while (*end != '\0') {
 857                                         sch = *end;
 858                                         if ( isspace( (int)sch ) ) {
 859                                                 break;
 860                                         }
 861                                         end++;
 862                                         if (--width == 0) {
 863                                            break;
 864                                         }
 865                                 }
 866                                 if (!(flags & SCAN_SUPPRESS)) {
 867                                         if (numVars && objIndex >= argCount) {
 868                                                 break;
 869                                         } else if (numVars) {
 870                                                 zend_uint refcount;
 871 
 872                                                 current = args[objIndex++];
 873                                                 refcount = Z_REFCOUNT_PP(current);
 874                                                 zval_dtor( *current );
 875                                                 ZVAL_STRINGL( *current, string, end-string, 1);
 876                                                 Z_SET_REFCOUNT_PP(current, refcount);
 877                                                 Z_SET_ISREF_PP(current);
 878                                         } else {
 879                                                 add_index_stringl( *return_value, objIndex++, string, end-string, 1);
 880                                         }
 881                                 }
 882                                 string = end;
 883                                 break;
 884 
 885                         case '[': {
 886                                 CharSet cset;
 887 
 888                                 if (width == 0) {
 889                                         width = (size_t) ~0;
 890                                 }
 891                                 end = string;
 892 
 893                                 format = BuildCharSet(&cset, format);
 894                                 while (*end != '\0') {
 895                                         sch = *end;
 896                                         if (!CharInSet(&cset, (int)sch)) {
 897                                                 break;
 898                                         }
 899                                         end++;
 900                                         if (--width == 0) {
 901                                                 break;
 902                                         }
 903                                 }
 904                                 ReleaseCharSet(&cset);
 905 
 906                                 if (string == end) {
 907                                         /*
 908                                          * Nothing matched the range, stop processing
 909                                          */
 910                                         goto done;
 911                                 }
 912                                 if (!(flags & SCAN_SUPPRESS)) {
 913                                         if (numVars && objIndex >= argCount) {
 914                                                 break;
 915                                         } else if (numVars) {
 916                                                 current = args[objIndex++];
 917                                                 zval_dtor( *current );
 918                                                 ZVAL_STRINGL( *current, string, end-string, 1);
 919                                         } else {
 920                                                 add_index_stringl(*return_value, objIndex++, string, end-string, 1);
 921                                         }
 922                                 }
 923                                 string = end;
 924                                 break;
 925                         }
 926 /*
 927                         case 'c':
 928                            / Scan a single character./
 929 
 930                                 sch = *string;
 931                                 string++;
 932                                 if (!(flags & SCAN_SUPPRESS)) {
 933                                         if (numVars) {
 934                                                 char __buf[2];
 935                                                 __buf[0] = sch;
 936                                                 __buf[1] = '\0';;
 937                                                 current = args[objIndex++];
 938                                                 zval_dtor(*current);
 939                                                 ZVAL_STRINGL( *current, __buf, 1, 1);
 940                                         } else {
 941                                                 add_index_stringl(*return_value, objIndex++, &sch, 1, 1);
 942                                         }
 943                                 }
 944                                 break;
 945 */
 946                         case 'i':
 947                                 /*
 948                                  * Scan an unsigned or signed integer.
 949                                  */
 950                                 /*-cc-*/
 951                                 buf[0] = '\0';
 952                                 /*-cc-*/
 953                                 if ((width == 0) || (width > sizeof(buf) - 1)) {
 954                                         width = sizeof(buf) - 1;
 955                                 }
 956 
 957                                 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO;
 958                                 for (end = buf; width > 0; width--) {
 959                                         switch (*string) {
 960                                                 /*
 961                                                  * The 0 digit has special meaning at the beginning of
 962                                                  * a number.  If we are unsure of the base, it
 963                                                  * indicates that we are in base 8 or base 16 (if it is
 964                                                  * followed by an 'x').
 965                                                  */
 966                                                 case '0':
 967                                                         /*-cc-*/
 968                                                         if (base == 16) {
 969                                                                 flags |= SCAN_XOK;
 970                                                         }
 971                                                         /*-cc-*/
 972                                                         if (base == 0) {
 973                                                                 base = 8;
 974                                                                 flags |= SCAN_XOK;
 975                                                         }
 976                                                         if (flags & SCAN_NOZERO) {
 977                                                                 flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS | SCAN_NOZERO);
 978                                                         } else {
 979                                                                 flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
 980                                                         }
 981                                                         goto addToInt;
 982 
 983                                                 case '1': case '2': case '3': case '4':
 984                                                 case '5': case '6': case '7':
 985                                                         if (base == 0) {
 986                                                                 base = 10;
 987                                                         }
 988                                                         flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
 989                                                         goto addToInt;
 990 
 991                                                 case '8': case '9':
 992                                                         if (base == 0) {
 993                                                                 base = 10;
 994                                                         }
 995                                                         if (base <= 8) {
 996                                                            break;
 997                                                         }
 998                                                         flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
 999                                                         goto addToInt;
1000 
1001                                                 case 'A': case 'B': case 'C':
1002                                                 case 'D': case 'E': case 'F':
1003                                                 case 'a': case 'b': case 'c':
1004                                                 case 'd': case 'e': case 'f':
1005                                                         if (base <= 10) {
1006                                                                 break;
1007                                                         }
1008                                                         flags &= ~(SCAN_SIGNOK | SCAN_XOK | SCAN_NODIGITS);
1009                                                         goto addToInt;
1010 
1011                                                 case '+': case '-':
1012                                                         if (flags & SCAN_SIGNOK) {
1013                                                                 flags &= ~SCAN_SIGNOK;
1014                                                                 goto addToInt;
1015                                                         }
1016                                                         break;
1017 
1018                                                 case 'x': case 'X':
1019                                                         if ((flags & SCAN_XOK) && (end == buf+1)) {
1020                                                                 base = 16;
1021                                                                 flags &= ~SCAN_XOK;
1022                                                                 goto addToInt;
1023                                                         }
1024                                                         break;
1025                                         }
1026 
1027                                         /*
1028                                          * We got an illegal character so we are done accumulating.
1029                                          */
1030                                         break;
1031 
1032 addToInt:
1033                                         /*
1034                                          * Add the character to the temporary buffer.
1035                                          */
1036                                         *end++ = *string++;
1037                                         if (*string == '\0') {
1038                                                 break;
1039                                         }
1040                                 }
1041 
1042                                 /*
1043                                  * Check to see if we need to back up because we only got a
1044                                  * sign or a trailing x after a 0.
1045                                  */
1046                                 if (flags & SCAN_NODIGITS) {
1047                                         if (*string == '\0') {
1048                                                 underflow = 1;
1049                                         }
1050                                         goto done;
1051                                 } else if (end[-1] == 'x' || end[-1] == 'X') {
1052                                         end--;
1053                                         string--;
1054                                 }
1055 
1056                                 /*
1057                                  * Scan the value from the temporary buffer.  If we are
1058                                  * returning a large unsigned value, we have to convert it back
1059                                  * to a string since PHP only supports signed values.
1060                                  */
1061                                 if (!(flags & SCAN_SUPPRESS)) {
1062                                         *end = '\0';
1063                                         value = (long) (*fn)(buf, NULL, base);
1064                                         if ((flags & SCAN_UNSIGNED) && (value < 0)) {
1065                                                 snprintf(buf, sizeof(buf), "%lu", value); /* INTL: ISO digit */
1066                                                 if (numVars && objIndex >= argCount) {
1067                                                         break;
1068                                                 } else if (numVars) {
1069                                                   /* change passed value type to string */
1070                                                         current = args[objIndex++];
1071                                                         zval_dtor(*current);
1072                                                         ZVAL_STRING( *current, buf, 1 );
1073                                                 } else {
1074                                                         add_index_string(*return_value, objIndex++, buf, 1);
1075                                                 }
1076                                         } else {
1077                                                 if (numVars && objIndex >= argCount) {
1078                                                         break;
1079                                                 } else if (numVars) {
1080                                                         current = args[objIndex++];
1081                                                         zval_dtor(*current);
1082                                                         ZVAL_LONG(*current, value);
1083                                                 } else {
1084                                                         add_index_long(*return_value, objIndex++, value);
1085                                                 }
1086                                         }
1087                                 }
1088                                 break;
1089 
1090                         case 'f':
1091                                 /*
1092                                  * Scan a floating point number
1093                                  */
1094                                 buf[0] = '\0';     /* call me pedantic */
1095                                 if ((width == 0) || (width > sizeof(buf) - 1)) {
1096                                         width = sizeof(buf) - 1;
1097                                 }
1098                                 flags |= SCAN_SIGNOK | SCAN_NODIGITS | SCAN_PTOK | SCAN_EXPOK;
1099                                 for (end = buf; width > 0; width--) {
1100                                         switch (*string) {
1101                                                 case '0': case '1': case '2': case '3':
1102                                                 case '4': case '5': case '6': case '7':
1103                                                 case '8': case '9':
1104                                                         flags &= ~(SCAN_SIGNOK | SCAN_NODIGITS);
1105                                                         goto addToFloat;
1106                                                 case '+':
1107                                                 case '-':
1108                                                         if (flags & SCAN_SIGNOK) {
1109                                                                 flags &= ~SCAN_SIGNOK;
1110                                                                 goto addToFloat;
1111                                                         }
1112                                                         break;
1113                                                 case '.':
1114                                                         if (flags & SCAN_PTOK) {
1115                                                                 flags &= ~(SCAN_SIGNOK | SCAN_PTOK);
1116                                                                 goto addToFloat;
1117                                                         }
1118                                                         break;
1119                                                 case 'e':
1120                                                 case 'E':
1121                                                         /*
1122                                                          * An exponent is not allowed until there has
1123                                                          * been at least one digit.
1124                                                          */
1125                                                         if ((flags & (SCAN_NODIGITS | SCAN_EXPOK)) == SCAN_EXPOK) {
1126                                                                 flags = (flags & ~(SCAN_EXPOK|SCAN_PTOK))
1127                                                                         | SCAN_SIGNOK | SCAN_NODIGITS;
1128                                                                 goto addToFloat;
1129                                                         }
1130                                                         break;
1131                                         }
1132 
1133                                         /*
1134                                          * We got an illegal character so we are done accumulating.
1135                                          */
1136                                         break;
1137 
1138 addToFloat:
1139                                         /*
1140                                          * Add the character to the temporary buffer.
1141                                          */
1142                                         *end++ = *string++;
1143                                         if (*string == '\0') {
1144                                                 break;
1145                                         }
1146                                 }
1147 
1148                                 /*
1149                                  * Check to see if we need to back up because we saw a
1150                                  * trailing 'e' or sign.
1151                                  */
1152                                 if (flags & SCAN_NODIGITS) {
1153                                         if (flags & SCAN_EXPOK) {
1154                                                 /*
1155                                                  * There were no digits at all so scanning has
1156                                                  * failed and we are done.
1157                                                  */
1158                                                 if (*string == '\0') {
1159                                                         underflow = 1;
1160                                                 }
1161                                                 goto done;
1162                                         }
1163 
1164                                         /*
1165                                          * We got a bad exponent ('e' and maybe a sign).
1166                                          */
1167                                         end--;
1168                                         string--;
1169                                         if (*end != 'e' && *end != 'E') {
1170                                                 end--;
1171                                                 string--;
1172                                         }
1173                                 }
1174 
1175                                 /*
1176                                  * Scan the value from the temporary buffer.
1177                                  */
1178                                 if (!(flags & SCAN_SUPPRESS)) {
1179                                         double dvalue;
1180                                         *end = '\0';
1181                                         dvalue = zend_strtod(buf, NULL);
1182                                         if (numVars && objIndex >= argCount) {
1183                                                 break;
1184                                         } else if (numVars) {
1185                                                 current = args[objIndex++];
1186                                                 zval_dtor(*current);
1187                                                 ZVAL_DOUBLE(*current, dvalue);
1188                                         } else {
1189                                                 add_index_double( *return_value, objIndex++, dvalue );
1190                                         }
1191                                 }
1192                                 break;
1193                 } /* switch (op) */
1194                 nconversions++;
1195         } /*  while (*format != '\0') */
1196 
1197 done:
1198         result = SCAN_SUCCESS;
1199 
1200         if (underflow && (0==nconversions)) {
1201                 scan_set_error_return( numVars, return_value );
1202                 result = SCAN_ERROR_EOF;
1203         } else if (numVars) {
1204                 convert_to_long( *return_value );
1205                 Z_LVAL_PP(return_value) = nconversions;
1206         } else if (nconversions < totalVars) {
1207                 /* TODO: not all elements converted. we need to prune the list - cc */
1208         }
1209         return result;
1210 }
1211 /* }}} */
1212 
1213 /* the compiler choked when i tried to make this a macro    */
1214 static inline void scan_set_error_return(int numVars, zval **return_value) /* {{{ */
1215 {
1216         if (numVars) {
1217                 Z_TYPE_PP(return_value) = IS_LONG;
1218                 Z_LVAL_PP(return_value) = SCAN_ERROR_EOF;  /* EOF marker */
1219         } else {
1220                 /* convert_to_null calls destructor */
1221                 convert_to_null( *return_value );
1222         }
1223 }
1224 /* }}} */
1225 
1226 /*
1227  * Local variables:
1228  * tab-width: 4
1229  * c-basic-offset: 4
1230  * End:
1231  * vim600: sw=4 ts=4 fdm=marker
1232  * vim<600: sw=4 ts=4
1233  */

/* [<][>][^][v][top][bottom][index][help] */