root/ext/filter/sanitizing_filters.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. php_filter_encode_html
  2. php_filter_encode_url
  3. php_filter_strip
  4. filter_map_init
  5. filter_map_update
  6. filter_map_apply
  7. php_filter_string
  8. php_filter_encoded
  9. php_filter_special_chars
  10. php_filter_full_special_chars
  11. php_filter_unsafe_raw
  12. php_filter_email
  13. php_filter_url
  14. php_filter_number_int
  15. php_filter_number_float
  16. php_filter_magic_quotes

   1 /*
   2   +----------------------------------------------------------------------+
   3   | PHP Version 5                                                        |
   4   +----------------------------------------------------------------------+
   5   | Copyright (c) 1997-2016 The PHP Group                                |
   6   +----------------------------------------------------------------------+
   7   | This source file is subject to version 3.01 of the PHP license,      |
   8   | that is bundled with this package in the file LICENSE, and is        |
   9   | available through the world-wide-web at the following url:           |
  10   | http://www.php.net/license/3_01.txt                                  |
  11   | If you did not receive a copy of the PHP license and are unable to   |
  12   | obtain it through the world-wide-web, please send a note to          |
  13   | license@php.net so we can mail you a copy immediately.               |
  14   +----------------------------------------------------------------------+
  15   | Authors: Derick Rethans <derick@php.net>                             |
  16   +----------------------------------------------------------------------+
  17 */
  18 
  19 /* $Id$ */
  20 
  21 #include "php_filter.h"
  22 #include "filter_private.h"
  23 #include "ext/standard/php_smart_str.h"
  24 
  25 /* {{{ STRUCTS */
  26 typedef unsigned long filter_map[256];
  27 /* }}} */
  28 
  29 /* {{{ HELPER FUNCTIONS */
  30 static void php_filter_encode_html(zval *value, const unsigned char *chars)
  31 {
  32         smart_str str = {0};
  33         int len = Z_STRLEN_P(value);
  34         unsigned char *s = (unsigned char *)Z_STRVAL_P(value);
  35         unsigned char *e = s + len;
  36 
  37         if (Z_STRLEN_P(value) == 0) {
  38                 return;
  39         }
  40 
  41         while (s < e) {
  42                 if (chars[*s]) {
  43                         smart_str_appendl(&str, "&#", 2);
  44                         smart_str_append_unsigned(&str, (unsigned long)*s);
  45                         smart_str_appendc(&str, ';');
  46                 } else {
  47                         /* XXX: this needs to be optimized to work with blocks of 'safe' chars */
  48                         smart_str_appendc(&str, *s);
  49                 }
  50                 s++;
  51         }
  52 
  53         smart_str_0(&str);
  54         str_efree(Z_STRVAL_P(value));
  55         Z_STRVAL_P(value) = str.c;
  56         Z_STRLEN_P(value) = str.len;
  57 }
  58 
  59 static const unsigned char hexchars[] = "0123456789ABCDEF";
  60 
  61 #define LOWALPHA    "abcdefghijklmnopqrstuvwxyz"
  62 #define HIALPHA     "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
  63 #define DIGIT       "0123456789"
  64 
  65 #define DEFAULT_URL_ENCODE    LOWALPHA HIALPHA DIGIT "-._"
  66 
  67 static void php_filter_encode_url(zval *value, const unsigned char* chars, const int char_len, int high, int low, int encode_nul)
  68 {
  69         unsigned char *str, *p;
  70         unsigned char tmp[256];
  71         unsigned char *s = (unsigned char *)chars;
  72         unsigned char *e = s + char_len;
  73 
  74         memset(tmp, 1, sizeof(tmp)-1);
  75 
  76         while (s < e) {
  77                 tmp[*s++] = 0;
  78         }
  79 /* XXX: This is not needed since these chars in the allowed list never include the high/low/null value
  80         if (encode_nul) {
  81                 tmp[0] = 1;
  82         }
  83         if (high) {
  84                 memset(tmp + 127, 1, sizeof(tmp) - 127);
  85         }
  86         if (low) {
  87                 memset(tmp, 1, 32);
  88         }
  89 */
  90         p = str = (unsigned char *) safe_emalloc(3, Z_STRLEN_P(value), 1);
  91         s = (unsigned char *)Z_STRVAL_P(value);
  92         e = s + Z_STRLEN_P(value);
  93 
  94         while (s < e) {
  95                 if (tmp[*s]) {
  96                         *p++ = '%';
  97                         *p++ = hexchars[(unsigned char) *s >> 4];
  98                         *p++ = hexchars[(unsigned char) *s & 15];
  99                 } else {
 100                         *p++ = *s;      
 101                 }
 102                 s++;    
 103         }
 104         *p = '\0';
 105         str_efree(Z_STRVAL_P(value));
 106         Z_STRVAL_P(value) = (char *)str;
 107         Z_STRLEN_P(value) = p - str;
 108 }
 109 
 110 static void php_filter_strip(zval *value, long flags)
 111 {
 112         unsigned char *buf, *str;
 113         int   i, c;
 114         
 115         /* Optimization for if no strip flags are set */
 116         if (!(flags & (FILTER_FLAG_STRIP_LOW | FILTER_FLAG_STRIP_HIGH | FILTER_FLAG_STRIP_BACKTICK))) {
 117                 return;
 118         }
 119 
 120         str = (unsigned char *)Z_STRVAL_P(value);
 121         buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
 122         c = 0;
 123         for (i = 0; i < Z_STRLEN_P(value); i++) {
 124                 if ((str[i] >= 127) && (flags & FILTER_FLAG_STRIP_HIGH)) {
 125                 } else if ((str[i] < 32) && (flags & FILTER_FLAG_STRIP_LOW)) {
 126                 } else if ((str[i] == '`') && (flags & FILTER_FLAG_STRIP_BACKTICK)) {
 127                 } else {
 128                         buf[c] = str[i];
 129                         ++c;
 130                 }
 131         }
 132         /* update zval string data */
 133         buf[c] = '\0';
 134         str_efree(Z_STRVAL_P(value));
 135         Z_STRVAL_P(value) = (char *)buf;
 136         Z_STRLEN_P(value) = c;
 137 }
 138 /* }}} */
 139 
 140 /* {{{ FILTER MAP HELPERS */
 141 static void filter_map_init(filter_map *map)
 142 {
 143         memset(map, 0, sizeof(filter_map));
 144 }
 145 
 146 static void filter_map_update(filter_map *map, int flag, const unsigned char *allowed_list)
 147 {
 148         int l, i;
 149 
 150         l = strlen((const char*)allowed_list);
 151         for (i = 0; i < l; ++i) {
 152                 (*map)[allowed_list[i]] = flag;
 153         }
 154 }
 155 
 156 static void filter_map_apply(zval *value, filter_map *map)
 157 {
 158         unsigned char *buf, *str;
 159         int   i, c;
 160         
 161         str = (unsigned char *)Z_STRVAL_P(value);
 162         buf = safe_emalloc(1, Z_STRLEN_P(value) + 1, 1);
 163         c = 0;
 164         for (i = 0; i < Z_STRLEN_P(value); i++) {
 165                 if ((*map)[str[i]]) {
 166                         buf[c] = str[i];
 167                         ++c;
 168                 }
 169         }
 170         /* update zval string data */
 171         buf[c] = '\0';
 172         str_efree(Z_STRVAL_P(value));
 173         Z_STRVAL_P(value) = (char *)buf;
 174         Z_STRLEN_P(value) = c;
 175 }
 176 /* }}} */
 177 
 178 /* {{{ php_filter_string */
 179 void php_filter_string(PHP_INPUT_FILTER_PARAM_DECL)
 180 {
 181         size_t new_len;
 182         unsigned char enc[256] = {0};
 183 
 184         /* strip high/strip low ( see flags )*/
 185         php_filter_strip(value, flags);
 186 
 187         if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
 188                 enc['\''] = enc['"'] = 1;
 189         }
 190         if (flags & FILTER_FLAG_ENCODE_AMP) {
 191                 enc['&'] = 1;
 192         }
 193         if (flags & FILTER_FLAG_ENCODE_LOW) {
 194                 memset(enc, 1, 32);
 195         }
 196         if (flags & FILTER_FLAG_ENCODE_HIGH) {
 197                 memset(enc + 127, 1, sizeof(enc) - 127);
 198         }
 199 
 200         php_filter_encode_html(value, enc);
 201 
 202         /* strip tags, implicitly also removes \0 chars */
 203         new_len = php_strip_tags_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), NULL, NULL, 0, 1);
 204         Z_STRLEN_P(value) = new_len;
 205 
 206         if (new_len == 0) {
 207                 zval_dtor(value);
 208                 if (flags & FILTER_FLAG_EMPTY_STRING_NULL) {
 209                         ZVAL_NULL(value);
 210                 } else {
 211                         ZVAL_EMPTY_STRING(value);                       
 212                 }
 213                 return;
 214         }
 215 }
 216 /* }}} */
 217 
 218 /* {{{ php_filter_encoded */
 219 void php_filter_encoded(PHP_INPUT_FILTER_PARAM_DECL)
 220 {
 221         /* apply strip_high and strip_low filters */
 222         php_filter_strip(value, flags);
 223         /* urlencode */
 224         php_filter_encode_url(value, (unsigned char *)DEFAULT_URL_ENCODE, sizeof(DEFAULT_URL_ENCODE)-1, flags & FILTER_FLAG_ENCODE_HIGH, flags & FILTER_FLAG_ENCODE_LOW, 1);
 225 }
 226 /* }}} */
 227 
 228 /* {{{ php_filter_special_chars */
 229 void php_filter_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
 230 {
 231         unsigned char enc[256] = {0};
 232 
 233         php_filter_strip(value, flags);
 234 
 235         /* encodes ' " < > & \0 to numerical entities */
 236         enc['\''] = enc['"'] = enc['<'] = enc['>'] = enc['&'] = enc[0] = 1;
 237 
 238         /* if strip low is not set, then we encode them as &#xx; */
 239         memset(enc, 1, 32);
 240 
 241         if (flags & FILTER_FLAG_ENCODE_HIGH) {
 242                 memset(enc + 127, 1, sizeof(enc) - 127);
 243         }
 244         
 245         php_filter_encode_html(value, enc);     
 246 }
 247 /* }}} */
 248 
 249 /* {{{ php_filter_full_special_chars */
 250 void php_filter_full_special_chars(PHP_INPUT_FILTER_PARAM_DECL)
 251 {
 252         char *buf;
 253         size_t len;
 254         int quotes;
 255         
 256         if (!(flags & FILTER_FLAG_NO_ENCODE_QUOTES)) {
 257                 quotes = ENT_QUOTES;
 258         } else {
 259                 quotes = ENT_NOQUOTES;
 260         }
 261         buf = php_escape_html_entities_ex(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 1, quotes, SG(default_charset), 0 TSRMLS_CC);
 262         str_efree(Z_STRVAL_P(value));
 263         Z_STRVAL_P(value) = buf;
 264         Z_STRLEN_P(value) = len;
 265 }
 266 /* }}} */
 267 
 268 /* {{{ php_filter_unsafe_raw */
 269 void php_filter_unsafe_raw(PHP_INPUT_FILTER_PARAM_DECL)
 270 {
 271         /* Only if no flags are set (optimization) */
 272         if (flags != 0 && Z_STRLEN_P(value) > 0) {
 273                 unsigned char enc[256] = {0};
 274 
 275                 php_filter_strip(value, flags);
 276 
 277                 if (flags & FILTER_FLAG_ENCODE_AMP) {
 278                         enc['&'] = 1;
 279                 }
 280                 if (flags & FILTER_FLAG_ENCODE_LOW) {
 281                         memset(enc, 1, 32);
 282                 }
 283                 if (flags & FILTER_FLAG_ENCODE_HIGH) {
 284                         memset(enc + 127, 1, sizeof(enc) - 127);
 285                 }
 286 
 287                 php_filter_encode_html(value, enc);     
 288         } else if (flags & FILTER_FLAG_EMPTY_STRING_NULL && Z_STRLEN_P(value) == 0) {
 289                 zval_dtor(value);
 290                 ZVAL_NULL(value);
 291         }
 292 }
 293 /* }}} */
 294 
 295 
 296 
 297 /* {{{ php_filter_email */
 298 #define SAFE        "$-_.+"
 299 #define EXTRA       "!*'(),"
 300 #define NATIONAL    "{}|\\^~[]`"
 301 #define PUNCTUATION "<>#%\""
 302 #define RESERVED    ";/?:@&="
 303 
 304 void php_filter_email(PHP_INPUT_FILTER_PARAM_DECL)
 305 {
 306         /* Check section 6 of rfc 822 http://www.faqs.org/rfcs/rfc822.html */
 307         const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT "!#$%&'*+-=?^_`{|}~@.[]";
 308         filter_map     map;
 309 
 310         filter_map_init(&map);
 311         filter_map_update(&map, 1, allowed_list);
 312         filter_map_apply(value, &map);
 313 }
 314 /* }}} */
 315 
 316 /* {{{ php_filter_url */
 317 void php_filter_url(PHP_INPUT_FILTER_PARAM_DECL)
 318 {
 319         /* Strip all chars not part of section 5 of
 320          * http://www.faqs.org/rfcs/rfc1738.html */
 321         const unsigned char allowed_list[] = LOWALPHA HIALPHA DIGIT SAFE EXTRA NATIONAL PUNCTUATION RESERVED;
 322         filter_map     map;
 323 
 324         filter_map_init(&map);
 325         filter_map_update(&map, 1, allowed_list);
 326         filter_map_apply(value, &map);
 327 }
 328 /* }}} */
 329 
 330 /* {{{ php_filter_number_int */
 331 void php_filter_number_int(PHP_INPUT_FILTER_PARAM_DECL)
 332 {
 333         /* strip everything [^0-9+-] */
 334         const unsigned char allowed_list[] = "+-" DIGIT;
 335         filter_map     map;
 336 
 337         filter_map_init(&map);
 338         filter_map_update(&map, 1, allowed_list);
 339         filter_map_apply(value, &map);
 340 }
 341 /* }}} */
 342 
 343 /* {{{ php_filter_number_float */
 344 void php_filter_number_float(PHP_INPUT_FILTER_PARAM_DECL)
 345 {
 346         /* strip everything [^0-9+-] */
 347         const unsigned char allowed_list[] = "+-" DIGIT;
 348         filter_map     map;
 349 
 350         filter_map_init(&map);
 351         filter_map_update(&map, 1, allowed_list);
 352 
 353         /* depending on flags, strip '.', 'e', ",", "'" */
 354         if (flags & FILTER_FLAG_ALLOW_FRACTION) {
 355                 filter_map_update(&map, 2, (const unsigned char *) ".");
 356         }
 357         if (flags & FILTER_FLAG_ALLOW_THOUSAND) {
 358                 filter_map_update(&map, 3,  (const unsigned char *) ",");
 359         }
 360         if (flags & FILTER_FLAG_ALLOW_SCIENTIFIC) {
 361                 filter_map_update(&map, 4,  (const unsigned char *) "eE");
 362         }
 363         filter_map_apply(value, &map);
 364 }
 365 /* }}} */
 366 
 367 /* {{{ php_filter_magic_quotes */
 368 void php_filter_magic_quotes(PHP_INPUT_FILTER_PARAM_DECL)
 369 {
 370         char *buf;
 371         int   len;
 372         
 373         /* just call php_addslashes quotes */
 374         buf = php_addslashes(Z_STRVAL_P(value), Z_STRLEN_P(value), &len, 0 TSRMLS_CC);
 375 
 376         str_efree(Z_STRVAL_P(value));
 377         Z_STRVAL_P(value) = buf;
 378         Z_STRLEN_P(value) = len;
 379 }
 380 /* }}} */
 381 
 382 /*
 383  * Local variables:
 384  * tab-width: 4
 385  * c-basic-offset: 4
 386  * End:
 387  * vim600: noet sw=4 ts=4 fdm=marker
 388  * vim<600: noet sw=4 ts=4
 389  */

/* [<][>][^][v][top][bottom][index][help] */