root/ext/tokenizer/tokenizer.c

/* [<][>][^][v][top][bottom][index][help] */

DEFINITIONS

This source file includes following definitions.
  1. ZEND_GET_MODULE
  2. PHP_MINFO_FUNCTION
  3. tokenize
  4. PHP_FUNCTION
  5. PHP_FUNCTION

   1 /*
   2    +----------------------------------------------------------------------+
   3    | PHP Version 5                                                        |
   4    +----------------------------------------------------------------------+
   5    | Copyright (c) 1997-2016 The PHP Group                                |
   6    +----------------------------------------------------------------------+
   7    | This source file is subject to version 3.01 of the PHP license,      |
   8    | that is bundled with this package in the file LICENSE, and is        |
   9    | available through the world-wide-web at the following url:           |
  10    | http://www.php.net/license/3_01.txt                                  |
  11    | If you did not receive a copy of the PHP license and are unable to   |
  12    | obtain it through the world-wide-web, please send a note to          |
  13    | license@php.net so we can mail you a copy immediately.               |
  14    +----------------------------------------------------------------------+
  15    | Author: Andrei Zmievski <andrei@php.net>                             |
  16    +----------------------------------------------------------------------+
  17 */
  18 
  19 /* $Id$ */
  20 
  21 #ifdef HAVE_CONFIG_H
  22 #include "config.h"
  23 #endif
  24 
  25 #include "php.h"
  26 #include "php_ini.h"
  27 #include "ext/standard/info.h"
  28 #include "php_tokenizer.h"
  29 
  30 #include "zend.h"
  31 #include "zend_language_scanner.h"
  32 #include "zend_language_scanner_defs.h"
  33 #include <zend_language_parser.h>
  34 
  35 #define zendtext   LANG_SCNG(yy_text)
  36 #define zendleng   LANG_SCNG(yy_leng)
  37 #define zendcursor LANG_SCNG(yy_cursor)
  38 #define zendlimit  LANG_SCNG(yy_limit)
  39 
  40 /* {{{ arginfo */
  41 ZEND_BEGIN_ARG_INFO_EX(arginfo_token_get_all, 0, 0, 1)
  42         ZEND_ARG_INFO(0, source)
  43 ZEND_END_ARG_INFO()
  44 
  45 ZEND_BEGIN_ARG_INFO_EX(arginfo_token_name, 0, 0, 1)
  46         ZEND_ARG_INFO(0, token)
  47 ZEND_END_ARG_INFO()
  48 /* }}} */
  49 
  50 /* {{{ tokenizer_functions[]
  51  *
  52  * Every user visible function must have an entry in tokenizer_functions[].
  53  */
  54 const zend_function_entry tokenizer_functions[] = {
  55         PHP_FE(token_get_all,   arginfo_token_get_all)
  56         PHP_FE(token_name,              arginfo_token_name)
  57         PHP_FE_END
  58 };
  59 /* }}} */
  60 
  61 /* {{{ tokenizer_module_entry
  62  */
  63 zend_module_entry tokenizer_module_entry = {
  64 #if ZEND_MODULE_API_NO >= 20010901
  65         STANDARD_MODULE_HEADER,
  66 #endif
  67         "tokenizer",
  68         tokenizer_functions,
  69         PHP_MINIT(tokenizer),
  70         NULL,
  71         NULL,
  72         NULL,
  73         PHP_MINFO(tokenizer),
  74 #if ZEND_MODULE_API_NO >= 20010901
  75         "0.1", /* Replace with version number for your extension */
  76 #endif
  77         STANDARD_MODULE_PROPERTIES
  78 };
  79 /* }}} */
  80 
  81 #ifdef COMPILE_DL_TOKENIZER
  82 ZEND_GET_MODULE(tokenizer)
  83 #endif
  84 
  85 /* {{{ PHP_MINIT_FUNCTION
  86  */
  87 PHP_MINIT_FUNCTION(tokenizer)
  88 {
  89         tokenizer_register_constants(INIT_FUNC_ARGS_PASSTHRU);
  90         return SUCCESS;
  91 }
  92 /* }}} */
  93 
  94 /* {{{ PHP_MINFO_FUNCTION
  95  */
  96 PHP_MINFO_FUNCTION(tokenizer)
  97 {
  98         php_info_print_table_start();
  99         php_info_print_table_row(2, "Tokenizer Support", "enabled");
 100         php_info_print_table_end();
 101 }
 102 /* }}} */
 103 
 104 static void tokenize(zval *return_value TSRMLS_DC)
 105 {
 106         zval token;
 107         zval *keyword;
 108         int token_type;
 109         zend_bool destroy;
 110         int token_line = 1;
 111         int need_tokens = -1; // for __halt_compiler lexing. -1 = disabled
 112 
 113         array_init(return_value);
 114 
 115         ZVAL_NULL(&token);
 116         while ((token_type = lex_scan(&token TSRMLS_CC))) {
 117                 destroy = 1;
 118                 switch (token_type) {
 119                         case T_CLOSE_TAG:
 120                                 if (zendtext[zendleng - 1] != '>') {
 121                                         CG(zend_lineno)++;
 122                                 }
 123                         case T_OPEN_TAG:
 124                         case T_OPEN_TAG_WITH_ECHO:
 125                         case T_WHITESPACE:
 126                         case T_COMMENT:
 127                         case T_DOC_COMMENT:
 128                                 destroy = 0;
 129                                 break;
 130                 }
 131 
 132                 if (token_type >= 256) {
 133                         MAKE_STD_ZVAL(keyword);
 134                         array_init(keyword);
 135                         add_next_index_long(keyword, token_type);
 136                         if (token_type == T_END_HEREDOC) {
 137                                 if (CG(increment_lineno)) {
 138                                         token_line = ++CG(zend_lineno);
 139                                         CG(increment_lineno) = 0;
 140                                 }
 141                         }
 142                         add_next_index_stringl(keyword, (char *)zendtext, zendleng, 1);
 143                         add_next_index_long(keyword, token_line);
 144                         add_next_index_zval(return_value, keyword);
 145                 } else {
 146                         add_next_index_stringl(return_value, (char *)zendtext, zendleng, 1);
 147                 }
 148                 if (destroy && Z_TYPE(token) != IS_NULL) {
 149                         zval_dtor(&token);
 150                 }
 151                 ZVAL_NULL(&token);
 152 
 153                 // after T_HALT_COMPILER collect the next three non-dropped tokens
 154                 if (need_tokens != -1) {
 155                         if (token_type != T_WHITESPACE && token_type != T_OPEN_TAG
 156                             && token_type != T_COMMENT && token_type != T_DOC_COMMENT
 157                             && --need_tokens == 0
 158                         ) {
 159                                 // fetch the rest into a T_INLINE_HTML
 160                                 if (zendcursor != zendlimit) {
 161                                         MAKE_STD_ZVAL(keyword);
 162                                         array_init(keyword);
 163                                         add_next_index_long(keyword, T_INLINE_HTML);
 164                                         add_next_index_stringl(keyword, (char *)zendcursor, zendlimit - zendcursor, 1);
 165                                         add_next_index_long(keyword, token_line);
 166                                         add_next_index_zval(return_value, keyword);
 167                                 }
 168                                 break;
 169                         }
 170                 } else if (token_type == T_HALT_COMPILER) {
 171                         need_tokens = 3;
 172                 }
 173 
 174                 token_line = CG(zend_lineno);
 175         }
 176 }
 177 
 178 /* {{{ proto array token_get_all(string source)
 179  */
 180 PHP_FUNCTION(token_get_all)
 181 {
 182         char *source = NULL;
 183         int argc = ZEND_NUM_ARGS();
 184         int source_len;
 185         zval source_z;
 186         zend_lex_state original_lex_state;
 187 
 188         if (zend_parse_parameters(argc TSRMLS_CC, "s", &source, &source_len) == FAILURE) {
 189                 return;
 190         }
 191 
 192         ZVAL_STRINGL(&source_z, source, source_len, 1);
 193         zend_save_lexical_state(&original_lex_state TSRMLS_CC);
 194 
 195         if (zend_prepare_string_for_scanning(&source_z, "" TSRMLS_CC) == FAILURE) {
 196                 zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
 197                 RETURN_FALSE;
 198         }
 199 
 200         LANG_SCNG(yy_state) = yycINITIAL;
 201 
 202         tokenize(return_value TSRMLS_CC);
 203         
 204         zend_restore_lexical_state(&original_lex_state TSRMLS_CC);
 205         zval_dtor(&source_z);
 206 }
 207 /* }}} */
 208 
 209 /* {{{ proto string token_name(int type)
 210  */
 211 PHP_FUNCTION(token_name)
 212 {
 213         int argc = ZEND_NUM_ARGS();
 214         long type;
 215 
 216         if (zend_parse_parameters(argc TSRMLS_CC, "l", &type) == FAILURE) {
 217                 return;
 218         }
 219         RETVAL_STRING(get_token_type_name(type), 1);
 220 }
 221 /* }}} */
 222 
 223 /*
 224  * Local variables:
 225  * tab-width: 4
 226  * c-basic-offset: 4
 227  * End:
 228  * vim600: noet sw=4 ts=4 fdm=marker
 229  * vim<600: noet sw=4 ts=4
 230  */

/* [<][>][^][v][top][bottom][index][help] */