moriyoshi Mon, 20 Dec 2010 03:16:09 +0000 Revision: http://svn.php.net/viewvc?view=revision&revision=306486
Log: - Avoid allocating extra buffers. This makes parsing with zend.multibyte enabled as fast as with it disabled. Changed paths: U php/php-src/trunk/Zend/zend_language_scanner.h U php/php-src/trunk/Zend/zend_language_scanner.l
Modified: php/php-src/trunk/Zend/zend_language_scanner.h =================================================================== --- php/php-src/trunk/Zend/zend_language_scanner.h 2010-12-20 03:11:41 UTC (rev 306485) +++ php/php-src/trunk/Zend/zend_language_scanner.h 2010-12-20 03:16:09 UTC (rev 306486) @@ -56,9 +56,7 @@ ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC); ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC); ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC); -ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC); ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC); -ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC); ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC); END_EXTERN_C() Modified: php/php-src/trunk/Zend/zend_language_scanner.l =================================================================== --- php/php-src/trunk/Zend/zend_language_scanner.l 2010-12-20 03:11:41 UTC (rev 306485) +++ php/php-src/trunk/Zend/zend_language_scanner.l 2010-12-20 03:16:09 UTC (rev 306486) @@ -207,10 +207,6 @@ CG(zend_lineno) = lex_state->lineno; zend_restore_compiled_filename(lex_state->filename TSRMLS_CC); - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; @@ -462,31 +458,23 @@ if (size != -1) { if (CG(multibyte)) { - if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != 0) { - return FAILURE; - } + SCNG(script_org) = buf; + SCNG(script_org_size) = n; + SCNG(script_filtered) = NULL; - SCNG(yy_in) = NULL; - zend_multibyte_set_filter(NULL TSRMLS_CC); - if (!SCNG(input_filter)) { - SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); - memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); - SCNG(script_filtered_size) = SCNG(script_org_size); - } else { - SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); - if (SCNG(script_filtered) == NULL) { + if (SCNG(input_filter)) { + if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); } + buf = SCNG(script_filtered); + size = SCNG(script_filtered_size); } - SCNG(yy_start) = SCNG(script_filtered) - offset; - yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); - } else { - SCNG(yy_start) = (unsigned char *)buf - offset; - yy_scan_buffer(buf, size TSRMLS_CC); } + SCNG(yy_start) = (unsigned char *)buf - offset; + yy_scan_buffer(buf, size TSRMLS_CC); } else { zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed"); } @@ -615,6 +603,9 @@ ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC) { + char *buf; + size_t size; + /* enforce two trailing NULLs for flex... */ if (IS_INTERNED(str->value.str.val)) { char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD); @@ -626,28 +617,31 @@ memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD); - SCNG(yy_in)=NULL; + SCNG(yy_in) = NULL; SCNG(yy_start) = NULL; + buf = str->value.str.val; + size = str->value.str.len; + if (CG(multibyte)) { - SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val); - SCNG(script_org_size) = str->value.str.len; + SCNG(script_org) = buf; + SCNG(script_org_size) = size; + SCNG(script_filtered) = NULL; zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC); - if (!SCNG(input_filter)) { - SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1); - memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1); - SCNG(script_filtered_size) = SCNG(script_org_size); - } else { - SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC); + if (SCNG(input_filter)) { + if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); + } + buf = SCNG(script_filtered); + size = SCNG(script_filtered_size); } - - yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC); - } else { - yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC); } + yy_scan_buffer(buf, size TSRMLS_CC); + zend_set_compiled_filename(filename TSRMLS_CC); CG(zend_lineno) = 1; CG(increment_lineno) = 0; @@ -659,11 +653,11 @@ { size_t offset = SCNG(yy_cursor) - SCNG(yy_start); if (SCNG(input_filter)) { - size_t original_offset = offset, length = 0; do { + size_t original_offset = offset, length = 0; + do { unsigned char *p = NULL; - SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC); - if (!p) { - break; + if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) { + return (size_t)-1; } efree(p); if (length > original_offset) { @@ -714,10 +708,6 @@ BEGIN(ST_IN_SCRIPTING); compiler_result = zendparse(TSRMLS_C); - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; @@ -759,10 +749,6 @@ return FAILURE; } zend_highlight(syntax_highlighter_ini TSRMLS_CC); - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; @@ -786,10 +772,6 @@ } BEGIN(INITIAL); zend_highlight(syntax_highlighter_ini TSRMLS_CC); - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } if (SCNG(script_filtered)) { efree(SCNG(script_filtered)); SCNG(script_filtered) = NULL; @@ -801,8 +783,8 @@ ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC) { - size_t original_offset, offset, free_flag, new_len, length; - unsigned char *p; + size_t original_offset, offset, length; + unsigned char *new_yy_start; /* calculate current position */ offset = original_offset = YYCURSOR - SCNG(yy_start); @@ -818,87 +800,31 @@ /* convert and set */ if (!SCNG(input_filter)) { + if (SCNG(script_filtered)) { + efree(SCNG(script_filtered)); + SCNG(script_filtered) = NULL; + } + SCNG(script_filtered_size) = 0; length = SCNG(script_org_size) - offset; - p = SCNG(script_org) + offset; - free_flag = 0; + new_yy_start = SCNG(script_org) + offset; } else { - SCNG(input_filter)(&p, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC); - free_flag = 1; - } - - new_len = original_offset + length; - - if (new_len > YYLIMIT - SCNG(yy_start)) { - unsigned char *new_yy_start = erealloc(SCNG(yy_start), new_len); - SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); - SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); - SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); - SCNG(yy_start) = new_yy_start; + if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC)) { + zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected " + "encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding))); + } SCNG(script_filtered) = new_yy_start; - SCNG(script_filtered_size) = new_len; + SCNG(script_filtered_size) = length; } - SCNG(yy_limit) = SCNG(yy_start) + new_len; - memmove(SCNG(yy_start) + original_offset, p, length); + SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start)); + SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start)); + SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start)); + SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start)); - if (free_flag) { - efree(p); - } + SCNG(yy_start) = new_yy_start; } -ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC) -{ - size_t n; - - if (CG(interactive) == 0) { - if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) { - return FAILURE; - } - n = len; - return n; - } - - /* interactive */ - if (SCNG(script_org)) { - efree(SCNG(script_org)); - } - if (SCNG(script_filtered)) { - efree(SCNG(script_filtered)); - } - SCNG(script_org) = NULL; - SCNG(script_org_size) = 0; - - /* TODO: support widechars */ - if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) { - return FAILURE; - } - n = len; - - SCNG(script_org_size) = n; - SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1); - memcpy(SCNG(script_org), buf, n); - - return n; -} - - -ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC) -{ - if (SCNG(script_org)) { - efree(SCNG(script_org)); - SCNG(script_org) = NULL; - } - SCNG(script_org_size) = n; - - SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1); - memcpy(SCNG(script_org), buf, n); - *(SCNG(script_org)+SCNG(script_org_size)) = '\0'; - - return 0; -} - - # define zend_copy_value(zendlval, yytext, yyleng) \ if (SCNG(output_filter)) { \ size_t sz = 0; \
-- PHP CVS Mailing List (http://www.php.net/) To unsubscribe, visit: http://www.php.net/unsub.php