moriyoshi                                Mon, 20 Dec 2010 03:16:09 +0000

Revision: http://svn.php.net/viewvc?view=revision&revision=306486

Log:
- Avoid allocating extra buffers. This makes parsing with zend.multibyte 
enabled as fast as with it disabled.

Changed paths:
    U   php/php-src/trunk/Zend/zend_language_scanner.h
    U   php/php-src/trunk/Zend/zend_language_scanner.l

Modified: php/php-src/trunk/Zend/zend_language_scanner.h
===================================================================
--- php/php-src/trunk/Zend/zend_language_scanner.h	2010-12-20 03:11:41 UTC (rev 306485)
+++ php/php-src/trunk/Zend/zend_language_scanner.h	2010-12-20 03:16:09 UTC (rev 306486)
@@ -56,9 +56,7 @@
 ZEND_API void zend_save_lexical_state(zend_lex_state *lex_state TSRMLS_DC);
 ZEND_API void zend_restore_lexical_state(zend_lex_state *lex_state TSRMLS_DC);
 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC);
-ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC);
 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC);
-ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC);
 ZEND_API int zend_multibyte_set_filter(const zend_encoding *onetime_encoding TSRMLS_DC);

 END_EXTERN_C()

Modified: php/php-src/trunk/Zend/zend_language_scanner.l
===================================================================
--- php/php-src/trunk/Zend/zend_language_scanner.l	2010-12-20 03:11:41 UTC (rev 306485)
+++ php/php-src/trunk/Zend/zend_language_scanner.l	2010-12-20 03:16:09 UTC (rev 306486)
@@ -207,10 +207,6 @@
 	CG(zend_lineno) = lex_state->lineno;
 	zend_restore_compiled_filename(lex_state->filename TSRMLS_CC);

-	if (SCNG(script_org)) {
-		efree(SCNG(script_org));
-		SCNG(script_org) = NULL;
-	}
 	if (SCNG(script_filtered)) {
 		efree(SCNG(script_filtered));
 		SCNG(script_filtered) = NULL;
@@ -462,31 +458,23 @@

 	if (size != -1) {
 		if (CG(multibyte)) {
-			if (zend_multibyte_read_script((unsigned char *)buf, size TSRMLS_CC) != 0) {
-				return FAILURE;
-			}
+			SCNG(script_org) = buf;
+			SCNG(script_org_size) = n;
+			SCNG(script_filtered) = NULL;

-			SCNG(yy_in) = NULL;
-
 			zend_multibyte_set_filter(NULL TSRMLS_CC);

-			if (!SCNG(input_filter)) {
-				SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1);
-				memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
-				SCNG(script_filtered_size) = SCNG(script_org_size);
-			} else {
-				SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
-				if (SCNG(script_filtered) == NULL) {
+			if (SCNG(input_filter)) {
+				if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
 					zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
 							"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
 				}
+				buf = SCNG(script_filtered);
+				size = SCNG(script_filtered_size);
 			}
-			SCNG(yy_start) = SCNG(script_filtered) - offset;
-			yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC);
-		} else {
-			SCNG(yy_start) = (unsigned char *)buf - offset;
-			yy_scan_buffer(buf, size TSRMLS_CC);
 		}
+		SCNG(yy_start) = (unsigned char *)buf - offset;
+		yy_scan_buffer(buf, size TSRMLS_CC);
 	} else {
 		zend_error_noreturn(E_COMPILE_ERROR, "zend_stream_mmap() failed");
 	}
@@ -615,6 +603,9 @@

 ZEND_API int zend_prepare_string_for_scanning(zval *str, char *filename TSRMLS_DC)
 {
+	char *buf;
+	size_t size;
+
 	/* enforce two trailing NULLs for flex... */
 	if (IS_INTERNED(str->value.str.val)) {
 		char *tmp = safe_emalloc(1, str->value.str.len, ZEND_MMAP_AHEAD);
@@ -626,28 +617,31 @@

 	memset(str->value.str.val + str->value.str.len, 0, ZEND_MMAP_AHEAD);

-	SCNG(yy_in)=NULL;
+	SCNG(yy_in) = NULL;
 	SCNG(yy_start) = NULL;

+	buf = str->value.str.val;
+	size = str->value.str.len;
+
 	if (CG(multibyte)) {
-		SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val);
-		SCNG(script_org_size) = str->value.str.len;
+		SCNG(script_org) = buf;
+		SCNG(script_org_size) = size;
+		SCNG(script_filtered) = NULL;

 		zend_multibyte_set_filter(zend_multibyte_get_internal_encoding(TSRMLS_C) TSRMLS_CC);

-		if (!SCNG(input_filter)) {
-			SCNG(script_filtered) = (unsigned char*)emalloc(SCNG(script_org_size)+1);
-			memcpy(SCNG(script_filtered), SCNG(script_org), SCNG(script_org_size)+1);
-			SCNG(script_filtered_size) = SCNG(script_org_size);
-		} else {
-			SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
+		if (SCNG(input_filter)) {
+			if ((size_t)-1 == SCNG(input_filter)(&SCNG(script_filtered), &SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC)) {
+				zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
+						"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
+			}
+			buf = SCNG(script_filtered);
+			size = SCNG(script_filtered_size);
 		}
-
-		yy_scan_buffer((char *)SCNG(script_filtered), SCNG(script_filtered_size) TSRMLS_CC);
-	} else {
-		yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC);
 	}

+	yy_scan_buffer(buf, size TSRMLS_CC);
+
 	zend_set_compiled_filename(filename TSRMLS_CC);
 	CG(zend_lineno) = 1;
 	CG(increment_lineno) = 0;
@@ -659,11 +653,11 @@
 {
 	size_t offset = SCNG(yy_cursor) - SCNG(yy_start);
 	if (SCNG(input_filter)) {
-		size_t original_offset = offset, length = 0; do {
+		size_t original_offset = offset, length = 0;
+		do {
 			unsigned char *p = NULL;
-			SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC);
-			if (!p) {
-				break;
+			if ((size_t)-1 == SCNG(input_filter)(&p, &length, SCNG(script_org), offset TSRMLS_CC)) {
+				return (size_t)-1;
 			}
 			efree(p);
 			if (length > original_offset) {
@@ -714,10 +708,6 @@
 		BEGIN(ST_IN_SCRIPTING);
 		compiler_result = zendparse(TSRMLS_C);

-		if (SCNG(script_org)) {
-			efree(SCNG(script_org));
-			SCNG(script_org) = NULL;
-		}
 		if (SCNG(script_filtered)) {
 			efree(SCNG(script_filtered));
 			SCNG(script_filtered) = NULL;
@@ -759,10 +749,6 @@
 		return FAILURE;
 	}
 	zend_highlight(syntax_highlighter_ini TSRMLS_CC);
-	if (SCNG(script_org)) {
-		efree(SCNG(script_org));
-		SCNG(script_org) = NULL;
-	}
 	if (SCNG(script_filtered)) {
 		efree(SCNG(script_filtered));
 		SCNG(script_filtered) = NULL;
@@ -786,10 +772,6 @@
 	}
 	BEGIN(INITIAL);
 	zend_highlight(syntax_highlighter_ini TSRMLS_CC);
-	if (SCNG(script_org)) {
-		efree(SCNG(script_org));
-		SCNG(script_org) = NULL;
-	}
 	if (SCNG(script_filtered)) {
 		efree(SCNG(script_filtered));
 		SCNG(script_filtered) = NULL;
@@ -801,8 +783,8 @@

 ZEND_API void zend_multibyte_yyinput_again(zend_encoding_filter old_input_filter, zend_encoding *old_encoding TSRMLS_DC)
 {
-	size_t original_offset, offset, free_flag, new_len, length;
-	unsigned char *p;
+	size_t original_offset, offset, length;
+	unsigned char *new_yy_start;

 	/* calculate current position */
 	offset = original_offset = YYCURSOR - SCNG(yy_start);
@@ -818,87 +800,31 @@

 	/* convert and set */
 	if (!SCNG(input_filter)) {
+		if (SCNG(script_filtered)) {
+			efree(SCNG(script_filtered));
+			SCNG(script_filtered) = NULL;
+		}
+		SCNG(script_filtered_size) = 0;
 		length = SCNG(script_org_size) - offset;
-		p = SCNG(script_org) + offset;
-		free_flag = 0;
+		new_yy_start = SCNG(script_org) + offset;
 	} else {
-		SCNG(input_filter)(&p, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC);
-		free_flag = 1;
-	}
-
-	new_len = original_offset + length;
-
-	if (new_len > YYLIMIT - SCNG(yy_start)) {
-		unsigned char *new_yy_start = erealloc(SCNG(yy_start), new_len);
-		SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
-		SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
-		SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
-		SCNG(yy_start) = new_yy_start;
+		if ((size_t)-1 == SCNG(input_filter)(&new_yy_start, &length, SCNG(script_org) + offset, SCNG(script_org_size) - offset TSRMLS_CC)) {
+			zend_error_noreturn(E_COMPILE_ERROR, "Could not convert the script from the detected "
+					"encoding \"%s\" to a compatible encoding", zend_multibyte_get_encoding_name(LANG_SCNG(script_encoding)));
+		}
 		SCNG(script_filtered) = new_yy_start;
-		SCNG(script_filtered_size) = new_len;
+		SCNG(script_filtered_size) = length;
 	}

-	SCNG(yy_limit) = SCNG(yy_start) + new_len;
-	memmove(SCNG(yy_start) + original_offset, p, length);
+	SCNG(yy_cursor) = new_yy_start + (SCNG(yy_cursor) - SCNG(yy_start));
+	SCNG(yy_marker) = new_yy_start + (SCNG(yy_marker) - SCNG(yy_start));
+	SCNG(yy_text) = new_yy_start + (SCNG(yy_text) - SCNG(yy_start));
+	SCNG(yy_limit) = new_yy_start + (SCNG(yy_limit) - SCNG(yy_start));

-	if (free_flag) {
-		efree(p);
-	}
+	SCNG(yy_start) = new_yy_start;
 }


-ZEND_API int zend_multibyte_yyinput(zend_file_handle *file_handle, char *buf, size_t len TSRMLS_DC)
-{
-	size_t n;
-
-	if (CG(interactive) == 0) {
-		if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) {
-			return FAILURE;
-		}
-		n = len;
-		return n;
-	}
-
-	/* interactive */
-	if (SCNG(script_org)) {
-		efree(SCNG(script_org));
-	}
-	if (SCNG(script_filtered)) {
-		efree(SCNG(script_filtered));
-	}
-	SCNG(script_org) = NULL;
-	SCNG(script_org_size) = 0;
-
-	/* TODO: support widechars */
-	if (zend_stream_fixup(file_handle, &buf, &len TSRMLS_CC) == FAILURE) {
-		return FAILURE;
-	}
-	n = len;
-
-	SCNG(script_org_size) = n;
-	SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1);
-	memcpy(SCNG(script_org), buf, n);
-
-	return n;
-}
-
-
-ZEND_API int zend_multibyte_read_script(unsigned char *buf, size_t n TSRMLS_DC)
-{
-	if (SCNG(script_org)) {
-		efree(SCNG(script_org));
-		SCNG(script_org) = NULL;
-	}
-	SCNG(script_org_size) = n;
-
-	SCNG(script_org) = (unsigned char*)emalloc(SCNG(script_org_size) + 1);
-	memcpy(SCNG(script_org), buf, n);
-	*(SCNG(script_org)+SCNG(script_org_size)) = '\0';
-
-	return 0;
-}
-
-
 # define zend_copy_value(zendlval, yytext, yyleng) \
 	if (SCNG(output_filter)) { \
 		size_t sz = 0; \
-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to