Hi,
The proposed patch allows compiling PHP with --enable-zend-multibyte and
then enable or disable multibyte support at run-time using
zend.multibyte=0/1 in php.ini. As result the single binary will be able
to support multibyte encodings and run without zend-multibyte overhead
dependent on configuration.
The patch doesn't affect PHP compiled without --enable-zend-multibyte.
I'm going to commit it into trunk before alpha.
Any objections?
Thanks. Dmitry.
Index: ext/standard/info.c
===================================================================
--- ext/standard/info.c (revision 305494)
+++ ext/standard/info.c (working copy)
@@ -760,7 +760,7 @@
php_info_print_table_row(2, "Zend Memory Manager",
is_zend_mm(TSRMLS_C) ? "enabled" : "disabled" );
#ifdef ZEND_MULTIBYTE
- php_info_print_table_row(2, "Zend Multibyte Support",
"enabled");
+ php_info_print_table_row(2, "Zend Multibyte Support",
CG(multibyte) ? "enabled" : "disabled");
#else
php_info_print_table_row(2, "Zend Multibyte Support",
"disabled");
#endif
Index: ext/mbstring/mbstring.c
===================================================================
--- ext/mbstring/mbstring.c (revision 305494)
+++ ext/mbstring/mbstring.c (working copy)
@@ -1132,6 +1132,9 @@
{
int *list, size;
+ if (!CG(multibyte)) {
+ return FAILURE;
+ }
if (php_mb_parse_encoding_list(new_value, new_value_length, &list,
&size, 1 TSRMLS_CC)) {
if (MBSTRG(script_encoding_list) != NULL) {
free(MBSTRG(script_encoding_list));
@@ -1442,8 +1445,10 @@
PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
#endif
#ifdef ZEND_MULTIBYTE
-
zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding))
TSRMLS_CC);
- php_mb_set_zend_encoding(TSRMLS_C);
+ if (CG(multibyte)) {
+
zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding))
TSRMLS_CC);
+ php_mb_set_zend_encoding(TSRMLS_C);
+ }
#endif /* ZEND_MULTIBYTE */
return SUCCESS;
@@ -1570,7 +1575,7 @@
MBSTRG(current_internal_encoding) = no_encoding;
#ifdef ZEND_MULTIBYTE
/* TODO: make independent from
mbstring.encoding_translation? */
- if (MBSTRG(encoding_translation)) {
+ if (CG(multibyte) && MBSTRG(encoding_translation)) {
zend_multibyte_set_internal_encoding(name
TSRMLS_CC);
}
#endif /* ZEND_MULTIBYTE */
Index: Zend/zend.c
===================================================================
--- Zend/zend.c (revision 305494)
+++ Zend/zend.c (working copy)
@@ -93,6 +93,7 @@
ZEND_INI_ENTRY("error_reporting", NULL,
ZEND_INI_ALL, OnUpdateErrorReporting)
STD_ZEND_INI_BOOLEAN("zend.enable_gc", "1",
ZEND_INI_ALL, OnUpdateGCEnabled, gc_enabled,
zend_gc_globals, gc_globals)
#ifdef ZEND_MULTIBYTE
+ STD_ZEND_INI_BOOLEAN("zend.multibyte", "0", ZEND_INI_PERDIR,
OnUpdateBool, multibyte, zend_compiler_globals, compiler_globals)
STD_ZEND_INI_BOOLEAN("detect_unicode", "1", ZEND_INI_ALL, OnUpdateBool,
detect_unicode, zend_compiler_globals, compiler_globals)
#endif
ZEND_INI_END()
Index: Zend/zend_language_scanner.l
===================================================================
--- Zend/zend_language_scanner.l (revision 305494)
+++ Zend/zend_language_scanner.l (working copy)
@@ -181,7 +181,7 @@
lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
lex_state->lineno = CG(zend_lineno);
-#ifdef ZEND_MULTIBYTE
+#ifdef ZEND_MULTIBYTE
lex_state->script_org = SCNG(script_org);
lex_state->script_org_size = SCNG(script_org_size);
lex_state->script_filtered = SCNG(script_filtered);
@@ -270,27 +270,32 @@
if (size != -1) {
#ifdef ZEND_MULTIBYTE
- if (zend_multibyte_read_script((unsigned char *)buf, size
TSRMLS_CC) != 0) {
- return FAILURE;
- }
+ if (CG(multibyte)) {
+ if (zend_multibyte_read_script((unsigned char *)buf,
size TSRMLS_CC) != 0) {
+ return FAILURE;
+ }
- SCNG(yy_in) = NULL;
+ SCNG(yy_in) = NULL;
- zend_multibyte_set_filter(NULL TSRMLS_CC);
+ zend_multibyte_set_filter(NULL TSRMLS_CC);
- if (!SCNG(input_filter)) {
- SCNG(script_filtered) = (unsigned
char*)emalloc(SCNG(script_org_size)+1);
- memcpy(SCNG(script_filtered), SCNG(script_org),
SCNG(script_org_size)+1);
- SCNG(script_filtered_size) = SCNG(script_org_size);
+ if (!SCNG(input_filter)) {
+ SCNG(script_filtered) = (unsigned
char*)emalloc(SCNG(script_org_size)+1);
+ memcpy(SCNG(script_filtered), SCNG(script_org),
SCNG(script_org_size)+1);
+ SCNG(script_filtered_size) =
SCNG(script_org_size);
+ } else {
+ SCNG(input_filter)(&SCNG(script_filtered),
&SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
+ if (SCNG(script_filtered) == NULL) {
+ zend_error_noreturn(E_COMPILE_ERROR,
"Could not convert the script from the detected "
+ "encoding \"%s\" to a
compatible encoding", LANG_SCNG(script_encoding)->name);
+ }
+ }
+ SCNG(yy_start) = SCNG(script_filtered) - offset;
+ yy_scan_buffer((char *)SCNG(script_filtered),
SCNG(script_filtered_size) TSRMLS_CC);
} else {
- SCNG(input_filter)(&SCNG(script_filtered),
&SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
- if (SCNG(script_filtered) == NULL) {
- zend_error_noreturn(E_COMPILE_ERROR, "Could not
convert the script from the detected "
- "encoding \"%s\" to a
compatible encoding", LANG_SCNG(script_encoding)->name);
- }
+ SCNG(yy_start) = buf - offset;
+ yy_scan_buffer(buf, size TSRMLS_CC);
}
- SCNG(yy_start) = SCNG(script_filtered) - offset;
- yy_scan_buffer((char *)SCNG(script_filtered),
SCNG(script_filtered_size) TSRMLS_CC);
#else /* !ZEND_MULTIBYTE */
SCNG(yy_start) = buf - offset;
yy_scan_buffer(buf, size TSRMLS_CC);
@@ -438,20 +443,24 @@
SCNG(yy_start) = NULL;
#ifdef ZEND_MULTIBYTE
- SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val);
- SCNG(script_org_size) = str->value.str.len;
+ if (CG(multibyte)) {
+ SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val);
+ SCNG(script_org_size) = str->value.str.len;
- zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC);
+ zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC);
- if (!SCNG(input_filter)) {
- SCNG(script_filtered) = (unsigned
char*)emalloc(SCNG(script_org_size)+1);
- memcpy(SCNG(script_filtered), SCNG(script_org),
SCNG(script_org_size)+1);
- SCNG(script_filtered_size) = SCNG(script_org_size);
+ if (!SCNG(input_filter)) {
+ SCNG(script_filtered) = (unsigned
char*)emalloc(SCNG(script_org_size)+1);
+ memcpy(SCNG(script_filtered), SCNG(script_org),
SCNG(script_org_size)+1);
+ SCNG(script_filtered_size) = SCNG(script_org_size);
+ } else {
+ SCNG(input_filter)(&SCNG(script_filtered),
&SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
+ }
+
+ yy_scan_buffer((char *)SCNG(script_filtered),
SCNG(script_filtered_size) TSRMLS_CC);
} else {
- SCNG(input_filter)(&SCNG(script_filtered),
&SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
+ yy_scan_buffer(str->value.str.val, str->value.str.len
TSRMLS_CC);
}
-
- yy_scan_buffer((char *)SCNG(script_filtered),
SCNG(script_filtered_size) TSRMLS_CC);
#else /* !ZEND_MULTIBYTE */
yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC);
#endif /* ZEND_MULTIBYTE */
Index: Zend/zend_compile.c
===================================================================
--- Zend/zend_compile.c (revision 305494)
+++ Zend/zend_compile.c (working copy)
@@ -149,14 +149,12 @@
/* NULL, name length, filename length, last accepting char position
length */
result->value.str.len = 1+name_length+strlen(filename)+char_pos_len;
-#ifdef ZEND_MULTIBYTE
+
/* must be binary safe */
result->value.str.val = (char *) safe_emalloc(result->value.str.len, 1,
1);
result->value.str.val[0] = '\0';
sprintf(result->value.str.val+1, "%s%s%s", name, filename,
char_pos_buf);
-#else
- zend_spprintf(&result->value.str.val, 0, "%c%s%s%s", '\0', name,
filename, char_pos_buf);
-#endif /* ZEND_MULTIBYTE */
+
result->type = IS_STRING;
Z_SET_REFCOUNT_P(result, 1);
}
@@ -5861,51 +5859,53 @@
CG(declarables).ticks = val->u.constant;
#ifdef ZEND_MULTIBYTE
} else if (!zend_binary_strcasecmp(var->u.constant.value.str.val,
var->u.constant.value.str.len, "encoding", sizeof("encoding")-1)) {
- zend_encoding *new_encoding, *old_encoding;
- zend_encoding_filter old_input_filter;
+ if (CG(multibyte)) {
+ zend_encoding *new_encoding, *old_encoding;
+ zend_encoding_filter old_input_filter;
- if ((Z_TYPE(val->u.constant) & IS_CONSTANT_TYPE_MASK) ==
IS_CONSTANT) {
- zend_error(E_COMPILE_ERROR, "Cannot use constants as
encoding");
- }
-
- /*
- * Check that the pragma comes before any opcodes. If the
compilation
- * got as far as this, the previous portion of the script must
have been
- * parseable according to the .ini script_encoding setting. We
still
- * want to tell them to put declare() at the top.
- */
- {
- int num = CG(active_op_array)->last;
- /* ignore ZEND_EXT_STMT and ZEND_TICKS */
- while (num > 0 &&
- (CG(active_op_array)->opcodes[num-1].opcode ==
ZEND_EXT_STMT ||
- CG(active_op_array)->opcodes[num-1].opcode ==
ZEND_TICKS)) {
- --num;
+ if ((Z_TYPE(val->u.constant) & IS_CONSTANT_TYPE_MASK)
== IS_CONSTANT) {
+ zend_error(E_COMPILE_ERROR, "Cannot use
constants as encoding");
}
- if (num > 0) {
- zend_error(E_COMPILE_ERROR, "Encoding
declaration pragma must be the very first statement in the script");
+ /*
+ * Check that the pragma comes before any opcodes. If
the compilation
+ * got as far as this, the previous portion of the
script must have been
+ * parseable according to the .ini script_encoding
setting. We still
+ * want to tell them to put declare() at the top.
+ */
+ {
+ int num = CG(active_op_array)->last;
+ /* ignore ZEND_EXT_STMT and ZEND_TICKS */
+ while (num > 0 &&
+
(CG(active_op_array)->opcodes[num-1].opcode == ZEND_EXT_STMT ||
+
CG(active_op_array)->opcodes[num-1].opcode == ZEND_TICKS)) {
+ --num;
+ }
+
+ if (num > 0) {
+ zend_error(E_COMPILE_ERROR, "Encoding
declaration pragma must be the very first statement in the script");
+ }
}
- }
- CG(encoding_declared) = 1;
+ CG(encoding_declared) = 1;
- convert_to_string(&val->u.constant);
- new_encoding =
zend_multibyte_fetch_encoding(val->u.constant.value.str.val);
- if (!new_encoding) {
- zend_error(E_COMPILE_WARNING, "Unsupported encoding
[%s]", val->u.constant.value.str.val);
- } else {
- old_input_filter = LANG_SCNG(input_filter);
- old_encoding = LANG_SCNG(script_encoding);
- zend_multibyte_set_filter(new_encoding TSRMLS_CC);
+ convert_to_string(&val->u.constant);
+ new_encoding =
zend_multibyte_fetch_encoding(val->u.constant.value.str.val);
+ if (!new_encoding) {
+ zend_error(E_COMPILE_WARNING, "Unsupported
encoding [%s]", val->u.constant.value.str.val);
+ } else {
+ old_input_filter = LANG_SCNG(input_filter);
+ old_encoding = LANG_SCNG(script_encoding);
+ zend_multibyte_set_filter(new_encoding
TSRMLS_CC);
- /* need to re-scan if input filter changed */
- if (old_input_filter != LANG_SCNG(input_filter) ||
- ((old_input_filter ==
zend_multibyte_script_encoding_filter) &&
- (new_encoding != old_encoding))) {
- zend_multibyte_yyinput_again(old_input_filter,
old_encoding TSRMLS_CC);
+ /* need to re-scan if input filter changed */
+ if (old_input_filter != LANG_SCNG(input_filter)
||
+ ((old_input_filter ==
zend_multibyte_script_encoding_filter) &&
+ (new_encoding != old_encoding))) {
+
zend_multibyte_yyinput_again(old_input_filter, old_encoding TSRMLS_CC);
+ }
}
}
- efree(val->u.constant.value.str.val);
+ zval_dtor(&val->u.constant);
#else /* !ZEND_MULTIBYTE */
} else if (!zend_binary_strcasecmp(var->u.constant.value.str.val,
var->u.constant.value.str.len, "encoding", sizeof("encoding")-1)) {
/* Do not generate any kind of warning for encoding declares */
Index: Zend/tests/multibyte/multibyte_encoding_002.phpt
===================================================================
--- Zend/tests/multibyte/multibyte_encoding_002.phpt (revision 305496)
+++ Zend/tests/multibyte/multibyte_encoding_002.phpt (working copy)
@@ -10,6 +10,7 @@
}
?>
--INI--
+zend.multibyte=1
mbstring.internal_encoding=iso-8859-1
--FILE--
ï>¿<?php
Index: Zend/tests/multibyte/multibyte_encoding_003.phpt
===================================================================
--- Zend/tests/multibyte/multibyte_encoding_003.phpt (revision 305496)
+++ Zend/tests/multibyte/multibyte_encoding_003.phpt (working copy)
@@ -10,6 +10,7 @@
}
?>
--INI--
+zend.multibyte=1
mbstring.internal_encoding=iso-8859-1
--FILE--
ÿþ<
Index: Zend/tests/multibyte/multibyte_encoding_004.phpt
===================================================================
--- Zend/tests/multibyte/multibyte_encoding_004.phpt (revision 305496)
+++ Zend/tests/multibyte/multibyte_encoding_004.phpt (working copy)
@@ -10,6 +10,7 @@
}
?>
--INI--
+zend.multibyte=1
mbstring.script_encoding=Shift_JIS
mbstring.internal_encoding=Shift_JIS
--FILE--
Index: Zend/tests/multibyte/multibyte_encoding_005.phpt
===================================================================
--- Zend/tests/multibyte/multibyte_encoding_005.phpt (revision 305496)
+++ Zend/tests/multibyte/multibyte_encoding_005.phpt (working copy)
@@ -10,6 +10,7 @@
}
?>
--INI--
+zend.multibyte=1
mbstring.encoding_translation = On
mbstring.script_encoding=Shift_JIS
mbstring.internal_encoding=UTF-8
Index: Zend/tests/multibyte/multibyte_encoding_001.phpt
===================================================================
--- Zend/tests/multibyte/multibyte_encoding_001.phpt (revision 305496)
+++ Zend/tests/multibyte/multibyte_encoding_001.phpt (working copy)
@@ -10,6 +10,7 @@
}
?>
--INI--
+zend.multibyte=1
mbstring.internal_encoding=SJIS
--FILE--
<?php
Index: Zend/zend_globals.h
===================================================================
--- Zend/zend_globals.h (revision 305494)
+++ Zend/zend_globals.h (working copy)
@@ -153,6 +153,7 @@
#ifdef ZEND_MULTIBYTE
zend_encoding **script_encoding_list;
size_t script_encoding_list_size;
+ zend_bool multibyte;
zend_bool detect_unicode;
zend_bool encoding_declared;
--
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php