Hi,

The proposed patch allows compiling PHP with --enable-zend-multibyte and then enable or disable multibyte support at run-time using zend.multibyte=0/1 in php.ini. As result the single binary will be able to support multibyte encodings and run without zend-multibyte overhead dependent on configuration.

The patch doesn't affect PHP compiled without --enable-zend-multibyte.

I'm going to commit it into trunk before alpha.
Any objections?

Thanks. Dmitry.
Index: ext/standard/info.c
===================================================================
--- ext/standard/info.c (revision 305494)
+++ ext/standard/info.c (working copy)
@@ -760,7 +760,7 @@
                php_info_print_table_row(2, "Zend Memory Manager", 
is_zend_mm(TSRMLS_C) ? "enabled" : "disabled" );
 
 #ifdef ZEND_MULTIBYTE
-               php_info_print_table_row(2, "Zend Multibyte Support", 
"enabled");
+               php_info_print_table_row(2, "Zend Multibyte Support", 
CG(multibyte) ? "enabled" : "disabled");
 #else
                php_info_print_table_row(2, "Zend Multibyte Support", 
"disabled");
 #endif
Index: ext/mbstring/mbstring.c
===================================================================
--- ext/mbstring/mbstring.c     (revision 305494)
+++ ext/mbstring/mbstring.c     (working copy)
@@ -1132,6 +1132,9 @@
 {
        int *list, size;
 
+       if (!CG(multibyte)) {
+               return FAILURE;
+       }
        if (php_mb_parse_encoding_list(new_value, new_value_length, &list, 
&size, 1 TSRMLS_CC)) {
                if (MBSTRG(script_encoding_list) != NULL) {
                        free(MBSTRG(script_encoding_list));
@@ -1442,8 +1445,10 @@
        PHP_RINIT(mb_regex) (INIT_FUNC_ARGS_PASSTHRU);
 #endif
 #ifdef ZEND_MULTIBYTE
-       
zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding))
 TSRMLS_CC);
-       php_mb_set_zend_encoding(TSRMLS_C);
+       if (CG(multibyte)) {
+               
zend_multibyte_set_internal_encoding(mbfl_no_encoding2name(MBSTRG(internal_encoding))
 TSRMLS_CC);
+               php_mb_set_zend_encoding(TSRMLS_C);
+       }
 #endif /* ZEND_MULTIBYTE */
 
        return SUCCESS;
@@ -1570,7 +1575,7 @@
                        MBSTRG(current_internal_encoding) = no_encoding;
 #ifdef ZEND_MULTIBYTE
                        /* TODO: make independent from 
mbstring.encoding_translation? */
-                       if (MBSTRG(encoding_translation)) {
+                       if (CG(multibyte) && MBSTRG(encoding_translation)) {
                                zend_multibyte_set_internal_encoding(name 
TSRMLS_CC);
                        }
 #endif /* ZEND_MULTIBYTE */
Index: Zend/zend.c
===================================================================
--- Zend/zend.c (revision 305494)
+++ Zend/zend.c (working copy)
@@ -93,6 +93,7 @@
        ZEND_INI_ENTRY("error_reporting",                               NULL,   
        ZEND_INI_ALL,           OnUpdateErrorReporting)
        STD_ZEND_INI_BOOLEAN("zend.enable_gc",                          "1",    
ZEND_INI_ALL,           OnUpdateGCEnabled,      gc_enabled,     
zend_gc_globals,        gc_globals)
 #ifdef ZEND_MULTIBYTE
+       STD_ZEND_INI_BOOLEAN("zend.multibyte", "0", ZEND_INI_PERDIR, 
OnUpdateBool, multibyte,      zend_compiler_globals, compiler_globals)
        STD_ZEND_INI_BOOLEAN("detect_unicode", "1", ZEND_INI_ALL, OnUpdateBool, 
detect_unicode, zend_compiler_globals, compiler_globals)
 #endif
 ZEND_INI_END()
Index: Zend/zend_language_scanner.l
===================================================================
--- Zend/zend_language_scanner.l        (revision 305494)
+++ Zend/zend_language_scanner.l        (working copy)
@@ -181,7 +181,7 @@
        lex_state->filename = zend_get_compiled_filename(TSRMLS_C);
        lex_state->lineno = CG(zend_lineno);
 
-#ifdef ZEND_MULTIBYTE
+#ifdef ZEND_MULTIBYTE  
        lex_state->script_org = SCNG(script_org);
        lex_state->script_org_size = SCNG(script_org_size);
        lex_state->script_filtered = SCNG(script_filtered);
@@ -270,27 +270,32 @@
 
        if (size != -1) {
 #ifdef ZEND_MULTIBYTE
-               if (zend_multibyte_read_script((unsigned char *)buf, size 
TSRMLS_CC) != 0) {
-                       return FAILURE;
-               }
+               if (CG(multibyte)) {
+                       if (zend_multibyte_read_script((unsigned char *)buf, 
size TSRMLS_CC) != 0) {
+                               return FAILURE;
+                       }
 
-               SCNG(yy_in) = NULL;
+                       SCNG(yy_in) = NULL;
 
-               zend_multibyte_set_filter(NULL TSRMLS_CC);
+                       zend_multibyte_set_filter(NULL TSRMLS_CC);
 
-               if (!SCNG(input_filter)) {
-                       SCNG(script_filtered) = (unsigned 
char*)emalloc(SCNG(script_org_size)+1);
-                       memcpy(SCNG(script_filtered), SCNG(script_org), 
SCNG(script_org_size)+1);
-                       SCNG(script_filtered_size) = SCNG(script_org_size);
+                       if (!SCNG(input_filter)) {
+                               SCNG(script_filtered) = (unsigned 
char*)emalloc(SCNG(script_org_size)+1);
+                               memcpy(SCNG(script_filtered), SCNG(script_org), 
SCNG(script_org_size)+1);
+                               SCNG(script_filtered_size) = 
SCNG(script_org_size);
+                       } else {
+                               SCNG(input_filter)(&SCNG(script_filtered), 
&SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
+                               if (SCNG(script_filtered) == NULL) {
+                                       zend_error_noreturn(E_COMPILE_ERROR, 
"Could not convert the script from the detected "
+                                                       "encoding \"%s\" to a 
compatible encoding", LANG_SCNG(script_encoding)->name);
+                               }
+                       }
+                       SCNG(yy_start) = SCNG(script_filtered) - offset;
+                       yy_scan_buffer((char *)SCNG(script_filtered), 
SCNG(script_filtered_size) TSRMLS_CC);
                } else {
-                       SCNG(input_filter)(&SCNG(script_filtered), 
&SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
-                       if (SCNG(script_filtered) == NULL) {
-                               zend_error_noreturn(E_COMPILE_ERROR, "Could not 
convert the script from the detected "
-                                               "encoding \"%s\" to a 
compatible encoding", LANG_SCNG(script_encoding)->name);
-                       }
+                       SCNG(yy_start) = buf - offset;
+                       yy_scan_buffer(buf, size TSRMLS_CC);
                }
-               SCNG(yy_start) = SCNG(script_filtered) - offset;
-               yy_scan_buffer((char *)SCNG(script_filtered), 
SCNG(script_filtered_size) TSRMLS_CC);
 #else /* !ZEND_MULTIBYTE */
                SCNG(yy_start) = buf - offset;
                yy_scan_buffer(buf, size TSRMLS_CC);
@@ -438,20 +443,24 @@
        SCNG(yy_start) = NULL;
 
 #ifdef ZEND_MULTIBYTE
-       SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val);
-       SCNG(script_org_size) = str->value.str.len;
+       if (CG(multibyte)) {
+               SCNG(script_org) = (unsigned char *)estrdup(str->value.str.val);
+               SCNG(script_org_size) = str->value.str.len;
 
-       zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC);
+               zend_multibyte_set_filter(CG(internal_encoding) TSRMLS_CC);
 
-       if (!SCNG(input_filter)) {
-               SCNG(script_filtered) = (unsigned 
char*)emalloc(SCNG(script_org_size)+1);
-               memcpy(SCNG(script_filtered), SCNG(script_org), 
SCNG(script_org_size)+1);
-               SCNG(script_filtered_size) = SCNG(script_org_size);
+               if (!SCNG(input_filter)) {
+                       SCNG(script_filtered) = (unsigned 
char*)emalloc(SCNG(script_org_size)+1);
+                       memcpy(SCNG(script_filtered), SCNG(script_org), 
SCNG(script_org_size)+1);
+                       SCNG(script_filtered_size) = SCNG(script_org_size);
+               } else {
+                       SCNG(input_filter)(&SCNG(script_filtered), 
&SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
+               }
+
+               yy_scan_buffer((char *)SCNG(script_filtered), 
SCNG(script_filtered_size) TSRMLS_CC);    
        } else {
-               SCNG(input_filter)(&SCNG(script_filtered), 
&SCNG(script_filtered_size), SCNG(script_org), SCNG(script_org_size) TSRMLS_CC);
+               yy_scan_buffer(str->value.str.val, str->value.str.len 
TSRMLS_CC);
        }
-
-       yy_scan_buffer((char *)SCNG(script_filtered), 
SCNG(script_filtered_size) TSRMLS_CC);    
 #else /* !ZEND_MULTIBYTE */
        yy_scan_buffer(str->value.str.val, str->value.str.len TSRMLS_CC);
 #endif /* ZEND_MULTIBYTE */
Index: Zend/zend_compile.c
===================================================================
--- Zend/zend_compile.c (revision 305494)
+++ Zend/zend_compile.c (working copy)
@@ -149,14 +149,12 @@
 
        /* NULL, name length, filename length, last accepting char position 
length */
        result->value.str.len = 1+name_length+strlen(filename)+char_pos_len;
-#ifdef ZEND_MULTIBYTE
+
        /* must be binary safe */
        result->value.str.val = (char *) safe_emalloc(result->value.str.len, 1, 
1);
        result->value.str.val[0] = '\0';
        sprintf(result->value.str.val+1, "%s%s%s", name, filename, 
char_pos_buf);
-#else
-       zend_spprintf(&result->value.str.val, 0, "%c%s%s%s", '\0', name, 
filename, char_pos_buf);
-#endif /* ZEND_MULTIBYTE */
+
        result->type = IS_STRING;
        Z_SET_REFCOUNT_P(result, 1);
 }
@@ -5861,51 +5859,53 @@
                CG(declarables).ticks = val->u.constant;
 #ifdef ZEND_MULTIBYTE
        } else if (!zend_binary_strcasecmp(var->u.constant.value.str.val, 
var->u.constant.value.str.len, "encoding", sizeof("encoding")-1)) {
-               zend_encoding *new_encoding, *old_encoding;
-               zend_encoding_filter old_input_filter;
+               if (CG(multibyte)) {
+                       zend_encoding *new_encoding, *old_encoding;
+                       zend_encoding_filter old_input_filter;
 
-               if ((Z_TYPE(val->u.constant) & IS_CONSTANT_TYPE_MASK) == 
IS_CONSTANT) {
-                       zend_error(E_COMPILE_ERROR, "Cannot use constants as 
encoding");
-               }
-
-               /*
-                * Check that the pragma comes before any opcodes. If the 
compilation
-                * got as far as this, the previous portion of the script must 
have been
-                * parseable according to the .ini script_encoding setting. We 
still
-                * want to tell them to put declare() at the top.
-                */
-               {
-                       int num = CG(active_op_array)->last;
-                       /* ignore ZEND_EXT_STMT and ZEND_TICKS */
-                       while (num > 0 &&
-                              (CG(active_op_array)->opcodes[num-1].opcode == 
ZEND_EXT_STMT ||
-                               CG(active_op_array)->opcodes[num-1].opcode == 
ZEND_TICKS)) {
-                               --num;
+                       if ((Z_TYPE(val->u.constant) & IS_CONSTANT_TYPE_MASK) 
== IS_CONSTANT) {
+                               zend_error(E_COMPILE_ERROR, "Cannot use 
constants as encoding");
                        }
 
-                       if (num > 0) {
-                               zend_error(E_COMPILE_ERROR, "Encoding 
declaration pragma must be the very first statement in the script");
+                       /*
+                        * Check that the pragma comes before any opcodes. If 
the compilation
+                        * got as far as this, the previous portion of the 
script must have been
+                        * parseable according to the .ini script_encoding 
setting. We still
+                        * want to tell them to put declare() at the top.
+                        */
+                       {
+                               int num = CG(active_op_array)->last;
+                               /* ignore ZEND_EXT_STMT and ZEND_TICKS */
+                               while (num > 0 &&
+                                      
(CG(active_op_array)->opcodes[num-1].opcode == ZEND_EXT_STMT ||
+                                       
CG(active_op_array)->opcodes[num-1].opcode == ZEND_TICKS)) {
+                                       --num;
+                               }
+
+                               if (num > 0) {
+                                       zend_error(E_COMPILE_ERROR, "Encoding 
declaration pragma must be the very first statement in the script");
+                               }
                        }
-               }
-               CG(encoding_declared) = 1;
+                       CG(encoding_declared) = 1;
 
-               convert_to_string(&val->u.constant);
-               new_encoding = 
zend_multibyte_fetch_encoding(val->u.constant.value.str.val);
-               if (!new_encoding) {
-                       zend_error(E_COMPILE_WARNING, "Unsupported encoding 
[%s]", val->u.constant.value.str.val);
-               } else {
-                       old_input_filter = LANG_SCNG(input_filter);
-                       old_encoding = LANG_SCNG(script_encoding);
-                       zend_multibyte_set_filter(new_encoding TSRMLS_CC);
+                       convert_to_string(&val->u.constant);
+                       new_encoding = 
zend_multibyte_fetch_encoding(val->u.constant.value.str.val);
+                       if (!new_encoding) {
+                               zend_error(E_COMPILE_WARNING, "Unsupported 
encoding [%s]", val->u.constant.value.str.val);
+                       } else {
+                               old_input_filter = LANG_SCNG(input_filter);
+                               old_encoding = LANG_SCNG(script_encoding);
+                               zend_multibyte_set_filter(new_encoding 
TSRMLS_CC);
 
-                       /* need to re-scan if input filter changed */
-                       if (old_input_filter != LANG_SCNG(input_filter) ||
-                               ((old_input_filter == 
zend_multibyte_script_encoding_filter) &&
-                                (new_encoding != old_encoding))) {
-                               zend_multibyte_yyinput_again(old_input_filter, 
old_encoding TSRMLS_CC);
+                               /* need to re-scan if input filter changed */
+                               if (old_input_filter != LANG_SCNG(input_filter) 
||
+                                       ((old_input_filter == 
zend_multibyte_script_encoding_filter) &&
+                                        (new_encoding != old_encoding))) {
+                                       
zend_multibyte_yyinput_again(old_input_filter, old_encoding TSRMLS_CC);
+                               }
                        }
                }
-               efree(val->u.constant.value.str.val);
+               zval_dtor(&val->u.constant);
 #else  /* !ZEND_MULTIBYTE */
        } else if (!zend_binary_strcasecmp(var->u.constant.value.str.val, 
var->u.constant.value.str.len, "encoding", sizeof("encoding")-1)) {
                /* Do not generate any kind of warning for encoding declares */
Index: Zend/tests/multibyte/multibyte_encoding_002.phpt
===================================================================
--- Zend/tests/multibyte/multibyte_encoding_002.phpt    (revision 305496)
+++ Zend/tests/multibyte/multibyte_encoding_002.phpt    (working copy)
@@ -10,6 +10,7 @@
 }
 ?>
 --INI--
+zend.multibyte=1
 mbstring.internal_encoding=iso-8859-1
 --FILE--
 ï>¿<?php
Index: Zend/tests/multibyte/multibyte_encoding_003.phpt
===================================================================
--- Zend/tests/multibyte/multibyte_encoding_003.phpt    (revision 305496)
+++ Zend/tests/multibyte/multibyte_encoding_003.phpt    (working copy)
@@ -10,6 +10,7 @@
 }
 ?>
 --INI--
+zend.multibyte=1
 mbstring.internal_encoding=iso-8859-1
 --FILE--
 ÿþ<
Index: Zend/tests/multibyte/multibyte_encoding_004.phpt
===================================================================
--- Zend/tests/multibyte/multibyte_encoding_004.phpt    (revision 305496)
+++ Zend/tests/multibyte/multibyte_encoding_004.phpt    (working copy)
@@ -10,6 +10,7 @@
 }
 ?>
 --INI--
+zend.multibyte=1
 mbstring.script_encoding=Shift_JIS
 mbstring.internal_encoding=Shift_JIS
 --FILE--
Index: Zend/tests/multibyte/multibyte_encoding_005.phpt
===================================================================
--- Zend/tests/multibyte/multibyte_encoding_005.phpt    (revision 305496)
+++ Zend/tests/multibyte/multibyte_encoding_005.phpt    (working copy)
@@ -10,6 +10,7 @@
 }
 ?>
 --INI--
+zend.multibyte=1
 mbstring.encoding_translation = On
 mbstring.script_encoding=Shift_JIS
 mbstring.internal_encoding=UTF-8
Index: Zend/tests/multibyte/multibyte_encoding_001.phpt
===================================================================
--- Zend/tests/multibyte/multibyte_encoding_001.phpt    (revision 305496)
+++ Zend/tests/multibyte/multibyte_encoding_001.phpt    (working copy)
@@ -10,6 +10,7 @@
 }
 ?>
 --INI--
+zend.multibyte=1
 mbstring.internal_encoding=SJIS
 --FILE--
 <?php
Index: Zend/zend_globals.h
===================================================================
--- Zend/zend_globals.h (revision 305494)
+++ Zend/zend_globals.h (working copy)
@@ -153,6 +153,7 @@
 #ifdef ZEND_MULTIBYTE
        zend_encoding **script_encoding_list;
        size_t script_encoding_list_size;
+       zend_bool multibyte;
        zend_bool detect_unicode;
        zend_bool encoding_declared;
 

-- 
PHP Internals - PHP Runtime Development Mailing List
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to