moriyoshi               Fri Sep 26 10:42:15 2003 EDT

  Modified files:              
    /php-src/ext/mbstring       mbstring.c 
  Log:
  Fix some incompatibilities with the pre-libmbfl behaviour regarding encoding 
detection.
  
  
Index: php-src/ext/mbstring/mbstring.c
diff -u php-src/ext/mbstring/mbstring.c:1.198 php-src/ext/mbstring/mbstring.c:1.199
--- php-src/ext/mbstring/mbstring.c:1.198       Tue Sep 23 09:23:30 2003
+++ php-src/ext/mbstring/mbstring.c     Fri Sep 26 10:42:14 2003
@@ -17,7 +17,7 @@
    +----------------------------------------------------------------------+
  */
 
-/* $Id: mbstring.c,v 1.198 2003/09/23 13:23:30 moriyoshi Exp $ */
+/* $Id: mbstring.c,v 1.199 2003/09/26 14:42:14 moriyoshi Exp $ */
 
 /*
  * PHP4 Multibyte String module "mbstring"
@@ -84,62 +84,65 @@
 static void _php_mb_globals_dtor(zend_mbstring_globals *pglobals TSRMLS_DC);
 /* }}} */
 
-/* {{{ php_mb_default_identify_list[] */
-#if defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+/* {{{ php_mb_default_identify_list */
+typedef struct _php_mb_nls_ident_list {
+       enum mbfl_no_language lang;
+       enum mbfl_no_encoding* list;
+       int list_size;
+} php_mb_nls_ident_list;
+
+static const enum mbfl_no_encoding php_mb_default_identify_list_ja[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_jis,
        mbfl_no_encoding_utf8,
        mbfl_no_encoding_euc_jp,
        mbfl_no_encoding_sjis
 };
-#endif
 
-#if defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_cn[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_utf8,
        mbfl_no_encoding_euc_cn,
        mbfl_no_encoding_cp936
 };
-#endif
 
-#if defined(HAVE_MBSTR_TW) && !defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_tw_hk[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_utf8,
        mbfl_no_encoding_euc_tw,
        mbfl_no_encoding_big5
 };
-#endif
 
-#if defined(HAVE_MBSTR_KR) && !defined(HAVE_MBSTR_TW) && !defined(HAVE_MBSTR_CN) && 
!defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_kr[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_utf8,
        mbfl_no_encoding_euc_kr,
        mbfl_no_encoding_uhc
 };
-#endif
 
-#if defined(HAVE_MBSTR_RU) && !defined(HAVE_MBSTR_KR) && !defined(HAVE_MBSTR_TW) && 
!defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_ru[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_utf8,
        mbfl_no_encoding_koi8r,
        mbfl_no_encoding_cp1251,
        mbfl_no_encoding_cp866
 };
-#endif
 
-#if !defined(HAVE_MBSTR_RU) && !defined(HAVE_MBSTR_KR) && !defined(HAVE_MBSTR_TW) && 
!defined(HAVE_MBSTR_CN) && !defined(HAVE_MBSTR_JA)
-static const enum mbfl_no_encoding php_mb_default_identify_list[] = {
+static const enum mbfl_no_encoding php_mb_default_identify_list_neut[] = {
        mbfl_no_encoding_ascii,
        mbfl_no_encoding_utf8
 };
-#endif
 
-static const int php_mb_default_identify_list_size = 
sizeof(php_mb_default_identify_list)/sizeof(enum mbfl_no_encoding);
+
+php_mb_nls_ident_list php_mb_default_identify_list[] = {
+       { mbfl_no_language_japanese, php_mb_default_identify_list_ja, 
sizeof(php_mb_default_identify_list_ja) / sizeof(php_mb_default_identify_list_ja[0]) },
+       { mbfl_no_language_korean, php_mb_default_identify_list_kr, 
sizeof(php_mb_default_identify_list_kr) / sizeof(php_mb_default_identify_list_kr[0]) },
+       { mbfl_no_language_traditional_chinese, php_mb_default_identify_list_tw_hk, 
sizeof(php_mb_default_identify_list_tw_hk) / 
sizeof(php_mb_default_identify_list_tw_hk[0]) },
+       { mbfl_no_language_simplified_chinese, php_mb_default_identify_list_cn, 
sizeof(php_mb_default_identify_list_cn) / sizeof(php_mb_default_identify_list_cn[0]) },
+       { mbfl_no_language_russian, php_mb_default_identify_list_ru, 
sizeof(php_mb_default_identify_list_ru) / sizeof(php_mb_default_identify_list_ru[0]) },
+       { mbfl_no_language_neutral, php_mb_default_identify_list_neut, 
sizeof(php_mb_default_identify_list_neut) / 
sizeof(php_mb_default_identify_list_neut[0]) }
+};
+
 /* }}} */
 
 static
@@ -285,11 +288,12 @@
  *  of parsed encodings.
  */
 static int
-php_mb_parse_encoding_list(const char *value, int value_length, int **return_list, 
int *return_size, int persistent)
+php_mb_parse_encoding_list(const char *value, int value_length, enum mbfl_no_encoding 
**return_list, int *return_size, int persistent TSRMLS_DC)
 {
-       int n, l, size, bauto, *src, *list, *entry, ret = 1;
+       int n, l, size, bauto, ret = 1;
        char *p, *p1, *p2, *endp, *tmpstr;
        enum mbfl_no_encoding no_encoding;
+       enum mbfl_no_encoding *src, *entry, *list;
 
        list = NULL;
        if (value == NULL || value_length <= 0) {
@@ -301,6 +305,12 @@
                }
                return 0;
        } else {
+               enum mbfl_no_encoding *identify_list;
+               int identify_list_size;
+
+               identify_list = MBSTRG(default_detect_order_list);
+               identify_list_size = MBSTRG(default_detect_order_list_size);
+
                /* copy the value string for work */
                if (value[0]=='"' && value[value_length-1]=='"' && value_length>2) {
                        tmpstr = (char *)estrndup(value+1, value_length-2);
@@ -319,7 +329,7 @@
                        p1 = p2 + 1;
                        n++;
                }
-               size = n + php_mb_default_identify_list_size;
+               size = n + identify_list_size;
                /* make list */
                list = (int *)pecalloc(size, sizeof(int), persistent);
                if (list != NULL) {
@@ -343,23 +353,25 @@
                                        p--;
                                }
                                /* convert to the encoding number and check encoding */
-                               no_encoding = mbfl_name2no_encoding(p1);
-                               if (no_encoding == mbfl_no_encoding_auto) {
+                               if (strcasecmp(p1, "auto") == 0) {
                                        if (!bauto) {
                                                bauto = 1;
-                                               l = php_mb_default_identify_list_size;
-                                               src = 
(int*)php_mb_default_identify_list;
+                                               l = identify_list_size;
+                                               src = identify_list;
                                                while (l > 0) {
                                                        *entry++ = *src++;
                                                        l--;
                                                        n++;
                                                }
                                        }
-                               } else if (no_encoding != mbfl_no_encoding_invalid) {
-                                       *entry++ = no_encoding;
-                                       n++;
                                } else {
-                                       ret = 0;
+                                       no_encoding = mbfl_name2no_encoding(p1);
+                                       if (no_encoding != mbfl_no_encoding_invalid) {
+                                               *entry++ = no_encoding;
+                                               n++;
+                                       } else {
+                                               ret = 0;
+                                       }
                                }
                                p1 = p2 + 1;
                        } while (n < size && p2 != NULL);
@@ -397,7 +409,7 @@
 
 /* {{{ MBSTRING_API php_mb_check_encoding_list */
 MBSTRING_API int php_mb_check_encoding_list(const char *encoding_list TSRMLS_DC) {
-       return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, 
NULL, 0); 
+       return php_mb_parse_encoding_list(encoding_list, strlen(encoding_list), NULL, 
NULL, 0 TSRMLS_CC);       
 }
 /* }}} */
 
@@ -407,19 +419,26 @@
  *  of parsed encodings.
  */
 static int
-php_mb_parse_encoding_array(zval *array, int **return_list, int *return_size, int 
persistent)
+php_mb_parse_encoding_array(zval *array, enum mbfl_no_encoding **return_list, int 
*return_size, int persistent TSRMLS_DC)
 {
        zval **hash_entry;
        HashTable *target_hash;
-       int i, n, l, size, bauto, *list, *entry, *src, ret = 1;
+       int i, n, l, size, bauto,ret = 1;
        enum mbfl_no_encoding no_encoding;
+       enum mbfl_no_encoding *src, *list, *entry;
 
        list = NULL;
        if (Z_TYPE_P(array) == IS_ARRAY) {
+               enum mbfl_no_encoding *identify_list;
+               int identify_list_size;
+
+               identify_list = MBSTRG(default_detect_order_list);
+               identify_list_size = MBSTRG(default_detect_order_list_size);
+
                target_hash = Z_ARRVAL_P(array);
                zend_hash_internal_pointer_reset(target_hash);
                i = zend_hash_num_elements(target_hash);
-               size = i + php_mb_default_identify_list_size;
+               size = i + identify_list_size;
                list = (int *)pecalloc(size, sizeof(int), persistent);
                if (list != NULL) {
                        entry = list;
@@ -430,23 +449,25 @@
                                        break;
                                }
                                convert_to_string_ex(hash_entry);
-                               no_encoding = 
mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
-                               if (no_encoding == mbfl_no_encoding_auto) {
+                               if (strcasecmp(Z_STRVAL_PP(hash_entry), "auto") == 0) {
                                        if (!bauto) {
                                                bauto = 1;
-                                               l = php_mb_default_identify_list_size;
-                                               src = 
(int*)php_mb_default_identify_list;
+                                               l = identify_list_size; 
+                                               src = identify_list;
                                                while (l > 0) {
                                                        *entry++ = *src++;
                                                        l--;
                                                        n++;
                                                }
                                        }
-                               } else if (no_encoding != mbfl_no_encoding_invalid) {
-                                       *entry++ = no_encoding;
-                                       n++;
                                } else {
-                                       ret = 0;;
+                                       no_encoding = 
mbfl_name2no_encoding(Z_STRVAL_PP(hash_entry));
+                                       if (no_encoding != mbfl_no_encoding_invalid) {
+                                               *entry++ = no_encoding;
+                                               n++;
+                                       } else {
+                                               ret = 0;
+                                       }
                                }
                                zend_hash_move_forward(target_hash);
                                i--;
@@ -482,6 +503,25 @@
 }
 /* }}} */
 
+/* {{{ php_mb_nls_get_default_detect_order_list */
+static int php_mb_nls_get_default_detect_order_list(enum mbfl_no_language lang, enum 
mbfl_no_encoding **plist, int* plist_size)
+{
+       size_t i;
+
+       *plist = php_mb_default_identify_list_neut;
+       *plist_size = sizeof(php_mb_default_identify_list_neut) / 
sizeof(php_mb_default_identify_list_neut[0]);
+
+       for (i = 0; i < sizeof(php_mb_default_identify_list) / 
sizeof(php_mb_default_identify_list[0]); i++) {
+               if (php_mb_default_identify_list[i].lang == lang) {
+                       *plist = php_mb_default_identify_list[i].list;
+                       *plist_size = php_mb_default_identify_list[i].list_size;
+                       return 1;
+               }
+       }
+       return 0;
+}
+/* }}} */
+
 /* {{{ php.ini directive handler */
 static PHP_INI_MH(OnUpdate_mbstring_language)
 {
@@ -492,6 +532,7 @@
                return FAILURE;
        }
        MBSTRG(language) = no_language;
+       php_mb_nls_get_default_detect_order_list(no_language, 
&MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
        return SUCCESS;
 }
 /* }}} */
@@ -501,7 +542,7 @@
 {
        int *list, size;
 
-       if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
+       if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 
TSRMLS_CC)) {
                if (MBSTRG(detect_order_list) != NULL) {
                        free(MBSTRG(detect_order_list));
                }
@@ -520,7 +561,7 @@
 {
        int *list, size;
 
-       if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
+       if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 
TSRMLS_CC)) {
                if (MBSTRG(http_input_list) != NULL) {
                        free(MBSTRG(http_input_list));
                }
@@ -594,7 +635,7 @@
 {
        int *list, size;
 
-       if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1)) {
+       if (php_mb_parse_encoding_list(new_value, new_value_length, &list, &size, 1 
TSRMLS_CC)) {
                if (MBSTRG(script_encoding_list) != NULL) {
                        free(MBSTRG(script_encoding_list));
                }
@@ -690,6 +731,8 @@
        MBSTRG(detect_order_list_size) = 0;
        MBSTRG(current_detect_order_list) = NULL;
        MBSTRG(current_detect_order_list_size) = 0;
+       MBSTRG(default_detect_order_list) = php_mb_default_identify_list_neut;
+       MBSTRG(default_detect_order_list_size) = 
sizeof(php_mb_default_identify_list_neut) / 
sizeof(php_mb_default_identify_list_neut[0]);
        MBSTRG(filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
        MBSTRG(filter_illegal_substchar) = 0x3f;        /* '?' */
        MBSTRG(current_filter_illegal_mode) = MBFL_OUTPUTFILTER_ILLEGAL_MODE_CHAR;
@@ -838,8 +881,8 @@
                n = MBSTRG(detect_order_list_size);
        }
        if (n <= 0) {
-               list = (int*)php_mb_default_identify_list;
-               n = php_mb_default_identify_list_size;
+               list = MBSTRG(default_detect_order_list);
+               n = MBSTRG(default_detect_order_list_size);
        }
        entry = (int *)safe_emalloc(n, sizeof(int), 0);
        MBSTRG(current_detect_order_list) = entry;
@@ -990,6 +1033,7 @@
                        php_error_docref(NULL TSRMLS_CC, E_WARNING, "Unknown language 
\"%s\"", name);
                        RETURN_FALSE;
                } else {
+                       php_mb_nls_get_default_detect_order_list(no_language, 
&MBSTRG(default_detect_order_list), &MBSTRG(default_detect_order_list_size));
                        MBSTRG(current_language) = no_language;
                        RETURN_TRUE;
                }
@@ -1185,7 +1229,7 @@
                size = 0;
                switch (Z_TYPE_PP(arg1)) {
                case IS_ARRAY:
-                       if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0)) {
+                       if (!php_mb_parse_encoding_array(*arg1, &list, &size, 0 
TSRMLS_CC)) {
                                if (list) {
                                        efree(list);
                                }
@@ -1194,7 +1238,7 @@
                        break;
                default:
                        convert_to_string_ex(arg1);
-                       if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), 
Z_STRLEN_PP(arg1), &list, &size, 0)) {
+                       if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg1), 
Z_STRLEN_PP(arg1), &list, &size, 0 TSRMLS_CC)) {
                                if (list) {
                                        efree(list);
                                }
@@ -1929,7 +1973,7 @@
        if (_from_encodings) {
                list = NULL;
                size = 0;
-           php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), 
&list, &size, 0);
+           php_mb_parse_encoding_list(_from_encodings, strlen(_from_encodings), 
&list, &size, 0 TSRMLS_CC);
                if (size == 1) {
                        from_encoding = *list;
                        string.no_encoding = from_encoding;
@@ -2151,7 +2195,7 @@
        if (ZEND_NUM_ARGS() >= 2 &&  Z_STRVAL_PP(arg_list)) {
                switch (Z_TYPE_PP(arg_list)) {
                case IS_ARRAY:
-                       if (!php_mb_parse_encoding_array(*arg_list, &list, &size, 0)) {
+                       if (!php_mb_parse_encoding_array(*arg_list, &list, &size, 0 
TSRMLS_CC)) {
                                if (list) {
                                        efree(list);
                                        size = 0;
@@ -2160,7 +2204,7 @@
                        break;
                default:
                        convert_to_string_ex(arg_list);
-                       if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg_list), 
Z_STRLEN_PP(arg_list), &list, &size, 0)) {
+                       if (!php_mb_parse_encoding_list(Z_STRVAL_PP(arg_list), 
Z_STRLEN_PP(arg_list), &list, &size, 0 TSRMLS_CC)) {
                                if (list) {
                                        efree(list);
                                        size = 0;
@@ -2432,11 +2476,11 @@
        elistsz = 0;
        switch (Z_TYPE_PP(args[1])) {
        case IS_ARRAY:
-               php_mb_parse_encoding_array(*args[1], &elist, &elistsz, 0);
+               php_mb_parse_encoding_array(*args[1], &elist, &elistsz, 0 TSRMLS_CC);
                break;
        default:
                convert_to_string_ex(args[1]);
-               php_mb_parse_encoding_list(Z_STRVAL_PP(args[1]), Z_STRLEN_PP(args[1]), 
&elist, &elistsz, 0);
+               php_mb_parse_encoding_list(Z_STRVAL_PP(args[1]), Z_STRLEN_PP(args[1]), 
&elist, &elistsz, 0 TSRMLS_CC);
                break;
        }
        if (elistsz <= 0) {
@@ -3366,7 +3410,7 @@
                /* make encoding list */
                list = NULL;
                size = 0;
-               php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 
0);
+               php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 
TSRMLS_CC);
                
                if (size > 0 && list != NULL) {
                        elist = list;
@@ -3374,16 +3418,16 @@
                        elist = MBSTRG(current_detect_order_list);
                        size = MBSTRG(current_detect_order_list_size);
                        if (size <= 0){
-                               elist = (int*)php_mb_default_identify_list;
-                               size = php_mb_default_identify_list_size;
+                               elist = MBSTRG(default_detect_order_list);
+                               size = MBSTRG(default_detect_order_list_size);
                        }
                }
        } else {
                elist = MBSTRG(current_detect_order_list);
                size = MBSTRG(current_detect_order_list_size);
                if (size <= 0){
-                       elist = (int*)php_mb_default_identify_list;
-                       size = php_mb_default_identify_list_size;
+                       elist = MBSTRG(default_detect_order_list);
+                       size = MBSTRG(default_detect_order_list_size);
                }
        }
 
@@ -3465,7 +3509,7 @@
        /* make encoding list */
        list = NULL;
        size = 0;
-       php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0);
+       php_mb_parse_encoding_list(arg_list, strlen(arg_list), &list, &size, 0 
TSRMLS_CC);
        if (size <= 0) {
                return NULL;
        }

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to