hirokawa                                 Sun, 21 Aug 2011 05:02:33 +0000

Revision: http://svn.php.net/viewvc?view=revision&revision=315252

Log:
MFH: cleanup jisx0213 table.

Changed paths:
    U   
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
    U   
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h

Modified: 
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
===================================================================
--- 
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c  
    2011-08-21 05:02:21 UTC (rev 315251)
+++ 
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c  
    2011-08-21 05:02:33 UTC (rev 315252)
@@ -547,7 +547,7 @@
                }
        }

-       /* check for major japanese chars */
+       /* check for major japanese chars: U+4E00 - U+9FFF */
        if (s1 <= 0) {
                for (k=0; k < uni2jis_tbl_len ;k++) {
                        if (c >= uni2jis_tbl_range[k][0] && c <= 
uni2jis_tbl_range[k][1]) {
@@ -557,7 +557,7 @@
                }
        }

-       /* check for japanese chars in compressed area */
+       /* check for japanese chars in compressed mapping area: U+1E00 - U+4DBF 
*/
        if (s1 <= 0 && c >= ucs_c1_jisx0213_min && c <= ucs_c1_jisx0213_max) {
                k = mbfl_bisec_srch(c, ucs_c1_jisx0213_tbl, 
ucs_c1_jisx0213_tbl_len);
                if (k >= 0) {
@@ -574,14 +574,24 @@
        }

        if (s1 <= 0) {
+               /* CJK Compatibility Forms: U+FE30 - U+FE4F */
+               if (c == 0xfe45) {
+                       s1 = 0x233e;
+               } else if (c == 0xfe46) {
+                       s1 = 0x233d;
+               } else if (c >= 0xf91d && c <= 0xf9dc) {
+                       /* CJK Compatibility Ideographs: U+F900 - U+F92A */
+                       k = mbfl_bisec_srch2(c, ucs_r2b_jisx0213_cmap_key, 
ucs_r2b_jisx0213_cmap_len);
+                       if (k >= 0) {
+                               s1 = ucs_r2b_jisx0213_cmap_val[k];
+                       }
+               }
+       }
+
+       if (s1 <= 0) {
                c1 = c & ~MBFL_WCSPLANE_MASK;
                if (c1 == MBFL_WCSPLANE_JIS0213) {
                        s1 = c & MBFL_WCSPLANE_MASK;
-               } else {
-                       k = mbfl_bisec_srch2(c, jisx0213_uni2sjis_cmap_key, 
jisx0213_uni2sjis_cmap_len);
-                       if (k >= 0) {
-                               s1 = jisx0213_uni2sjis_cmap_val[k];
-                       }
                }
                if (c == 0) {
                        s1 = 0;

Modified: 
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h
===================================================================
--- 
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h
   2011-08-21 05:02:21 UTC (rev 315251)
+++ 
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h
   2011-08-21 05:02:33 UTC (rev 315252)
@@ -4585,6 +4585,7 @@
 static const int ucs_i_jisx0213_table_max = 0x4E00 + 
(sizeof(ucs_i_jisx0213_table)/
                                                                                
           sizeof(unsigned short));

+/* Halfwidth and Fullwidth Forms */
 static const unsigned short ucs_r_jisx0213_table[] = { // 0xff00 - 0xffe5

 /* FF00h */
@@ -4622,6 +4623,7 @@
 static const int ucs_r_jisx0213_table_max = 0xFF00 + 
(sizeof(ucs_r_jisx0213_table)/
                                                                                
                          sizeof(unsigned short));

+/* CJK Compatibility Ideographs : U+F900 - U+FAFF  */
 static const unsigned short ucs_r2_jisx0213_table[] = { // 0xfa0f - 0xfa6a
 0x2F4B,
 0x2F57,0x4F72,0x0000,0x8679,0x757A,0x775A,0x776F,0x0000,
@@ -4640,8 +4642,47 @@
 static const int ucs_r2_jisx0213_min = 0xFA0F;
 static const int ucs_r2_jisx0213_max = 0xFA6A;

-static const unsigned short ucs_c1_jisx0213_tbl[] = {
- // 0x1e00 - 0x4dff
+/*
+   CJK Compatibility Ideographs: U+F900 - U+FAFF (seperate mapping for U+F9XX)
+*/
+static const unsigned short ucs_r2b_jisx0213_cmap_key[] = {
+       0xf91d,0xf928,0xf929,0xf936,0xf970,0xf9d0,0xf9dc};
+
+static const unsigned short ucs_r2b_jisx0213_cmap_val[] = {
+       0x763b,0x742e,0x754e,0x7b4f,0x7649,0x7e24,0x7d5d};
+
+static const int ucs_r2b_jisx0213_cmap_len =
+       sizeof(ucs_r2b_jisx0213_cmap_key)/sizeof(unsigned short);
+
+/*
+  U+1E00 - U+4DBF in compresed mapping
+
+  Latin Extended Additional: U+1E00 - U+1EFF
+  Greek Extended: U+1F00 - U+1FFF
+  General Punctuation: U+2000 - U+206F
+  Currency Symbols U+20A0 - U+20CF
+  Combining Diacritical Marks for Symbols: U+20D0 - 20FF
+  Number Forms: U+2150 - U+218F
+  Arrow : U+2190 - U+21FF
+  Mathematical Operations : U+2200 - U+22FF
+  Miscellaneous Technical : U+2300 - U+23FF
+  Enclosed Alphanumerics : U+2460 - U+24FF
+  Box Drawing: U+2500 - U+257F
+  Geometric Shapes: U+25A0 - U+25FF
+  Miscellanuous Symbols : U+2600 - U+26FF
+  Digbats : U+2700 - U+27BF
+  Supplemental Arrows-B: U+2900 - U+297F
+  Miscellaneous Mathematical Symbols-B: U+2980 - U+29FF
+  CJK Symbols and Punctution: U+3000 - U+303F
+  Hiragana (*1) : U+3040 - U+309F
+  Katakana (*1): U+30A0 - U+30FF
+  CJK Strokes: U+31C0-31EF
+  Katakana Phonetic Extensions : U+31F0 - U+31FF
+  CJK Unified Ideographs Extension A: U+3400 - U+4DBF
+
+  *1 U+3000 - U+30FF is also defined in ucs_hk to optimize for speed.
+ */
+static const unsigned short ucs_c1_jisx0213_tbl[] = { // 0x1e00 - 0x4dff
 0x1E3E,0x1E3F,0x1F70,0x1F71,0x1F72,0x1F73,0x2010,0x2010,
 0x2013,0x2013,0x2014,0x2014,0x2016,0x2016,0x2018,0x2019,
 0x201C,0x201D,0x2020,0x2021,0x2022,0x2022,0x2025,0x2025,
@@ -4970,12 +5011,14 @@
 static const int jisx0213_u5_tbl_max = 0x2A6B2;
 static const int jisx0213_u5_tbl_len = 
sizeof(jisx0213_u5_jis_key)/sizeof(unsigned short);

+/* combined chars in JIS X 0213 */
 static const unsigned short jisx0213_u2_key[] = {
        0x2477,0x2478,0x2479,0x247A,0x247B,0x2577,0x2578,0x2579,
        0x257A,0x257B,0x257C,0x257D,0x257E,0x2678,0x2B44,0x2B48,
        0x2B49,0x2B4A,0x2B4B,0x2B4C,0x2B4D,0x2B4E,0x2B4F,0x2B65,
        0x2B66};

+/* combined pairs in Unicode */
 static const unsigned short jisx0213_u2_tbl[] = {
        0x304B,0x309A,0x304D,0x309A,0x304F,0x309A,0x3051,0x309A,
        0x3053,0x309A,0x30AB,0x309A,0x30AD,0x309A,0x30AF,0x309A,
@@ -4985,6 +5028,7 @@
        0x0259,0x0301,0x025A,0x0300,0x025A,0x0301,0x02E9,0x02E5,
        0x02E5,0x02E9};

+/* fallback chars for combined chars in Unicode */
 static const unsigned short jisx0213_u2_fb_tbl[] = {
        0x242B,0x242D,0x242F,0x2431,0x2433,0x252B,0x252D,0x252F,
        0x2531,0x2533,0x253B,0x2544,0x2548,0x2675,0x295C,0x2B38,
@@ -4993,20 +5037,7 @@

 static const int jisx0213_u2_tbl_len = sizeof(jisx0213_u2_key)/sizeof(unsigned 
short);

-static const unsigned short jisx0213_uni2sjis_cmap_key[] = {
-       0xf91d,0xf928,0xf929,0xf936,0xf970,0xf9d0,0xf9dc,
-       0xfe45,0xfe46,0xffe5,
-};

-static const unsigned short jisx0213_uni2sjis_cmap_val[] = {
-       0x763b,0x742e,0x754e,0x7b4f,0x7649,0x7e24,0x7d5d,
-       0x233e,0x233d,0x216f,
-};
-
-static const int jisx0213_uni2sjis_cmap_len =
-       sizeof(jisx0213_uni2sjis_cmap_key)/sizeof(unsigned short);
-
-
 static const unsigned short jisx0213_p2_ofst[] = {
        1, 8, 3, 4, 5, 12, 13, 14, 15, 78, 79, 80, 81, 82,
        83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94};

-- 
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php

Reply via email to