hirokawa Sun, 21 Aug 2011 05:02:33 +0000
Revision: http://svn.php.net/viewvc?view=revision&revision=315252
Log:
MFH: cleanup jisx0213 table.
Changed paths:
U
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
U
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h
Modified:
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
===================================================================
---
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
2011-08-21 05:02:21 UTC (rev 315251)
+++
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/mbfilter_sjis_2004.c
2011-08-21 05:02:33 UTC (rev 315252)
@@ -547,7 +547,7 @@
}
}
- /* check for major japanese chars */
+ /* check for major japanese chars: U+4E00 - U+9FFF */
if (s1 <= 0) {
for (k=0; k < uni2jis_tbl_len ;k++) {
if (c >= uni2jis_tbl_range[k][0] && c <=
uni2jis_tbl_range[k][1]) {
@@ -557,7 +557,7 @@
}
}
- /* check for japanese chars in compressed area */
+ /* check for japanese chars in compressed mapping area: U+1E00 - U+4DBF
*/
if (s1 <= 0 && c >= ucs_c1_jisx0213_min && c <= ucs_c1_jisx0213_max) {
k = mbfl_bisec_srch(c, ucs_c1_jisx0213_tbl,
ucs_c1_jisx0213_tbl_len);
if (k >= 0) {
@@ -574,14 +574,24 @@
}
if (s1 <= 0) {
+ /* CJK Compatibility Forms: U+FE30 - U+FE4F */
+ if (c == 0xfe45) {
+ s1 = 0x233e;
+ } else if (c == 0xfe46) {
+ s1 = 0x233d;
+ } else if (c >= 0xf91d && c <= 0xf9dc) {
+ /* CJK Compatibility Ideographs: U+F900 - U+F92A */
+ k = mbfl_bisec_srch2(c, ucs_r2b_jisx0213_cmap_key,
ucs_r2b_jisx0213_cmap_len);
+ if (k >= 0) {
+ s1 = ucs_r2b_jisx0213_cmap_val[k];
+ }
+ }
+ }
+
+ if (s1 <= 0) {
c1 = c & ~MBFL_WCSPLANE_MASK;
if (c1 == MBFL_WCSPLANE_JIS0213) {
s1 = c & MBFL_WCSPLANE_MASK;
- } else {
- k = mbfl_bisec_srch2(c, jisx0213_uni2sjis_cmap_key,
jisx0213_uni2sjis_cmap_len);
- if (k >= 0) {
- s1 = jisx0213_uni2sjis_cmap_val[k];
- }
}
if (c == 0) {
s1 = 0;
Modified:
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h
===================================================================
---
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h
2011-08-21 05:02:21 UTC (rev 315251)
+++
php/php-src/branches/PHP_5_4/ext/mbstring/libmbfl/filters/unicode_table_jis2004.h
2011-08-21 05:02:33 UTC (rev 315252)
@@ -4585,6 +4585,7 @@
static const int ucs_i_jisx0213_table_max = 0x4E00 +
(sizeof(ucs_i_jisx0213_table)/
sizeof(unsigned short));
+/* Halfwidth and Fullwidth Forms */
static const unsigned short ucs_r_jisx0213_table[] = { // 0xff00 - 0xffe5
/* FF00h */
@@ -4622,6 +4623,7 @@
static const int ucs_r_jisx0213_table_max = 0xFF00 +
(sizeof(ucs_r_jisx0213_table)/
sizeof(unsigned short));
+/* CJK Compatibility Ideographs : U+F900 - U+FAFF */
static const unsigned short ucs_r2_jisx0213_table[] = { // 0xfa0f - 0xfa6a
0x2F4B,
0x2F57,0x4F72,0x0000,0x8679,0x757A,0x775A,0x776F,0x0000,
@@ -4640,8 +4642,47 @@
static const int ucs_r2_jisx0213_min = 0xFA0F;
static const int ucs_r2_jisx0213_max = 0xFA6A;
-static const unsigned short ucs_c1_jisx0213_tbl[] = {
- // 0x1e00 - 0x4dff
+/*
+ CJK Compatibility Ideographs: U+F900 - U+FAFF (seperate mapping for U+F9XX)
+*/
+static const unsigned short ucs_r2b_jisx0213_cmap_key[] = {
+ 0xf91d,0xf928,0xf929,0xf936,0xf970,0xf9d0,0xf9dc};
+
+static const unsigned short ucs_r2b_jisx0213_cmap_val[] = {
+ 0x763b,0x742e,0x754e,0x7b4f,0x7649,0x7e24,0x7d5d};
+
+static const int ucs_r2b_jisx0213_cmap_len =
+ sizeof(ucs_r2b_jisx0213_cmap_key)/sizeof(unsigned short);
+
+/*
+ U+1E00 - U+4DBF in compresed mapping
+
+ Latin Extended Additional: U+1E00 - U+1EFF
+ Greek Extended: U+1F00 - U+1FFF
+ General Punctuation: U+2000 - U+206F
+ Currency Symbols U+20A0 - U+20CF
+ Combining Diacritical Marks for Symbols: U+20D0 - 20FF
+ Number Forms: U+2150 - U+218F
+ Arrow : U+2190 - U+21FF
+ Mathematical Operations : U+2200 - U+22FF
+ Miscellaneous Technical : U+2300 - U+23FF
+ Enclosed Alphanumerics : U+2460 - U+24FF
+ Box Drawing: U+2500 - U+257F
+ Geometric Shapes: U+25A0 - U+25FF
+ Miscellanuous Symbols : U+2600 - U+26FF
+ Digbats : U+2700 - U+27BF
+ Supplemental Arrows-B: U+2900 - U+297F
+ Miscellaneous Mathematical Symbols-B: U+2980 - U+29FF
+ CJK Symbols and Punctution: U+3000 - U+303F
+ Hiragana (*1) : U+3040 - U+309F
+ Katakana (*1): U+30A0 - U+30FF
+ CJK Strokes: U+31C0-31EF
+ Katakana Phonetic Extensions : U+31F0 - U+31FF
+ CJK Unified Ideographs Extension A: U+3400 - U+4DBF
+
+ *1 U+3000 - U+30FF is also defined in ucs_hk to optimize for speed.
+ */
+static const unsigned short ucs_c1_jisx0213_tbl[] = { // 0x1e00 - 0x4dff
0x1E3E,0x1E3F,0x1F70,0x1F71,0x1F72,0x1F73,0x2010,0x2010,
0x2013,0x2013,0x2014,0x2014,0x2016,0x2016,0x2018,0x2019,
0x201C,0x201D,0x2020,0x2021,0x2022,0x2022,0x2025,0x2025,
@@ -4970,12 +5011,14 @@
static const int jisx0213_u5_tbl_max = 0x2A6B2;
static const int jisx0213_u5_tbl_len =
sizeof(jisx0213_u5_jis_key)/sizeof(unsigned short);
+/* combined chars in JIS X 0213 */
static const unsigned short jisx0213_u2_key[] = {
0x2477,0x2478,0x2479,0x247A,0x247B,0x2577,0x2578,0x2579,
0x257A,0x257B,0x257C,0x257D,0x257E,0x2678,0x2B44,0x2B48,
0x2B49,0x2B4A,0x2B4B,0x2B4C,0x2B4D,0x2B4E,0x2B4F,0x2B65,
0x2B66};
+/* combined pairs in Unicode */
static const unsigned short jisx0213_u2_tbl[] = {
0x304B,0x309A,0x304D,0x309A,0x304F,0x309A,0x3051,0x309A,
0x3053,0x309A,0x30AB,0x309A,0x30AD,0x309A,0x30AF,0x309A,
@@ -4985,6 +5028,7 @@
0x0259,0x0301,0x025A,0x0300,0x025A,0x0301,0x02E9,0x02E5,
0x02E5,0x02E9};
+/* fallback chars for combined chars in Unicode */
static const unsigned short jisx0213_u2_fb_tbl[] = {
0x242B,0x242D,0x242F,0x2431,0x2433,0x252B,0x252D,0x252F,
0x2531,0x2533,0x253B,0x2544,0x2548,0x2675,0x295C,0x2B38,
@@ -4993,20 +5037,7 @@
static const int jisx0213_u2_tbl_len = sizeof(jisx0213_u2_key)/sizeof(unsigned
short);
-static const unsigned short jisx0213_uni2sjis_cmap_key[] = {
- 0xf91d,0xf928,0xf929,0xf936,0xf970,0xf9d0,0xf9dc,
- 0xfe45,0xfe46,0xffe5,
-};
-static const unsigned short jisx0213_uni2sjis_cmap_val[] = {
- 0x763b,0x742e,0x754e,0x7b4f,0x7649,0x7e24,0x7d5d,
- 0x233e,0x233d,0x216f,
-};
-
-static const int jisx0213_uni2sjis_cmap_len =
- sizeof(jisx0213_uni2sjis_cmap_key)/sizeof(unsigned short);
-
-
static const unsigned short jisx0213_p2_ofst[] = {
1, 8, 3, 4, 5, 12, 13, 14, 15, 78, 79, 80, 81, 82,
83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94};
--
PHP CVS Mailing List (http://www.php.net/)
To unsubscribe, visit: http://www.php.net/unsub.php