Hi,

I have filed a bug and submitted patches for 'hkscs and gb18030 support'
for luit.

Bug ID is 1048. The patch is attached also. Please note that the patch
depends on the GB18030 fontmap to work properly, which reside in xfree
cvs 'xc/fonts/encoding/large/gb18030*'

Regards,
Zarick
-- 
ThizLinux Laboratory Limited
Address:     Unit 502-505, 5/F, Tower 3,
             Enterprise Square, 9 Sheung Yuet Road,
             Kowloon Bay, Hong Kong
Telephone:   (852) 2735 2725
Direct Line: (852) 3184 1311
Fax:         (852) 2111 0702
diff -u xc.orig/programs/luit/charset.c xc/programs/luit/charset.c
--- charset.c	22 Dec 2003 17:48:12 -0000	1.8
+++ charset.c	2 Jan 2004 03:42:49 -0000
@@ -154,6 +154,8 @@
     {"GBK", init_gbk, mapping_gbk, reverse_gbk, stack_gbk},
     {"UTF-8", init_utf8, mapping_utf8, reverse_utf8, stack_utf8},
     {"SJIS", init_sjis, mapping_sjis, reverse_sjis, stack_sjis},
+    {"BIG5-HKSCS", init_hkscs, mapping_hkscs, reverse_hkscs, stack_hkscs},
+    {"GB18030", init_gb18030, mapping_gb18030, reverse_gb18030, stack_gb18030},
     {0, 0, 0, 0, 0}
 };
 
@@ -418,6 +420,8 @@
     { "gbk", 0, 1, NULL, NULL, NULL, NULL, "GBK"},
     { "UTF-8", 0, 1, NULL, NULL, NULL, NULL, "UTF-8"},
     { "SJIS", 0, 1, NULL, NULL, NULL, NULL, "SJIS"},
+    { "Big5-HKSCS", 0, 1, NULL, NULL, NULL, NULL, "BIG5-HKSCS"},
+    { "gb18030", 0, 1, NULL, NULL, NULL, NULL, "GB18030"},
     { 0, 0, 0, 0, 0, 0, 0}
 };
 
diff -u xc.orig/programs/luit/iso2022.c xc/programs/luit/iso2022.c
--- iso2022.c	8 Dec 2002 20:19:49 -0000	1.9
+++ iso2022.c	2 Jan 2004 03:42:49 -0000
@@ -623,7 +623,7 @@
                     is->parserState = P_ESC;
                 } else if(OTHER(is) != NULL) {
                     int c = OTHER(is)->other_stack(*s, OTHER(is)->other_aux);
-                    if(c >= 0) {
+                    if(c != -1) {
                         outbufUTF8(is, fd, OTHER(is)->other_recode(c, OTHER(is)->other_aux));
                         is->shiftState = S_NORMAL;
                     }
diff -u xc.orig/programs/luit/other.c xc/programs/luit/other.c
--- other.c	17 Oct 2002 01:06:09 -0000	1.1
+++ other.c	2 Jan 2004 03:42:49 -0000
@@ -244,3 +244,182 @@
     }
 }
 
+int
+init_hkscs(OtherStatePtr s)
+{
+    s->hkscs.mapping =
+        FontEncMapFind("big5hkscs-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
+    if(!s->hkscs.mapping) return 0;
+
+    s->hkscs.reverse = FontMapReverse(s->hkscs.mapping);
+    if(!s->hkscs.reverse) return 0;
+
+    s->hkscs.buf = -1;
+    return 1;
+}
+
+unsigned int
+mapping_hkscs(unsigned int n, OtherStatePtr s)
+{
+    unsigned int r;
+    if(n < 128) return n;
+    if(n == 128) return EURO_10646;
+    r = FontEncRecode(n, s->hkscs.mapping);
+    return r;
+}
+
+unsigned int
+reverse_hkscs(unsigned int n, OtherStatePtr s)
+{
+    if(n < 128) return n;
+    if(n == EURO_10646) return 128;
+    return s->hkscs.reverse->reverse(n, s->hkscs.reverse->data);
+}
+
+int
+stack_hkscs(unsigned char c, OtherStatePtr s)
+{
+    if(s->hkscs.buf < 0) {
+        if(c < 129) return c;
+        s->hkscs.buf = c;
+	return -1;
+    } else {
+        int b;
+        if(c < 0x40 || c == 0x7F) {
+            s->hkscs.buf = -1;
+            return c;
+        }
+        if(s->hkscs.buf < 0xFF && c < 0xFF)
+            b = (s->hkscs.buf << 8) + c;
+        else
+            b = -1;
+        s->hkscs.buf = -1;
+        return b;
+    }
+}
+
+
+/*
+ *  Because of the 1 ~ 4 multi-bytes nature of GB18030.
+ *  CharSet encoding is split to 2 subset (besides latin)
+ *  The 2Bytes MB char is defined in gb18030.2000-0
+ *  The 4Bytes MB char is defined in gb18030.2000-1
+ *  Please note that the mapping in 2000-1 is not a 4Bytes seq => 2Bytes value
+ *  mapping.
+ *  To use the 2000-1 we need to 'linear' the 4Bytes sequence and 'lookup' the 
+ *  unicode value after that.
+ *
+ *  For more info on GB18030 standard pls check:
+ *    http://oss.software.ibm.com/icu/docs/papers/gb18030.html
+ *
+ *  For more info on GB18030 implementation issues in XFree86 pls check:
+ *    http://www.ibm.com/developerWorks/cn/linux/i18n/gb18030/xfree86/part1
+ */
+int
+init_gb18030(OtherStatePtr s)
+{
+    s->gb18030.cs0_mapping =
+        FontEncMapFind("gb18030.2000-0", FONT_ENCODING_UNICODE, -1, -1, NULL);
+    if(!s->gb18030.cs0_mapping) return 0;
+
+    s->gb18030.cs0_reverse = FontMapReverse(s->gb18030.cs0_mapping);
+    if(!s->gb18030.cs0_reverse) return 0;
+
+    s->gb18030.cs1_mapping =
+        FontEncMapFind("gb18030.2000-1", FONT_ENCODING_UNICODE, -1, -1, NULL);
+    if(!s->gb18030.cs1_mapping) return 0;
+
+    s->gb18030.cs1_reverse = FontMapReverse(s->gb18030.cs1_mapping);
+    if(!s->gb18030.cs1_reverse) return 0;
+
+    s->gb18030.linear  = 0;
+    s->gb18030.buf_ptr = 0;
+    return 1;
+}
+
+unsigned int
+mapping_gb18030(unsigned int n, OtherStatePtr s)
+{
+    if(n <= 0x80)   return n;       /* 0x80 is valid but unassigned codepoint */
+    if(n >= 0xFFFF) return '?';
+    
+    return FontEncRecode(n,
+            (s->gb18030.linear)?s->gb18030.cs1_mapping:s->gb18030.cs0_mapping);
+}
+
+unsigned int
+reverse_gb18030(unsigned int n, OtherStatePtr s)
+{
+    /* when lookup in 2000-0 failed. */
+    /* lookup in 2000-1 and then try to unlinear'd */
+    unsigned int r;
+    if(n <= 0x80) return n;
+
+    r = s->gb18030.cs0_reverse->reverse(n, s->gb18030.cs0_reverse->data);
+    if (r != 0)
+        return r;
+
+    r = s->gb18030.cs1_reverse->reverse(n, s->gb18030.cs1_reverse->data);
+    if (r != 0) {
+        unsigned char bytes[4];
+
+        bytes[3] = 0x30 + r % 10;   r /= 10;
+        bytes[2] = 0x81 + r % 126;  r /= 126;
+        bytes[1] = 0x30 + r % 10;   r /= 10;
+        bytes[0] = 0x81 + r;
+
+        r  = (unsigned int)bytes[0] << 24;
+        r |= (unsigned int)bytes[1] << 16;
+        r |= (unsigned int)bytes[2] << 8;
+        r |= (unsigned int)bytes[3];
+    }
+    return r;
+}
+
+int
+stack_gb18030(unsigned char c, OtherStatePtr s)
+{
+    /* if set gb18030.linear => True. the return value is "linear'd" */
+    if(s->gb18030.buf_ptr == 0) {
+        if(c <= 0x80) return c;
+        if (c == 0xFF) return -1;
+        s->gb18030.linear = 0;
+        s->gb18030.buf[s->gb18030.buf_ptr++] = c;
+        return -1;
+    } else if (s->gb18030.buf_ptr == 1) {
+        if (c >= 0x40) {
+            s->gb18030.buf_ptr = 0;
+            if ((c == 0x80) || (c == 0xFF))
+                return -1;
+            else
+                return (s->gb18030.buf[0] << 8) + c;
+        } else if (c >= 30) {   /* 2Byte is (0x30 -> 0x39) */
+            s->gb18030.buf[s->gb18030.buf_ptr++] = c;
+            return -1;
+        } else {
+            s->gb18030.buf_ptr = 0;
+            return c;
+        }
+    } else if (s->gb18030.buf_ptr == 2) {
+        if ((c >= 0x81) && (c <= 0xFE)) {
+            s->gb18030.buf[s->gb18030.buf_ptr++] = c;
+            return -1;
+        } else {
+            s->gb18030.buf_ptr = 0;
+            return c;
+        }
+    } else {
+        int r = 0;
+        s->gb18030.buf_ptr = 0;
+        if ((c >= 0x30) && (c <= 0x39)) {
+            s->gb18030.linear = 1;
+            r = (((s->gb18030.buf[0] - 0x81) * 10
+                        + (s->gb18030.buf[1] - 0x30)) * 126
+                    + (s->gb18030.buf[2] - 0x81)) * 10
+                + (c - 0x30);
+            return r;
+        }
+        return -1;
+    }
+}
+
diff -u xc.orig/programs/luit/other.h xc/programs/luit/other.h
--- other.h	17 Oct 2002 01:06:09 -0000	1.1
+++ other.h	2 Jan 2004 03:42:49 -0000
@@ -40,10 +40,30 @@
     int buf;
 } aux_sjis;
 
+typedef struct {
+    FontMapPtr mapping;
+    FontMapReversePtr reverse;
+    int buf;
+} aux_hkscs;
+
+typedef struct {
+    FontMapPtr          cs0_mapping;    /* gb18030.2000-0 */
+    FontMapReversePtr   cs0_reverse;
+
+    FontMapPtr          cs1_mapping;    /* gb18030.2000-1 */
+    FontMapReversePtr   cs1_reverse;
+
+    int     linear;     /* set to '1' if stack_gb18030 linearized a 4bytes seq */
+    int     buf[3];
+    int     buf_ptr;
+} aux_gb18030;
+
 typedef union {
     aux_gbk gbk;
     aux_utf8 utf8;
     aux_sjis sjis;
+    aux_hkscs hkscs;
+    aux_gb18030 gb18030;
 } OtherState, *OtherStatePtr;
 
 int init_gbk(OtherStatePtr);
@@ -60,4 +80,14 @@
 unsigned int mapping_sjis(unsigned int, OtherStatePtr);
 unsigned int reverse_sjis(unsigned int, OtherStatePtr);
 int stack_sjis(unsigned char, OtherStatePtr);
+
+int init_hkscs(OtherStatePtr);
+unsigned int mapping_hkscs(unsigned int, OtherStatePtr);
+unsigned int reverse_hkscs(unsigned int, OtherStatePtr);
+int stack_hkscs(unsigned char, OtherStatePtr);
+
+int init_gb18030(OtherStatePtr);
+unsigned int mapping_gb18030(unsigned int, OtherStatePtr);
+unsigned int reverse_gb18030(unsigned int, OtherStatePtr);
+int stack_gb18030(unsigned char, OtherStatePtr);
 

Reply via email to