Hello,
As postgresql is widely used in the world,many Chinese users are looking
forward to use such a high performanced database management
system.However since the Chinese new codepage standard GB18030 is not
completely supported,postgresql is limitted to be used in China.
Now I have managed to implement the GB18030 support upon the latest
version,so the following functions are added after the patches are added.
-Chinese GB18030 encoding is available on front-end side,while on
backend side,EUC_CN or MIC is used.
-Encoding convertion between MIC and GB18030 is implement.
-GB18030 locale support is available on front-end side.
-GB18030 locale test is added.
Any help for testing with these patches and sugguestions for GB18030
support are greatly appreciated.
Best Regards,
Bill
--
/---------------------------/
Bill Huang
E-mail:[EMAIL PROTECTED]
Cell phone:090-9979-4631
/---------------------------/
--- postgresql-7.2.1/src/backend/utils/mb/conv.c.org Thu Jun 6 11:52:24 2002
+++ postgresql-7.2.1/src/backend/utils/mb/conv.c Thu Jun 6 12:20:36 2002
@@ -502,6 +502,96 @@
}
/*
+ * GB18030 ---> MIC
+ * Added by Bill Huang <[EMAIL PROTECTED]>,<[EMAIL PROTECTED]>
+ */
+static void
+gb180302mic(unsigned char *gb18030, unsigned char *p, int len)
+{
+ int c1;
+ int c2;
+
+ while (len > 0 && (c1 = *gb18030++))
+ {
+ if (c1 < 0x80)
+ { /* should be ASCII */
+ len--;
+ *p++ = c1;
+ }
+ else if(c1 >= 0x81 && c1 <= 0xfe)
+ {
+ c2 = *gb18030++;
+
+ if(c2 >= 0x30 && c2 <= 0x69){
+ len -= 4;
+ *p++ = c1;
+ *p++ = c2;
+ *p++ = *gb18030++;
+ *p++ = *gb18030++;
+ *p++ = *gb18030++;
+ }
+ else if ((c2 >=0x40 && c2 <= 0x7e) ||(c2 >=0x80 && c2 <=
+0xfe)){
+ len -= 2;
+ *p++ = c1;
+ *p++ = c2;
+ *p++ = *gb18030++;
+ }
+ else{ /*throw the strange code*/
+ len--;
+ }
+ }
+ }
+ *p = '\0';
+}
+
+/*
+ * MIC ---> GB18030
+ * Added by Bill Huang <[EMAIL PROTECTED]>,<[EMAIL PROTECTED]>
+ */
+static void
+mic2gb18030(unsigned char *mic, unsigned char *p, int len)
+{
+ int c1;
+ int c2;
+
+ while (len > 0 && (c1 = *mic))
+ {
+ len -= pg_mic_mblen(mic++);
+
+ if (c1 <= 0x7f) /*ASCII*/
+ {
+ *p++ = c1;
+ }
+ else if (c1 >= 0x81 && c1 <= 0xfe)
+ {
+ c2 = *mic++;
+
+ if((c2 >= 0x40 && c2 <= 0x7e) || (c2 >= 0x80 && c2 <= 0xfe)){
+ *p++ = c1;
+ *p++ = c2;
+ }
+ else if(c2 >= 0x30 && c2 <= 0x39){
+ *p++ = c1;
+ *p++ = c2;
+ *p++ = *mic++;
+ *p++ = *mic++;
+ }
+ else{
+ mic--;
+ printBogusChar(&mic, &p);
+ mic--;
+ printBogusChar(&mic, &p);
+ }
+ }
+ else{
+ mic--;
+ printBogusChar(&mic, &p);
+ }
+ }
+ *p = '\0';
+}
+
+/*
* EUC_TW ---> MIC
*/
static void
@@ -1583,6 +1673,26 @@
}
/*
+ * UTF-8 ---> GB18030
+ */
+static void
+utf_to_gb18030(unsigned char *utf, unsigned char *euc, int len)
+
+{
+ utf_to_local(utf, euc, ULmapEUC_CN,
+ sizeof(ULmapEUC_CN) / sizeof(pg_utf_to_local), len);
+}
+
+/*
+ * GB18030 ---> UTF-8
+ */
+static void
+gb18030_to_utf(unsigned char *euc, unsigned char *utf, int len)
+{
+ local_to_utf(euc, utf, LUmapEUC_CN,
+ sizeof(LUmapEUC_CN) / sizeof(pg_local_to_utf), PG_EUC_CN, len);
+}
+/*
* UTF-8 ---> EUC_KR
*/
static void
@@ -1754,6 +1864,9 @@
PG_BIG5, big52mic, mic2big5, big5_to_utf, utf_to_big5
},
{
+ PG_GB18030, gb180302mic, mic2gb18030, gb18030_to_utf, utf_to_gb18030
+ },
+ {
PG_WIN1250, win12502mic, mic2win1250, 0, 0
},
};
@@ -1841,6 +1954,9 @@
PG_BIG5, big52mic, mic2big5, 0, 0
},
{
+ PG_GB18030, gb180302mic, mic2gb18030, 0, 0
+ },
+ {
PG_WIN1250, win12502mic, mic2win1250, 0, 0
},
};
--- postgresql-7.2.1/src/backend/utils/mb/encnames.c.org Mon Jun 3 19:24:10
2002
+++ postgresql-7.2.1/src/backend/utils/mb/encnames.c Mon Jun 3 19:25:26 2002
@@ -173,6 +173,9 @@
{
"windows1251", PG_WIN1251
}, /* Windows-1251;
Microsoft */
+ {
+ "gb18030", PG_GB18030
+ }, /* GB18030; GB18030 */
{
NULL, 0
@@ -268,6 +271,9 @@
"BIG5", PG_BIG5
},
{
+ "GB18030", PG_GB18030
+ },
+ {
"WIN1250", PG_WIN1250
}
};
--- postgresql-7.2.1/src/interfaces/odbc/multibyte.c.org Wed Jun 5 18:28:30
2002
+++ postgresql-7.2.1/src/interfaces/odbc/multibyte.c Wed Jun 5 19:48:01 2002
@@ -48,6 +48,28 @@
mb_st = 0;
}
break;
+ /* Chinese GB18030 support
+ * By Bill Huang
+<[EMAIL PROTECTED]>,<[EMAIL PROTECTED]>
+ * */
+ case GB18030:
+ {
+ if (mb_st < 2 && s[i] > 0x81)
+ mb_st = 2;
+ else if (mb_st == 2)
+ if(s[i] >= 0x30 && s[i] <= 0x39)
+ mb_st = 3;
+ else
+ mb_st = 1;
+ else if (mb_st == 3)
+ if(s[i] >= 0x30 && s[i] <= 0x39)
+ mb_st = 1;
+ else
+ mb_st = 3;
+ else
+ mb_st = 0;
+ }
+ break;
+
default:
mb_st = 0;
}
@@ -87,6 +109,16 @@
{
multibyte_client_encoding = BIG5;
return ("BIG5");
+ }/* Chinese GB18030 support.
+ * Added by Bill Huang <[EMAIL PROTECTED]>,<[EMAIL PROTECTED]>
+ */
+ if (strstr(str, "%27GB18030%27") ||
+ strstr(str, "%27gb18030%27") ||
+ strstr(str, "'GB18030'") ||
+ strstr(str, "'gb18030'") )
+ {
+ multibyte_client_encoding = GB18030;
+ return ("GB18030");
}
return ("OTHER");
}
@@ -127,6 +159,25 @@
else
multibyte_status = 0;
}
+ break;
+ /*Chinese GB18030 support.Added by Bill Huang
+<[EMAIL PROTECTED]> <[EMAIL PROTECTED]>*/
+ case GB18030:
+ {
+ if (multibyte_status < 2 && s > 0x80)
+ multibyte_status = 2;
+ else if (multibyte_status = 2)
+ if (s >= 0x30 && s <= 0x39)
+ multibyte_status = 3;
+ else
+ multibyte_status = 1;
+ else if (multibyte_status = 3)
+ if (s >= 0x30 && s <= 0x39)
+ multibyte_status = 1;
+ else
+ multibyte_status = 3;
+ else
+ multibyte_status = 0;
+ }
break;
default:
multibyte_status = 0;
--- postgresql-7.2.1/src/interfaces/odbc/multibyte.h.org Wed Jun 5 19:51:20
2002
+++ postgresql-7.2.1/src/interfaces/odbc/multibyte.h Wed Jun 5 19:51:35 2002
@@ -28,6 +28,7 @@
#define SJIS 32 /* Shift JIS */
#define BIG5 33 /* Big5 */
#define WIN1250 34 /* windows-1250 */
+#define GB18030 35 /* GB18030 */
extern int multibyte_client_encoding; /* Multibyte client encoding. */
extern int multibyte_status; /* Multibyte charcter status. */
--- postgresql-7.2.1/src/include/mb/pg_wchar.h.org Mon May 27 20:07:58 2002
+++ postgresql-7.2.1/src/include/mb/pg_wchar.h Mon May 27 20:08:59 2002
@@ -182,6 +182,7 @@
/* followings are for client encoding only */
PG_SJIS, /* Shift JIS */
PG_BIG5, /* Big5 */
+ PG_GB18030, /* GB18030 */
PG_WIN1250, /* windows-1250 */
_PG_LAST_ENCODING_ /* mark only */
--- postgresql-7.2.1/src/backend/utils/mb/wchar.c.org Mon May 27 20:02:44 2002
+++ postgresql-7.2.1/src/backend/utils/mb/wchar.c Mon May 27 20:03:12 2002
@@ -457,6 +457,33 @@
return (len);
}
+/*
+ * GB18030
+ * Added by Bill Huang <[EMAIL PROTECTED]>,<[EMAIL PROTECTED]>
+ */
+static int
+pg_gb18030_mblen(const unsigned char *s)
+{
+ int len;
+
+ if (*s <= 0x7f)
+ { /* kanji? */
+ len = 1;
+ }
+ else
+ { /* should be ASCII */
+
+ if((*(s+1) >0x40 && *(s+1) <= 0x7e)
+ || (*(s+1) >= 0x80 && *(s+1) <= 0xfe))
+ len = 2;
+ else if(*(s+1) >0x30 && *(s+1) <= 0x39)
+ len = 4;
+ else
+ len = 2;
+ }
+ return (len);
+}
+
pg_wchar_tbl pg_wchar_table[] = {
{pg_ascii2wchar_with_len, pg_ascii_mblen, 1}, /* 0; PG_SQL_ASCII
*/
{pg_eucjp2wchar_with_len, pg_eucjp_mblen, 3}, /* 1; PG_EUC_JP */
@@ -483,6 +510,7 @@
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 22; ISO-8859-15 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1}, /* 23; ISO-8859-16 */
{0, pg_sjis_mblen, 2}, /* 24; PG_SJIS */
+ {0, pg_gb18030_mblen, 2}, /* 25; PG_GB18030 */
{0, pg_big5_mblen, 2}, /* 25; PG_BIG5 */
{pg_latin12wchar_with_len, pg_latin1_mblen, 1} /* 26; PG_WIN1250 */
};
---------------------------(end of broadcast)---------------------------
TIP 1: subscribe and unsubscribe commands go to [EMAIL PROTECTED]