Here are the final bits of CJK megapatch that should fix problems with cut&
paste of CJK chars.
It includes everything Belcon talked, plus it adds a correct parameter for
\fcharset under Chinese locale, making RTFs saved by AW under Chinese locale
importing in AW without any problem. Currently the same charset code is used
for both GB2312 and Big5 though it was tested with GB2312 version of word so
please test it with Big5 version of Word.
This patch fixes thing in a wise way by modifying various tables, so there is
a very little chance that I've made a mistake. So please test it (all aspects
it should fix - cut&paste).
It doesn't fix problem with showing "?" instead of smart quotes on screen and
of crashing when importing rtfs with CJK produced by word.
It's a diff against CVS.
Best regards,
-Vlad
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp
abi-0.7.11-orig-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp Thu Nov 9
19:38:47 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp Thu
+Nov 9 21:16:31 2000
@@ -395,6 +395,12 @@
static const char* wincharsetcode_th[]= /* thai charset*/
{ "th", NULL };
+/*I'm not sure that charset code is the same for Big5 and GB2312.
+ Tested with GB2312 only.
+*/
+static const char* wincharsetcode_zh[]= /* chinese*/
+{ "zh", NULL };
+
static const _rmap langcode_to_wincharsetcode[]=
{
{"0"}, /* default value - ansi charset*/
@@ -403,6 +409,7 @@
{"162",wincharsetcode_tr},
{"163",wincharsetcode_vi},
{"222",wincharsetcode_th},
+ {"134",wincharsetcode_zh},
{NULL}
};
@@ -449,12 +456,16 @@
{NULL}
};
+/*
+ This table is useful since iconv implementations don't know some cpNNNN
+ charsets but under some different name.
+*/
static const _map MSCodepagename_to_charset_name_map[]=
{
/*key, value*/
{NULL,NULL},
- {"CP936","BIG5"}, /* most probably it's correct - VH*/
- {"CP950","GB2312"}, /* 100% correct */
+ {"CP936","GB2312"},
+ {"CP950","BIG5"},
{NULL,NULL}
};
@@ -463,7 +474,10 @@
{
/*key, value*/
{NULL},
-/* {"0x404","zh_CN"},*/ /*I guess - VH*/
+ {"0x404","zh_CN.BIG5"},
+ {"0x804","zh_CN.GB2312"},
+ {"0x404","zh_TW.BIG5"},
+ {"0x804","zh_TW.GB2312"},
{NULL}
};
@@ -815,7 +829,9 @@
const char* XAP_EncodingManager::charsetFromCodepage(int lid) const
{
- char* cpname = wvLIDToCodePageConverter(lid);
+ static char buf[100];
+ sprintf(buf,"CP%d",lid);
+ char* cpname = buf;
UT_Bool is_default;
const char* ret =
search_map(MSCodepagename_to_charset_name_map,cpname,&is_default);
return is_default ? cpname : ret;
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp
abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp
Thu Nov 9 19:38:49 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp
+ Thu Nov 9 20:24:06 2000
@@ -197,13 +197,11 @@
/*FIXME: can it happen that wctomb will fail under CJK
locales? */
m_wctomb.wctomb_or_fallback(mbbuf,mblen,*pData++);
for(int i=0;i<mblen;++i) {
+ /* unfortunately all english letters will be
+output as \'hh too. */
unsigned char c = mbbuf[i];
- if ( c > 0x007f)
- m_pie->_rtf_nonascii_hex2(c);
- else
- *pBuf++ = c;
-
- };
+ FlushBuffer();
+ m_pie->_rtf_nonascii_hex2(c);
+ }
} else if (!m_pie->m_atticFormat)
{
if (*pData > 0x00ff) // emit unicode
character