On Thu, 9 Nov 2000, Vlad Harchev wrote:
> On Thu, 9 Nov 2000, Vlad Harchev wrote:
>
> Here is corrected version of the patch - I think it's only important for
> those who link with libiconv, but nevertheless.
>
> What's new: hunks to ut_*.cpp
>
In fact, I sent the same old version of that patch with this letter. Sorry.
Here is a correct version (with those ut_*.cpp bits), and also it adds 136 as
value \fcharset for BIG5 docs.
Please test it.
Best regards,
-Vlad
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp
abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp Thu Nov 9 19:38:46
2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp Thu Nov 9
+22:09:50 2000
@@ -240,7 +240,9 @@
UT_Mbtowc::~UT_Mbtowc()
{
- iconv_close(cd);
+ /*libiconv is stupid - we'll get segfault if we don't check - VH */
+ if (cd!=(iconv_t)-1)
+ iconv_close(cd);
};
int UT_Mbtowc::mbtowc(wchar_t &wc,char mb)
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp
abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp Thu Nov 9 19:38:47
2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp Thu Nov 9
+22:09:30 2000
@@ -185,7 +185,9 @@
UT_Wctomb::~UT_Wctomb()
{
- iconv_close(cd);
+ /*libiconv is stupid - we'll get segfault if we don't check - VH */
+ if (cd!=(iconv_t)-1)
+ iconv_close(cd);
};
int UT_Wctomb::wctomb(char * pC,int &length,wchar_t wc)
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp
abi-0.7.11-orig-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp Thu Nov 9
19:38:47 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp Fri
+Nov 10 11:20:06 2000
@@ -395,6 +395,15 @@
static const char* wincharsetcode_th[]= /* thai charset*/
{ "th", NULL };
+/*I'm not sure that charset code is the same for Big5 and GB2312.
+ Tested with GB2312 only.
+*/
+static const char* wincharsetcode_zh_GB2312[]= /* chinese*/
+{ "zh_CN.GB2312", "zh_TW.GB2312", NULL };
+
+static const char* wincharsetcode_zh_BIG5[]= /* chinese*/
+{ "zh_CN.BIG5", "zh_TW.BIG5", NULL };
+
static const _rmap langcode_to_wincharsetcode[]=
{
{"0"}, /* default value - ansi charset*/
@@ -403,6 +412,8 @@
{"162",wincharsetcode_tr},
{"163",wincharsetcode_vi},
{"222",wincharsetcode_th},
+ {"134",wincharsetcode_zh_GB2312},
+ {"136",wincharsetcode_zh_BIG5},
{NULL}
};
@@ -449,12 +460,16 @@
{NULL}
};
+/*
+ This table is useful since iconv implementations don't know some cpNNNN
+ charsets but under some different name.
+*/
static const _map MSCodepagename_to_charset_name_map[]=
{
/*key, value*/
{NULL,NULL},
- {"CP936","BIG5"}, /* most probably it's correct - VH*/
- {"CP950","GB2312"}, /* 100% correct */
+ {"CP936","GB2312"},
+ {"CP950","BIG5"},
{NULL,NULL}
};
@@ -463,7 +478,10 @@
{
/*key, value*/
{NULL},
-/* {"0x404","zh_CN"},*/ /*I guess - VH*/
+ {"0x404","zh_CN.BIG5"},
+ {"0x804","zh_CN.GB2312"},
+ {"0x404","zh_TW.BIG5"},
+ {"0x804","zh_TW.GB2312"},
{NULL}
};
@@ -815,7 +833,9 @@
const char* XAP_EncodingManager::charsetFromCodepage(int lid) const
{
- char* cpname = wvLIDToCodePageConverter(lid);
+ static char buf[100];
+ sprintf(buf,"CP%d",lid);
+ char* cpname = buf;
UT_Bool is_default;
const char* ret =
search_map(MSCodepagename_to_charset_name_map,cpname,&is_default);
return is_default ? cpname : ret;
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp
abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp
Thu Nov 9 19:38:49 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp
+ Thu Nov 9 20:24:06 2000
@@ -197,13 +197,11 @@
/*FIXME: can it happen that wctomb will fail under CJK
locales? */
m_wctomb.wctomb_or_fallback(mbbuf,mblen,*pData++);
for(int i=0;i<mblen;++i) {
+ /* unfortunately all english letters will be
+output as \'hh too. */
unsigned char c = mbbuf[i];
- if ( c > 0x007f)
- m_pie->_rtf_nonascii_hex2(c);
- else
- *pBuf++ = c;
-
- };
+ FlushBuffer();
+ m_pie->_rtf_nonascii_hex2(c);
+ }
} else if (!m_pie->m_atticFormat)
{
if (*pData > 0x00ff) // emit unicode
character