Re: final bits for CJK patch

Vlad Harchev Fri, 10 Nov 2000 02:05:54 -0600 (CST)

On Thu, 9 Nov 2000, Vlad Harchev wrote:

> On Thu, 9 Nov 2000, Vlad Harchev wrote:
> 
>  Here is corrected version of the patch - I think it's only important for
> those who link with libiconv, but nevertheless.
>  
>  What's new: hunks to ut_*.cpp
> 

 In fact, I sent the same old version of that patch with this letter. Sorry.
 Here is a correct version (with those ut_*.cpp bits), and also it adds 136 as
value \fcharset for BIG5 docs.
 Please test it.

 Best regards,
  -Vlad

diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp 
abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp    Thu Nov  9 19:38:46 
2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_mbtowc.cpp        Thu Nov  9 
+22:09:50 2000
@@ -240,7 +240,9 @@
 
 UT_Mbtowc::~UT_Mbtowc()
 {
-    iconv_close(cd);
+    /*libiconv is stupid - we'll get segfault if we don't check  - VH */
+    if (cd!=(iconv_t)-1)
+           iconv_close(cd);
 };
 
 int UT_Mbtowc::mbtowc(wchar_t &wc,char mb)
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp 
abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp    Thu Nov  9 19:38:47 
2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/util/xp/ut_wctomb.cpp        Thu Nov  9 
+22:09:30 2000
@@ -185,7 +185,9 @@
 
 UT_Wctomb::~UT_Wctomb()
 {
-    iconv_close(cd);
+    /*libiconv is stupid - we'll get segfault if we don't check  - VH */
+    if (cd!=(iconv_t)-1)
+           iconv_close(cd);
 };
 
 int UT_Wctomb::wctomb(char * pC,int &length,wchar_t wc)
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp 
abi-0.7.11-orig-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp   Thu Nov  9 
19:38:47 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp       Fri 
+Nov 10 11:20:06 2000
@@ -395,6 +395,15 @@
 static const char* wincharsetcode_th[]=  /* thai charset*/
 { "th", NULL };
 
+/*I'm not sure that charset code is the same for Big5 and GB2312.
+  Tested with GB2312 only.  
+*/
+static const char* wincharsetcode_zh_GB2312[]= /* chinese*/
+{ "zh_CN.GB2312", "zh_TW.GB2312", NULL };
+
+static const char* wincharsetcode_zh_BIG5[]= /* chinese*/
+{ "zh_CN.BIG5", "zh_TW.BIG5", NULL };
+
 static const _rmap langcode_to_wincharsetcode[]=
 {
        {"0"}, /* default value - ansi charset*/
@@ -403,6 +412,8 @@
        {"162",wincharsetcode_tr},
        {"163",wincharsetcode_vi},
        {"222",wincharsetcode_th},      
+       {"134",wincharsetcode_zh_GB2312},
+       {"136",wincharsetcode_zh_BIG5}, 
        {NULL}
 };
 
@@ -449,12 +460,16 @@
        {NULL}
 };
 
+/*
+ This table is useful since iconv implementations don't know some cpNNNN 
+ charsets but under some different name.
+*/
 static const _map MSCodepagename_to_charset_name_map[]=
 {
 /*key, value*/
     {NULL,NULL},
-    {"CP936","BIG5"}, /* most probably it's correct  - VH*/
-    {"CP950","GB2312"},    /* 100% correct */
+    {"CP936","GB2312"},
+    {"CP950","BIG5"},  
     {NULL,NULL}
 };
 
@@ -463,7 +478,10 @@
 {
 /*key, value*/
     {NULL},
-/*   {"0x404","zh_CN"},*/  /*I guess - VH*/
+   {"0x404","zh_CN.BIG5"},  
+   {"0x804","zh_CN.GB2312"},     
+   {"0x404","zh_TW.BIG5"},  
+   {"0x804","zh_TW.GB2312"}, 
     {NULL}
 };
 
@@ -815,7 +833,9 @@
 
 const char* XAP_EncodingManager::charsetFromCodepage(int lid) const
 {
-    char* cpname = wvLIDToCodePageConverter(lid);
+    static char buf[100];
+    sprintf(buf,"CP%d",lid);    
+    char* cpname = buf;
     UT_Bool is_default;
     const char* ret = 
search_map(MSCodepagename_to_charset_name_map,cpname,&is_default);
     return is_default ? cpname : ret;
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp 
abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp       
 Thu Nov  9 19:38:49 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp   
+ Thu Nov  9 20:24:06 2000
@@ -197,13 +197,11 @@
                                /*FIXME: can it happen that wctomb will fail under CJK 
locales? */
                                m_wctomb.wctomb_or_fallback(mbbuf,mblen,*pData++);
                                for(int i=0;i<mblen;++i) {
+                                       /* unfortunately all english letters will be 
+output as \'hh too. */
                                        unsigned char c = mbbuf[i];
-                                       if ( c > 0x007f)
-                                               m_pie->_rtf_nonascii_hex2(c);
-                                       else
-                                               *pBuf++ = c;
-                                       
-                               };
+                                       FlushBuffer();
+                                       m_pie->_rtf_nonascii_hex2(c);
+                               }
                        } else if (!m_pie->m_atticFormat) 
                        {
                                if (*pData > 0x00ff)            // emit unicode 
character

Re: final bits for CJK patch

Reply via email to