Re: final bits for CJK patch

Vlad Harchev Thu, 9 Nov 2000 12:37:26 -0600 (CST)

On Thu, 9 Nov 2000, Vlad Harchev wrote:

 Here is corrected version of the patch - I think it's only important for
those who link with libiconv, but nevertheless.
 
 What's new: hunks to ut_*.cpp

 Best regards,
  -Vlad

diff -ru abi-0.7.11-orig~/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp 
abi-0.7.11-orig-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp   Thu Nov  9 
19:38:47 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/af/xap/xp/xap_EncodingManager.cpp       Thu 
+Nov  9 21:16:31 2000
@@ -395,6 +395,12 @@
 static const char* wincharsetcode_th[]=  /* thai charset*/
 { "th", NULL };
 
+/*I'm not sure that charset code is the same for Big5 and GB2312.
+  Tested with GB2312 only.  
+*/
+static const char* wincharsetcode_zh[]= /* chinese*/
+{ "zh", NULL };
+
 static const _rmap langcode_to_wincharsetcode[]=
 {
        {"0"}, /* default value - ansi charset*/
@@ -403,6 +409,7 @@
        {"162",wincharsetcode_tr},
        {"163",wincharsetcode_vi},
        {"222",wincharsetcode_th},      
+       {"134",wincharsetcode_zh},
        {NULL}
 };
 
@@ -449,12 +456,16 @@
        {NULL}
 };
 
+/*
+ This table is useful since iconv implementations don't know some cpNNNN 
+ charsets but under some different name.
+*/
 static const _map MSCodepagename_to_charset_name_map[]=
 {
 /*key, value*/
     {NULL,NULL},
-    {"CP936","BIG5"}, /* most probably it's correct  - VH*/
-    {"CP950","GB2312"},    /* 100% correct */
+    {"CP936","GB2312"},
+    {"CP950","BIG5"},  
     {NULL,NULL}
 };
 
@@ -463,7 +474,10 @@
 {
 /*key, value*/
     {NULL},
-/*   {"0x404","zh_CN"},*/  /*I guess - VH*/
+   {"0x404","zh_CN.BIG5"},  
+   {"0x804","zh_CN.GB2312"},     
+   {"0x404","zh_TW.BIG5"},  
+   {"0x804","zh_TW.GB2312"}, 
     {NULL}
 };
 
@@ -815,7 +829,9 @@
 
 const char* XAP_EncodingManager::charsetFromCodepage(int lid) const
 {
-    char* cpname = wvLIDToCodePageConverter(lid);
+    static char buf[100];
+    sprintf(buf,"CP%d",lid);    
+    char* cpname = buf;
     UT_Bool is_default;
     const char* ret = 
search_map(MSCodepagename_to_charset_name_map,cpname,&is_default);
     return is_default ? cpname : ret;
diff -ru abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp 
abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp
--- abi-0.7.11-orig~/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp       
 Thu Nov  9 19:38:49 2000
+++ abi-0.7.11-orig-orig/abi-0.7.11/src/wp/impexp/xp/ie_exp_RTF_listenerWriteDoc.cpp   
+ Thu Nov  9 20:24:06 2000
@@ -197,13 +197,11 @@
                                /*FIXME: can it happen that wctomb will fail under CJK 
locales? */
                                m_wctomb.wctomb_or_fallback(mbbuf,mblen,*pData++);
                                for(int i=0;i<mblen;++i) {
+                                       /* unfortunately all english letters will be 
+output as \'hh too. */
                                        unsigned char c = mbbuf[i];
-                                       if ( c > 0x007f)
-                                               m_pie->_rtf_nonascii_hex2(c);
-                                       else
-                                               *pBuf++ = c;
-                                       
-                               };
+                                       FlushBuffer();
+                                       m_pie->_rtf_nonascii_hex2(c);
+                               }
                        } else if (!m_pie->m_atticFormat) 
                        {
                                if (*pData > 0x00ff)            // emit unicode 
character

Re: final bits for CJK patch

Reply via email to