Hi,

FYI

oliver


---------- Forwarded message ----------
Date: Mon, 11 Jun 2001 20:40:49 +1000
From: Andrew Dunbar <[EMAIL PROTECTED]>
To: abiword-dev <[EMAIL PROTECTED]>
Subject: Patch: Encoding Manager enhancements

This patch detects the correct names for UCS-2 little endian and big
endian and provides functions to get these names when needed instead
of hard-coding names.  Different iconv implementations know different
names.  Specifically, the libiconv we are using has the wrong names.

I've also provided a getNativeUnicodeEncodingName().  This will
return UTF-8 on *nix, Be, QNX, UCS-2LE on Windows, and whatever
Macs use on Macs (:

Andrew Dunbar.

-- 
http://linguaphile.sourceforge.net
Index: src/af/ev/win/Makefile
===================================================================
RCS file: /cvsroot/abi/src/af/ev/win/Makefile,v
retrieving revision 1.12
diff -u -r1.12 Makefile
--- src/af/ev/win/Makefile      2000/02/14 11:24:49     1.12
+++ src/af/ev/win/Makefile      2001/06/11 10:21:05
@@ -28,6 +28,8 @@
                        ev_Win32Toolbar.cpp             \
                        ev_Win32Toolbar_ViewListener.cpp
 
+INCLUDES=              -I$(ABI_XX_ROOT)/../libiconv/include
+
 TARGETS=               $(OBJS)
 
 include $(ABI_ROOT)/src/config/abi_rules.mk
Index: src/af/ev/win/ev_Win32Keyboard.cpp
===================================================================
RCS file: /cvsroot/abi/src/af/ev/win/ev_Win32Keyboard.cpp,v
retrieving revision 1.25
diff -u -r1.25 ev_Win32Keyboard.cpp
--- src/af/ev/win/ev_Win32Keyboard.cpp  2001/06/07 15:51:42     1.25
+++ src/af/ev/win/ev_Win32Keyboard.cpp  2001/06/11 10:21:12
@@ -31,6 +31,7 @@
 #include "ev_EditMethod.h"
 #include "ev_EditBinding.h"
 #include "ev_EditEventMapper.h"
+#include "xap_EncodingManager.h"
 
 #ifdef UT_DEBUG
 #define MSG(keydata,args)      do { if ( ! (keyData & 0x40000000)) UT_DEBUGMSG args ; 
} while (0)
@@ -193,11 +194,12 @@
                if( GetLocaleInfo( LOWORD( hKeyboardLayout ), 
LOCALE_IDEFAULTANSICODEPAGE, &szCodePage[2], sizeof( szCodePage ) / sizeof( 
szCodePage[0] ) - 2 ) )
                {
                        // Unicode locale?
-                       // TODO Does NT use UCS-2-BE internally on non-Intel CPUs?
                        if( !strcmp( szCodePage, "CP0" ) )
                        {
+                               const char *szUCS2Name = 
+XAP_EncodingManager::get_instance()->getNativeUnicodeEncodingName();
+                               UT_ASSERT(szUCS2Name);
                                m_bIsUnicodeInput = true;
-                               strcpy( szCodePage, "UCS-2-LE" );
+                               strcpy( szCodePage, szUCS2Name );
                        }
                        else
                                m_bIsUnicodeInput = false;
Index: src/af/util/xp/ut_Encoding.cpp
===================================================================
RCS file: /cvsroot/abi/src/af/util/xp/ut_Encoding.cpp,v
retrieving revision 1.1
diff -u -r1.1 ut_Encoding.cpp
--- src/af/util/xp/ut_Encoding.cpp      2001/06/07 15:51:59     1.1
+++ src/af/util/xp/ut_Encoding.cpp      2001/06/11 10:21:16
@@ -167,8 +167,10 @@
        {enc_tis620,                    NULL, XAP_STRING_ID_ENC_21},
        {enc_ucs2be,                    NULL, XAP_STRING_ID_ENC_53},
        {enc_ucs2le,                    NULL, XAP_STRING_ID_ENC_54},
+       // UCS-4 be and le
        {enc_utf7,                              NULL, XAP_STRING_ID_ENC_51},
        {enc_utf8,                              NULL, XAP_STRING_ID_ENC_52},
+       // UTF-16, UTF-32 be and le
        {enc_viscii,                    NULL, XAP_STRING_ID_ENC_24},
 };
 
Index: src/af/xap/win/xap_Win32EncodingManager.cpp
===================================================================
RCS file: /cvsroot/abi/src/af/xap/win/xap_Win32EncodingManager.cpp,v
retrieving revision 1.4
diff -u -r1.4 xap_Win32EncodingManager.cpp
--- src/af/xap/win/xap_Win32EncodingManager.cpp 2001/06/07 15:52:18     1.4
+++ src/af/xap/win/xap_Win32EncodingManager.cpp 2001/06/11 10:21:22
@@ -28,11 +28,14 @@
 
 XAP_Win32EncodingManager::~XAP_Win32EncodingManager() {}
 
-static const char* NativeEncodingName, *LanguageISOName, *LanguageISOTerritory;
+static const char* NativeEncodingName, *NativeUnicodeEncodingName, *LanguageISOName, 
+*LanguageISOTerritory;
 
 const char* XAP_Win32EncodingManager::getNativeEncodingName() const
 {     return NativeEncodingName; };
 
+const char* XAP_Win32EncodingManager::getNativeUnicodeEncodingName() const
+{     return NativeUnicodeEncodingName; };
+
 const char* XAP_Win32EncodingManager::getLanguageISOName() const
 {      return LanguageISOName; };
 
@@ -52,14 +55,17 @@
        LanguageISOName = "en";
        LanguageISOTerritory = NULL;
 
+       // Unicode Encoding Name
+       // TODO Does NT use UCS-2BE internally on non-Intel CPUs?
+       NativeUnicodeEncodingName = getUCS2LEName();
+
        // Encoding 
        if 
(GetLocaleInfo(LOCALE_USER_DEFAULT,LOCALE_IDEFAULTANSICODEPAGE,szLocaleInfo,sizeof(szLocaleInfo)/sizeof(szLocaleInfo[0])))
        {
                // Windows Unicode locale?
                if (!strcmp(szLocaleInfo,"0"))
                {
-                       // TODO Does NT use UCS-2-BE internally on non-Intel CPUs?
-                       NativeEncodingName = "UCS-2-LE";
+                       NativeEncodingName = NativeUnicodeEncodingName;
                        m_bIsUnicodeLocale = true;
                }
                else
Index: src/af/xap/win/xap_Win32EncodingManager.h
===================================================================
RCS file: /cvsroot/abi/src/af/xap/win/xap_Win32EncodingManager.h,v
retrieving revision 1.1
diff -u -r1.1 xap_Win32EncodingManager.h
--- src/af/xap/win/xap_Win32EncodingManager.h   2001/05/25 18:12:44     1.1
+++ src/af/xap/win/xap_Win32EncodingManager.h   2001/06/11 10:21:22
@@ -15,6 +15,7 @@
 public:
 
     const char* getNativeEncodingName()        const;
+    const char* getNativeUnicodeEncodingName() const;
        inline virtual bool isUnicodeLocale()   const {return m_bIsUnicodeLocale;}
     const char* getLanguageISOName()           const;
     const char* getLanguageISOTerritory()      const;
Index: src/af/xap/xp/xap_EncodingManager.cpp
===================================================================
RCS file: /cvsroot/abi/src/af/xap/xp/xap_EncodingManager.cpp,v
retrieving revision 1.34
diff -u -r1.34 xap_EncodingManager.cpp
--- src/af/xap/xp/xap_EncodingManager.cpp       2001/06/07 15:52:24     1.34
+++ src/af/xap/xp/xap_EncodingManager.cpp       2001/06/11 10:21:39
@@ -40,6 +40,23 @@
     return "ISO-8859-1"; /* this will definitely work*/
 }
 
+const char* XAP_EncodingManager::getNativeUnicodeEncodingName() const
+{
+    return "UTF-8"; /* this will definitely work*/
+}
+
+static const char* UCS2BEName, *UCS2LEName;
+
+const char* XAP_EncodingManager::getUCS2BEName() const
+{
+       return UCS2BEName;
+}
+
+const char* XAP_EncodingManager::getUCS2LEName() const
+{
+       return UCS2LEName;
+}
+
 #define VALID_ICONV_HANDLE(i) ((i) != (iconv_t)-1)
 XAP_EncodingManager::~XAP_EncodingManager()
 {
@@ -754,6 +1757,51 @@
        *terrname = getLanguageISOTerritory(),
        *enc = getNativeEncodingName();
        
+       // UCS-2 Encoding Names
+       static const char * (szUCS2BENames[]) = {
+               "UCS-2BE",                      // preferred
+               "UCS-2-BE",                     // older libiconv
+               "UNICODEBIG",           // older glibc
+               "UNICODE-1-1",          // in libiconv source
+               "UTF-16BE",                     // superset
+               "UTF-16-BE",            // my guess
+               0 };
+       static const char * (szUCS2LENames[]) = {
+               "UCS-2LE",                      // preferred
+               "UCS-2-LE",                     // older libiconv
+               "UNICODELITTLE",        // older glibc
+               "UTF-16LE",                     // superset
+               "UTF-16-LE",            // my guess
+               0 };
+       const char ** p;
+       iconv_t iconv_handle;
+       for (p = szUCS2BENames; *p; ++p)
+       {
+               if ((iconv_handle = iconv_open(*p,*p)) != (iconv_t)-1)
+               {
+                       iconv_close(iconv_handle);
+                       UCS2BEName = *p;
+                       break;
+               }
+       }
+       for (p = szUCS2LENames; *p; ++p)
+       {
+               if ((iconv_handle = iconv_open(*p,*p)) != (iconv_t)-1)
+               {
+                       iconv_close(iconv_handle);
+                       UCS2LEName = *p;
+                       break;
+               }
+       }
+       if (UCS2BEName)
+               UT_DEBUGMSG(("This iconv supports UCS-2BE as \"%s\"\n",UCS2BEName));
+       else
+               UT_DEBUGMSG(("This iconv does not support UCS-2BE!\n"));
+       if (UCS2LEName)
+               UT_DEBUGMSG(("This iconv supports UCS-2LE as \"%s\"\n",UCS2LEName));
+       else
+               UT_DEBUGMSG(("This iconv does not support UCS-2LE!\n"));
+
        if(!strcmp(enc, "UTF-8") || !strcmp(enc, "UTF8") || !strcmp(enc, "utf-8") || 
!strcmp(enc, "utf8"))
                m_bIsUnicodeLocale = true;
        else
Index: src/af/xap/xp/xap_EncodingManager.h
===================================================================
RCS file: /cvsroot/abi/src/af/xap/xp/xap_EncodingManager.h,v
retrieving revision 1.20
diff -u -r1.20 xap_EncodingManager.h
--- src/af/xap/xp/xap_EncodingManager.h 2001/06/07 15:52:24     1.20
+++ src/af/xap/xp/xap_EncodingManager.h 2001/06/11 10:21:43
@@ -52,9 +52,26 @@
     /*
        this shouldn't return NULL. Don't free or write to returned string. 
        The string should be uppercased (extra font tarballs assume this).
-       TODO isn't iconv case sensitive?  Mac encoding names are mixed case!
     */
     virtual const char* getNativeEncodingName() const;
+
+    /*
+       this can return NULL. Don't free or write to returned string. 
+       The string should be uppercased (extra font tarballs assume this).
+    */
+    virtual const char* getNativeUnicodeEncodingName() const;
+
+    /*
+       this can return NULL. Don't free or write to returned string. 
+       The string should be uppercased (extra font tarballs assume this).
+    */
+    virtual const char* getUCS2BEName() const;
+
+    /*
+       this can return NULL. Don't free or write to returned string. 
+       The string should be uppercased (extra font tarballs assume this).
+    */
+    virtual const char* getUCS2LEName() const;
 
        /*
        This should return true for any Unicode locale:
Index: src/wp/impexp/xp/ie_imp_Text.cpp
===================================================================
RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_imp_Text.cpp,v
retrieving revision 1.26
diff -u -r1.26 ie_imp_Text.cpp
--- src/wp/impexp/xp/ie_imp_Text.cpp    2001/06/07 15:52:42     1.26
+++ src/wp/impexp/xp/ie_imp_Text.cpp    2001/06/11 10:22:44
@@ -355,11 +363,10 @@
 
                eUcs2 = IE_Imp_Text_Sniffer::_recognizeUCS2(szBuf, iNumbytes, true);
                
-               // TODO Old libiconv uses UCS-2-BE, new uses UCS-2BE
                if (eUcs2 == IE_Imp_Text_Sniffer::UE_BigEnd)
-                       _setEncoding("UCS-2-BE");
+                       
+_setEncoding(XAP_EncodingManager::get_instance()->getUCS2BEName());
                else if (eUcs2 == IE_Imp_Text_Sniffer::UE_LittleEnd)
-                       _setEncoding("UCS-2-LE");
+                       
+_setEncoding(XAP_EncodingManager::get_instance()->getUCS2LEName());
        }
 
        return UT_OK;
@@ -558,11 +566,10 @@
        // Attempt to guess whether we're pasting 8 bit or unicode text
        IE_Imp_Text_Sniffer::UCS2_Endian eUcs2 = 
IE_Imp_Text_Sniffer::_recognizeUCS2((const char *)pData, lenData, true);
        
-       // TODO Old libiconv uses UCS-2-BE, new uses UCS-2BE
        if (eUcs2 == IE_Imp_Text_Sniffer::UE_BigEnd)
-               _setEncoding("UCS-2-BE");
+               _setEncoding(XAP_EncodingManager::get_instance()->getUCS2BEName());
        else if (eUcs2 == IE_Imp_Text_Sniffer::UE_LittleEnd)
-               _setEncoding("UCS-2-LE");
+               _setEncoding(XAP_EncodingManager::get_instance()->getUCS2LEName());
        else
                
_setEncoding(XAP_EncodingManager::get_instance()->getNativeEncodingName());
 

Reply via email to