Hi,
FYI
oliver
---------- Forwarded message ----------
Date: Mon, 11 Jun 2001 20:40:49 +1000
From: Andrew Dunbar <[EMAIL PROTECTED]>
To: abiword-dev <[EMAIL PROTECTED]>
Subject: Patch: Encoding Manager enhancements
This patch detects the correct names for UCS-2 little endian and big
endian and provides functions to get these names when needed instead
of hard-coding names. Different iconv implementations know different
names. Specifically, the libiconv we are using has the wrong names.
I've also provided a getNativeUnicodeEncodingName(). This will
return UTF-8 on *nix, Be, QNX, UCS-2LE on Windows, and whatever
Macs use on Macs (:
Andrew Dunbar.
--
http://linguaphile.sourceforge.net
Index: src/af/ev/win/Makefile
===================================================================
RCS file: /cvsroot/abi/src/af/ev/win/Makefile,v
retrieving revision 1.12
diff -u -r1.12 Makefile
--- src/af/ev/win/Makefile 2000/02/14 11:24:49 1.12
+++ src/af/ev/win/Makefile 2001/06/11 10:21:05
@@ -28,6 +28,8 @@
ev_Win32Toolbar.cpp \
ev_Win32Toolbar_ViewListener.cpp
+INCLUDES= -I$(ABI_XX_ROOT)/../libiconv/include
+
TARGETS= $(OBJS)
include $(ABI_ROOT)/src/config/abi_rules.mk
Index: src/af/ev/win/ev_Win32Keyboard.cpp
===================================================================
RCS file: /cvsroot/abi/src/af/ev/win/ev_Win32Keyboard.cpp,v
retrieving revision 1.25
diff -u -r1.25 ev_Win32Keyboard.cpp
--- src/af/ev/win/ev_Win32Keyboard.cpp 2001/06/07 15:51:42 1.25
+++ src/af/ev/win/ev_Win32Keyboard.cpp 2001/06/11 10:21:12
@@ -31,6 +31,7 @@
#include "ev_EditMethod.h"
#include "ev_EditBinding.h"
#include "ev_EditEventMapper.h"
+#include "xap_EncodingManager.h"
#ifdef UT_DEBUG
#define MSG(keydata,args) do { if ( ! (keyData & 0x40000000)) UT_DEBUGMSG args ;
} while (0)
@@ -193,11 +194,12 @@
if( GetLocaleInfo( LOWORD( hKeyboardLayout ),
LOCALE_IDEFAULTANSICODEPAGE, &szCodePage[2], sizeof( szCodePage ) / sizeof(
szCodePage[0] ) - 2 ) )
{
// Unicode locale?
- // TODO Does NT use UCS-2-BE internally on non-Intel CPUs?
if( !strcmp( szCodePage, "CP0" ) )
{
+ const char *szUCS2Name =
+XAP_EncodingManager::get_instance()->getNativeUnicodeEncodingName();
+ UT_ASSERT(szUCS2Name);
m_bIsUnicodeInput = true;
- strcpy( szCodePage, "UCS-2-LE" );
+ strcpy( szCodePage, szUCS2Name );
}
else
m_bIsUnicodeInput = false;
Index: src/af/util/xp/ut_Encoding.cpp
===================================================================
RCS file: /cvsroot/abi/src/af/util/xp/ut_Encoding.cpp,v
retrieving revision 1.1
diff -u -r1.1 ut_Encoding.cpp
--- src/af/util/xp/ut_Encoding.cpp 2001/06/07 15:51:59 1.1
+++ src/af/util/xp/ut_Encoding.cpp 2001/06/11 10:21:16
@@ -167,8 +167,10 @@
{enc_tis620, NULL, XAP_STRING_ID_ENC_21},
{enc_ucs2be, NULL, XAP_STRING_ID_ENC_53},
{enc_ucs2le, NULL, XAP_STRING_ID_ENC_54},
+ // UCS-4 be and le
{enc_utf7, NULL, XAP_STRING_ID_ENC_51},
{enc_utf8, NULL, XAP_STRING_ID_ENC_52},
+ // UTF-16, UTF-32 be and le
{enc_viscii, NULL, XAP_STRING_ID_ENC_24},
};
Index: src/af/xap/win/xap_Win32EncodingManager.cpp
===================================================================
RCS file: /cvsroot/abi/src/af/xap/win/xap_Win32EncodingManager.cpp,v
retrieving revision 1.4
diff -u -r1.4 xap_Win32EncodingManager.cpp
--- src/af/xap/win/xap_Win32EncodingManager.cpp 2001/06/07 15:52:18 1.4
+++ src/af/xap/win/xap_Win32EncodingManager.cpp 2001/06/11 10:21:22
@@ -28,11 +28,14 @@
XAP_Win32EncodingManager::~XAP_Win32EncodingManager() {}
-static const char* NativeEncodingName, *LanguageISOName, *LanguageISOTerritory;
+static const char* NativeEncodingName, *NativeUnicodeEncodingName, *LanguageISOName,
+*LanguageISOTerritory;
const char* XAP_Win32EncodingManager::getNativeEncodingName() const
{ return NativeEncodingName; };
+const char* XAP_Win32EncodingManager::getNativeUnicodeEncodingName() const
+{ return NativeUnicodeEncodingName; };
+
const char* XAP_Win32EncodingManager::getLanguageISOName() const
{ return LanguageISOName; };
@@ -52,14 +55,17 @@
LanguageISOName = "en";
LanguageISOTerritory = NULL;
+ // Unicode Encoding Name
+ // TODO Does NT use UCS-2BE internally on non-Intel CPUs?
+ NativeUnicodeEncodingName = getUCS2LEName();
+
// Encoding
if
(GetLocaleInfo(LOCALE_USER_DEFAULT,LOCALE_IDEFAULTANSICODEPAGE,szLocaleInfo,sizeof(szLocaleInfo)/sizeof(szLocaleInfo[0])))
{
// Windows Unicode locale?
if (!strcmp(szLocaleInfo,"0"))
{
- // TODO Does NT use UCS-2-BE internally on non-Intel CPUs?
- NativeEncodingName = "UCS-2-LE";
+ NativeEncodingName = NativeUnicodeEncodingName;
m_bIsUnicodeLocale = true;
}
else
Index: src/af/xap/win/xap_Win32EncodingManager.h
===================================================================
RCS file: /cvsroot/abi/src/af/xap/win/xap_Win32EncodingManager.h,v
retrieving revision 1.1
diff -u -r1.1 xap_Win32EncodingManager.h
--- src/af/xap/win/xap_Win32EncodingManager.h 2001/05/25 18:12:44 1.1
+++ src/af/xap/win/xap_Win32EncodingManager.h 2001/06/11 10:21:22
@@ -15,6 +15,7 @@
public:
const char* getNativeEncodingName() const;
+ const char* getNativeUnicodeEncodingName() const;
inline virtual bool isUnicodeLocale() const {return m_bIsUnicodeLocale;}
const char* getLanguageISOName() const;
const char* getLanguageISOTerritory() const;
Index: src/af/xap/xp/xap_EncodingManager.cpp
===================================================================
RCS file: /cvsroot/abi/src/af/xap/xp/xap_EncodingManager.cpp,v
retrieving revision 1.34
diff -u -r1.34 xap_EncodingManager.cpp
--- src/af/xap/xp/xap_EncodingManager.cpp 2001/06/07 15:52:24 1.34
+++ src/af/xap/xp/xap_EncodingManager.cpp 2001/06/11 10:21:39
@@ -40,6 +40,23 @@
return "ISO-8859-1"; /* this will definitely work*/
}
+const char* XAP_EncodingManager::getNativeUnicodeEncodingName() const
+{
+ return "UTF-8"; /* this will definitely work*/
+}
+
+static const char* UCS2BEName, *UCS2LEName;
+
+const char* XAP_EncodingManager::getUCS2BEName() const
+{
+ return UCS2BEName;
+}
+
+const char* XAP_EncodingManager::getUCS2LEName() const
+{
+ return UCS2LEName;
+}
+
#define VALID_ICONV_HANDLE(i) ((i) != (iconv_t)-1)
XAP_EncodingManager::~XAP_EncodingManager()
{
@@ -754,6 +1757,51 @@
*terrname = getLanguageISOTerritory(),
*enc = getNativeEncodingName();
+ // UCS-2 Encoding Names
+ static const char * (szUCS2BENames[]) = {
+ "UCS-2BE", // preferred
+ "UCS-2-BE", // older libiconv
+ "UNICODEBIG", // older glibc
+ "UNICODE-1-1", // in libiconv source
+ "UTF-16BE", // superset
+ "UTF-16-BE", // my guess
+ 0 };
+ static const char * (szUCS2LENames[]) = {
+ "UCS-2LE", // preferred
+ "UCS-2-LE", // older libiconv
+ "UNICODELITTLE", // older glibc
+ "UTF-16LE", // superset
+ "UTF-16-LE", // my guess
+ 0 };
+ const char ** p;
+ iconv_t iconv_handle;
+ for (p = szUCS2BENames; *p; ++p)
+ {
+ if ((iconv_handle = iconv_open(*p,*p)) != (iconv_t)-1)
+ {
+ iconv_close(iconv_handle);
+ UCS2BEName = *p;
+ break;
+ }
+ }
+ for (p = szUCS2LENames; *p; ++p)
+ {
+ if ((iconv_handle = iconv_open(*p,*p)) != (iconv_t)-1)
+ {
+ iconv_close(iconv_handle);
+ UCS2LEName = *p;
+ break;
+ }
+ }
+ if (UCS2BEName)
+ UT_DEBUGMSG(("This iconv supports UCS-2BE as \"%s\"\n",UCS2BEName));
+ else
+ UT_DEBUGMSG(("This iconv does not support UCS-2BE!\n"));
+ if (UCS2LEName)
+ UT_DEBUGMSG(("This iconv supports UCS-2LE as \"%s\"\n",UCS2LEName));
+ else
+ UT_DEBUGMSG(("This iconv does not support UCS-2LE!\n"));
+
if(!strcmp(enc, "UTF-8") || !strcmp(enc, "UTF8") || !strcmp(enc, "utf-8") ||
!strcmp(enc, "utf8"))
m_bIsUnicodeLocale = true;
else
Index: src/af/xap/xp/xap_EncodingManager.h
===================================================================
RCS file: /cvsroot/abi/src/af/xap/xp/xap_EncodingManager.h,v
retrieving revision 1.20
diff -u -r1.20 xap_EncodingManager.h
--- src/af/xap/xp/xap_EncodingManager.h 2001/06/07 15:52:24 1.20
+++ src/af/xap/xp/xap_EncodingManager.h 2001/06/11 10:21:43
@@ -52,9 +52,26 @@
/*
this shouldn't return NULL. Don't free or write to returned string.
The string should be uppercased (extra font tarballs assume this).
- TODO isn't iconv case sensitive? Mac encoding names are mixed case!
*/
virtual const char* getNativeEncodingName() const;
+
+ /*
+ this can return NULL. Don't free or write to returned string.
+ The string should be uppercased (extra font tarballs assume this).
+ */
+ virtual const char* getNativeUnicodeEncodingName() const;
+
+ /*
+ this can return NULL. Don't free or write to returned string.
+ The string should be uppercased (extra font tarballs assume this).
+ */
+ virtual const char* getUCS2BEName() const;
+
+ /*
+ this can return NULL. Don't free or write to returned string.
+ The string should be uppercased (extra font tarballs assume this).
+ */
+ virtual const char* getUCS2LEName() const;
/*
This should return true for any Unicode locale:
Index: src/wp/impexp/xp/ie_imp_Text.cpp
===================================================================
RCS file: /cvsroot/abi/src/wp/impexp/xp/ie_imp_Text.cpp,v
retrieving revision 1.26
diff -u -r1.26 ie_imp_Text.cpp
--- src/wp/impexp/xp/ie_imp_Text.cpp 2001/06/07 15:52:42 1.26
+++ src/wp/impexp/xp/ie_imp_Text.cpp 2001/06/11 10:22:44
@@ -355,11 +363,10 @@
eUcs2 = IE_Imp_Text_Sniffer::_recognizeUCS2(szBuf, iNumbytes, true);
- // TODO Old libiconv uses UCS-2-BE, new uses UCS-2BE
if (eUcs2 == IE_Imp_Text_Sniffer::UE_BigEnd)
- _setEncoding("UCS-2-BE");
+
+_setEncoding(XAP_EncodingManager::get_instance()->getUCS2BEName());
else if (eUcs2 == IE_Imp_Text_Sniffer::UE_LittleEnd)
- _setEncoding("UCS-2-LE");
+
+_setEncoding(XAP_EncodingManager::get_instance()->getUCS2LEName());
}
return UT_OK;
@@ -558,11 +566,10 @@
// Attempt to guess whether we're pasting 8 bit or unicode text
IE_Imp_Text_Sniffer::UCS2_Endian eUcs2 =
IE_Imp_Text_Sniffer::_recognizeUCS2((const char *)pData, lenData, true);
- // TODO Old libiconv uses UCS-2-BE, new uses UCS-2BE
if (eUcs2 == IE_Imp_Text_Sniffer::UE_BigEnd)
- _setEncoding("UCS-2-BE");
+ _setEncoding(XAP_EncodingManager::get_instance()->getUCS2BEName());
else if (eUcs2 == IE_Imp_Text_Sniffer::UE_LittleEnd)
- _setEncoding("UCS-2-LE");
+ _setEncoding(XAP_EncodingManager::get_instance()->getUCS2LEName());
else
_setEncoding(XAP_EncodingManager::get_instance()->getNativeEncodingName());