Date: Thursday, January 18, 2007 @ 16:48:54
Author: marc
Path: /cvsroot/carob/carob
Modified: include/Common.hpp (1.55 -> 1.56) src/Common.cpp (1.63 -> 1.64)
src/JavaSocket.cpp (1.74 -> 1.75)
test/01-Unit/TestStringCodecs.cpp (1.8 -> 1.9)
test/01-Unit/TestStringCodecs.hpp (1.3 -> 1.4)
Moved tryUTF8locale() from Common.cpp to JavaSocket.cpp: the only
place it is used. Removed #include <locale> pollution from
Common.hpp. Split single TestStringCodecs test into two tests: UTF8
and others. Related to CAROB-74.
-----------------------------------+
include/Common.hpp | 5 -
src/Common.cpp | 114 -----------------------------------
src/JavaSocket.cpp | 116 ++++++++++++++++++++++++++++++++++++
test/01-Unit/TestStringCodecs.cpp | 86 +++++++++++++++++++-------
test/01-Unit/TestStringCodecs.hpp | 1
5 files changed, 181 insertions(+), 141 deletions(-)
Index: carob/include/Common.hpp
diff -u carob/include/Common.hpp:1.55 carob/include/Common.hpp:1.56
--- carob/include/Common.hpp:1.55 Wed Jan 17 17:23:31 2007
+++ carob/include/Common.hpp Thu Jan 18 16:48:54 2007
@@ -57,11 +57,6 @@
}
#endif
-// should move this to JavaSocket.cpp at some point
-#include <locale>
-namespace CarobNS {
-std::locale tryUTF8locale();
-}
namespace CarobNS {
Index: carob/src/Common.cpp
diff -u carob/src/Common.cpp:1.63 carob/src/Common.cpp:1.64
--- carob/src/Common.cpp:1.63 Thu Jan 18 15:23:02 2007
+++ carob/src/Common.cpp Thu Jan 18 16:48:54 2007
@@ -92,19 +92,6 @@
-// On linux or MACOSX>10.4 try "locale -a" to get the list of all available
locales.
-// TODO: it would be nice to be able to get this at runtime
-// (in a non-portable way quite obviously...)
-
-// An empty string "" will get the "default" C++ locale, typically set
-// at run-time using environnement variables like LANG/LC_XXX. This
-// will work fine as long as this "default" is set to some UTF8 locale.
-
-// Using "C" should work fine as long as every string involved is pure
-// ASCII.
-
-#define NAME_OF_ANY_UTF8_LOCALE_AVAILABLE "en_US.utf8"
-
void CarobNS::setLogLevel(const LogLevel l)
{
#ifdef CAROB_LOG4CXX_NAME
@@ -230,111 +217,14 @@
}
namespace {
- std::locale tryuserlocale();
-}
-
-namespace CarobNS {
-
- std::locale tryUTF8locale()
- {
- // __func__ is unfortunately C99
- wstring funcname(__WFILE__ L":tryUTF8locale()");
-
- // 1. trying hardwired macro
- try {
- logInfo(funcname,
- L"1. trying to use locale: " NAME_OF_ANY_UTF8_LOCALE_AVAILABLE);
- return trylocale(NAME_OF_ANY_UTF8_LOCALE_AVAILABLE);
- } catch (CodecException& ce) {
- logWarn(funcname,
- L"please install the " NAME_OF_ANY_UTF8_LOCALE_AVAILABLE "
locale");
- }
-
- // 2. try to fallback on the user defined locale
- string user_loc;
- try{
- user_loc = std::locale("").name();
- } catch (std::runtime_error&) {
- user_loc = std::locale().name();
- }
- logInfo(funcname,
- std::wstring(L"2. trying to fallback on locale: ")
- + fromString(user_loc));
-
- try { // search for "utf8" or variants
-
- string::size_type utf8pos;
-
- // sorry for this
- (utf8pos = user_loc.rfind("UTF")) != string::npos
- || (utf8pos = user_loc.rfind("Utf")) != string::npos
- || ( utf8pos = user_loc.rfind("utf")) != string::npos;
-
- if (utf8pos != string::npos)
- if (user_loc.at(utf8pos+3) == '8'
- || user_loc.at(utf8pos+4) == '8')
- return std::locale(user_loc.c_str());
-
- } catch (std::out_of_range& oor) {
- // out_of_range => not found
- }
-
- logWarn(funcname, fromString(user_loc)
- + L" does not seem to be an utf8 locale either");
-
-
- // 3. Else try to hack the user locale name a bit
- string::size_type delimpos;
- string nocodeset;
- if ((delimpos = user_loc.find_last_of('.')) != string::npos)
- nocodeset = user_loc.substr(0, delimpos);
- else if ((delimpos = user_loc.find_last_of('@')) != string::npos)
- nocodeset = user_loc.substr(0, delimpos);
- else
- nocodeset = user_loc;
-
- string hacked_names[2];
- hacked_names[0] = nocodeset + ".UTF-8";
- hacked_names[1] = nocodeset + ".utf8";
-
- for (int i=0; i<2; i++)
- try {
- logInfo(funcname, std::wstring(L"3n. trying to fall back on locale ")
- + fromString(hacked_names[i]));
- return trylocale(hacked_names[i].c_str());
- } catch (CodecException&) {
- logWarn(funcname,
- std::wstring(L"or please install locale ")
- + fromString(hacked_names[i]));
- }
-
- // 4. nothing found
- logFatal(funcname,
- std::wstring(L"Please install one of the following locales: "
- NAME_OF_ANY_UTF8_LOCALE_AVAILABLE ", ")
- + fromString(user_loc) + L", "
- + fromString(nocodeset)
- + L".utf8"
- );
- throw CodecException(L"no UTF8 codec found");
-
- // TODO: fall back on:
- // - configuration file
- // - iconv
- // - MultiByteToWideChar(CP_UTF8,...)
- // - others?
- // See CAROB-74
-
- } // tryUTF8locale()
-} // namespace CarobNS
-
-namespace {
std::locale tryuserlocale()
{
try {
return std::locale("");
} catch (std::runtime_error& ) {
+ // FIXME: since this is static initialization time, log4cxx may
+ // not have been initialized at this point
CarobNS::logWarn(__WFILE__ L":tryuserlocale()",
L"Missing user-preferred locale (check LANG). "
"Falling back on current global locale.");
Index: carob/src/JavaSocket.cpp
diff -u carob/src/JavaSocket.cpp:1.74 carob/src/JavaSocket.cpp:1.75
--- carob/src/JavaSocket.cpp:1.74 Thu Jan 18 14:46:38 2007
+++ carob/src/JavaSocket.cpp Thu Jan 18 16:48:54 2007
@@ -25,6 +25,10 @@
#include "Common.hpp"
#include "SystemDependantDefs.hpp"
+#include <locale>
+#include <string>
+#include <stdexcept>
+
#ifdef __MINGW32__
#include <ws2tcpip.h>
#else
@@ -45,6 +49,7 @@
#include <unistd.h> // close()
using std::wstring;
+using std::string;
using namespace CarobNS;
@@ -70,6 +75,117 @@
}
}
+// On linux or MACOSX>10.4 try "locale -a" to get the list of all available
locales.
+// TODO: it would be nice to be able to get this at runtime
+// (in a non-portable way quite obviously...)
+
+// An empty string "" will get the "default" C++ locale, typically set
+// at run-time using environnement variables like LANG/LC_XXX. This
+// will work fine as long as this "default" is set to some UTF8 locale.
+
+// Using "C" should work fine as long as every string involved is pure
+// ASCII.
+
+#define NAME_OF_ANY_UTF8_LOCALE_AVAILABLE "en_US.utf7"
+
+namespace CarobNS { // tryUTF8locale() exported only for testing purposes
+
+ // TODO: add a *run-time* UTF8 locale definition option. See CAROB-74.
+ std::locale tryUTF8locale()
+ {
+ // __func__ is unfortunately C99
+ wstring funcname(__WFILE__ L":tryUTF8locale()");
+
+ // 1. trying hardwired macro
+ try {
+ logInfo(funcname,
+ L"1. trying to use locale: " NAME_OF_ANY_UTF8_LOCALE_AVAILABLE);
+ return trylocale(NAME_OF_ANY_UTF8_LOCALE_AVAILABLE);
+ } catch (CodecException& ce) {
+ logWarn(funcname,
+ L"please install the " NAME_OF_ANY_UTF8_LOCALE_AVAILABLE "
locale");
+ }
+
+ // 2. try to fallback on the user defined locale
+ string user_loc;
+ try{
+ user_loc = std::locale("").name();
+ } catch (std::runtime_error&) {
+ user_loc = std::locale().name();
+ }
+ logInfo(funcname,
+ std::wstring(L"2. trying to fallback on locale: ")
+ + fromString(user_loc));
+
+ try { // search for "utf8" or variants
+
+ string::size_type utf8pos;
+
+ // sorry for this
+ (utf8pos = user_loc.rfind("UTF")) != string::npos
+ || (utf8pos = user_loc.rfind("Utf")) != string::npos
+ || ( utf8pos = user_loc.rfind("utf")) != string::npos;
+
+ if (utf8pos != string::npos)
+ if (user_loc.at(utf8pos+3) == '8'
+ || user_loc.at(utf8pos+4) == '8')
+ return std::locale(user_loc.c_str());
+
+ } catch (std::out_of_range& oor) {
+ // out_of_range => not found
+ }
+
+ logWarn(funcname, fromString(user_loc)
+ + L" does not seem to be an utf8 locale either");
+
+
+ // 3. Else try to hack the user locale name a bit
+ string::size_type delimpos;
+ string nocodeset;
+ if ((delimpos = user_loc.find_last_of('.')) != string::npos)
+ nocodeset = user_loc.substr(0, delimpos);
+ else if ((delimpos = user_loc.find_last_of('@')) != string::npos)
+ nocodeset = user_loc.substr(0, delimpos);
+ else
+ nocodeset = user_loc;
+
+ string hacked_names[2];
+ hacked_names[0] = nocodeset + ".UTF-8";
+ hacked_names[1] = nocodeset + ".utf8";
+
+ for (int i=0; i<2; i++)
+ try {
+ logInfo(funcname, std::wstring(L"3n. trying to fall back on locale ")
+ + fromString(hacked_names[i]));
+ return trylocale(hacked_names[i].c_str());
+ } catch (CodecException&) {
+ logWarn(funcname,
+ std::wstring(L"or please install locale ")
+ + fromString(hacked_names[i]));
+ }
+
+ // 4. nothing found
+ logFatal(funcname,
+ std::wstring(L"Please install one of the following locales: "
+ NAME_OF_ANY_UTF8_LOCALE_AVAILABLE ", ")
+ + fromString(user_loc) + L", "
+ + fromString(nocodeset)
+ + L".utf8"
+ );
+ throw CodecException(L"no UTF8 codec found");
+
+ // TODO: fall back on:
+ // - configuration file
+ // - iconv
+ // - MultiByteToWideChar(CP_UTF8,...)
+ // - others?
+ // See CAROB-74
+
+ } // tryUTF8locale()
+} // CarobNS namespace
+
+
+
JavaSocket::JavaSocket() throw (CodecException) :
#ifdef CAROB_USE_ICONV
utf8_codec("UTF-8"),
Index: carob/test/01-Unit/TestStringCodecs.cpp
diff -u carob/test/01-Unit/TestStringCodecs.cpp:1.8
carob/test/01-Unit/TestStringCodecs.cpp:1.9
--- carob/test/01-Unit/TestStringCodecs.cpp:1.8 Thu Jan 18 15:04:31 2007
+++ carob/test/01-Unit/TestStringCodecs.cpp Thu Jan 18 16:48:54 2007
@@ -28,43 +28,83 @@
#include <iostream>
+/*
+ Build/Install a locale.
-using namespace CarobNS;
-// wchar_t is supposed to be Unicode (UCS-4 or UCS-2) else these tests won't
work
+ - On debian-based distros, type as root:
+ # dpkg-reconfigure locales
-void TestStringCodecs::encode_decode()
-{
- using std::string;
- using std::wstring;
- // to install a missing locale,
- // on debian-based distros, type as root:
- // # dpkg-reconfigure locales
- // on redhat:
- // # TODO
+ - Manually:
+ cd /usr/share/i18n
+ ls locales/
+ fr_FR ...
+ ls charmaps/
+ ISO-8859-5.gz ...
+ localedef -i fr_FR -f ISO-8859-15 my.locale
+ locale -a
+ my.locale ...
+ export LANG=my.locale
+
+ For SuSE See:
<http://www.google.com/search?q=SUSE+LINUX+Language+and+Country-Specific+Settings++localedef>
+*/
- const MBSCodec latin9_codec("[EMAIL PROTECTED]");
- const MBSCodec iso7_codec("el_GR.iso88597");
+using namespace CarobNS;
+using std::string;
+using std::wstring;
-#ifdef CAROB_USE_ICONV
- IconvCodec utf8_codec("UTF-8");
-#else
- MBSCodec utf8_codec(tryUTF8locale());
-#endif
+// wchar_t is supposed to be Unicode (UCS-4 or UCS-2) else these tests won't
work
- // TEST STRINGS
+namespace CarobNS {
+ std::locale tryUTF8locale(); // from JavaSocket.cpp
+}
+namespace {
// CAE
// c, a` e', + zero + last character (e') again
// in latin1, latin2 and UCS: 231, 224, 233
wchar_t wide_cae_[] = { 0xe7, 0xe0, 0xe9, 0, 0xe9 };
wstring wide_cae(wide_cae_, 5);
+}
+
+void TestStringCodecs::UTF8codec()
+{
+
+#ifdef CAROB_USE_ICONV
+ IconvCodec utf8_codec("UTF-8");
+#else
+ MBSCodec utf8_codec(tryUTF8locale());
+#endif
+ char utf8_pbk_[] = { 0xcf, 0x86, 0xce, 0xb2, 0xce, 0xba, 0, 0xce, 0xba };
+ string utf8_pbk(utf8_pbk_, 9);
+
// 195, 167, 195, 160, 195, 169
char utf8_cae_[] = { 0xc3, 0xa7, 0xc3, 0xa0, 0xc3, 0xa9, 0, 0xc3, 0xa9 };
string utf8_cae(utf8_cae_, 9);
+
+ string utf8s(utf8_codec.encode(wide_cae));
+
+ CPPUNIT_ASSERT(0 == utf8_cae.compare(utf8s));
+
+ // DECODE BACK AND COMPARE
+
+ CPPUNIT_ASSERT(0 == wide_cae.compare(utf8_codec.decode(utf8s)));
+}
+
+
+void TestStringCodecs::encode_decode()
+{
+
+ const MBSCodec latin9_codec("[EMAIL PROTECTED]");
+ const MBSCodec iso7_codec("el_GR.iso88597");
+
+
+ // TEST STRINGS
+
+
char latin1or2_cae_[] = { 0xe7, 0xe0, 0xe9, 0, 0xe9 };
string latin1or2_cae(latin1or2_cae_, 5);
@@ -73,15 +113,12 @@
wchar_t wide_pbk_[] = { 0x03c6, 0x03b2, 0x03ba, 0, 0x03ba };
wstring wide_pbk(wide_pbk_, 5);
- char utf8_pbk_[] = { 0xcf, 0x86, 0xce, 0xb2, 0xce, 0xba, 0, 0xce, 0xba };
- string utf8_pbk(utf8_pbk_, 9);
char iso7_pbk_[] = { 0xf6, 0xe2, 0xea, 0, 0xea};
string iso7_pbk(iso7_pbk_, 5);
// ENCODE
- string utf8s(utf8_codec.encode(wide_cae));
string latin9s(latin9_codec.encode(wide_cae));
string iso7s;
@@ -98,13 +135,11 @@
// COMPARE
- CPPUNIT_ASSERT(0 == utf8_cae.compare(utf8s));
CPPUNIT_ASSERT(0 == latin1or2_cae.compare(latin9s));
CPPUNIT_ASSERT(0 == iso7s.compare(iso7_pbk));
// DECODE BACK AND COMPARE
- CPPUNIT_ASSERT(0 == wide_cae.compare(utf8_codec.decode(utf8s)));
CPPUNIT_ASSERT(0 == wide_cae.compare(latin9_codec.decode(latin9s)));
CPPUNIT_ASSERT(0 == wide_pbk.compare(iso7_codec.decode(iso7s)));
@@ -142,6 +177,9 @@
suiteOfTests->addTest(new CppUnit::TestCaller<TestStringCodecs>(
"TestStringCodecs::encode_decode",
&TestStringCodecs::encode_decode));
+ suiteOfTests->addTest(new CppUnit::TestCaller<TestStringCodecs>(
+ "TestStringCodecs::UTF8codec",
+ &TestStringCodecs::UTF8codec));
return suiteOfTests;
}
Index: carob/test/01-Unit/TestStringCodecs.hpp
diff -u carob/test/01-Unit/TestStringCodecs.hpp:1.3
carob/test/01-Unit/TestStringCodecs.hpp:1.4
--- carob/test/01-Unit/TestStringCodecs.hpp:1.3 Thu Jul 27 17:10:53 2006
+++ carob/test/01-Unit/TestStringCodecs.hpp Thu Jan 18 16:48:54 2007
@@ -33,6 +33,7 @@
/** Suite of tests to be run */
static CppUnit::Test* suite();
+ void UTF8codec();
/**
* Encode various strings in different locales and checks that decoding
* returns the same string
_______________________________________________
Carob-commits mailing list
[email protected]
https://forge.continuent.org/mailman/listinfo/carob-commits