Date: Thursday, January 18, 2007 @ 16:48:54
  Author: marc
    Path: /cvsroot/carob/carob

Modified: include/Common.hpp (1.55 -> 1.56) src/Common.cpp (1.63 -> 1.64)
          src/JavaSocket.cpp (1.74 -> 1.75)
          test/01-Unit/TestStringCodecs.cpp (1.8 -> 1.9)
          test/01-Unit/TestStringCodecs.hpp (1.3 -> 1.4)

Moved tryUTF8locale() from Common.cpp to JavaSocket.cpp: the only
place it is used. Removed #include <locale> pollution from
Common.hpp. Split single TestStringCodecs test into two tests: UTF8
and others. Related to CAROB-74.


-----------------------------------+
 include/Common.hpp                |    5 -
 src/Common.cpp                    |  114 -----------------------------------
 src/JavaSocket.cpp                |  116 ++++++++++++++++++++++++++++++++++++
 test/01-Unit/TestStringCodecs.cpp |   86 +++++++++++++++++++-------
 test/01-Unit/TestStringCodecs.hpp |    1 
 5 files changed, 181 insertions(+), 141 deletions(-)


Index: carob/include/Common.hpp
diff -u carob/include/Common.hpp:1.55 carob/include/Common.hpp:1.56
--- carob/include/Common.hpp:1.55       Wed Jan 17 17:23:31 2007
+++ carob/include/Common.hpp    Thu Jan 18 16:48:54 2007
@@ -57,11 +57,6 @@
 }
 #endif
 
-// should move this to JavaSocket.cpp at some point
-#include <locale>
-namespace CarobNS {
-std::locale tryUTF8locale();
-}
 
 namespace CarobNS {
 
Index: carob/src/Common.cpp
diff -u carob/src/Common.cpp:1.63 carob/src/Common.cpp:1.64
--- carob/src/Common.cpp:1.63   Thu Jan 18 15:23:02 2007
+++ carob/src/Common.cpp        Thu Jan 18 16:48:54 2007
@@ -92,19 +92,6 @@
 
 
 
-// On linux or MACOSX>10.4 try "locale -a" to get the list of all available 
locales.
-// TODO: it would be nice to be able to get this at runtime
-// (in a non-portable way quite obviously...)
-
-// An empty string "" will get the "default" C++ locale, typically set
-// at run-time using environnement variables like LANG/LC_XXX. This
-// will work fine as long as this "default" is set to some UTF8 locale.
-
-// Using "C" should work fine as long as every string involved is pure
-// ASCII.
-
-#define NAME_OF_ANY_UTF8_LOCALE_AVAILABLE "en_US.utf8"
-
 void CarobNS::setLogLevel(const LogLevel l)
 {
 #ifdef CAROB_LOG4CXX_NAME
@@ -230,111 +217,14 @@
 }
 
 namespace {
-  std::locale tryuserlocale();
-}
-
-namespace CarobNS {
-
-  std::locale tryUTF8locale()
-  {
-    // __func__ is unfortunately C99
-    wstring funcname(__WFILE__ L":tryUTF8locale()");
-
-    // 1. trying hardwired macro
-    try {
-      logInfo(funcname,
-              L"1. trying to use locale: " NAME_OF_ANY_UTF8_LOCALE_AVAILABLE);
-      return trylocale(NAME_OF_ANY_UTF8_LOCALE_AVAILABLE);
-    } catch (CodecException& ce) {
-      logWarn(funcname,
-              L"please install the " NAME_OF_ANY_UTF8_LOCALE_AVAILABLE " 
locale");
-    }
-
-    // 2. try to fallback on the user defined locale
-    string user_loc;
-    try{
-      user_loc = std::locale("").name();
-    } catch (std::runtime_error&) {
-      user_loc = std::locale().name();
-    }
-    logInfo(funcname,
-            std::wstring(L"2. trying to fallback on locale: ")
-            + fromString(user_loc));
-
-    try { // search for "utf8" or variants
-
-      string::size_type utf8pos;
-
-      // sorry for this
-      (utf8pos = user_loc.rfind("UTF")) != string::npos 
-        || (utf8pos = user_loc.rfind("Utf")) != string::npos 
-        || ( utf8pos = user_loc.rfind("utf")) != string::npos;
-
-      if (utf8pos != string::npos)
-        if (user_loc.at(utf8pos+3) == '8'
-            || user_loc.at(utf8pos+4) == '8')
-          return std::locale(user_loc.c_str());
-
-    } catch (std::out_of_range& oor) {
-      // out_of_range => not found
-    }
-
-    logWarn(funcname, fromString(user_loc)
-            + L" does not seem to be an utf8 locale either");
-
-
-    // 3. Else try to hack the user locale name a bit
-    string::size_type delimpos;
-    string nocodeset;
-    if ((delimpos = user_loc.find_last_of('.')) != string::npos)
-      nocodeset = user_loc.substr(0, delimpos);
-    else if ((delimpos = user_loc.find_last_of('@')) != string::npos)
-      nocodeset = user_loc.substr(0, delimpos);
-    else
-      nocodeset = user_loc;
-
-    string hacked_names[2];
-    hacked_names[0] = nocodeset + ".UTF-8";
-    hacked_names[1] = nocodeset + ".utf8";
-
-    for (int i=0; i<2; i++)
-      try {
-        logInfo(funcname, std::wstring(L"3n. trying to fall back on locale ")
-                + fromString(hacked_names[i]));
-        return trylocale(hacked_names[i].c_str());
-      } catch (CodecException&) {
-        logWarn(funcname,
-                std::wstring(L"or please install locale ")
-                + fromString(hacked_names[i]));
-      }
-
-    // 4. nothing found
-    logFatal(funcname,
-             std::wstring(L"Please install one of the following locales: "
-                          NAME_OF_ANY_UTF8_LOCALE_AVAILABLE ", ")
-             + fromString(user_loc) + L", "
-             + fromString(nocodeset)
-             + L".utf8"
-      );
-    throw CodecException(L"no UTF8 codec found");
-
-    // TODO: fall back on:
-    // - configuration file
-    // - iconv
-    // - MultiByteToWideChar(CP_UTF8,...)
-    // - others?
-    // See CAROB-74
-
-  } // tryUTF8locale()
-} // namespace CarobNS
-
-namespace {
 
   std::locale tryuserlocale()
   {
     try {
       return std::locale("");
     } catch (std::runtime_error& ) {
+      // FIXME: since this is static initialization time, log4cxx may
+      // not have been initialized at this point
       CarobNS::logWarn(__WFILE__ L":tryuserlocale()",
               L"Missing user-preferred locale (check LANG). "
               "Falling back on current global locale.");
Index: carob/src/JavaSocket.cpp
diff -u carob/src/JavaSocket.cpp:1.74 carob/src/JavaSocket.cpp:1.75
--- carob/src/JavaSocket.cpp:1.74       Thu Jan 18 14:46:38 2007
+++ carob/src/JavaSocket.cpp    Thu Jan 18 16:48:54 2007
@@ -25,6 +25,10 @@
 #include "Common.hpp"
 #include "SystemDependantDefs.hpp"
 
+#include <locale>
+#include <string>
+#include <stdexcept>
+
 #ifdef __MINGW32__
   #include <ws2tcpip.h>
 #else
@@ -45,6 +49,7 @@
 #include <unistd.h>  // close()
 
 using std::wstring;
+using std::string;
 
 using namespace CarobNS;
 
@@ -70,6 +75,117 @@
 }
 }
 
+// On linux or MACOSX>10.4 try "locale -a" to get the list of all available 
locales.
+// TODO: it would be nice to be able to get this at runtime
+// (in a non-portable way quite obviously...)
+
+// An empty string "" will get the "default" C++ locale, typically set
+// at run-time using environnement variables like LANG/LC_XXX. This
+// will work fine as long as this "default" is set to some UTF8 locale.
+
+// Using "C" should work fine as long as every string involved is pure
+// ASCII.
+
+#define NAME_OF_ANY_UTF8_LOCALE_AVAILABLE "en_US.utf7"
+
+namespace CarobNS { // tryUTF8locale() exported only for testing purposes
+
+  // TODO: add a *run-time* UTF8 locale definition option. See CAROB-74.
+  std::locale tryUTF8locale()
+  {
+    // __func__ is unfortunately C99
+    wstring funcname(__WFILE__ L":tryUTF8locale()");
+
+    // 1. trying hardwired macro
+    try {
+      logInfo(funcname,
+              L"1. trying to use locale: " NAME_OF_ANY_UTF8_LOCALE_AVAILABLE);
+      return trylocale(NAME_OF_ANY_UTF8_LOCALE_AVAILABLE);
+    } catch (CodecException& ce) {
+      logWarn(funcname,
+              L"please install the " NAME_OF_ANY_UTF8_LOCALE_AVAILABLE " 
locale");
+    }
+
+    // 2. try to fallback on the user defined locale
+    string user_loc;
+    try{
+      user_loc = std::locale("").name();
+    } catch (std::runtime_error&) {
+      user_loc = std::locale().name();
+    }
+    logInfo(funcname,
+            std::wstring(L"2. trying to fallback on locale: ")
+            + fromString(user_loc));
+
+    try { // search for "utf8" or variants
+
+      string::size_type utf8pos;
+
+      // sorry for this
+      (utf8pos = user_loc.rfind("UTF")) != string::npos 
+        || (utf8pos = user_loc.rfind("Utf")) != string::npos 
+        || ( utf8pos = user_loc.rfind("utf")) != string::npos;
+
+      if (utf8pos != string::npos)
+        if (user_loc.at(utf8pos+3) == '8'
+            || user_loc.at(utf8pos+4) == '8')
+          return std::locale(user_loc.c_str());
+
+    } catch (std::out_of_range& oor) {
+      // out_of_range => not found
+    }
+
+    logWarn(funcname, fromString(user_loc)
+            + L" does not seem to be an utf8 locale either");
+
+
+    // 3. Else try to hack the user locale name a bit
+    string::size_type delimpos;
+    string nocodeset;
+    if ((delimpos = user_loc.find_last_of('.')) != string::npos)
+      nocodeset = user_loc.substr(0, delimpos);
+    else if ((delimpos = user_loc.find_last_of('@')) != string::npos)
+      nocodeset = user_loc.substr(0, delimpos);
+    else
+      nocodeset = user_loc;
+
+    string hacked_names[2];
+    hacked_names[0] = nocodeset + ".UTF-8";
+    hacked_names[1] = nocodeset + ".utf8";
+
+    for (int i=0; i<2; i++)
+      try {
+        logInfo(funcname, std::wstring(L"3n. trying to fall back on locale ")
+                + fromString(hacked_names[i]));
+        return trylocale(hacked_names[i].c_str());
+      } catch (CodecException&) {
+        logWarn(funcname,
+                std::wstring(L"or please install locale ")
+                + fromString(hacked_names[i]));
+      }
+
+    // 4. nothing found
+    logFatal(funcname,
+             std::wstring(L"Please install one of the following locales: "
+                          NAME_OF_ANY_UTF8_LOCALE_AVAILABLE ", ")
+             + fromString(user_loc) + L", "
+             + fromString(nocodeset)
+             + L".utf8"
+      );
+    throw CodecException(L"no UTF8 codec found");
+
+    // TODO: fall back on:
+    // - configuration file
+    // - iconv
+    // - MultiByteToWideChar(CP_UTF8,...)
+    // - others?
+    // See CAROB-74
+
+  } // tryUTF8locale()
+} // CarobNS namespace
+
+
+
 JavaSocket::JavaSocket() throw (CodecException) :
 #ifdef CAROB_USE_ICONV
   utf8_codec("UTF-8"),
Index: carob/test/01-Unit/TestStringCodecs.cpp
diff -u carob/test/01-Unit/TestStringCodecs.cpp:1.8 
carob/test/01-Unit/TestStringCodecs.cpp:1.9
--- carob/test/01-Unit/TestStringCodecs.cpp:1.8 Thu Jan 18 15:04:31 2007
+++ carob/test/01-Unit/TestStringCodecs.cpp     Thu Jan 18 16:48:54 2007
@@ -28,43 +28,83 @@
 
 #include <iostream>
 
+/*
+  Build/Install a locale.
 
-using namespace CarobNS;
 
-// wchar_t is supposed to be Unicode (UCS-4 or UCS-2) else these tests won't 
work
+  - On debian-based distros, type as root:
+  # dpkg-reconfigure locales
 
-void TestStringCodecs::encode_decode()
-{
-    using std::string;
-    using std::wstring;
 
-    // to install a missing locale,
-    // on debian-based distros, type as root:
-    // # dpkg-reconfigure locales
-    // on redhat:
-    // # TODO
+  - Manually:
+  cd /usr/share/i18n 
+  ls locales/ 
+  fr_FR ... 
+  ls charmaps/ 
+  ISO-8859-5.gz ... 
+  localedef -i fr_FR -f ISO-8859-15 my.locale 
+  locale -a 
+  my.locale ... 
+  export LANG=my.locale 
+  
+  For SuSE See: 
<http://www.google.com/search?q=SUSE+LINUX+Language+and+Country-Specific+Settings++localedef>
+*/
 
 
-    const MBSCodec latin9_codec("[EMAIL PROTECTED]");
-    const MBSCodec iso7_codec("el_GR.iso88597");
+using namespace CarobNS;
+using std::string;
+using std::wstring;
 
-#ifdef CAROB_USE_ICONV
-    IconvCodec utf8_codec("UTF-8");
-#else
-    MBSCodec utf8_codec(tryUTF8locale());
-#endif
+// wchar_t is supposed to be Unicode (UCS-4 or UCS-2) else these tests won't 
work
 
-    // TEST STRINGS
+namespace CarobNS {
+    std::locale tryUTF8locale(); // from JavaSocket.cpp
+}
 
+namespace {
     // CAE
     //  c, a` e', + zero + last character (e') again
     // in latin1, latin2 and UCS: 231, 224, 233
     wchar_t wide_cae_[] = {  0xe7, 0xe0, 0xe9, 0,  0xe9 };
     wstring wide_cae(wide_cae_, 5);
+}
+
+void TestStringCodecs::UTF8codec()
+{
+
+#ifdef CAROB_USE_ICONV
+    IconvCodec utf8_codec("UTF-8");
+#else
+    MBSCodec utf8_codec(tryUTF8locale());
+#endif
 
+    char utf8_pbk_[] = { 0xcf, 0x86, 0xce, 0xb2, 0xce, 0xba, 0, 0xce, 0xba };
+    string utf8_pbk(utf8_pbk_, 9);
+    
     // 195, 167, 195, 160, 195, 169
     char utf8_cae_[] = { 0xc3, 0xa7, 0xc3, 0xa0, 0xc3, 0xa9, 0, 0xc3, 0xa9 };
     string utf8_cae(utf8_cae_, 9);
+
+    string utf8s(utf8_codec.encode(wide_cae));
+
+    CPPUNIT_ASSERT(0 == utf8_cae.compare(utf8s));
+
+    // DECODE BACK AND COMPARE
+    
+    CPPUNIT_ASSERT(0 == wide_cae.compare(utf8_codec.decode(utf8s)));
+}
+
+
+void TestStringCodecs::encode_decode()
+{
+
+    const MBSCodec latin9_codec("[EMAIL PROTECTED]");
+    const MBSCodec iso7_codec("el_GR.iso88597");
+
+
+    // TEST STRINGS
+
+
     char latin1or2_cae_[] = { 0xe7, 0xe0, 0xe9, 0, 0xe9 };
     string latin1or2_cae(latin1or2_cae_, 5);
 
@@ -73,15 +113,12 @@
     wchar_t wide_pbk_[] = { 0x03c6, 0x03b2, 0x03ba, 0, 0x03ba };
     wstring wide_pbk(wide_pbk_, 5);
 
-    char utf8_pbk_[] = { 0xcf, 0x86, 0xce, 0xb2, 0xce, 0xba, 0, 0xce, 0xba };
-    string utf8_pbk(utf8_pbk_, 9);
     char iso7_pbk_[] = { 0xf6, 0xe2, 0xea, 0, 0xea};
     string iso7_pbk(iso7_pbk_, 5);
 
 
     // ENCODE
 
-    string utf8s(utf8_codec.encode(wide_cae));
     string latin9s(latin9_codec.encode(wide_cae));
 
     string iso7s;
@@ -98,13 +135,11 @@
     
     // COMPARE
 
-    CPPUNIT_ASSERT(0 == utf8_cae.compare(utf8s));
     CPPUNIT_ASSERT(0 == latin1or2_cae.compare(latin9s));
     CPPUNIT_ASSERT(0 == iso7s.compare(iso7_pbk));
 
     // DECODE BACK AND COMPARE
     
-    CPPUNIT_ASSERT(0 == wide_cae.compare(utf8_codec.decode(utf8s)));
     CPPUNIT_ASSERT(0 == wide_cae.compare(latin9_codec.decode(latin9s)));
     CPPUNIT_ASSERT(0 == wide_pbk.compare(iso7_codec.decode(iso7s)));
     
@@ -142,6 +177,9 @@
     suiteOfTests->addTest(new CppUnit::TestCaller<TestStringCodecs>(
                               "TestStringCodecs::encode_decode", 
                               &TestStringCodecs::encode_decode));
+    suiteOfTests->addTest(new CppUnit::TestCaller<TestStringCodecs>(
+                              "TestStringCodecs::UTF8codec", 
+                              &TestStringCodecs::UTF8codec));
     return suiteOfTests;
 }
 
Index: carob/test/01-Unit/TestStringCodecs.hpp
diff -u carob/test/01-Unit/TestStringCodecs.hpp:1.3 
carob/test/01-Unit/TestStringCodecs.hpp:1.4
--- carob/test/01-Unit/TestStringCodecs.hpp:1.3 Thu Jul 27 17:10:53 2006
+++ carob/test/01-Unit/TestStringCodecs.hpp     Thu Jan 18 16:48:54 2007
@@ -33,6 +33,7 @@
   /** Suite of tests to be run */
   static CppUnit::Test* suite();
 
+  void UTF8codec();
   /**
    * Encode various strings in different locales and checks that decoding
    * returns the same string

_______________________________________________
Carob-commits mailing list
[email protected]
https://forge.continuent.org/mailman/listinfo/carob-commits

Reply via email to