Date: Thursday, December 15, 2005 @ 21:01:18
  Author: marc
    Path: /cvsroot/carob/carob

   Added: include/StringCodecs.hpp (1.1) src/StringCodecs.cpp (1.1)

"Simplicity is the ultimate sophistication." Leonardo da Vinci


--------------------------+
 include/StringCodecs.hpp |   93 ++++++++++++++++++++++++++++++++
 src/StringCodecs.cpp     |  129 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 222 insertions(+)


Index: carob/include/StringCodecs.hpp
diff -u /dev/null carob/include/StringCodecs.hpp:1.1
--- /dev/null   Thu Dec 15 21:01:18 2005
+++ carob/include/StringCodecs.hpp      Thu Dec 15 21:01:17 2005
@@ -0,0 +1,93 @@
+
+/*
+ * Sequoia: Database clustering technology Copyright 2002-2005
+ * Continuent, Inc.
+
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License. You
+ * may *obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0 
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+
+ * Initial developer(s): Marc Herbert
+ * Contributor(s):
+ */
+
+#ifndef CAROB_STRING_CODEC_HPP_
+#define CAROB_STRING_CODEC_HPP_
+
+#include <locale>
+// #include <stdexcept> // runtime_error
+
+
+// TODO: it would be nice to be able to get this at runtime
+// (in a non-portable way quite obviously...)
+// On linux try "locale -a"
+#define NAME_OF_ANY_UTF8_LOCALE_AVAILABLE "fr_FR.utf8"
+
+namespace CarobNS {
+
+// TODO: get some features, at least a what().
+// Derive from CarobException or overkill?
+class CodecException {}; 
+
+typedef std::codecvt<wchar_t, char, mbstate_t> mbs_codecvt;
+typedef std::codecvt_byname<wchar_t, char, mbstate_t> mbs_codecvt_byname;
+
+// This class should be splitted into interface and implementation,
+// the "MBS" part of being restricted to the implementation.
+
+// For a direct access to iconv read this:
+// 
file://localhost/usr/share/doc/gcc-4.0-base/libstdc++/html/22_locale/codecvt.html
+
+// This code probably does not work with UTF-16 wide chars, only UCS-2
+// and UCS-4
+
+std::locale trylocale(const char * const name) throw (CodecException);
+
+class MBSCodec
+{
+    // Here order matters (see init below)
+    const std::locale loc; // hold our own (and cheap) copy for safety
+    const mbs_codecvt& codecvt;
+
+public:
+    std::string encode(const std::wstring&) const throw (CodecException);
+    std::wstring decode(const std::string&) const throw (CodecException);
+
+    // default converter using user-defined locale (typically LANG, LC_etc)
+    MBSCodec() :
+        loc(std::locale("")),
+        codecvt (std::use_facet<mbs_codecvt>(loc))
+    { }
+    
+    MBSCodec(const std::locale& locarg) :
+        loc(locarg),
+        codecvt (std::use_facet<mbs_codecvt>(loc))
+    { }
+
+    MBSCodec(const char* locname) throw (CodecException) :
+        loc(trylocale(locname)),
+        codecvt (std::use_facet<mbs_codecvt>(loc))
+    { }
+    
+};
+
+
+} // namespace
+
+#endif // include only once
+
+/*
+ * Local Variables:
+ * c-file-style: "bsd"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
Index: carob/src/StringCodecs.cpp
diff -u /dev/null carob/src/StringCodecs.cpp:1.1
--- /dev/null   Thu Dec 15 21:01:18 2005
+++ carob/src/StringCodecs.cpp  Thu Dec 15 21:01:17 2005
@@ -0,0 +1,129 @@
+
+/*
+ * Sequoia: Database clustering technology Copyright 2002-2005
+ * Continuent, Inc.
+
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License. You
+ * may *obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0 
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+
+ * Initial developer(s): Marc Herbert
+ * Contributor(s):
+ */
+
+#include <stdexcept>
+
+#include <iostream> // TODO: remove me once we have decent exceptions
+
+#include "StringCodecs.hpp"
+
+
+using namespace CarobNS;
+
+
+/**
+ * Catching std::runtime_error from std::codecvt_byname() segfaults
+ * with gcc 4.0.2, see
+ * <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=343108>
+ * So unfortunately do NOT do this. This code is a reminder.
+ */
+namespace {
+template <typename facet_byname_t>
+facet_byname_t * trynewfacet_byname(const char * const name)
+{
+    try {
+        return new facet_byname_t(name);
+    } catch (std::runtime_error re) {
+        std::cerr << re.what() << std::endl;
+        std::cerr << "locale name was: " << name << std::endl;
+        exit(912);
+    }
+}
+}
+
+
+std::locale CarobNS::trylocale(const char * const name) throw (CodecException)
+{
+    try {
+        return std::locale(name);
+    } catch (std::runtime_error re) {
+        std::cerr << re.what() << std::endl;
+        std::cerr << "locale name was: " << name << std::endl;
+        throw CodecException();
+    }
+}
+
+
+std::string MBSCodec::encode(const std::wstring& w_arg) const throw 
(CodecException)
+{
+    mbstate_t mbst = { 0 };
+
+    // prefered to cvt.length(in), assumed to be too costly cause of the scan
+    // (it's a memory/CPU tradeoff)
+    int res_maxlength = codecvt.max_length() * w_arg.length();
+    // TODO: rather use free store for big strings?
+    char res_buf[res_maxlength];
+
+    const wchar_t *w_argnext;
+    char *res_next;
+    const wchar_t *w_argdata = w_arg.data();
+
+    std::codecvt_base::result success =
+        codecvt.out(mbst, w_argdata, w_argdata+w_arg.length(), w_argnext,
+                res_buf, res_buf+res_maxlength, res_next);
+
+    if (success != std::codecvt_base::ok) // codecvt_base::noconv is not 
possible
+    {
+        // TODO something sensible here: messages in the exception instead of 
cerr
+        std::cerr << "Error " << success << " in "
+                  << loc.name() << " encode method" << std::endl;
+        throw CodecException();
+    }
+
+    return std::string(res_buf, res_next-res_buf);
+}
+
+
+std::wstring MBSCodec::decode(const std::string& arg) const throw 
(CodecException)
+{
+    mbstate_t mbst = { 0 };
+
+    int arglength = arg.length();
+    // TODO: rather use free store for big strings?
+    wchar_t wres_buf[arglength];
+    
+    const char *argnext;
+    wchar_t *wres_next;
+    const char *argdata = arg.data();
+
+    std::codecvt_base::result success =
+        codecvt.in(mbst, argdata, argdata+arglength, argnext,
+                   wres_buf, wres_buf+arglength, wres_next);
+
+    if (success != std::codecvt_base::ok)  // codecvt_base::noconv is not 
possible
+    {
+        // TODO something sensible here: put messages in the exception instead 
of cerr
+        std::cerr << "Error " << success << " in "
+                  << loc.name() << " decode method" << std::endl;
+        throw CodecException();
+    }
+
+    return std::wstring(wres_buf, wres_next-wres_buf);
+}
+
+
+/*
+ * Local Variables:
+ * c-file-style: "bsd"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */

_______________________________________________
Carob-commits mailing list
[email protected]
https://forge.continuent.org/mailman/listinfo/carob-commits

Reply via email to