Date: Thursday, December 15, 2005 @ 21:01:18
Author: marc
Path: /cvsroot/carob/carob
Added: include/StringCodecs.hpp (1.1) src/StringCodecs.cpp (1.1)
"Simplicity is the ultimate sophistication." Leonardo da Vinci
--------------------------+
include/StringCodecs.hpp | 93 ++++++++++++++++++++++++++++++++
src/StringCodecs.cpp | 129 +++++++++++++++++++++++++++++++++++++++++++++
2 files changed, 222 insertions(+)
Index: carob/include/StringCodecs.hpp
diff -u /dev/null carob/include/StringCodecs.hpp:1.1
--- /dev/null Thu Dec 15 21:01:18 2005
+++ carob/include/StringCodecs.hpp Thu Dec 15 21:01:17 2005
@@ -0,0 +1,93 @@
+
+/*
+ * Sequoia: Database clustering technology Copyright 2002-2005
+ * Continuent, Inc.
+
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License. You
+ * may *obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+
+ * Initial developer(s): Marc Herbert
+ * Contributor(s):
+ */
+
+#ifndef CAROB_STRING_CODEC_HPP_
+#define CAROB_STRING_CODEC_HPP_
+
+#include <locale>
+// #include <stdexcept> // runtime_error
+
+
+// TODO: it would be nice to be able to get this at runtime
+// (in a non-portable way quite obviously...)
+// On linux try "locale -a"
+#define NAME_OF_ANY_UTF8_LOCALE_AVAILABLE "fr_FR.utf8"
+
+namespace CarobNS {
+
+// TODO: get some features, at least a what().
+// Derive from CarobException or overkill?
+class CodecException {};
+
+typedef std::codecvt<wchar_t, char, mbstate_t> mbs_codecvt;
+typedef std::codecvt_byname<wchar_t, char, mbstate_t> mbs_codecvt_byname;
+
+// This class should be splitted into interface and implementation,
+// the "MBS" part of being restricted to the implementation.
+
+// For a direct access to iconv read this:
+//
file://localhost/usr/share/doc/gcc-4.0-base/libstdc++/html/22_locale/codecvt.html
+
+// This code probably does not work with UTF-16 wide chars, only UCS-2
+// and UCS-4
+
+std::locale trylocale(const char * const name) throw (CodecException);
+
+class MBSCodec
+{
+ // Here order matters (see init below)
+ const std::locale loc; // hold our own (and cheap) copy for safety
+ const mbs_codecvt& codecvt;
+
+public:
+ std::string encode(const std::wstring&) const throw (CodecException);
+ std::wstring decode(const std::string&) const throw (CodecException);
+
+ // default converter using user-defined locale (typically LANG, LC_etc)
+ MBSCodec() :
+ loc(std::locale("")),
+ codecvt (std::use_facet<mbs_codecvt>(loc))
+ { }
+
+ MBSCodec(const std::locale& locarg) :
+ loc(locarg),
+ codecvt (std::use_facet<mbs_codecvt>(loc))
+ { }
+
+ MBSCodec(const char* locname) throw (CodecException) :
+ loc(trylocale(locname)),
+ codecvt (std::use_facet<mbs_codecvt>(loc))
+ { }
+
+};
+
+
+} // namespace
+
+#endif // include only once
+
+/*
+ * Local Variables:
+ * c-file-style: "bsd"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
Index: carob/src/StringCodecs.cpp
diff -u /dev/null carob/src/StringCodecs.cpp:1.1
--- /dev/null Thu Dec 15 21:01:18 2005
+++ carob/src/StringCodecs.cpp Thu Dec 15 21:01:17 2005
@@ -0,0 +1,129 @@
+
+/*
+ * Sequoia: Database clustering technology Copyright 2002-2005
+ * Continuent, Inc.
+
+ * Licensed under the Apache License, Version 2.0 (the "License"); you
+ * may not use this file except in compliance with the License. You
+ * may *obtain a copy of the License at
+
+ * http://www.apache.org/licenses/LICENSE-2.0
+
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+
+ * Initial developer(s): Marc Herbert
+ * Contributor(s):
+ */
+
+#include <stdexcept>
+
+#include <iostream> // TODO: remove me once we have decent exceptions
+
+#include "StringCodecs.hpp"
+
+
+using namespace CarobNS;
+
+
+/**
+ * Catching std::runtime_error from std::codecvt_byname() segfaults
+ * with gcc 4.0.2, see
+ * <http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=343108>
+ * So unfortunately do NOT do this. This code is a reminder.
+ */
+namespace {
+template <typename facet_byname_t>
+facet_byname_t * trynewfacet_byname(const char * const name)
+{
+ try {
+ return new facet_byname_t(name);
+ } catch (std::runtime_error re) {
+ std::cerr << re.what() << std::endl;
+ std::cerr << "locale name was: " << name << std::endl;
+ exit(912);
+ }
+}
+}
+
+
+std::locale CarobNS::trylocale(const char * const name) throw (CodecException)
+{
+ try {
+ return std::locale(name);
+ } catch (std::runtime_error re) {
+ std::cerr << re.what() << std::endl;
+ std::cerr << "locale name was: " << name << std::endl;
+ throw CodecException();
+ }
+}
+
+
+std::string MBSCodec::encode(const std::wstring& w_arg) const throw
(CodecException)
+{
+ mbstate_t mbst = { 0 };
+
+ // prefered to cvt.length(in), assumed to be too costly cause of the scan
+ // (it's a memory/CPU tradeoff)
+ int res_maxlength = codecvt.max_length() * w_arg.length();
+ // TODO: rather use free store for big strings?
+ char res_buf[res_maxlength];
+
+ const wchar_t *w_argnext;
+ char *res_next;
+ const wchar_t *w_argdata = w_arg.data();
+
+ std::codecvt_base::result success =
+ codecvt.out(mbst, w_argdata, w_argdata+w_arg.length(), w_argnext,
+ res_buf, res_buf+res_maxlength, res_next);
+
+ if (success != std::codecvt_base::ok) // codecvt_base::noconv is not
possible
+ {
+ // TODO something sensible here: messages in the exception instead of
cerr
+ std::cerr << "Error " << success << " in "
+ << loc.name() << " encode method" << std::endl;
+ throw CodecException();
+ }
+
+ return std::string(res_buf, res_next-res_buf);
+}
+
+
+std::wstring MBSCodec::decode(const std::string& arg) const throw
(CodecException)
+{
+ mbstate_t mbst = { 0 };
+
+ int arglength = arg.length();
+ // TODO: rather use free store for big strings?
+ wchar_t wres_buf[arglength];
+
+ const char *argnext;
+ wchar_t *wres_next;
+ const char *argdata = arg.data();
+
+ std::codecvt_base::result success =
+ codecvt.in(mbst, argdata, argdata+arglength, argnext,
+ wres_buf, wres_buf+arglength, wres_next);
+
+ if (success != std::codecvt_base::ok) // codecvt_base::noconv is not
possible
+ {
+ // TODO something sensible here: put messages in the exception instead
of cerr
+ std::cerr << "Error " << success << " in "
+ << loc.name() << " decode method" << std::endl;
+ throw CodecException();
+ }
+
+ return std::wstring(wres_buf, wres_next-wres_buf);
+}
+
+
+/*
+ * Local Variables:
+ * c-file-style: "bsd"
+ * c-basic-offset: 4
+ * indent-tabs-mode: nil
+ * End:
+ */
_______________________________________________
Carob-commits mailing list
[email protected]
https://forge.continuent.org/mailman/listinfo/carob-commits