Author: sebor
Date: Mon May 21 09:23:16 2007
New Revision: 540193
URL: http://svn.apache.org/viewvc?view=rev&rev=540193
Log:
2007-05-21 Martin Sebor <[EMAIL PROTECTED]>
STDCXX-296
* insert_wchar.cpp: New example program demonstrating an implementation
of an inserter operator overloaded for arrays of wchar_t that performs
codeset conversion from arrays of wchar_t to mutlibyte characters.
* insert_wchar.out: Expected output of the example program.
Added:
incubator/stdcxx/trunk/examples/manual/insert_wchar.cpp (with props)
incubator/stdcxx/trunk/examples/manual/out/insert_wchar.out
Added: incubator/stdcxx/trunk/examples/manual/insert_wchar.cpp
URL:
http://svn.apache.org/viewvc/incubator/stdcxx/trunk/examples/manual/insert_wchar.cpp?view=auto&rev=540193
==============================================================================
--- incubator/stdcxx/trunk/examples/manual/insert_wchar.cpp (added)
+++ incubator/stdcxx/trunk/examples/manual/insert_wchar.cpp Mon May 21 09:23:16
2007
@@ -0,0 +1,211 @@
+/**************************************************************************
+ *
+ * insert_wchar.cpp
+ *
+ * Example program demonstrating an implementation of an inserter
+ * operator overloaded for arrays of wchar_t that performs codeset
+ * conversion from wchar_t to mutlibyte characters.
+ *
+ * $Id$
+ *
+ ***************************************************************************
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed
+ * with this work for additional information regarding copyright
+ * ownership. The ASF licenses this file to you under the Apache
+ * License, Version 2.0 (the "License"); you may not use this file
+ * except in compliance with the License. You may obtain a copy of
+ * the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+ * implied. See the License for the specific language governing
+ * permissions and limitations under the License.
+ *
+ **************************************************************************/
+
+#include <cassert> // for assert()
+#include <cwchar> // for mbstate_t, size_t
+#include <ios> // for hex
+#include <iostream> // for cout
+#include <locale> // for codecvt, isalnum(), locale
+#include <ostream> // for basic_ostream
+#include <sstream> // for ostringstream
+
+
+// inserts a wide character string into a stream buffer performing
+// codeset conversion if necessary
+template <class charT, class Traits>
+void
+streambuf_insert (std::basic_ostream<charT, Traits> &strm,
+ const wchar_t *s)
+{
+ typedef typename Traits::state_type StateT;
+ typedef std::codecvt<wchar_t, charT, StateT> Codecvt;
+
+ const Codecvt &cvt = std::use_facet<Codecvt>(strm.getloc ());
+
+ const std::size_t slen = std::char_traits<wchar_t>::length (s);
+
+ // perform codeset conversion in chunks to avoid dynamic
+ // memory allocation
+
+ const std::size_t xbufsize = 32;
+
+ charT xbuf [xbufsize];
+ charT* xbuf_end = xbuf + xbufsize;
+ charT* to_next = 0;
+ const wchar_t* from_next = 0;
+ const wchar_t* const end = s + slen;
+
+ StateT state = StateT ();
+
+ for (const wchar_t* base = s; from_next != end; base = from_next) {
+
+ const std::codecvt_base::result res =
+ cvt.out (state, base, end, from_next,
+ xbuf, xbuf_end, to_next);
+
+ std::streamsize nbytes = to_next - xbuf;
+
+ switch (res) {
+ case Codecvt::error:
+ // write out the sequence successfully converted up
+ // to the point of the error in the internal sequence
+ // and fail
+ strm.rdbuf ()->sputn (xbuf, nbytes);
+ strm.setstate (strm.badbit);
+
+ case Codecvt::noconv:
+ // write the entire sequence
+ if (nbytes != strm.rdbuf ()->sputn (xbuf, nbytes)) {
+ strm.setstate (strm.badbit);
+ return;
+ }
+
+ from_next = end; // effectively break
+ break;
+
+ default:
+ assert (cvt.ok == res || cvt.partial == res);
+
+ // partial conversion will result if there isn't enough
+ // space in the conversion buffer to hold the converted
+ // sequence, but we're O.K. since we'll be passing any
+ // remaining unconverted characters (starting at
+ // from_next) in the next iteration
+
+ nbytes = to_next - xbuf;
+
+ if (nbytes != strm.rdbuf ()->sputn (xbuf, nbytes)) {
+ strm.setstate (strm.badbit);
+ return;
+ }
+ }
+ }
+}
+
+
+// stream insertion operator overloaded for arrays of wchar_t characters
+template <class charT, class Traits>
+std::basic_ostream<charT, Traits>&
+operator<< (std::basic_ostream<charT, Traits> &strm,
+ const wchar_t *s)
+{
+ const typename std::basic_ostream<charT, Traits>::sentry opfx (strm);
+
+ if (opfx) {
+
+ try {
+ // try to insert character array into stream buffer
+ streambuf_insert (strm, s);
+ }
+ catch (...) {
+ bool threw;
+ try {
+ // set badbit on exception without throwing ios::failure
+ strm.setstate (strm.badbit);
+ threw = false;
+ }
+ catch (std::ios_base::failure&) {
+ // make a note of the exception thrown from setstate()...
+ threw = true;
+ }
+ if (threw) {
+ // ...and rethrow the original exception
+ throw;
+ }
+ }
+ }
+
+ return strm;
+}
+
+
+// examples of wide character strings
+static const wchar_t* const wcs [] = {
+ L"a", L"abc",
+ // Greek letter Alpha:
+ L"\x0391", // "\xce\x91"
+ // Greek letters Alpha Beta:
+ L"\x0391\x0392", // "\xce\x91\xce\x91\xce\x92"
+ // Greek letters Alpha Beta Gamma:
+ L"\x0391\x0392\x0393", // "\xce\x91\xce\x92\xce\x93"
+ // Tibetan digit zero:
+ L"\x0f20", // "\xe0\xbc\xa0"
+ // Tibetan digits one, zero:
+ L"\x0f21\x0f20", // "\xe0\xbc\xa1\xe0\xbc\xa0"
+ // Tibetan digits two, one, zero:
+ L"\x0f22\x0f21\x0f20" // "\xe0\xbc\xa2\xe0\xbc\xa1\xe0\xbc\xa0"
+};
+
+
+int main ()
+{
+ typedef std::codecvt_byname<wchar_t, char, std::mbstate_t> Codecvt;
+
+ // create a UCS/UTF-8 codecvt facet and install it in a locale
+ const std::locale utf (std::cout.getloc (), new Codecvt ("[EMAIL
PROTECTED]"));
+
+ for (std::size_t i = 0; i != sizeof wcs / sizeof *wcs; ++i) {
+
+ std::ostringstream strm;
+
+ // imbue the UTF-8/UCS capable locale in a stringstream
+ strm.imbue (utf);
+
+ // insert each wide character string into the narrow stream
+ // object relying on the inserter to convert each wide string
+ // into the corresponding multibyte character string
+ strm << wcs [i];
+
+ // write out the wide character string in Unicode notation
+ std::cout << "UCS-2: " << std::hex;
+
+ for (const wchar_t *pwc = wcs [i]; *pwc != L'\0'; ++pwc)
+ std::cout << "U+" << unsigned (*pwc) << ' ';
+
+ const std::string str = strm.str ();
+
+ std::cout << " ==> UTF-8: \"";
+
+ typedef unsigned char UChar;
+
+ // write out the the multibyte character sequence using
+ // ordinary aphanumeric symbols or hex escape sequences
+ // where necessary
+ for (const char *pc = str.c_str (); *pc != '\0'; ++pc) {
+
+ if (std::isalnum (*pc, std::cout.getloc ()))
+ std::cout << *pc;
+ else
+ std::cout << "\\x" << int (UChar (*pc));
+ }
+
+ std::cout << "\"\n";
+ }
+}
Propchange: incubator/stdcxx/trunk/examples/manual/insert_wchar.cpp
------------------------------------------------------------------------------
svn:keywords = Id
Added: incubator/stdcxx/trunk/examples/manual/out/insert_wchar.out
URL:
http://svn.apache.org/viewvc/incubator/stdcxx/trunk/examples/manual/out/insert_wchar.out?view=auto&rev=540193
==============================================================================
--- incubator/stdcxx/trunk/examples/manual/out/insert_wchar.out (added)
+++ incubator/stdcxx/trunk/examples/manual/out/insert_wchar.out Mon May 21
09:23:16 2007
@@ -0,0 +1,8 @@
+UCS-2: U+61 ==> UTF-8: "a"
+UCS-2: U+61 U+62 U+63 ==> UTF-8: "abc"
+UCS-2: U+391 ==> UTF-8: "\xce\x91"
+UCS-2: U+391 U+392 ==> UTF-8: "\xce\x91\xce\x92"
+UCS-2: U+391 U+392 U+393 ==> UTF-8: "\xce\x91\xce\x92\xce\x93"
+UCS-2: U+f20 ==> UTF-8: "\xe0\xbc\xa0"
+UCS-2: U+f21 U+f20 ==> UTF-8: "\xe0\xbc\xa1\xe0\xbc\xa0"
+UCS-2: U+f22 U+f21 U+f20 ==> UTF-8: "\xe0\xbc\xa2\xe0\xbc\xa1\xe0\xbc\xa0"