Author: sebor
Date: Sun Jun 17 20:03:00 2007
New Revision: 548179
URL: http://svn.apache.org/viewvc?view=rev&rev=548179
Log:
2007-06-17 Martin Sebor <[EMAIL PROTECTED]>
* rw_locale.h (rw_mbchar_array_t, rw_get_mb_chars, rw_find_mb_locale):
New type and helper functions to retrieve locales that use multibyte
encodings (where MB_CUR_MAX > 1).
Modified:
incubator/stdcxx/trunk/tests/include/rw_locale.h
incubator/stdcxx/trunk/tests/src/locale.cpp
Modified: incubator/stdcxx/trunk/tests/include/rw_locale.h
URL:
http://svn.apache.org/viewvc/incubator/stdcxx/trunk/tests/include/rw_locale.h?view=diff&rev=548179&r1=548178&r2=548179
==============================================================================
--- incubator/stdcxx/trunk/tests/include/rw_locale.h (original)
+++ incubator/stdcxx/trunk/tests/include/rw_locale.h Sun Jun 17 20:03:00 2007
@@ -2,7 +2,7 @@
*
* localedef.h - declarations of locale testsuite helpers
*
- * $Id:$
+ * $Id$
*
************************************************************************
*
@@ -62,6 +62,26 @@
// returned from prior calls
_TEST_EXPORT const char*
rw_localedef (const char*, const char*, const char*, const char*);
+
+
+// an array of multibyte characters 1 to MB_LEN_MAX bytes in length
+typedef char
+rw_mbchar_array_t [_RWSTD_MB_LEN_MAX][_RWSTD_MB_LEN_MAX];
+
+// fills consecutive elements of the `mb_chars' array with multibyte
+// characters between 1 and MB_CUR_MAX bytes long for the currently
+// set locale
+// returns the number of elements populated (normally, MB_CUR_MAX)
+_TEST_EXPORT _RWSTD_SIZE_T
+rw_get_mb_chars (rw_mbchar_array_t /* mb_chars */);
+
+
+// finds the multibyte locale with the largest MB_CUR_MAX value and
+// fills consecutive elements of the `mb_chars' array with multibyte
+// characters between 1 and MB_CUR_MAX bytes long for such a locale
+_TEST_EXPORT const char*
+rw_find_mb_locale (_RWSTD_SIZE_T* /* mb_cur_max */,
+ rw_mbchar_array_t /* mb_chars */);
#endif // RW_LOCALE_H_INCLUDED
Modified: incubator/stdcxx/trunk/tests/src/locale.cpp
URL:
http://svn.apache.org/viewvc/incubator/stdcxx/trunk/tests/src/locale.cpp?view=diff&rev=548179&r1=548178&r2=548179
==============================================================================
--- incubator/stdcxx/trunk/tests/src/locale.cpp (original)
+++ incubator/stdcxx/trunk/tests/src/locale.cpp Sun Jun 17 20:03:00 2007
@@ -34,6 +34,7 @@
#include <environ.h> // for rw_putenv()
#include <file.h> // for SHELL_RM_RF, rw_tmpnam
+#include <rw_printf.h> // for rw_fprintf()
#include <rw_process.h> // for rw_system()
@@ -62,7 +63,7 @@
#include <assert.h> // for assert
#include <errno.h> // for EBADF
#include <float.h> // for {FLT,DBL,LDBL}_DIG
-#include <limits.h> // for CHAR_BIT, PATH_MAX
+#include <limits.h> // for CHAR_BIT, MB_LEN_MAX, PATH_MAX
#include <locale.h> // for LC_XXX macros, setlocale
#include <stdarg.h> // for va_copy, va_list, ...
#include <stdio.h> // for fgets, remove, sprintf, ...
@@ -470,3 +471,181 @@
return slocname;
}
+
+/**************************************************************************/
+
+// finds a multibyte character that is `bytes' long if `bytes' is less
+// than or equal to MB_CUR_MAX, or the longest multibyte sequence in
+// the current locale
+static const char*
+_get_mb_char (char *buf, size_t bytes)
+{
+ _RWSTD_ASSERT (0 != buf);
+
+ *buf = '\0';
+
+ if (0 == bytes)
+ return buf;
+
+ const bool exact = bytes <= size_t (MB_CUR_MAX);
+
+ if (!exact)
+ bytes = MB_CUR_MAX;
+
+ wchar_t wc;
+
+ // search the first 64K characters sequentially
+ for (wc = wchar_t (1); wc != wchar_t (0xffff); ++wc) {
+
+ if ( int (bytes) == wctomb (buf, wc)
+ && int (bytes) == mblen (buf, bytes)) {
+ // NUL-terminate the multibyte character of the requested length
+ buf [bytes] = '\0';
+ break;
+ }
+
+ *buf = '\0';
+ }
+
+#if 2 < _RWSTD_WCHAR_T_SIZE
+
+ // if a multibyte character of the requested size is not found
+ // in the low 64K range, try to find one using a random search
+ if (wchar_t (0xffff) == wc) {
+
+ // iterate only so many times to prevent an infinite loop
+ // in case when MB_CUR_MAX is greater than the longest
+ // multibyte character
+ for (int i = 0; i != 0x100000; ++i) {
+
+ wc = wchar_t (rand ());
+
+ if (RAND_MAX < 0x10000) {
+ wc <<= 16;
+ wc |= wchar_t (rand ());
+ }
+
+ if ( int (bytes) == wctomb (buf, wc)
+ && int (bytes) == mblen (buf, bytes)) {
+ // NUL-terminate the multibyte character
+ buf [bytes] = '\0';
+ break;
+ }
+
+ *buf = '\0';
+ }
+ }
+
+#endif // 2 < _RWSTD_WCHAR_SIZE
+
+ // return 0 on failure to find a sequence exactly `bytes' long
+ return !exact || bytes == strlen (buf) ? buf : 0;
+}
+
+
+_TEST_EXPORT size_t
+rw_get_mb_chars (rw_mbchar_array_t mb_chars)
+{
+ _RWSTD_ASSERT (0 != mb_chars);
+
+ const char* mbc = _get_mb_char (mb_chars [0], size_t (-1));
+
+ if (!mbc) {
+ rw_fprintf (rw_stderr, "*** failed to find any multibyte characters "
+ "in locale \"%s\" with MB_CUR_MAX = %u\n",
+ setlocale (LC_CTYPE, 0), MB_CUR_MAX);
+ return 0;
+ }
+
+ size_t mb_cur_max = strlen (mbc);
+
+ if (_RWSTD_MB_LEN_MAX < mb_cur_max)
+ mb_cur_max = _RWSTD_MB_LEN_MAX;
+
+ // fill each element of `mb_chars' with a multibyte character
+ // of the corresponding length
+ for (size_t i = mb_cur_max; i; --i) {
+
+ // try to generate a multibyte character `i' bytes long
+ mbc = _get_mb_char (mb_chars [i - 1], i);
+
+ if (0 == mbc) {
+ if (i < mb_cur_max) {
+ rw_fprintf (rw_stderr, "*** failed to find %u-byte characters "
+ "in locale \"%s\" with MB_CUR_MAX = %u\n",
+ i + 1, setlocale (LC_CTYPE, 0), MB_CUR_MAX);
+ mb_cur_max = 0;
+ break;
+ }
+ --mb_cur_max;
+ }
+ }
+
+ return mb_cur_max;
+}
+
+
+_TEST_EXPORT const char*
+rw_find_mb_locale (size_t *mb_cur_max,
+ rw_mbchar_array_t mb_chars)
+{
+ _RWSTD_ASSERT (0 != mb_cur_max);
+ _RWSTD_ASSERT (0 != mb_chars);
+
+ if (2 > _RWSTD_MB_LEN_MAX) {
+ rw_fprintf (rw_stderr, "MB_LEN_MAX = %d, giving up\n",
+ _RWSTD_MB_LEN_MAX);
+ return 0;
+ }
+
+ static const char *mb_locale_name;
+
+ char saved_locale_name [1024];
+ strcpy (saved_locale_name, setlocale (LC_CTYPE, 0));
+
+ _RWSTD_ASSERT (strlen (saved_locale_name) < sizeof saved_locale_name);
+
+ *mb_cur_max = 0;
+
+ // iterate over all installed locales
+ for (const char *name = rw_locales (_RWSTD_LC_CTYPE, 0); name && *name;
+ name += strlen (name) + 1) {
+
+ if (setlocale (LC_CTYPE, name)) {
+
+ // try to generate a set of multibyte characters
+ // with lengths from 1 and MB_CUR_MAX (or less)
+ const size_t cur_max = rw_get_mb_chars (mb_chars);
+
+ if (*mb_cur_max < cur_max) {
+ *mb_cur_max = cur_max;
+ mb_locale_name = name;
+
+ // break when we've found a multibyte locale
+ // with the longest possible encoding
+ if (_RWSTD_MB_LEN_MAX == *mb_cur_max)
+ break;
+ }
+ }
+ }
+
+ if (*mb_cur_max < 2) {
+ rw_fprintf (rw_stderr, "*** failed to find a full set of multibyte "
+ "characters in locale \"%s\" with MB_CUR_MAX = %u "
+ "(computed)", mb_locale_name, *mb_cur_max);
+ mb_locale_name = 0;
+ }
+ else {
+ // (re)generate the multibyte characters for the saved locale
+ // as they may have been overwritten in subsequent iterations
+ // of the loop above (while searching for a locale with greater
+ // value of MB_CUR_MAX)
+ setlocale (LC_CTYPE, mb_locale_name);
+ rw_get_mb_chars (mb_chars);
+ }
+
+ setlocale (LC_CTYPE, saved_locale_name);
+
+ return mb_locale_name;
+}
+