I have enhanced the collation test, 22.locale.collate.cpp with a bunch
of cases that deal with embedded strings, inside the input strings as
well as at the edges. More defects became apparent, and they have been
fixed.
I have re-worked the src/collate.cpp patch and I am attaching it here.
All collation tests (old and new) pass. If there are no objections, I
will check it in later in the week.
Thanks,
Liviu
On 10/13/12 11:16, Liviu Nicoara wrote:
The initial patch does not pass the following test case. Have re-worked
the patch and attached it to the incident, and I am also attaching it
here. It passes all collate tests.
Index: tests/localization/22.locale.collate.cpp
===================================================================
--- tests/localization/22.locale.collate.cpp (revision 1397847)
+++ tests/localization/22.locale.collate.cpp (working copy)
@@ -1029,7 +1029,105 @@ check_hash_eff (const char* charTname)
template <class charT>
void
-check_NUL_locale (const char* charTname, const char* locname)
+check_NUL_transform (const char* charTname, const char* locname,
+ const charT* src, size_t src_len,
+ const char* pat, size_t pat_len)
+{
+ std::locale loc (locname);
+
+ typedef typename std::collate<charT> Collate;
+ typedef typename Collate::string_type String;
+
+ const Collate &col = std::use_facet<Collate> (loc);
+
+ String s = col.transform (src, src + src_len);
+
+ for (size_t i = 0, j = 0; i < pat_len; ++i) {
+
+ if (j >= s.size ()) {
+ rw_assert (false, __FILE__, __LINE__,
+ "collate<%s>::transform (%{*.*Ac}) -> %{*.*Ac}"
+ ", incomplete transformation (locale \"%s\")",
+ charTname, sizeof (charT), src_len, src,
+ sizeof (charT), s.size (), s.c_str (), locname);
+ break;
+ }
+
+ switch (pat [i]) {
+ case 0:
+ rw_assert (j < s.size () && 0 == s [j], __FILE__, __LINE__,
+ "collate<%s>::transform (\"%{*.*Ac}\") -> \"%{*.*Ac}\""
+ ", expected \\0, got %d in position %lu (locale
\"%s\")",
+ charTname, sizeof (charT), src_len, src, sizeof
(charT),
+ s.size (), s.c_str (), s [j], j, locname);
+ ++j;
+ break;
+
+ case '*':
+ rw_assert (j < s.size () && s [j], __FILE__, __LINE__,
+ "collate<%s>::transform (\"%{*.*Ac}\") -> \"%{*.*Ac}\""
+ ", got NUL in position %lu (locale \"%s\")", charTname,
+ sizeof (charT), src_len, src, sizeof (charT), s.size
(),
+ s.c_str (), j, locname);
+ for (; j < s.size () && s [j]; ++j) ;
+ break;
+
+ default:
+ break;
+ }
+ }
+}
+
+static void
+check_NUL_transform_narrow (const char* charTname, const char* locname)
+{
+#define T(src, pat) \
+ check_NUL_transform (charTname, locname, \
+ src, sizeof src / sizeof *src - 1, \
+ pat, sizeof pat / sizeof *pat - 1)
+
+ T ("\0", "\0" );
+ T ("\0\0", "\0\0" );
+ T ("\0\0\0", "\0\0\0" );
+ T ("a\0", "*\0" );
+ T ("\0a\0", "\0*\0" );
+ T ("\0a\0", "\0*\0" );
+ T ("a\0\0", "*\0\0" );
+ T ("a\0\0\0", "*\0\0\0" );
+ T ("a\0b", "*\0*" );
+ T ("a\0b\0", "*\0*\0" );
+ T ("a\0b\0\0", "*\0*\0\0" );
+}
+
+static void
+check_NUL_transform_wide (const char* charTname, const char* locname)
+{
+ T (L"\0", "\0" );
+ T (L"\0\0", "\0\0" );
+ T (L"\0\0\0", "\0\0\0" );
+ T (L"a\0", "*\0" );
+ T (L"\0a\0", "\0*\0" );
+ T (L"\0a\0", "\0*\0" );
+ T (L"a\0\0", "*\0\0" );
+ T (L"a\0\0\0", "*\0\0\0" );
+ T (L"a\0b", "*\0*" );
+ T (L"a\0b\0", "*\0*\0" );
+ T (L"a\0b\0\0", "*\0*\0\0" );
+
+#undef T
+}
+
+template <class charT>
+void
+check_NUL_transform (const char* charTname, const char* locname)
+{
+ check_NUL_transform_narrow (charTname, locname);
+ check_NUL_transform_wide (charTname, locname);
+}
+
+template <class charT>
+void
+check_NUL_collate (const char* charTname, const char* locname)
{
std::locale loc (locname);
@@ -1090,6 +1188,14 @@ check_NUL_locale (const char* charTname,
template <class charT>
void
+check_NUL (const char* charTname, const char* locname)
+{
+ check_NUL_transform< charT > (charTname, locname);
+ check_NUL_collate< charT > (charTname, locname);
+}
+
+template <class charT>
+void
check_NUL (const char* charTname)
{
// Verify that the collate facet correctly handles character
@@ -1103,7 +1209,7 @@ check_NUL (const char* charTname)
for (const char* locname = rw_locales (LC_COLLATE);
*locname; locname += std::strlen (locname) + 1, ++i) {
try {
- check_NUL_locale<charT> (charTname, locname);
+ check_NUL<charT> (charTname, locname);
}
catch (...) {
}
@@ -1135,7 +1241,6 @@ run_test (int /*argc*/, char* /*argv*/ [
return 0;
}
-
int
main (int argc, char* argv [])
{
Index: src/collate.cpp
===================================================================
--- src/collate.cpp (revision 1397825)
+++ src/collate.cpp (working copy)
@@ -488,6 +488,8 @@ __rw_strnxfrm (const char *src, size_t n
while (nchars) {
+ if (src [0]) {
+
// using a C-style cast instead of static_cast to avoid
// a gcc 2.95.2 bug causing an error on some platforms:
// static_cast from `void *' to `const char *'
@@ -515,7 +517,6 @@ __rw_strnxfrm (const char *src, size_t n
nchars = 0;
}
else {
-
// terminating NUL found in the source buffer
nchars -= (last - src) + 1;
psrc = _RWSTD_CONST_CAST (char*, src);
@@ -531,7 +532,7 @@ __rw_strnxfrm (const char *src, size_t n
// it's buggy (such as MSVC's) and tries to write to
// the buffer even if it's 0
char just_in_case_buf [8];
-#endif
+#endif // _RWSTD_OS_SUNOS
const size_t dst_size = strxfrm (just_in_case_buf, psrc, 0);
@@ -547,7 +548,7 @@ __rw_strnxfrm (const char *src, size_t n
_TRY {
// resize the result string to fit itself plus the result
- // of the transformation including the terminatin NUL
+ // of the transformation including the terminating NUL
// appended by strxfrm()
res.resize (res_size + dst_size + 1);
}
@@ -557,31 +558,32 @@ __rw_strnxfrm (const char *src, size_t n
_RETHROW;
}
- // transfor the source string up to the terminating NUL
- size_t xfrm_size =
strxfrm (&res [0] + res_size, psrc, dst_size + 1);
+ res.resize (res.size () - !last);
+ }
+ else {
+
+ // count and append the consecutive NULs embedded in the
+ // input string
-#if defined _MSC_VER && _MSC_VER < 1400
- // compute the correct value that should have been returned from
- // strxfrm() after the transformation has completed (MSVC strxfrm()
- // returns a bogus result; see PR #29935)
- xfrm_size = strlen (&res [0] + res_size);
-#endif // MSVC < 8.0
-
- // increment the size of the result string by the number
- // of transformed characters excluding the terminating NUL
- // if strxfrm() transforms the empty string into the empty
- // string, keep the terminating NUL, otherwise drop it
- res_size += xfrm_size + (last && !*psrc && !xfrm_size);
+ size_t i = 0;
+ for (; i < nchars && 0 == src [i]; ++i) ;
_TRY {
- res.resize (res_size);
+ // resize the result string to fit itself plus the result
+ // of the transformation including the terminating NUL
+ // appended by strxfrm()
+ res.resize (res.size () + i);
}
_CATCH (...) {
if (pbuf != buf)
delete[] pbuf;
_RETHROW;
}
+
+ nchars -= i;
+ src += i;
+ }
}
if (pbuf != buf)
@@ -702,6 +704,8 @@ __rw_wcsnxfrm (const wchar_t *src, size_
while (nchars) {
+ if (src [0]) {
+
typedef _STD::char_traits<wchar_t> Traits;
const wchar_t* const last = Traits::find (src, nchars, L'\0');
@@ -771,31 +775,33 @@ __rw_wcsnxfrm (const wchar_t *src, size_
_RETHROW;
}
- // transfor the source string up to the terminating NUL
- size_t xfrm_size =
+ // transform the source string up to the terminating NUL
_RWSTD_WCSXFRM (&res [0] + res_size, psrc, dst_size + 1);
+ res.resize (res.size () - !last);
+ }
+ else {
+
+ // count and append the consecutive NULs embedded in the
+ // input string
-# if defined _MSC_VER && _MSC_VER < 1400
- // compute the correct value that should have been returned from
- // strxfrm() after the transformation has completed (MSVC strxfrm()
- // returns a bogus result; see PR #29935)
- xfrm_size = Traits::length (&res [0] + res_size);
-# endif // MSVC < 8.0
-
- // increment the size of the result string by the number
- // of transformed characters excluding the terminating NUL
- // if strxfrm() transforms the empty string into the empty
- // string, keep the terminating NUL, otherwise drop it
- res_size += xfrm_size + (last && !*psrc && !xfrm_size);
+ size_t i = 0;
+ for (; i < nchars && 0 == src [i]; ++i) ;
_TRY {
- res.resize (res_size);
+ // resize the result string to fit itself plus the result
+ // of the transformation including the terminating NUL
+ // appended by strxfrm()
+ res.resize (res.size () + i);
}
_CATCH (...) {
if (pbuf != buf)
delete[] pbuf;
_RETHROW;
}
+
+ nchars -= i;
+ src += i;
+ }
}
if (pbuf != buf)