Author: lnicoara Date: Sun Nov 4 14:17:44 2012 New Revision: 1405545 URL: http://svn.apache.org/viewvc?rev=1405545&view=rev Log: 2012-11-4 Liviu Nicoara <lnico...@apache.org>
Fixes to collate facet and test enhancements: * src/collate.cpp (__rw_strnxfrm): corrected processing of embedded NULs. (__rw_wcsnxfrm) same (duplicated code). (collate_byname<wchar_t>::do_compare): fixed string comparison return values, re-implemented the wcscoll-based comparison. * tests/localization/22.locale.collate.cpp: implemented a simpler collation test for strings with embedded NULs. Modified: stdcxx/branches/4.2.x/src/collate.cpp stdcxx/branches/4.2.x/tests/localization/22.locale.collate.cpp Modified: stdcxx/branches/4.2.x/src/collate.cpp URL: http://svn.apache.org/viewvc/stdcxx/branches/4.2.x/src/collate.cpp?rev=1405545&r1=1405544&r2=1405545&view=diff ============================================================================== --- stdcxx/branches/4.2.x/src/collate.cpp (original) +++ stdcxx/branches/4.2.x/src/collate.cpp Sun Nov 4 14:17:44 2012 @@ -488,99 +488,100 @@ __rw_strnxfrm (const char *src, size_t n while (nchars) { - // using a C-style cast instead of static_cast to avoid - // a gcc 2.95.2 bug causing an error on some platforms: - // static_cast from `void *' to `const char *' - const char* const last = (const char*)memchr (src, '\0', nchars); - - if (0 == last) { - - // no NUL found in the initial portion of the source string - // that fits into the local temporary buffer; copy as many - // characters as fit into the buffer + if (src [0]) { - if (bufsize <= nchars) { - if (pbuf != buf) - delete[] pbuf; - pbuf = new char [nchars + 1]; - } - - psrc = pbuf; - memcpy (psrc, src, nchars); + // using a C-style cast instead of static_cast to avoid + // a gcc 2.95.2 bug causing an error on some platforms: + // static_cast from `void *' to `const char *' + const char* const last = (const char*)memchr (src, '\0', nchars); + + if (0 == last) { + + // no NUL found in the initial portion of the source string + // that fits into the local temporary buffer; copy as many + // characters as fit into the buffer + + if (bufsize <= nchars) { + if (pbuf != buf) + delete[] pbuf; + pbuf = new char [nchars + 1]; + } - // append a terminating NUL and decrement the number - // of characters that remain to be processed - psrc [nchars] = '\0'; - src += nchars; - nchars = 0; - } - else { + psrc = pbuf; + memcpy (psrc, src, nchars); - // terminating NUL found in the source buffer - nchars -= (last - src) + 1; - psrc = _RWSTD_CONST_CAST (char*, src); - src += (last - src) + 1; - } + // append a terminating NUL and decrement the number + // of characters that remain to be processed + psrc [nchars] = '\0'; + src += nchars; + nchars = 0; + } + else { + // terminating NUL found in the source buffer + nchars -= (last - src) + 1; + psrc = _RWSTD_CONST_CAST (char*, src); + src += (last - src) + 1; + } #ifdef _RWSTD_OS_SUNOS - // Solaris 10u5 on AMD64 overwrites memory past the end of - // just_in_case_buf[8], to avoid this, pass a null pointer - char* const just_in_case_buf = 0; + // Solaris 10u5 on AMD64 overwrites memory past the end of + // just_in_case_buf[8], to avoid this, pass a null pointer + char* const just_in_case_buf = 0; #else - // provide a destination buffer to strxfrm() in case - // it's buggy (such as MSVC's) and tries to write to - // the buffer even if it's 0 - char just_in_case_buf [8]; -#endif + // provide a destination buffer to strxfrm() in case + // it's buggy (such as MSVC's) and tries to write to + // the buffer even if it's 0 + char just_in_case_buf [8]; +#endif // _RWSTD_OS_SUNOS - const size_t dst_size = strxfrm (just_in_case_buf, psrc, 0); + const size_t dst_size = strxfrm (just_in_case_buf, psrc, 0); - // check for strxfrm() errors - if (0 == (dst_size << 1)) { - if (pbuf != buf) - delete[] pbuf; + // check for strxfrm() errors + if (0 == (dst_size << 1)) { + if (pbuf != buf) + delete[] pbuf; - return _STD::string (); - } + return _STD::string (); + } - size_t res_size = res.size (); + size_t res_size = res.size (); - _TRY { - // resize the result string to fit itself plus the result - // of the transformation including the terminatin NUL - // appended by strxfrm() - res.resize (res_size + dst_size + 1); - } - _CATCH (...) { - if (pbuf != buf) - delete[] pbuf; - _RETHROW; - } + _TRY { + // resize the result string to fit itself plus the result + // of the transformation including the terminating NUL + // appended by strxfrm() + res.resize (res_size + dst_size + 1); + } + _CATCH (...) { + if (pbuf != buf) + delete[] pbuf; + _RETHROW; + } - // transfor the source string up to the terminating NUL - size_t xfrm_size = strxfrm (&res [0] + res_size, psrc, dst_size + 1); + res.resize (res.size () - !last); + } + else { -#if defined _MSC_VER && _MSC_VER < 1400 - // compute the correct value that should have been returned from - // strxfrm() after the transformation has completed (MSVC strxfrm() - // returns a bogus result; see PR #29935) - xfrm_size = strlen (&res [0] + res_size); -#endif // MSVC < 8.0 - - // increment the size of the result string by the number - // of transformed characters excluding the terminating NUL - // if strxfrm() transforms the empty string into the empty - // string, keep the terminating NUL, otherwise drop it - res_size += xfrm_size + (last && !*psrc && !xfrm_size); + // count and append the consecutive NULs embedded in the + // input string - _TRY { - res.resize (res_size); - } - _CATCH (...) { - if (pbuf != buf) - delete[] pbuf; - _RETHROW; + size_t i = 0; + for (; i < nchars && 0 == src [i]; ++i) ; + + _TRY { + // resize the result string to fit itself plus the + // embedded NULs + res.resize (res.size () + i); + } + _CATCH (...) { + if (pbuf != buf) + delete[] pbuf; + _RETHROW; + } + + nchars -= i; + src += i; } } @@ -702,99 +703,102 @@ __rw_wcsnxfrm (const wchar_t *src, size_ while (nchars) { - typedef _STD::char_traits<wchar_t> Traits; + if (src [0]) { - const wchar_t* const last = Traits::find (src, nchars, L'\0'); + typedef _STD::char_traits<wchar_t> Traits; - if (0 == last) { + const wchar_t* const last = Traits::find (src, nchars, L'\0'); - // no NUL found in the initial portion of the source string - // that fits into the local temporary buffer; copy as many - // characters as fit into the buffer + if (0 == last) { - if (bufsize <= nchars) { - if (pbuf != buf) - delete[] pbuf; - pbuf = new wchar_t [nchars + 1]; - } + // no NUL found in the initial portion of the source string + // that fits into the local temporary buffer; copy as many + // characters as fit into the buffer - psrc = pbuf; - memcpy (psrc, src, nchars * sizeof *psrc); + if (bufsize <= nchars) { + if (pbuf != buf) + delete[] pbuf; + pbuf = new wchar_t [nchars + 1]; + } - // append a terminating NUL and decrement the number - // of characters that remain to be processed - psrc [nchars] = 0; - src += nchars; - nchars = 0; - } - else { + psrc = pbuf; + memcpy (psrc, src, nchars * sizeof *psrc); - // terminating NUL found in the source buffer - nchars -= (last - src) + 1; - psrc = _RWSTD_CONST_CAST (wchar_t*, src); - src += (last - src) + 1; - } + // append a terminating NUL and decrement the number + // of characters that remain to be processed + psrc [nchars] = 0; + src += nchars; + nchars = 0; + } + else { + + // terminating NUL found in the source buffer + nchars -= (last - src) + 1; + psrc = _RWSTD_CONST_CAST (wchar_t*, src); + src += (last - src) + 1; + } #ifdef _RWSTD_OS_SUNOS - // just in case Solaris wcsxfrm() has the same bug - // as its strxfrm() (see above) - wchar_t* const just_in_case_buf = 0; + // just in case Solaris wcsxfrm() has the same bug + // as its strxfrm() (see above) + wchar_t* const just_in_case_buf = 0; #else - // provide a destination buffer to strxfrm() in case - // it's buggy (such as MSVC's) and tries to write to - // the buffer even if it's 0 - wchar_t just_in_case_buf [8]; + // provide a destination buffer to strxfrm() in case + // it's buggy (such as MSVC's) and tries to write to + // the buffer even if it's 0 + wchar_t just_in_case_buf [8]; #endif - const size_t dst_size = - _RWSTD_WCSXFRM (just_in_case_buf, psrc, 0); + const size_t dst_size = + _RWSTD_WCSXFRM (just_in_case_buf, psrc, 0); - // check for wcsxfrm() errors - if (_RWSTD_SIZE_MAX == dst_size) { - if (pbuf != buf) - delete[] pbuf; + // check for wcsxfrm() errors + if (_RWSTD_SIZE_MAX == dst_size) { + if (pbuf != buf) + delete[] pbuf; - return _STD::wstring (); - } + return _STD::wstring (); + } - size_t res_size = res.size (); + size_t res_size = res.size (); - _TRY { - // resize the result string to fit itself plus the result - // of the transformation including the terminatin NUL - // appended by strxfrm() - res.resize (res_size + dst_size + 1); - } - _CATCH (...) { - if (pbuf != buf) - delete[] pbuf; - _RETHROW; - } + _TRY { + // resize the result string to fit itself plus the result + // of the transformation including the terminatin NUL + // appended by strxfrm() + res.resize (res_size + dst_size + 1); + } + _CATCH (...) { + if (pbuf != buf) + delete[] pbuf; + _RETHROW; + } - // transfor the source string up to the terminating NUL - size_t xfrm_size = + // transform the source string up to the terminating NUL _RWSTD_WCSXFRM (&res [0] + res_size, psrc, dst_size + 1); + res.resize (res.size () - !last); + } + else { -# if defined _MSC_VER && _MSC_VER < 1400 - // compute the correct value that should have been returned from - // strxfrm() after the transformation has completed (MSVC strxfrm() - // returns a bogus result; see PR #29935) - xfrm_size = Traits::length (&res [0] + res_size); -# endif // MSVC < 8.0 - - // increment the size of the result string by the number - // of transformed characters excluding the terminating NUL - // if strxfrm() transforms the empty string into the empty - // string, keep the terminating NUL, otherwise drop it - res_size += xfrm_size + (last && !*psrc && !xfrm_size); + // count and append the consecutive NULs embedded in the + // input string - _TRY { - res.resize (res_size); - } - _CATCH (...) { - if (pbuf != buf) - delete[] pbuf; - _RETHROW; + size_t i = 0; + for (; i < nchars && 0 == src [i]; ++i) ; + + _TRY { + // resize the result string to fit itself plus the + // embedded NULs + res.resize (res.size () + i); + } + _CATCH (...) { + if (pbuf != buf) + delete[] pbuf; + _RETHROW; + } + + nchars -= i; + src += i; } } @@ -1136,43 +1140,94 @@ do_compare (const wchar_t* low1, const w const string_type s2 = do_transform (low2, high2); // FIXME: optimize - return s1.compare (s2); + const int cmp = s1.compare (s2); + + // adjust return value + return cmp < 0 ? -1 : cmp ? 1 : 0; } #ifndef _RWSTD_NO_WCSCOLL // use the system C library to compare the strings - _RW::__rw_setlocale clocale (this->_C_name, _RWSTD_LC_COLLATE); - const size_t len1 = high1 - low1; - const size_t len2 = high2 - low2; - const size_t len = len1 + len2; - - // small local buffer - wchar_t local_buffer [256]; - const size_t bufsize = sizeof local_buffer / sizeof *local_buffer; - - // allocate only if local buffer is too small - wchar_t* const wbuf = - len + 2 >= bufsize ? new wchar_t [len + 2] : local_buffer; - - // copy and null-terminate first sequence - char_traits<wchar_t>::copy (wbuf, low1, len1); - wbuf [len1] = '\0'; - - // append and null-terminate first sequence - char_traits<wchar_t>::copy (wbuf + len1 + 1, low2, len2); - wbuf [len1 + 1 + len2] = '\0'; - - // compare sequences using wcscoll() - const int result = wcscoll (wbuf, wbuf + len1 + 1); - - // deallocate only if allocated - if (wbuf != local_buffer) - delete[] wbuf; + size_t len1 = high1 - low1; + size_t len2 = high2 - low2; + + if (0 == len1 || 0 == len2) + return len1 ? 1 : len2 ? -1 : 0; + + // attempt to use a small buffer + wchar_t wbuf [256], *pwbuf = wbuf; + const size_t bufsize = sizeof wbuf / sizeof *wbuf; + + wchar_t* pwbuf1 = high1 [-1] ? wbuf : const_cast< wchar_t* > (low1); + wchar_t* pwbuf2 = high2 [-1] ? wbuf : const_cast< wchar_t* > (low2); + + size_t len = + (pwbuf1 == wbuf ? (len1 + 1) : 0) + + (pwbuf2 == wbuf ? (len2 + 1) : 0); + + if (len >= bufsize) + pwbuf = new wchar_t [len]; + + wchar_t* ptmp = pwbuf; - return result ? result > 0 ? 1 : -1 : 0; + // only copy non NUL-terminated buffers + if (pwbuf1 == wbuf) { + pwbuf1 = pwbuf; + + // append and null-terminate first sequence + char_traits<wchar_t>::copy (pwbuf1, low1, len1); + pwbuf1 [len1] = '\0'; + + ptmp = pwbuf + len1 + 1; + } + + if (pwbuf2 == wbuf) { + pwbuf2 = ptmp; + + // append and null-terminate second sequence + char_traits<wchar_t>::copy (pwbuf2, low2, len2); + pwbuf2 [len2] = '\0'; + } + + int cmp = 0; + + for (; len1 && len2;) { + + for (; len1 && len2 && 0 == pwbuf1 [0] && 0 == pwbuf2 [0]; + ++pwbuf1, ++pwbuf2, --len1, --len2) ; + + // compare sequences using wcscoll, stopping at first NUL + cmp = wcscoll (pwbuf1, pwbuf2); + + if (cmp) { + if (pwbuf != wbuf) + delete [] pwbuf; + return cmp > 0 ? 1 : -1; + } + + // if they compared equal, they may have embedded NULs + size_t n = _RWSTD_WCSLEN (pwbuf1); + + len1 -= n; + pwbuf1 += n; + + n = _RWSTD_WCSLEN (pwbuf2); + + len2 -= n; + pwbuf2 += n; + } + + // adjust return value + if (0 == cmp) + cmp = len1 ? 1 : len2 ? -1 : 0; + + if (pwbuf != wbuf) + delete [] pwbuf; + + return cmp; #else // if defined (_RWSTD_NO_WCSCOLL) @@ -1180,7 +1235,10 @@ do_compare (const wchar_t* low1, const w const string_type s1 = do_transform (low1, high1); const string_type s2 = do_transform (low2, high2); - return s1.compare (s2); + const int cmp = s1.compare (s2); + + // adjust return value + return cmp < 0 ? -1 : cmp ? 1 : 0; #endif // _RWSTD_NO_WCSCOLL Modified: stdcxx/branches/4.2.x/tests/localization/22.locale.collate.cpp URL: http://svn.apache.org/viewvc/stdcxx/branches/4.2.x/tests/localization/22.locale.collate.cpp?rev=1405545&r1=1405544&r2=1405545&view=diff ============================================================================== --- stdcxx/branches/4.2.x/tests/localization/22.locale.collate.cpp (original) +++ stdcxx/branches/4.2.x/tests/localization/22.locale.collate.cpp Sun Nov 4 14:17:44 2012 @@ -116,7 +116,7 @@ const char* widen (char* dst, const char return dst; } -#ifndef _RWSTD_NO_WCHAR_T +#if !defined (_RWSTD_NO_WCHAR_T) int c_strcoll (const wchar_t* s1, const wchar_t* s2) { @@ -1029,65 +1029,119 @@ check_hash_eff (const char* charTname) template <class charT> void -check_NUL_locale (const char* charTname, const char* locname) +check_NUL_collate (const char* charTname, const char* locname, + const charT* s1, size_t s1_len, + const charT* s2, size_t s2_len) { std::locale loc (locname); - charT s [STR_SIZE]; - gen_str (s, STR_SIZE); + typedef typename std::collate<charT> Collate; + typedef typename Collate::string_type String; - charT buf [2][STR_SIZE]; + const Collate &col = std::use_facet<Collate> (loc); - std::memcpy (buf [0], s, sizeof s); - std::memcpy (buf [1], s, sizeof s); + const String x1 = col.transform (s1, s1 + s1_len); + const String x2 = col.transform (s2, s2 + s2_len); - // - // Verify that first buffer compares more: - // |--------0----| = buf [0] - // |----0--------| = buf [1] - // - buf [0][4] = charT (); - buf [1][3] = charT (); + const int colcmp = col.compare (s1, s1 + s1_len, s2, s2 + s2_len); - typedef std::collate<charT> Collate; + int lexcmp = x1.compare (x2); + lexcmp = lexcmp < -1 ? -1 : 1 < lexcmp ? 1 : lexcmp; + + rw_assert (colcmp == lexcmp, __FILE__, __LINE__, + "collate<%s>::compare (%{*.*Ac}, %{*.*Ac}) = %d, " + "lexicographical comparison of transformed strings = %d, " + "mismatch in locale (\"%s\")", charTname, + sizeof (charT), s1_len, s1, + sizeof (charT), s2_len, s2, + colcmp, lexcmp, locname); + + const bool eq = + std::string (s1, s1 + s1_len) == + std::string (s2, s2 + s2_len); + + rw_assert (bool (colcmp) != eq, __FILE__, __LINE__, + "collate<%s>::compare (%{*.*Ac}, %{*.*Ac}) = %d, " + "lexicographical compare = %s, mismatch in locale (\"%s\")", + charTname, + sizeof (charT), s1_len, s1, + sizeof (charT), s2_len, s2, colcmp, + (eq ? "true" : "false"), locname); +} - const Collate &col = std::use_facet<Collate> (loc); +static void +check_NUL_collate (const char* charTname, const char* locname, char) +{ +#define T(s, t) \ + check_NUL_collate (charTname, locname, \ + s, sizeof s / sizeof *s - 1, \ + t, sizeof t / sizeof *t - 1) + + T ("", ""); + T ("", "\0"); + T ("", "\0\0"); + T ("\0", ""); + T ("\0", "\0"); + T ("\0", "\0\0"); + T ("a", "\0"); + T ("a", "\0a"); + T ("a", "a\0"); + T ("a", "a\0\0"); + T ("a\0", "a"); + T ("a\0", "a\0"); + T ("a\0", "a\0\0"); + T ("\0a", ""); + T ("\0a", "\0"); + T ("\0a", "\0a"); + T ("\0a", "\0a\0"); + T ("a\0\0b", ""); + T ("a\0\0b", "a"); + T ("a\0\0b", "ab"); + T ("a\0\0b", "a\0"); + T ("a\0\0b", "a\0\0"); + T ("a\0\0b", "a\0b"); + T ("a\0\0b", "a\0\0b"); +} - int cmp = col.compare ( - buf [0], buf [0] + sizeof buf [0] / sizeof *buf [0], - buf [1], buf [1] + sizeof buf [1] / sizeof *buf [1]); - - rw_assert (cmp > 0, __FILE__, __LINE__, - "collate<%s>::compare (%{*.*Ac}, %{*.*Ac}) " - " > 0, failed in locale (\"%s\")", charTname, - sizeof (charT), sizeof buf [0] / sizeof *buf [0], buf [0], - sizeof (charT), sizeof buf [1] / sizeof *buf [1], buf [1], - locname); - - std::memcpy (buf [0], s, sizeof s); - std::memcpy (buf [1], s, sizeof s); - - // - // Verify that first compare less: - // |----0---0----| = buf [0] - // |----0--------| = buf [1] - // - buf [0][3] = charT (); - buf [0][5] = charT (); - buf [1][3] = charT (); - - cmp = col.compare ( - buf [0], buf [0] + sizeof buf [0] / sizeof *buf [0], - buf [1], buf [1] + sizeof buf [1] / sizeof *buf [1]); - - rw_assert (cmp < 0, __FILE__, __LINE__, - "collate<%s>::compare (%{*.*Ac}, ..., %{*.*Ac}, ...) " - " < 0, failed in locale (\"%s\")", charTname, - sizeof (charT), sizeof buf [0] / sizeof *buf [0], buf [0], - sizeof (charT), sizeof buf [1] / sizeof *buf [1], buf [1], - locname); +#if !defined (_RWSTD_NO_WCHAR_T) + +static void +check_NUL_collate (const char* charTname, const char* locname, wchar_t) +{ + T (L"", L""); + T (L"", L"\0"); + T (L"", L"\0\0"); + T (L"\0", L""); + T (L"\0", L"\0"); + T (L"\0", L"\0\0"); + T (L"a", L"\0"); + T (L"a", L"\0a"); + T (L"a", L"a\0"); + T (L"a", L"a\0\0"); + T (L"a\0", L"a"); + T (L"a\0", L"a\0"); + T (L"a\0", L"a\0\0"); + T (L"\0a", L""); + T (L"\0a", L"\0"); + T (L"\0a", L"\0a"); + T (L"\0a", L"\0a\0"); + T (L"a\0\0b", L""); + T (L"a\0\0b", L"a"); + T (L"a\0\0b", L"ab"); + T (L"a\0\0b", L"a\0"); + T (L"a\0\0b", L"a\0\0"); + T (L"a\0\0b", L"a\0b"); + T (L"a\0\0b", L"a\0\0b"); + T (L"a\0\0b\0", L"a\0\0b"); + T (L"a\0\0b\0\0", L"a\0\0b"); + T (L"a\0\0b\0\0", L"a\0\0b\0"); + T (L"a\0\0b\0\0", L"a\0\0bc"); + +#undef T } +#endif // _RWSTD_NO_WCHAR_T + template <class charT> void check_NUL (const char* charTname) @@ -1101,9 +1155,9 @@ check_NUL (const char* charTname) size_t i = 0; for (const char* locname = rw_locales (LC_COLLATE); - *locname; locname += std::strlen (locname) + 1, ++i) { + *locname; locname += std::strlen (locname) + 1) { try { - check_NUL_locale<charT> (charTname, locname); + check_NUL_collate (charTname, locname, charT ()); } catch (...) { } @@ -1128,14 +1182,13 @@ run_test (int /*argc*/, char* /*argv*/ [ { do_test<char> ("char"); -#if defined (_RWSTD_NO_WCHAR_T) +#if !defined (_RWSTD_NO_WCHAR_T) do_test<wchar_t> ("wchar_t"); #endif // _RWSTD_NO_WCHAR_T return 0; } - int main (int argc, char* argv []) {