https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=eac830e0feac1e5f4fbb9637506bd071e7530a1f
commit eac830e0feac1e5f4fbb9637506bd071e7530a1f Author: Corinna Vinschen <[email protected]> AuthorDate: Tue Feb 14 12:22:36 2023 +0100 Commit: Corinna Vinschen <[email protected]> CommitDate: Tue Feb 14 12:48:26 2023 +0100 Cygwin: __collate_range_cmp: handle Unicode values >= 0x10000 So far the input to __collate_range_cmp was handled as a wchar_t. Change that to handle it as wint_t holding a UTF-32 value and add creating surrogate pairs for the call to wcscoll. Signed-off-by: Corinna Vinschen <[email protected]> Diff: --- winsup/cygwin/nlsfuncs.cc | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) diff --git a/winsup/cygwin/nlsfuncs.cc b/winsup/cygwin/nlsfuncs.cc index ddd85bea1647..0d204929d24c 100644 --- a/winsup/cygwin/nlsfuncs.cc +++ b/winsup/cygwin/nlsfuncs.cc @@ -1176,8 +1176,20 @@ strcoll (const char *__restrict s1, const char *__restrict s2) extern "C" int __collate_range_cmp (int c1, int c2) { - wchar_t s1[2] = { (wchar_t) c1, L'\0' }; - wchar_t s2[2] = { (wchar_t) c2, L'\0' }; + wchar_t s1[3] = { (wchar_t) c1, L'\0', L'\0' }; + wchar_t s2[3] = { (wchar_t) c2, L'\0', L'\0' }; + + /* Handle Unicode values >= 0x10000, convert to surrogate pair */ + if (c1 > 0xffff) + { + s1[0] = ((c1 - 0x10000) >> 10) + 0xd800; + s1[1] = ((c1 - 0x10000) & 0x3ff) + 0xdc00; + } + if (c2 > 0xffff) + { + s2[0] = ((c2 - 0x10000) >> 10) + 0xd800; + s2[1] = ((c2 - 0x10000) & 0x3ff) + 0xdc00; + } return wcscoll (s1, s2); }
