These two patches work around an ISO C compliance bug of wmemcmp()
on several platforms.

The point is that in ISO C, a "wide character" is any wchar_t value.
A "wide character" is not constrained to the range 0..INT_MAX. For
the precise reasoning, see
<https://www.openwall.com/lists/musl/2023/04/18/5>.


2023-04-18  Bruno Haible  <[email protected]>

        wmemcmp: Add tests.
        * tests/test-wmemcmp.c: New file, based on tests/unistr/test-cmp.h.
        * modules/wmemcmp-tests: New file.

        wmemcmp: Work around ISO C compliance bug on several platforms.
        * lib/wchar.in.h (wmemcmp): Consider REPLACE_WMEMCMP.
        * lib/wmemcmp-impl.h (wmemcmp): Don't assume that the two wide
        characters are in the range 0..INT_MAX.
        * m4/wmemcmp.m4 (gl_FUNC_WMEMCMP): Test whether wmemcmp works for all
        wide characters. Set REPLACE_WMEMCMP.
        * m4/wchar_h.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_WMEMCMP.
        * modules/wchar (Makefile.am): Substitute REPLACE_WMEMCMP.
        * modules/wmemcmp (configure.ac): Consider REPLACE_WMEMCMP.
        * doc/posix-functions/wmemcmp.texi: Mention the bug.

>From 6c28538c9d6bbf692ab12972de6cc035e54b0c67 Mon Sep 17 00:00:00 2001
From: Bruno Haible <[email protected]>
Date: Wed, 19 Apr 2023 01:01:56 +0200
Subject: [PATCH 1/2] wmemcmp: Work around ISO C compliance bug on several
 platforms.

* lib/wchar.in.h (wmemcmp): Consider REPLACE_WMEMCMP.
* lib/wmemcmp-impl.h (wmemcmp): Don't assume that the two wide
characters are in the range 0..INT_MAX.
* m4/wmemcmp.m4 (gl_FUNC_WMEMCMP): Test whether wmemcmp works for all
wide characters. Set REPLACE_WMEMCMP.
* m4/wchar_h.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_WMEMCMP.
* modules/wchar (Makefile.am): Substitute REPLACE_WMEMCMP.
* modules/wmemcmp (configure.ac): Consider REPLACE_WMEMCMP.
* doc/posix-functions/wmemcmp.texi: Mention the bug.
---
 ChangeLog                        | 13 +++++++++
 doc/posix-functions/wmemcmp.texi |  4 +++
 lib/wchar.in.h                   | 16 +++++++++--
 lib/wmemcmp-impl.h               |  5 ++--
 m4/wchar_h.m4                    |  3 +-
 m4/wmemcmp.m4                    | 49 +++++++++++++++++++++++++++++++-
 modules/wchar                    |  1 +
 modules/wmemcmp                  |  3 +-
 8 files changed, 87 insertions(+), 7 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 4b8c72490b..6e885d865c 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,16 @@
+2023-04-18  Bruno Haible  <[email protected]>
+
+	wmemcmp: Work around ISO C compliance bug on several platforms.
+	* lib/wchar.in.h (wmemcmp): Consider REPLACE_WMEMCMP.
+	* lib/wmemcmp-impl.h (wmemcmp): Don't assume that the two wide
+	characters are in the range 0..INT_MAX.
+	* m4/wmemcmp.m4 (gl_FUNC_WMEMCMP): Test whether wmemcmp works for all
+	wide characters. Set REPLACE_WMEMCMP.
+	* m4/wchar_h.m4 (gl_WCHAR_H_DEFAULTS): Initialize REPLACE_WMEMCMP.
+	* modules/wchar (Makefile.am): Substitute REPLACE_WMEMCMP.
+	* modules/wmemcmp (configure.ac): Consider REPLACE_WMEMCMP.
+	* doc/posix-functions/wmemcmp.texi: Mention the bug.
+
 2023-04-18  Bruno Haible  <[email protected]>
 
 	doc: Update platform list for posix_spawnp.
diff --git a/doc/posix-functions/wmemcmp.texi b/doc/posix-functions/wmemcmp.texi
index 8d3262c5cb..cebacfb664 100644
--- a/doc/posix-functions/wmemcmp.texi
+++ b/doc/posix-functions/wmemcmp.texi
@@ -11,6 +11,10 @@
 @item
 This function is missing on some platforms:
 HP-UX 11.00, IRIX 6.5, MSVC 14.
+@item
+This function compares the wide characters as if they were unsigned, although
+@code{wchar_t} is signed, on some platforms:
+glibc 2.14.1 on x86 or x86_64, musl libc 1.2.3, NetBSD 9.0, OpenBSD 7.2, Solaris 11.4.
 @end itemize
 
 Portability problems not fixed by Gnulib:
diff --git a/lib/wchar.in.h b/lib/wchar.in.h
index 80b6652e95..6a5b18d39d 100644
--- a/lib/wchar.in.h
+++ b/lib/wchar.in.h
@@ -641,13 +641,25 @@ _GL_WARN_ON_USE (wmemchr, "wmemchr is unportable - "
 
 /* Compare N wide characters of S1 and S2.  */
 #if @GNULIB_WMEMCMP@
-# if !@HAVE_WMEMCMP@
+# if @REPLACE_WMEMCMP@
+#  if !(defined __cplusplus && defined GNULIB_NAMESPACE)
+#   undef wmemcmp
+#   define wmemcmp rpl_wmemcmp
+#  endif
+_GL_FUNCDECL_RPL (wmemcmp, int,
+                  (const wchar_t *s1, const wchar_t *s2, size_t n)
+                  _GL_ATTRIBUTE_PURE);
+_GL_CXXALIAS_RPL (wmemcmp, int,
+                  (const wchar_t *s1, const wchar_t *s2, size_t n));
+# else
+#  if !@HAVE_WMEMCMP@
 _GL_FUNCDECL_SYS (wmemcmp, int,
                   (const wchar_t *s1, const wchar_t *s2, size_t n)
                   _GL_ATTRIBUTE_PURE);
-# endif
+#  endif
 _GL_CXXALIAS_SYS (wmemcmp, int,
                   (const wchar_t *s1, const wchar_t *s2, size_t n));
+# endif
 # if __GLIBC__ >= 2
 _GL_CXXALIASWARN (wmemcmp);
 # endif
diff --git a/lib/wmemcmp-impl.h b/lib/wmemcmp-impl.h
index 2b8125fe26..6148220de7 100644
--- a/lib/wmemcmp-impl.h
+++ b/lib/wmemcmp-impl.h
@@ -27,8 +27,9 @@ wmemcmp (const wchar_t *s1, const wchar_t *s2, size_t n)
           n--;
           continue;
         }
-      /* Note that wc1 and wc2 each have at most 31 bits.  */
-      return (int)wc1 - (int)wc2;
+      /* ISO C requires wmemcmp to work with all wchar_t values.
+         We cannot assume that wc1 and wc2 are in the range 0..INT_MAX.  */
+      return _GL_CMP (wc1, wc2);
              /* > 0 if wc1 > wc2, < 0 if wc1 < wc2.  */
     }
   return 0;
diff --git a/m4/wchar_h.m4 b/m4/wchar_h.m4
index 8cc38ef804..dfd154f8e9 100644
--- a/m4/wchar_h.m4
+++ b/m4/wchar_h.m4
@@ -7,7 +7,7 @@
 
 dnl Written by Eric Blake.
 
-# wchar_h.m4 serial 57
+# wchar_h.m4 serial 58
 
 AC_DEFUN_ONCE([gl_WCHAR_H],
 [
@@ -255,5 +255,6 @@ AC_DEFUN([gl_WCHAR_H_DEFAULTS]
   REPLACE_WCSFTIME=0;   AC_SUBST([REPLACE_WCSFTIME])
   REPLACE_WCSSTR=0;     AC_SUBST([REPLACE_WCSSTR])
   REPLACE_WCSTOK=0;     AC_SUBST([REPLACE_WCSTOK])
+  REPLACE_WMEMCMP=0;    AC_SUBST([REPLACE_WMEMCMP])
   REPLACE_WMEMPCPY=0;   AC_SUBST([REPLACE_WMEMPCPY])
 ])
diff --git a/m4/wmemcmp.m4 b/m4/wmemcmp.m4
index 534dde10cc..c804dc8a4a 100644
--- a/m4/wmemcmp.m4
+++ b/m4/wmemcmp.m4
@@ -1,4 +1,4 @@
-# wmemcmp.m4 serial 5
+# wmemcmp.m4 serial 6
 dnl Copyright (C) 2011-2023 Free Software Foundation, Inc.
 dnl This file is free software; the Free Software Foundation
 dnl gives unlimited permission to copy and/or distribute it,
@@ -7,6 +7,7 @@
 AC_DEFUN([gl_FUNC_WMEMCMP],
 [
   AC_REQUIRE([gl_WCHAR_H_DEFAULTS])
+  AC_REQUIRE([AC_CANONICAL_HOST])
   dnl We cannot use AC_CHECK_FUNCS here, because the MSVC 9 header files
   dnl provide this function as an inline function definition.
   AC_CACHE_CHECK([for wmemcmp], [gl_cv_func_wmemcmp],
@@ -21,5 +22,51 @@ AC_DEFUN([gl_FUNC_WMEMCMP]
     ])
   if test $gl_cv_func_wmemcmp = no; then
     HAVE_WMEMCMP=0
+  else
+    AC_CACHE_CHECK([whether wmemcmp works for all wide characters],
+      [gl_cv_func_wmemcmp_works],
+      [AC_RUN_IFELSE(
+         [AC_LANG_SOURCE([[
+            #include <wchar.h>
+            int main ()
+            {
+              wchar_t a = (wchar_t) 0x76547654;
+              wchar_t b = (wchar_t) 0x9abc9abc;
+              int cmp = wmemcmp (&a, &b, 1);
+              if ((wchar_t)-1 < 0)
+                return !(cmp > 0);
+              else
+                return !(cmp < 0);
+            }
+            ]])
+         ],
+         [gl_cv_func_wmemcmp_works=yes],
+         [gl_cv_func_wmemcmp_works=no],
+         [case "$host_on" in
+            # Guess no on glibc versions < 2.15.
+            *-gnu* | gnu*)
+              AC_EGREP_CPP([Unlucky],
+                [
+#include <features.h>
+#ifdef __GNU_LIBRARY__
+ #if (__GLIBC__ == 2 && __GLIBC_MINOR__ < 15)
+  Unlucky GNU user
+ #endif
+#endif
+                ],
+                [gl_cv_func_wmemcmp_works="guessing no"],
+                [gl_cv_func_wmemcmp_works="guessing yes"])
+              ;;
+            # Guess no on musl systems.
+            *-musl* | midipix*) gl_cv_func_wmemcmp_works="guessing no" ;;
+            # If we don't know, obey --enable-cross-guesses.
+            *) gl_cv_func_wmemcmp_works="$gl_cross_guess_normal" ;;
+          esac
+         ])
+      ])
+    case "$gl_cv_func_wmemcmp_works" in
+      *yes) ;;
+      *) REPLACE_WMEMCMP=1 ;;
+    esac
   fi
 ])
diff --git a/modules/wchar b/modules/wchar
index feacaffff1..180c94309c 100644
--- a/modules/wchar
+++ b/modules/wchar
@@ -144,6 +144,7 @@ wchar.h: wchar.in.h $(top_builddir)/config.status $(CXXDEFS_H) $(ARG_NONNULL_H)
 	      -e 's|@''REPLACE_WCSFTIME''@|$(REPLACE_WCSFTIME)|g' \
 	      -e 's|@''REPLACE_WCSSTR''@|$(REPLACE_WCSSTR)|g' \
 	      -e 's|@''REPLACE_WCSTOK''@|$(REPLACE_WCSTOK)|g' \
+	      -e 's|@''REPLACE_WMEMCMP''@|$(REPLACE_WMEMCMP)|g' \
 	      -e 's|@''REPLACE_WMEMPCPY''@|$(REPLACE_WMEMPCPY)|g' \
 	      -e '/definitions of _GL_FUNCDECL_RPL/r $(CXXDEFS_H)' \
 	      -e '/definition of _GL_ARG_NONNULL/r $(ARG_NONNULL_H)' \
diff --git a/modules/wmemcmp b/modules/wmemcmp
index 219f3c3754..c0e19a5184 100644
--- a/modules/wmemcmp
+++ b/modules/wmemcmp
@@ -11,7 +11,8 @@ wchar
 
 configure.ac:
 gl_FUNC_WMEMCMP
-gl_CONDITIONAL([GL_COND_OBJ_WMEMCMP], [test $HAVE_WMEMCMP = 0])
+gl_CONDITIONAL([GL_COND_OBJ_WMEMCMP],
+               [test $HAVE_WMEMCMP = 0 || test $REPLACE_WMEMCMP = 1])
 gl_WCHAR_MODULE_INDICATOR([wmemcmp])
 
 Makefile.am:
-- 
2.34.1

>From 312f211ba66f4b3f58fdb4399c61c58c0dcf6e75 Mon Sep 17 00:00:00 2001
From: Bruno Haible <[email protected]>
Date: Wed, 19 Apr 2023 01:10:32 +0200
Subject: [PATCH 2/2] wmemcmp: Add tests.

* tests/test-wmemcmp.c: New file, based on tests/unistr/test-cmp.h.
* modules/wmemcmp-tests: New file.
---
 ChangeLog             |  4 ++
 modules/wmemcmp-tests | 12 ++++++
 tests/test-wmemcmp.c  | 91 +++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 107 insertions(+)
 create mode 100644 modules/wmemcmp-tests
 create mode 100644 tests/test-wmemcmp.c

diff --git a/ChangeLog b/ChangeLog
index 6e885d865c..17596c4b23 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,9 @@
 2023-04-18  Bruno Haible  <[email protected]>
 
+	wmemcmp: Add tests.
+	* tests/test-wmemcmp.c: New file, based on tests/unistr/test-cmp.h.
+	* modules/wmemcmp-tests: New file.
+
 	wmemcmp: Work around ISO C compliance bug on several platforms.
 	* lib/wchar.in.h (wmemcmp): Consider REPLACE_WMEMCMP.
 	* lib/wmemcmp-impl.h (wmemcmp): Don't assume that the two wide
diff --git a/modules/wmemcmp-tests b/modules/wmemcmp-tests
new file mode 100644
index 0000000000..c42df6ec27
--- /dev/null
+++ b/modules/wmemcmp-tests
@@ -0,0 +1,12 @@
+Files:
+tests/test-wmemcmp.c
+tests/signature.h
+tests/macros.h
+
+Depends-on:
+
+configure.ac:
+
+Makefile.am:
+TESTS += test-wmemcmp
+check_PROGRAMS += test-wmemcmp
diff --git a/tests/test-wmemcmp.c b/tests/test-wmemcmp.c
new file mode 100644
index 0000000000..998dff2653
--- /dev/null
+++ b/tests/test-wmemcmp.c
@@ -0,0 +1,91 @@
+/* Test of wmemcmp() function.
+   Copyright (C) 2008-2023 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation, either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program.  If not, see <https://www.gnu.org/licenses/>.  */
+
+/* Written by Bruno Haible <[email protected]>, 2023.  */
+
+#include <config.h>
+
+#include <wchar.h>
+
+#include "signature.h"
+SIGNATURE_CHECK (wmemcmp, int, (const wchar_t *, const wchar_t *, size_t));
+
+#include "macros.h"
+
+int
+main (int argc, char *argv[])
+{
+  /* Test equal / not equal distinction.  */
+  {
+    static const wchar_t input1[] = { 'f', 'o', 'o', 0 };
+    static const wchar_t input2[] = { 'f', 'o', 'o', 'b', 'a', 'r', 0 };
+    ASSERT (wmemcmp (input1, input2, 2) == 0);
+    ASSERT (wmemcmp (input1, input2, 3) == 0);
+    ASSERT (wmemcmp (input1, input2, 4) != 0);
+  }
+  {
+    static const wchar_t input1[] = { 'f', 'o', 'o', 0 };
+    static const wchar_t input2[] = { 'b', 'a', 'r', 0 };
+    ASSERT (wmemcmp (input1, input2, 1) != 0);
+    ASSERT (wmemcmp (input1, input2, 3) != 0);
+  }
+
+  /* Test less / equal / greater distinction.  */
+  {
+    static const wchar_t input1[] = { 'f', 'o', 'o', 0 };
+    static const wchar_t input2[] = { 'm', 'o', 'o', 0 };
+    ASSERT (wmemcmp (input1, input2, 4) < 0);
+    ASSERT (wmemcmp (input2, input1, 4) > 0);
+  }
+  {
+    static const wchar_t input1[] = { 'o', 'o', 'm', 'p', 'h', 0 };
+    static const wchar_t input2[] = { 'o', 'o', 'p', 's', 0 };
+    ASSERT (wmemcmp (input1, input2, 3) < 0);
+    ASSERT (wmemcmp (input2, input1, 3) > 0);
+  }
+  {
+    static const wchar_t input1[] = { 'f', 'o', 'o', 0 };
+    static const wchar_t input2[] = { 'f', 'o', 'o', 'b', 'a', 'r', 0 };
+    ASSERT (wmemcmp (input1, input2, 4) < 0);
+    ASSERT (wmemcmp (input2, input1, 4) > 0);
+  }
+
+  /* ISO C requires wmemcmp to work with all wchar_t values.  */
+  {
+    static const wchar_t input1[] = { (wchar_t) 0x76547654 };
+    static const wchar_t input2[] = { (wchar_t) 0x9abc9abc };
+    if ((wchar_t)-1 < 0)
+      {
+        /* wchar_t is signed.  */
+        ASSERT (wmemcmp (input1, input2, 1) > 0);
+        ASSERT (wmemcmp (input2, input1, 1) < 0);
+      }
+    else
+      {
+        /* wchar_t is unsigned.  */
+        ASSERT (wmemcmp (input1, input2, 1) < 0);
+        ASSERT (wmemcmp (input2, input1, 1) > 0);
+      }
+  }
+  {
+    static const wchar_t input1[] = { (wchar_t) 0x9abc9abc };
+    static const wchar_t input2[] = { (wchar_t) 0x9bdf9bdf };
+    ASSERT (wmemcmp (input1, input2, 1) < 0);
+    ASSERT (wmemcmp (input2, input1, 1) > 0);
+  }
+
+  return 0;
+}
-- 
2.34.1

Reply via email to