On 2026-05-25 15:15, Bruno Haible wrote:
Paul Eggert wrote:
-  bool unibyte_locale = MB_CUR_MAX == 1;
+  bool unibyte_locale = USE_C_LOCALE || MB_CUR_MAX == 1;

I think this part causes a regression on platforms where the "C"
locale uses UTF-8 encoding (macOS, Haiku):

Thanks for mentioning that. I installed the attached to try to fix this.
From 53d4558960659ba7c4e9e2757bfb0977a5027fae Mon Sep 17 00:00:00 2001
From: Paul Eggert <[email protected]>
Date: Mon, 25 May 2026 17:08:48 -0700
Subject: [PATCH] quotearg: be nicer on macOS etc

Problem reported by Bruno Haible in:
https://lists.gnu.org/r/bug-gnulib/2026-05/msg00150.html
* lib/quotearg.c (C_LOCALE_MIGHT_BE_MULTIBYTE): New macro.
(wch, mbstate, mbs_clear, mbrtowch, chisprint, wchisprint
(quotearg_buffer_restyled): Fall back on <wchar.h> and <wctype.h>
functions if USE_C_LOCALE and C_LOCALE_MIGHT_BE_MULTIBYTE.
---
 ChangeLog      | 10 +++++++++
 lib/quotearg.c | 56 ++++++++++++++++++++++++++++++++++----------------
 2 files changed, 48 insertions(+), 18 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index b486911abe..ea72f7062e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2026-05-25  Paul Eggert  <[email protected]>
+
+	quotearg: be nicer on macOS etc
+	Problem reported by Bruno Haible in:
+	https://lists.gnu.org/r/bug-gnulib/2026-05/msg00150.html
+	* lib/quotearg.c (C_LOCALE_MIGHT_BE_MULTIBYTE): New macro.
+	(wch, mbstate, mbs_clear, mbrtowch, chisprint, wchisprint
+	(quotearg_buffer_restyled): Fall back on <wchar.h> and <wctype.h>
+	functions if USE_C_LOCALE and C_LOCALE_MIGHT_BE_MULTIBYTE.
+
 2026-05-25  Bruno Haible  <[email protected]>
 
 	mbuiterf: Implement multi-byte per encoding error (MEE) consistently.
diff --git a/lib/quotearg.c b/lib/quotearg.c
index 21014d4782..3c2bf946ed 100644
--- a/lib/quotearg.c
+++ b/lib/quotearg.c
@@ -39,16 +39,26 @@
 #include <stdlib.h>
 #include <string.h>
 
-/* If USE_C_LOCALE is set to 1, this file defines a function that uses the
+/* If USE_C_LOCALE is nonzero, this file defines a function that uses the
    "C" locale, regardless of the current locale.  Applications
    defining this macro might avoid the need for Gnulib's c32isprint,
-   gettext-h, mbrtoc32, mbsinit, mbszero, wchar-h, and uchar-h modules,
-   but they also need the c-ctype module.  */
+   gettext-h, mbrtoc32, mbsinit, wchar-h, and uchar-h modules,
+   but they also need the c-ctype module, and they rely on
+   the mbszero module defining MUSL_LIBC as needed.  */
 #ifndef USE_C_LOCALE
 # define USE_C_LOCALE 0
 #endif
 
-#if USE_C_LOCALE
+/* On recent-enough Android, Darwin/iOS/macOS and musl,
+   the "C" locale uses UTF-8, contrary to POSIX.  */
+#if (defined __ANDROID__ || (defined __APPLE__ && defined __MACH__) \
+     || defined MUSL_LIBC)
+# define C_LOCALE_MIGHT_BE_MULTIBYTE true
+#else
+# define C_LOCALE_MIGHT_BE_MULTIBYTE false
+#endif
+
+#if USE_C_LOCALE && !C_LOCALE_MIGHT_BE_MULTIBYTE
 # include <c-ctype.h>
 typedef unsigned char wch;
 typedef struct incomplete_mbstate *mbstate;
@@ -58,15 +68,20 @@ typedef struct incomplete_mbstate *mbstate;
 #else
 # include <ctype.h>
 # include <wchar.h>
-# include <uchar.h>
-typedef char32_t wch;
 typedef mbstate_t mbstate;
+# if USE_C_LOCALE
+# include <wctype.h>
+typedef wchar_t wch;
+# else
+#  include <uchar.h>
+typedef char32_t wch;
+# endif
 #endif
 
 static void
 mbs_clear (MAYBE_UNUSED mbstate *ps)
 {
-#if !USE_C_LOCALE
+#if !USE_C_LOCALE || C_LOCALE_MIGHT_BE_MULTIBYTE
   mbszero (ps);
 #endif
 }
@@ -74,30 +89,34 @@ mbs_clear (MAYBE_UNUSED mbstate *ps)
 static size_t
 mbrtowch (wch *pwc, char const *s, size_t n, MAYBE_UNUSED mbstate *ps)
 {
-#if USE_C_LOCALE
-  return n && (*pwc = *s);
-#else
+#if !USE_C_LOCALE
   return mbrtoc32 (pwc, s, n, ps);
+#elif C_LOCALE_MIGHT_BE_MULTIBYTE
+  return mbrtowc (pwc, s, n, ps);
+#else
+  return n && (*pwc = *s);
 #endif
 }
 
 static bool
-wchisprint (wch w)
+chisprint (unsigned char c)
 {
 #if USE_C_LOCALE
-  return c_isprint (w);
+  return c_isprint (c);
 #else
-  return c32isprint (w);
+  return isprint (c) != 0;
 #endif
 }
 
 static bool
-chisprint (unsigned char c)
+wchisprint (wch w)
 {
-#if USE_C_LOCALE
-  return c_isprint (c);
+#if !USE_C_LOCALE
+  return c32isprint (w);
+#elif C_LOCALE_MIGHT_BE_MULTIBYTE
+  return iswprint (w);
 #else
-  return isprint (c) != 0;
+  return chisprint (w);
 #endif
 }
 
@@ -317,7 +336,8 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
                           char const *left_quote,
                           char const *right_quote)
 {
-  bool unibyte_locale = USE_C_LOCALE || MB_CUR_MAX == 1;
+  bool unibyte_locale = ((USE_C_LOCALE && !C_LOCALE_MIGHT_BE_MULTIBYTE)
+                         || MB_CUR_MAX == 1);
 
   size_t len = 0;
   size_t orig_buffersize = 0;
-- 
2.53.0

Reply via email to