On 2026-05-25 15:15, Bruno Haible wrote:
Paul Eggert wrote:
- bool unibyte_locale = MB_CUR_MAX == 1;
+ bool unibyte_locale = USE_C_LOCALE || MB_CUR_MAX == 1;
I think this part causes a regression on platforms where the "C"
locale uses UTF-8 encoding (macOS, Haiku):
Thanks for mentioning that. I installed the attached to try to fix this.
From 53d4558960659ba7c4e9e2757bfb0977a5027fae Mon Sep 17 00:00:00 2001
From: Paul Eggert <[email protected]>
Date: Mon, 25 May 2026 17:08:48 -0700
Subject: [PATCH] quotearg: be nicer on macOS etc
Problem reported by Bruno Haible in:
https://lists.gnu.org/r/bug-gnulib/2026-05/msg00150.html
* lib/quotearg.c (C_LOCALE_MIGHT_BE_MULTIBYTE): New macro.
(wch, mbstate, mbs_clear, mbrtowch, chisprint, wchisprint
(quotearg_buffer_restyled): Fall back on <wchar.h> and <wctype.h>
functions if USE_C_LOCALE and C_LOCALE_MIGHT_BE_MULTIBYTE.
---
ChangeLog | 10 +++++++++
lib/quotearg.c | 56 ++++++++++++++++++++++++++++++++++----------------
2 files changed, 48 insertions(+), 18 deletions(-)
diff --git a/ChangeLog b/ChangeLog
index b486911abe..ea72f7062e 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,3 +1,13 @@
+2026-05-25 Paul Eggert <[email protected]>
+
+ quotearg: be nicer on macOS etc
+ Problem reported by Bruno Haible in:
+ https://lists.gnu.org/r/bug-gnulib/2026-05/msg00150.html
+ * lib/quotearg.c (C_LOCALE_MIGHT_BE_MULTIBYTE): New macro.
+ (wch, mbstate, mbs_clear, mbrtowch, chisprint, wchisprint
+ (quotearg_buffer_restyled): Fall back on <wchar.h> and <wctype.h>
+ functions if USE_C_LOCALE and C_LOCALE_MIGHT_BE_MULTIBYTE.
+
2026-05-25 Bruno Haible <[email protected]>
mbuiterf: Implement multi-byte per encoding error (MEE) consistently.
diff --git a/lib/quotearg.c b/lib/quotearg.c
index 21014d4782..3c2bf946ed 100644
--- a/lib/quotearg.c
+++ b/lib/quotearg.c
@@ -39,16 +39,26 @@
#include <stdlib.h>
#include <string.h>
-/* If USE_C_LOCALE is set to 1, this file defines a function that uses the
+/* If USE_C_LOCALE is nonzero, this file defines a function that uses the
"C" locale, regardless of the current locale. Applications
defining this macro might avoid the need for Gnulib's c32isprint,
- gettext-h, mbrtoc32, mbsinit, mbszero, wchar-h, and uchar-h modules,
- but they also need the c-ctype module. */
+ gettext-h, mbrtoc32, mbsinit, wchar-h, and uchar-h modules,
+ but they also need the c-ctype module, and they rely on
+ the mbszero module defining MUSL_LIBC as needed. */
#ifndef USE_C_LOCALE
# define USE_C_LOCALE 0
#endif
-#if USE_C_LOCALE
+/* On recent-enough Android, Darwin/iOS/macOS and musl,
+ the "C" locale uses UTF-8, contrary to POSIX. */
+#if (defined __ANDROID__ || (defined __APPLE__ && defined __MACH__) \
+ || defined MUSL_LIBC)
+# define C_LOCALE_MIGHT_BE_MULTIBYTE true
+#else
+# define C_LOCALE_MIGHT_BE_MULTIBYTE false
+#endif
+
+#if USE_C_LOCALE && !C_LOCALE_MIGHT_BE_MULTIBYTE
# include <c-ctype.h>
typedef unsigned char wch;
typedef struct incomplete_mbstate *mbstate;
@@ -58,15 +68,20 @@ typedef struct incomplete_mbstate *mbstate;
#else
# include <ctype.h>
# include <wchar.h>
-# include <uchar.h>
-typedef char32_t wch;
typedef mbstate_t mbstate;
+# if USE_C_LOCALE
+# include <wctype.h>
+typedef wchar_t wch;
+# else
+# include <uchar.h>
+typedef char32_t wch;
+# endif
#endif
static void
mbs_clear (MAYBE_UNUSED mbstate *ps)
{
-#if !USE_C_LOCALE
+#if !USE_C_LOCALE || C_LOCALE_MIGHT_BE_MULTIBYTE
mbszero (ps);
#endif
}
@@ -74,30 +89,34 @@ mbs_clear (MAYBE_UNUSED mbstate *ps)
static size_t
mbrtowch (wch *pwc, char const *s, size_t n, MAYBE_UNUSED mbstate *ps)
{
-#if USE_C_LOCALE
- return n && (*pwc = *s);
-#else
+#if !USE_C_LOCALE
return mbrtoc32 (pwc, s, n, ps);
+#elif C_LOCALE_MIGHT_BE_MULTIBYTE
+ return mbrtowc (pwc, s, n, ps);
+#else
+ return n && (*pwc = *s);
#endif
}
static bool
-wchisprint (wch w)
+chisprint (unsigned char c)
{
#if USE_C_LOCALE
- return c_isprint (w);
+ return c_isprint (c);
#else
- return c32isprint (w);
+ return isprint (c) != 0;
#endif
}
static bool
-chisprint (unsigned char c)
+wchisprint (wch w)
{
-#if USE_C_LOCALE
- return c_isprint (c);
+#if !USE_C_LOCALE
+ return c32isprint (w);
+#elif C_LOCALE_MIGHT_BE_MULTIBYTE
+ return iswprint (w);
#else
- return isprint (c) != 0;
+ return chisprint (w);
#endif
}
@@ -317,7 +336,8 @@ quotearg_buffer_restyled (char *buffer, size_t buffersize,
char const *left_quote,
char const *right_quote)
{
- bool unibyte_locale = USE_C_LOCALE || MB_CUR_MAX == 1;
+ bool unibyte_locale = ((USE_C_LOCALE && !C_LOCALE_MIGHT_BE_MULTIBYTE)
+ || MB_CUR_MAX == 1);
size_t len = 0;
size_t orig_buffersize = 0;
--
2.53.0