* lib/localeinfo.c (CHAR32_T_IS_UNICODE): New macro.
(case_folded_counterparts): In single-byte locales where
char32_t is not known to be Unicode, check all 255 possibilities
instead of hoping that char32_t is Unicode.
* lib/localeinfo.h (CASE_FOLDED_BUFSIZE): Increase to 255.
* modules/localeinfo (Depends-on): Add btoc32.
---
 ChangeLog          |  8 ++++++++
 lib/localeinfo.c   | 50 +++++++++++++++++++++++++++++++++++++---------
 lib/localeinfo.h   |  6 +++---
 modules/localeinfo |  1 +
 4 files changed, 53 insertions(+), 12 deletions(-)

diff --git a/ChangeLog b/ChangeLog
index 3f1ddaa6f0..8853ded780 100644
--- a/ChangeLog
+++ b/ChangeLog
@@ -1,5 +1,13 @@
 2026-05-05  Paul Eggert  <[email protected]>
 
+       localeinfo: port to single-byte non-__STDC_ISO_10646__
+       * lib/localeinfo.c (CHAR32_T_IS_UNICODE): New macro.
+       (case_folded_counterparts): In single-byte locales where
+       char32_t is not known to be Unicode, check all 255 possibilities
+       instead of hoping that char32_t is Unicode.
+       * lib/localeinfo.h (CASE_FOLDED_BUFSIZE): Increase to 255.
+       * modules/localeinfo (Depends-on): Add btoc32.
+
        localeinfo: don’t check U+03F2 in newer glibc
        * lib/localeinfo.c (lonesome_lower): Omit U+03F2 in recent Unicode.
 
diff --git a/lib/localeinfo.c b/lib/localeinfo.c
index 5a51d6105c..9e9bacad9f 100644
--- a/lib/localeinfo.c
+++ b/lib/localeinfo.c
@@ -127,6 +127,18 @@ static unsigned short int const lonesome_lower[] =
    c32tolower, and 1 for each entry in LONESOME_LOWER.  */
 verify (1 + 1 + countof (lonesome_lower) <= CASE_FOLDED_BUFSIZE);
 
+/* Whether char32_t values are Unicode code points.
+   It is OK if only UTF-16 is supported,
+   since this file converts only single-byte encodings to char32_t
+   and in practice these encodings convert to characters in the BMP.  */
+#ifdef GL_CHAR32_T_IS_UNICODE
+# define CHAR32_T_IS_UNICODE GL_CHAR32_T_IS_UNICODE /* uchar-h-c23 */
+#elif defined __STDC_ISO_10646__
+# define CHAR32_T_IS_UNICODE 1 /* glibc, musl libc, Cygwin */
+#else
+# define CHAR32_T_IS_UNICODE 0
+#endif
+
 /* Find the characters equal to C after case-folding, other than C
    itself, and store them into FOLDED.  Return the number of characters
    stored; this is zero if C is WEOF.  */
@@ -136,16 +148,36 @@ case_folded_counterparts (wint_t c, char32_t 
folded[CASE_FOLDED_BUFSIZE])
 {
   int n = 0;
   wint_t uc = c32toupper (c);
-  wint_t lc = c32tolower (uc);
-  if (uc != c)
-    folded[n++] = uc;
-  if (lc != uc && lc != c && c32toupper (lc) == uc)
-    folded[n++] = lc;
-  for (int i = 0; i < countof (lonesome_lower); i++)
+
+  if (CHAR32_T_IS_UNICODE || 1 < MB_CUR_MAX)
     {
-      wint_t li = lonesome_lower[i];
-      if (li != lc && li != uc && li != c && c32toupper (li) == uc)
-        folded[n++] = li;
+      /* char32_t is Unicode, or this is a multibyte locale where
+         it is impractical to look for all case-folded counterparts
+         and where guessing Unicode will not produce false positives
+         though it may miss some case-folded counterparts.  */
+      wint_t lc = c32tolower (uc);
+      if (uc != c)
+        folded[n++] = uc;
+      if (lc != uc && lc != c && c32toupper (lc) == uc)
+        folded[n++] = lc;
+      for (int i = 0; i < countof (lonesome_lower); i++)
+        {
+          wint_t li = lonesome_lower[i];
+          if (li != lc && li != uc && li != c && c32toupper (li) == uc)
+            folded[n++] = li;
+        }
     }
+  else if (c != WEOF)
+    {
+      /* A single-byte locale where it is not known that char32_t is Unicode,
+         and C is not WEOF.  Check all 255 possibilities for counterparts.  */
+        for (int i = 1; i <= UCHAR_MAX; i++)
+          {
+            wint_t li = btoc32 (i);
+            if (li != c && c32toupper (li) == uc)
+              folded[n++] = li;
+          }
+    }
+
   return n;
 }
diff --git a/lib/localeinfo.h b/lib/localeinfo.h
index f8b3c970d9..6f8e0addfd 100644
--- a/lib/localeinfo.h
+++ b/lib/localeinfo.h
@@ -54,9 +54,9 @@ struct localeinfo
 extern void init_localeinfo (struct localeinfo *);
 
 /* Maximum number of characters that can be the case-folded
-   counterparts of a single character, not counting the character
-   itself.  This is a generous upper bound.  */
-enum { CASE_FOLDED_BUFSIZE = 32 };
+   counterparts of a single character, not counting the character itself.
+   Subtract from 256 one for U+0000.  This is a generous upper bound.  */
+enum { CASE_FOLDED_BUFSIZE = (unsigned char) -1 };
 
 extern int case_folded_counterparts (wint_t, char32_t[CASE_FOLDED_BUFSIZE]);
 
diff --git a/modules/localeinfo b/modules/localeinfo
index 2fc871ee10..536a2a715b 100644
--- a/modules/localeinfo
+++ b/modules/localeinfo
@@ -7,6 +7,7 @@ lib/localeinfo.h
 
 Depends-on:
 bool
+btoc32
 c32tolower
 c32toupper
 c99
-- 
2.54.0


Reply via email to