https://sourceware.org/git/gitweb.cgi?p=newlib-cygwin.git;h=c49bc478b4a7e7d00a0c8540b8d6b6e99453443e

commit c49bc478b4a7e7d00a0c8540b8d6b6e99453443e
Author:     Corinna Vinschen <[email protected]>
AuthorDate: Wed Aug 2 16:55:52 2023 +0200
Commit:     Corinna Vinschen <[email protected]>
CommitDate: Wed Aug 2 16:56:24 2023 +0200

    Cygwin: Add ISO C2X functions c8rtomb, mbrtoc8
    
    Signed-off-by: Corinna Vinschen <[email protected]>

Diff:
---
 winsup/cygwin/include/cygwin/version.h |   3 +-
 winsup/cygwin/include/uchar.h          |  14 ++-
 winsup/cygwin/release/3.5.0            |   2 +-
 winsup/cygwin/strfuncs.cc              | 166 +++++++++++++++++++++++++++++++++
 winsup/doc/new-features.xml            |   6 +-
 5 files changed, 184 insertions(+), 7 deletions(-)

diff --git a/winsup/cygwin/include/cygwin/version.h 
b/winsup/cygwin/include/cygwin/version.h
index 7bc3e5ec3b25..833de646c563 100644
--- a/winsup/cygwin/include/cygwin/version.h
+++ b/winsup/cygwin/include/cygwin/version.h
@@ -482,12 +482,13 @@ details. */
   346: (Belatedly) add posix_spawn_file_actions_addchdir_np,
        posix_spawn_file_actions_addfchdir_np.
   347: Add c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
+  348: Add c8rtomb, mbrtoc.
 
   Note that we forgot to bump the api for ualarm, strtoll, strtoull,
   sigaltstack, sethostname. */
 
 #define CYGWIN_VERSION_API_MAJOR 0
-#define CYGWIN_VERSION_API_MINOR 346
+#define CYGWIN_VERSION_API_MINOR 348
 
 /* There is also a compatibity version number associated with the shared memory
    regions.  It is incremented when incompatible changes are made to the shared
diff --git a/winsup/cygwin/include/uchar.h b/winsup/cygwin/include/uchar.h
index bf865ff16e7f..ed548ac7360e 100644
--- a/winsup/cygwin/include/uchar.h
+++ b/winsup/cygwin/include/uchar.h
@@ -4,8 +4,11 @@
 #include <sys/cdefs.h>
 #include <wchar.h>
 
-typedef        __uint16_t      char16_t;
-typedef        __uint32_t      char32_t;
+/* Either C2x or if C++ doesn't already define char8_t */
+#if __ISO_C_VISIBLE >= 2020 && !defined (__cpp_char8_t)
+typedef unsigned char          char8_t;
+#endif
+
 /* C++11 already defines those types. */
 #if !defined (__cplusplus) || (__cplusplus - 0 < 201103L)
 typedef        __uint_least16_t        char16_t;
@@ -14,6 +17,13 @@ typedef      __uint_least32_t        char32_t;
 
 __BEGIN_DECLS
 
+/* Either C2x or if C++ defines char8_t */
+#if __ISO_C_VISIBLE >= 2020 || defined (__cpp_char8_t)
+size_t  c8rtomb(char * __restrict, char8_t, mbstate_t * __restrict);
+size_t mbrtoc8(char8_t * __restrict, const char * __restrict, size_t,
+               mbstate_t * __restrict);
+#endif
+
 size_t c16rtomb(char * __restrict, char16_t, mbstate_t * __restrict);
 size_t mbrtoc16(char16_t * __restrict, const char * __restrict, size_t,
                 mbstate_t * __restrict);
diff --git a/winsup/cygwin/release/3.5.0 b/winsup/cygwin/release/3.5.0
index 7c27e1bb8ca7..d71de50de536 100644
--- a/winsup/cygwin/release/3.5.0
+++ b/winsup/cygwin/release/3.5.0
@@ -27,7 +27,7 @@ What's new:
 - New API calls: posix_spawn_file_actions_addchdir_np,
   posix_spawn_file_actions_addfchdir_np.
 
-- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
+- New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32.
 
 What changed:
 -------------
diff --git a/winsup/cygwin/strfuncs.cc b/winsup/cygwin/strfuncs.cc
index 770d40ed8f2f..cbcd0ec464ee 100644
--- a/winsup/cygwin/strfuncs.cc
+++ b/winsup/cygwin/strfuncs.cc
@@ -155,6 +155,103 @@ c16rtomb (char *s, char16_t wc, mbstate_t *ps)
   return wcrtomb (s, (wchar_t) wc, ps);
 }
 
+extern "C" size_t
+c8rtomb (char *s, char8_t c8, mbstate_t *ps)
+{
+  struct _reent *reent = _REENT;
+  char32_t wc;
+
+  if (ps == NULL)
+    {
+      _REENT_CHECK_MISC(reent);
+      ps = &(_REENT_MBRTOWC_STATE(reent));
+    }
+
+  if (s == NULL)
+    {
+      ps->__count = 0;
+      return 1;
+    }
+  if ((ps->__count & 0xff00) != 0xc800)
+    {
+      switch (c8)
+       {
+       case 0 ... 0x7f:        /* single octet */
+         ps->__count = 0;
+         wc = c8;
+         break;
+       case 0xc2 ... 0xf4:     /* valid lead byte */
+         ps->__count = 0xc801;
+         ps->__value.__wchb[0] = c8;
+         return 0;
+       default:
+         goto ilseq;
+       }
+    }
+  else
+    {
+      /* We already collected something... */
+      int idx = ps->__count & 0x3;
+      char8_t &c1 = ps->__value.__wchb[0];
+      char8_t &c2 = ps->__value.__wchb[1];
+      char8_t &c3 = ps->__value.__wchb[2];
+
+      switch (idx)
+       {
+         case 1:
+           /* Annoyingly complex check for validity for 2nd octet. */
+           if (c8 <= 0x7f || c8 >= 0xc0)
+             goto ilseq;
+           if (c1 == 0xe0 && c8 <= 0x9f)
+             goto ilseq;
+           if (c1 == 0xed && c8 >= 0xa0)
+             goto ilseq;
+           if (c1 == 0xf0 && c8 <= 0x8f)
+             goto ilseq;
+           if (c1 == 0xf4 && c8 >= 0x90)
+             goto ilseq;
+           if (c1 >= 0xe0)
+             {
+               ps->__count = 0xc802;
+               c2 = c8;
+               return 0;
+             }
+           wc =   ((c1 & 0x1f) << 6)
+                |  (c8 & 0x3f);
+           break;
+         case 2:
+           if (c8 <= 0x7f || c8 >= 0xc0)
+             goto ilseq;
+           if (c1 >= 0xf0)
+             {
+               ps->__count = 0xc803;
+               c3 = c8;
+               return 0;
+             }
+           wc =   ((c1 & 0x0f) << 12)
+                | ((c2 & 0x3f) <<  6)
+                |  (c8 & 0x3f);
+           break;
+         case 3:
+           if (c8 <= 0x7f || c8 >= 0xc0)
+             goto ilseq;
+           wc =   ((c1 & 0x07) << 18)
+                | ((c2 & 0x3f) << 12)
+                | ((c3 & 0x3f) <<  6)
+                |  (c8 & 0x3f);
+           break;
+         default: /* Shouldn't happen */
+           goto ilseq;
+       }
+    }
+  ps->__count = 0;
+  return c32rtomb (s, wc, ps);
+ilseq:
+  ps->__count = 0;
+  _REENT_ERRNO(reent) = EILSEQ;
+  return (size_t)(-1);
+}
+
 extern "C" size_t
 mbrtoc32 (char32_t *pwc, const char *s, size_t n, mbstate_t *ps)
 {
@@ -245,6 +342,75 @@ ilseq:
   return (size_t)(-1);
 }
 
+extern "C" size_t
+mbrtoc8 (char8_t *pc8, const char *s, size_t n, mbstate_t *ps)
+{
+  struct _reent *reent = _REENT;
+  size_t len;
+  char32_t wc;
+
+  if (ps == NULL)
+    {
+      _REENT_CHECK_MISC(reent);
+      ps = &(_REENT_MBRTOWC_STATE(reent));
+    }
+
+  if (s == NULL)
+    {
+      if (ps)
+       ps->__count = 0;
+      return 1;
+    }
+  else if ((ps->__count & 0xff00) == 0xc800)
+    {
+      /* Return next utf-8 octet in line. */
+      int idx = ps->__count & 0x3;
+
+      if (pc8)
+       *pc8 = ps->__value.__wchb[--idx];
+      if (idx == 0)
+       ps->__count = 0;
+      return -3;
+    }
+  len = mbrtoc32 (&wc, s, n, ps);
+  if (len > 0)
+    {
+      /* octets stored back to front for easier indexing */
+      switch (wc)
+       {
+       case 0 ... 0x7f:
+         ps->__value.__wchb[0] = wc;
+         ps->__count = 0;
+         break;
+       case 0x80 ... 0x7ff:
+         ps->__value.__wchb[1] = 0xc0 | ((wc & 0x7c0) >> 6);
+         ps->__value.__wchb[0] = 0x80 |  (wc &  0x3f);
+         ps->__count = 0xc800 | 1;
+         break;
+       case 0x800 ... 0xffff:
+         ps->__value.__wchb[2] = 0xe0 | ((wc & 0xf000) >> 12);
+         ps->__value.__wchb[1] = 0x80 | ((wc &  0xfc0) >> 6);
+         ps->__value.__wchb[0] = 0x80 |  (wc &   0x3f);
+         ps->__count = 0xc800 | 2;
+         break;
+       case 0x10000 ... 0x10ffff:
+         ps->__value.__wchb[3] = 0xf0 | ((wc & 0x1c0000) >> 18);
+         ps->__value.__wchb[2] = 0x80 | ((wc &  0x3f000) >> 12);
+         ps->__value.__wchb[1] = 0x80 | ((wc &    0xfc0) >> 6);
+         ps->__value.__wchb[0] = 0x80 |  (wc &     0x3f);
+         ps->__count = 0xc800 | 3;
+         break;
+       default:
+         ps->__count = 0;
+         _REENT_ERRNO(reent) = EILSEQ;
+         return (size_t)(-1);
+       }
+      if (pc8)
+       *pc8 = ps->__value.__wchb[ps->__count & 0x3];
+    }
+  return len;
+}
+
 extern "C" size_t
 mbsnrtowci(wint_t *dst, const char **src, size_t nms, size_t len, mbstate_t 
*ps)
 {
diff --git a/winsup/doc/new-features.xml b/winsup/doc/new-features.xml
index b6d3e3a30a36..14644aa85ebe 100644
--- a/winsup/doc/new-features.xml
+++ b/winsup/doc/new-features.xml
@@ -46,12 +46,12 @@ Add support for GB18030 codeset.
 </para></listitem>
 
 <listitem><para>
-- New API calls: posix_spawn_file_actions_addchdir_np,
-  posix_spawn_file_actions_addfchdir_np.
+New API calls: posix_spawn_file_actions_addchdir_np,
+posix_spawn_file_actions_addfchdir_np.
 </para></listitem>
 
 <listitem><para>
-- New API calls: c16rtomb, c32rtomb, mbrtoc16, mbrtoc32.
+New API calls: c8rtomb, c16rtomb, c32rtomb, mbrtoc8, mbrtoc16, mbrtoc32.
 </para></listitem>
 
 </itemizedlist>

Reply via email to