This is an automated email from the ASF dual-hosted git repository.

xiaoxiang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nuttx.git

commit 673a2e0136c9d7f3d6137a125769b1dbea9796f0
Author: Jiuzhu Dong <dongjiuz...@xiaomi.com>
AuthorDate: Sun May 29 16:27:52 2022 +0800

    libc/wchar: support wchar
    
    Implemented according to https://en.wikipedia.org/wiki/UTF-8
    
    Signed-off-by: Jiuzhu Dong <dongjiuz...@xiaomi.com>
---
 include/limits.h                 |   2 +-
 include/stdlib.h                 |   2 +-
 libs/libc/stdlib/lib_mbtowc.c    |   9 +--
 libs/libc/stdlib/lib_wctomb.c    |  18 +----
 libs/libc/wchar/Make.defs        |   2 +-
 libs/libc/wchar/lib_mbrtowc.c    | 140 +++++++++++++++++++++++++++++++++++----
 libs/libc/wchar/lib_mbsinit.c    |  42 ++++++++++++
 libs/libc/wchar/lib_mbsnrtowcs.c |  56 +++++++++++++---
 libs/libc/wchar/lib_wcrtomb.c    |  37 ++++++++---
 libs/libc/wchar/lib_wcsnrtombs.c |  74 +++++++++++----------
 10 files changed, 288 insertions(+), 94 deletions(-)

diff --git a/include/limits.h b/include/limits.h
index df8355b787..5b7a8adf24 100644
--- a/include/limits.h
+++ b/include/limits.h
@@ -60,7 +60,7 @@
  * the correct value.
  */
 
-#define MB_LEN_MAX            1
+#define MB_LEN_MAX            4
 
 /* Configurable limits required by POSIX 
****************************************
  *
diff --git a/include/stdlib.h b/include/stdlib.h
index 255341f997..72724061b1 100644
--- a/include/stdlib.h
+++ b/include/stdlib.h
@@ -56,7 +56,7 @@
  * character specified by the current locale.
  */
 
-#define MB_CUR_MAX 1
+#define MB_CUR_MAX 4
 
 /* The environ variable, normally 'char **environ;' is not implemented as a
  * function call.  However, get_environ_ptr() can be used in its place.
diff --git a/libs/libc/stdlib/lib_mbtowc.c b/libs/libc/stdlib/lib_mbtowc.c
index ff3ca71e01..3ea8de49c9 100644
--- a/libs/libc/stdlib/lib_mbtowc.c
+++ b/libs/libc/stdlib/lib_mbtowc.c
@@ -40,7 +40,7 @@
  ****************************************************************************/
 
 /****************************************************************************
- * Name: mbtowc.c
+ * Name: mbtowc
  *
  * Description:
  *   Minimal multibyte to wide char converter
@@ -59,10 +59,5 @@ int mbtowc(FAR wchar_t *pwc, FAR const char *s, size_t n)
       return -1;
     }
 
-  if (pwc)
-    {
-      *pwc = (wchar_t)*s;
-    }
-
-  return (*s != '\0');
+  return mbrtowc(pwc, s, n, NULL);
 }
diff --git a/libs/libc/stdlib/lib_wctomb.c b/libs/libc/stdlib/lib_wctomb.c
index 663c372e46..855ed1c904 100644
--- a/libs/libc/stdlib/lib_wctomb.c
+++ b/libs/libc/stdlib/lib_wctomb.c
@@ -34,10 +34,8 @@
  * Included Files
  ****************************************************************************/
 
-#include <string.h>
 #include <stdlib.h>
 #include <wchar.h>
-#include <errno.h>
 
 /****************************************************************************
  * Public Functions
@@ -53,19 +51,5 @@
 
 int wctomb(FAR char *s, wchar_t wc)
 {
-  if (s == NULL)
-    {
-      return 0;
-    }
-
-  /* Verify that wchar is a valid single-byte character.  */
-
-  if ((size_t) wc >= 0x100)
-    {
-      set_errno(EILSEQ);
-      return -1;
-    }
-
-  *s = (char)wc;
-  return 1;
+  return wcrtomb(s, wc, NULL);
 }
diff --git a/libs/libc/wchar/Make.defs b/libs/libc/wchar/Make.defs
index 6673033513..ece1358b87 100644
--- a/libs/libc/wchar/Make.defs
+++ b/libs/libc/wchar/Make.defs
@@ -25,7 +25,7 @@ CSRCS += lib_wmemmove.c lib_wmemset.c lib_btowc.c 
lib_mbrtowc.c lib_wctob.c
 CSRCS += lib_wcslcpy.c lib_wcsxfrm.c lib_wcrtomb.c lib_wcsftime.c
 CSRCS += lib_wcscoll.c lib_wcstol.c lib_wcstoll.c lib_wcstoul.c
 CSRCS += lib_wcstoull.c lib_wcstold.c lib_wcstof.c lib_wcstod.c
-CSRCS += lib_swprintf.c lib_mbsnrtowcs.c lib_wcsnrtombs.c
+CSRCS += lib_swprintf.c lib_mbsnrtowcs.c lib_wcsnrtombs.c lib_mbsinit.c
 CSRCS += lib_mbrlen.c lib_mbsrtowcs.c lib_wcsrtombs.c
 
 # Add the wchar directory to the build
diff --git a/libs/libc/wchar/lib_mbrtowc.c b/libs/libc/wchar/lib_mbrtowc.c
index c8eb92303a..2f2f79ade8 100644
--- a/libs/libc/wchar/lib_mbrtowc.c
+++ b/libs/libc/wchar/lib_mbrtowc.c
@@ -32,12 +32,62 @@
  * Included Files
  ****************************************************************************/
 
-#include <stdlib.h>
-#include <stdio.h>
 #include <errno.h>
-#include <string.h>
 #include <wchar.h>
 
+/****************************************************************************
+ * Pre-processor Definitions
+ ****************************************************************************/
+
+/* Implemented according to https://en.wikipedia.org/wiki/UTF-8 */
+
+#define SA        0xc2u
+#define SB        0xf4u
+
+/* Upper 6 state bits are a negative integer offset to bound-check next byte
+ * equivalent to: (((b) - 0x80) | ((b) + offset)) & ~0x3f
+ */
+
+#define OOB(c, b) (((((b) >> 3) - 0x10) | \
+                   (((b) >> 3) + ((int32_t)(c) >> 26))) & ~7)
+
+/* Interval [a,b). Either a must be 80 or b must be c0, lower 3 bits clear. */
+
+#define R(a, b)   ((uint32_t)((uint32_t)((a) == 0x80 ? 0x40u - (b) : \
+                                                       0u - (a)) << 23))
+
+#define C(x)      ((x) < 2 ? -1 : (R(0x80, 0xc0) | (x)))
+#define D(x)      C((x) + 16)
+#define E(x)      (((x) == 0 ? R(0xa0, 0xc0) : \
+                    (x) == 0xd ? R(0x80, 0xa0) : R(0x80, 0xc0)) \
+                   | (R(0x80, 0xc0) >> 6) \
+                   | (x))
+#define F(x)      (((x) >= 5 ? 0 : \
+                    (x) == 0 ? R(0x90, 0xc0) : \
+                    (x) == 4 ? R(0x80, 0x90) : R(0x80, 0xc0)) \
+                   | (R(0x80, 0xc0) >> 6) \
+                   | (R(0x80, 0xc0) >> 12) \
+                   | (x))
+
+/****************************************************************************
+ * Private Data
+ ****************************************************************************/
+
+/* This definition of g_bittab refer to link:
+ * https://en.wikipedia.org/wiki/UTF-8 [Codepage layout].
+ */
+
+static const uint32_t g_bittab[] =
+{
+                  C(0x2), C(0x3), C(0x4), C(0x5), C(0x6), C(0x7),
+  C(0x8), C(0x9), C(0xa), C(0xb), C(0xc), C(0xd), C(0xe), C(0xf),
+  D(0x0), D(0x1), D(0x2), D(0x3), D(0x4), D(0x5), D(0x6), D(0x7),
+  D(0x8), D(0x9), D(0xa), D(0xb), D(0xc), D(0xd), D(0xe), D(0xf),
+  E(0x0), E(0x1), E(0x2), E(0x3), E(0x4), E(0x5), E(0x6), E(0x7),
+  E(0x8), E(0x9), E(0xa), E(0xb), E(0xc), E(0xd), E(0xe), E(0xf),
+  F(0x0), F(0x1), F(0x2), F(0x3), F(0x4)
+};
+
 /****************************************************************************
  * Public Functions
  ****************************************************************************/
@@ -53,20 +103,86 @@
 size_t mbrtowc(FAR wchar_t *pwc, FAR const char *s,
                size_t n, FAR mbstate_t *ps)
 {
-  FAR const char *e = s;
-  size_t retval = 0;
+  FAR const unsigned char *src = (FAR const void *)s;
+  static mbstate_t state;
+  size_t num = n;
+  wchar_t dummy;
+  uint32_t c;
+
+  if (ps == NULL)
+    {
+      ps = &state;
+    }
 
-  if (s == NULL)
+  c = *(FAR uint32_t *)ps;
+  if (src == NULL)
+    {
+      if (c != 0)
+        {
+          goto ilseq;
+        }
+
+      return 0;
+    }
+  else if (pwc == NULL)
     {
-      s = e = "";
-      n = 1;
+      pwc = &dummy;
     }
 
-  retval = mbsnrtowcs(pwc, &e, 1, n, ps);
-  if (retval == 1)
+  if (n == 0)
     {
-      retval = e - s;
+      return -2;
     }
 
-  return retval;
+  if (c == 0)
+    {
+      if (*src < 0x80)
+        {
+          return !!(*pwc = *src);
+        }
+
+      if (*src - SA > SB - SA)
+        {
+          goto ilseq;
+        }
+
+      c = g_bittab[*src++ - SA];
+      n--;
+    }
+
+  if (n != 0)
+    {
+      if (OOB(c, *src) != 0)
+        {
+          goto ilseq;
+        }
+
+loop:
+      c = (c << 6) | (*src++ - 0x80);
+      n--;
+      if ((c >> 31) == 0)
+        {
+          *(FAR uint32_t *)ps = 0;
+          *pwc = c;
+          return num - n;
+        }
+
+      if (n != 0)
+        {
+          if (*src - 0x80u >= 0x40)
+            {
+              goto ilseq;
+            }
+
+          goto loop;
+        }
+    }
+
+  *(FAR uint32_t *)ps = c;
+  return -2;
+
+ilseq:
+  *(FAR uint32_t *)ps = 0;
+  set_errno(EILSEQ);
+  return -1;
 }
diff --git a/libs/libc/wchar/lib_mbsinit.c b/libs/libc/wchar/lib_mbsinit.c
new file mode 100644
index 0000000000..39288edb9c
--- /dev/null
+++ b/libs/libc/wchar/lib_mbsinit.c
@@ -0,0 +1,42 @@
+/****************************************************************************
+ * libs/libc/wchar/lib_mbsinit.c
+ *
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.  The
+ * ASF licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the
+ * License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
+ * License for the specific language governing permissions and limitations
+ * under the License.
+ *
+ ****************************************************************************/
+
+/****************************************************************************
+ * Included Files
+ ****************************************************************************/
+
+#include <wchar.h>
+
+/****************************************************************************
+ * Public Functions
+ ****************************************************************************/
+
+/****************************************************************************
+ * Name: mbsinit
+ *
+ * Description:
+ *   test for initial shift state
+ *
+ ****************************************************************************/
+
+int mbsinit(FAR const mbstate_t *st)
+{
+  return st == NULL || !*(FAR uint32_t *)st;
+}
diff --git a/libs/libc/wchar/lib_mbsnrtowcs.c b/libs/libc/wchar/lib_mbsnrtowcs.c
index 3899cade3f..c81356f261 100644
--- a/libs/libc/wchar/lib_mbsnrtowcs.c
+++ b/libs/libc/wchar/lib_mbsnrtowcs.c
@@ -22,8 +22,6 @@
  * Included Files
  ****************************************************************************/
 
-#include <sys/types.h>
-#include <string.h>
 #include <wchar.h>
 
 /****************************************************************************
@@ -68,23 +66,59 @@
 size_t mbsnrtowcs(FAR wchar_t *dst, FAR const char **src, size_t nms,
                   size_t len, FAR mbstate_t *ps)
 {
-  size_t i;
+  FAR const char *s = *src;
+  FAR wchar_t *ws = dst;
+  size_t cnt = 0;
+  size_t l;
 
   if (dst == NULL)
     {
-      return strnlen(*src, nms);
+      len = SIZE_MAX;
     }
 
-  for (i = 0; i < nms && i < len; i++)
+  if (s != NULL)
     {
-      dst[i] = (wchar_t)(*src)[i];
-      if (dst[i] == L'\0')
+      while (len > 0 && nms > 0)
         {
-          *src = NULL;
-          return i;
+          l = mbrtowc(ws, s, nms, ps);
+          if ((ssize_t)l <= 0)
+            {
+              if ((ssize_t)l == -2)
+                {
+                  /* if the input buffer ends with an incomplete character
+                   * stops at the end of the input buffer.
+                   */
+
+                  s += nms;
+                }
+              else if (l == 0)
+                {
+                  s = NULL;
+                }
+              else
+                {
+                  cnt = l;
+                }
+
+              break;
+            }
+
+          s += l;
+          nms -= l;
+          if (ws != NULL)
+            {
+              ws++;
+            }
+
+          len--;
+          cnt++;
         }
     }
 
-  *src += i;
-  return i;
+  if (dst != NULL)
+    {
+      *src = s;
+    }
+
+  return cnt;
 }
diff --git a/libs/libc/wchar/lib_wcrtomb.c b/libs/libc/wchar/lib_wcrtomb.c
index 061b80150b..e2a0a575e6 100644
--- a/libs/libc/wchar/lib_wcrtomb.c
+++ b/libs/libc/wchar/lib_wcrtomb.c
@@ -34,10 +34,7 @@
  * Included Files
  ****************************************************************************/
 
-#include <string.h>
 #include <wchar.h>
-#include <stdlib.h>
-#include <stdio.h>
 #include <errno.h>
 
 /****************************************************************************
@@ -54,17 +51,37 @@
 
 size_t wcrtomb(FAR char *s, wchar_t wc, FAR mbstate_t *ps)
 {
-  int retval = 0;
-  char buf[MB_LEN_MAX];
-
   if (s == NULL)
     {
-      retval = wctomb(buf, wc);
+      return 0;
+    }
+  else if ((unsigned)wc < 0x80)
+    {
+      *s = wc;
+      return 1;
+    }
+  else if ((unsigned)wc < 0x800)
+    {
+      *s++ = 0xc0 | (wc >> 6);
+      *s = 0x80 | (wc & 0x3f);
+      return 2;
+    }
+  else if ((unsigned)wc < 0xd800 || (unsigned)wc <= 0xffff)
+    {
+      *s++ = 0xe0 | (wc >> 12);
+      *s++ = 0x80 | ((wc >> 6) & 0x3f);
+      *s = 0x80 | (wc & 0x3f);
+      return 3;
     }
-  else
+  else if ((unsigned long)wc < 0x110000)
     {
-      retval = wctomb(s, wc);
+      *s++ = 0xf0 | ((unsigned long)wc >> 18);
+      *s++ = 0x80 | ((wc >> 12) & 0x3f);
+      *s++ = 0x80 | ((wc >> 6) & 0x3f);
+      *s = 0x80 | (wc & 0x3f);
+      return 4;
     }
 
-  return retval;
+  set_errno(EILSEQ);
+  return -1;
 }
diff --git a/libs/libc/wchar/lib_wcsnrtombs.c b/libs/libc/wchar/lib_wcsnrtombs.c
index 1b959767c8..a0f8c4aafa 100644
--- a/libs/libc/wchar/lib_wcsnrtombs.c
+++ b/libs/libc/wchar/lib_wcsnrtombs.c
@@ -22,12 +22,9 @@
  * Included Files
  ****************************************************************************/
 
-#include <sys/types.h>
 #include <wchar.h>
-
-#include <stdlib.h>
-#include <stdio.h>
-#include <errno.h>
+#include <string.h>
+#include <limits.h>
 
 /****************************************************************************
  * Public Functions
@@ -70,48 +67,57 @@
 size_t wcsnrtombs(FAR char *dst, FAR const wchar_t **src, size_t nwc,
                   size_t len, FAR mbstate_t *ps)
 {
-  size_t i;
+  FAR const wchar_t *ws = *src;
+  size_t cnt = 0;
 
   if (dst == NULL)
     {
-      for (i = 0; i < nwc; i++)
+      len = 0;
+    }
+
+  while (ws != NULL && nwc != 0)
+    {
+      char tmp[MB_LEN_MAX];
+      size_t res;
+
+      if (*ws == 0)
         {
-          wchar_t wc = (*src)[i];
+          ws = NULL;
+          break;
+        }
 
-          if (wc < 0 || wc > 0xff)
-            {
-              set_errno(EILSEQ);
-              return -1;
-            }
+      res = wcrtomb(len < MB_LEN_MAX ? tmp : dst, *ws, ps);
+      if ((ssize_t)res < 0)
+        {
+          cnt = res;
+          break;
+        }
 
-          if (wc == L'\0')
+      if (dst != NULL)
+        {
+          if (len < MB_LEN_MAX)
             {
-              return i;
+              if (res > len)
+                {
+                  break;
+                }
+
+              memcpy(dst, tmp, res);
             }
+
+          dst += res;
+          len -= res;
         }
 
-      return i;
+      ws++;
+      nwc--;
+      cnt += res;
     }
 
-  for (i = 0; i < nwc && i < len; i++)
+  if (dst != NULL)
     {
-      wchar_t wc = (*src)[i];
-
-      if (wc < 0 || wc > 0xff)
-        {
-          *src += i;
-          set_errno(EILSEQ);
-          return -1;
-        }
-
-      dst[i] = wc;
-      if (wc == L'\0')
-        {
-          *src = NULL;
-          return i;
-        }
+      *src = ws;
     }
 
-  *src += i;
-  return i;
+  return cnt;
 }

Reply via email to