Ok, this sounds good to me, __mingw_mbrtowc_cp should not really be visible to 
users.

These are the same patches except they declare __mingw_mbrtowc_cp in new 
internal mingw-wchar.h.

Maybe it would be good to check for places where we call either of mbrlen, 
mbrtowc ot wcrtomb repeatedly and replace them with internal versions?

- Kirill Makurin
________________________________
From: Pali Rohár <[email protected]>
Sent: Saturday, October 4, 2025 9:56 PM
To: Kirill Makurin <[email protected]>
Cc: mingw-w64-public <[email protected]>
Subject: Re: crt: add internal function __mingw_mbrtowc_cp

Hello, thanks for looking at this. For me it looks good. Just one note.

I would propose to not export that __mingw_mbrtowc_cp function from
public mingw-w64-headers headers at least for now, and rather have it in
some private mingw-w64-crt header only for mingw-w64 CRT code.

As we are still improving the mb* functions it is quite better to not
make that function visible for regular application until the API is
stable, which can prevent API breakage in case we would need to modify
this code again.

On Saturday 04 October 2025 12:47:33 Kirill Makurin wrote:
> This internal function is used to implement C95 functions mbrlen, mbrtowc and 
> mbsrtowcs. This makes difference for crtdll.dll, where `___lc_codepage_func` 
> parses return value of setlocale(LC_ALL, NULL) and converts code page part to 
> an `int`. With this change `mbsrtowcs` only needs to call both 
> `___lc_codepage_func` and `___mb_cur_max_func` once.
>
> I wasn't sure where to declare __mingw_mbrtowc_cp; I don't feel like creating 
> internal header file just for one function, so I declared it in wchar.h.
>
> If we're good with these patches, I'll send similar patches for `wcrtomb` and 
> `wcsrtombs`.
>
> I just pushed them to my GitHub fork to run CI tests[1].
>
> - Kirill Makurin
>
> [1] https://github.com/maiddaisuki/mingw-w64/actions/runs/18244326038
From fa081a7a434aaa5d20531f31b1fbbf974dd05879 Mon Sep 17 00:00:00 2001
From: Kirill Makurin <[email protected]>
Date: Sat, 4 Oct 2025 22:15:56 +0900
Subject: [PATCH 1/3] crt: add internal function __mingw_mbrtowc_cp

This function is internally called from mbrlen, mbrtowc and mbsrtowcs
functions.

Previous implementation of mbsrtowcs was calling mbrtowc, which internally
was calling ___lc_codepage_func. Implementation of ___lc_codepage_func for
crtdll.dll is quite expensive as it parses return value of setlocale(LC_ALL, 
NULL).

Using internal __mingw_mbrtowc_cp instead mbrtowc allows mbsrtowcs call both
___lc_codepage_func and ___mb_cur_max_func only once.

Signed-off-by: Kirill Makurin <[email protected]>
---
 mingw-w64-crt/include/mingw-wchar.h | 12 ++++++
 mingw-w64-crt/misc/mbrlen.c         | 13 +++++-
 mingw-w64-crt/misc/mbrtowc.c        | 63 ++++++++++++++++++++++-------
 mingw-w64-crt/misc/mbsrtowcs.c      | 11 +++--
 4 files changed, 81 insertions(+), 18 deletions(-)
 create mode 100644 mingw-w64-crt/include/mingw-wchar.h

diff --git a/mingw-w64-crt/include/mingw-wchar.h 
b/mingw-w64-crt/include/mingw-wchar.h
new file mode 100644
index 000000000..902221083
--- /dev/null
+++ b/mingw-w64-crt/include/mingw-wchar.h
@@ -0,0 +1,12 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+
+#ifndef __MINGW_WCHAR_INTERNAL_H
+#define __MINGW_WCHAR_INTERNAL_H
+#include <wchar.h>
+
+size_t __cdecl __mingw_mbrtowc_cp(wchar_t * __restrict__ _DstCh,const char * 
__restrict__ _SrcCh,size_t _SizeInBytes,mbstate_t * __restrict__ _State, 
unsigned _Cp, int _MbCurMax);
+#endif
diff --git a/mingw-w64-crt/misc/mbrlen.c b/mingw-w64-crt/misc/mbrlen.c
index 7b57f5753..73111cf1d 100644
--- a/mingw-w64-crt/misc/mbrlen.c
+++ b/mingw-w64-crt/misc/mbrlen.c
@@ -3,8 +3,12 @@
  * This file is part of the mingw-w64 runtime package.
  * No warranty is given; refer to the file DISCLAIMER.PD within this package.
  */
+#include <locale.h>
+#include <stdlib.h>
 #include <wchar.h>
 
+#include "mingw-wchar.h"
+
 size_t mbrlen (
   const char *__restrict__ mbs,
   size_t count,
@@ -15,5 +19,12 @@ size_t mbrlen (
     static mbstate_t state_mbrlen = {0};
     state = &state_mbrlen;
   }
-  return mbrtowc (NULL, mbs, count, state);
+
+  /* Code page used by current locale */
+  unsigned cp = ___lc_codepage_func ();
+
+  /* Maximum character length used by current locale */
+  int mb_cur_max = ___mb_cur_max_func ();
+
+  return __mingw_mbrtowc_cp (NULL, mbs, count, state, cp, mb_cur_max);
 }
diff --git a/mingw-w64-crt/misc/mbrtowc.c b/mingw-w64-crt/misc/mbrtowc.c
index a6933a0cb..b9324fc95 100644
--- a/mingw-w64-crt/misc/mbrtowc.c
+++ b/mingw-w64-crt/misc/mbrtowc.c
@@ -11,18 +11,38 @@
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 
-size_t mbrtowc (
+#include "mingw-wchar.h"
+
+/**
+ * __mingw_mbrtowc_cp is internal implementation for C95 functions mbrlen,
+ * mbrtowc and mbsrtowcs.
+ *
+ * In order to perform conversion we need the following information:
+ *
+ *  - code page used by active locale (which can be a thread locale for
+ *    msvcr80.dll and later); obtained by calling ___lc_codepage_func
+ *
+ *  - maximum character length in used code page; obtained by calling
+ *    ___mb_cur_max_func
+ *
+ *  - for double-byte code pages, we need to recognize leading bytes in order
+ *    to correctly convert multibyte characters; this can be done with Win32
+ *    function IsDBCSLeadByteEx or CRT function isleadbyte
+ *
+ * crtdll.dll's ___lc_codepage_func is quite expensive as it obtains this
+ * information by parsing return value of setlocale(LC_CTYPE, NULL). Using
+ * __mingw_mbrtowc_cp allows mbsrtowcs call both ___lc_codepage_func and
+ * ___mb_cur_max_func only once.
+ */
+
+size_t __mingw_mbrtowc_cp (
   wchar_t *__restrict__ wc,
   const char *__restrict__ mbs,
   size_t count,
-  mbstate_t *__restrict__ state
+  mbstate_t *__restrict__ state,
+  unsigned cp,
+  int mb_cur_max
 ) {
-  /* Use private `mbstate_t` if caller did not supply one */
-  if (state == NULL) {
-    static mbstate_t state_mbrtowc = {0};
-    state = &state_mbrtowc;
-  }
-
   /**
    * Calling mbrtowc (..., NULL, ..., state) is equivalent to
    *
@@ -44,12 +64,6 @@ size_t mbrtowc (
     return (size_t) -2;
   }
 
-  /* Code page used by current locale */
-  unsigned cp = ___lc_codepage_func ();
-
-  /* Maximum character length used by current locale */
-  int mb_cur_max = ___mb_cur_max_func ();
-
   /* Treat `state` as an array of bytes */
   union {
     mbstate_t state;
@@ -141,3 +155,24 @@ einval:
   errno = EINVAL;
   return (size_t) -1;
 }
+
+size_t mbrtowc (
+  wchar_t *__restrict__ wc,
+  const char *__restrict__ mbs,
+  size_t count,
+  mbstate_t *__restrict__ state
+) {
+  /* Use private `mbstate_t` if caller did not supply one */
+  if (state == NULL) {
+    static mbstate_t state_mbrtowc = {0};
+    state = &state_mbrtowc;
+  }
+
+  /* Code page used by current locale */
+  unsigned cp = ___lc_codepage_func ();
+
+  /* Maximum character length used by current locale */
+  int mb_cur_max = ___mb_cur_max_func ();
+
+  return __mingw_mbrtowc_cp (wc, mbs, count, state, cp, mb_cur_max);
+}
diff --git a/mingw-w64-crt/misc/mbsrtowcs.c b/mingw-w64-crt/misc/mbsrtowcs.c
index e7e4105bb..612c29b03 100644
--- a/mingw-w64-crt/misc/mbsrtowcs.c
+++ b/mingw-w64-crt/misc/mbsrtowcs.c
@@ -4,8 +4,10 @@
  * No warranty is given; refer to the file DISCLAIMER.PD within this package.
  */
 #include <locale.h>
-#include <wchar.h>
 #include <stdlib.h>
+#include <wchar.h>
+
+#include "mingw-wchar.h"
 
 size_t mbsrtowcs (
   wchar_t *wcs,
@@ -29,12 +31,15 @@ size_t mbsrtowcs (
   /* Next multibyte character to convert */
   const char *mbc = *mbs;
 
+  /* Code page used by current locale */
+  unsigned cp = ___lc_codepage_func ();
+
   /* Maximum character length in `cp` */
   int mb_cur_max = ___mb_cur_max_func();
 
   while (1) {
-    const size_t length = mbrtowc (
-      &wc, mbc, mb_cur_max, &conversion_state
+    const size_t length = __mingw_mbrtowc_cp (
+      &wc, mbc, mb_cur_max, &conversion_state, cp, mb_cur_max
     );
 
     /* Conversion failed */
-- 
2.51.0.windows.1

From 617b5505e772c0072cc1df60b7ef35448546b349 Mon Sep 17 00:00:00 2001
From: Kirill Makurin <[email protected]>
Date: Sat, 4 Oct 2025 22:29:10 +0900
Subject: [PATCH 2/3] crt: move definition of __mingw_mbrtowc_cp to a separate
 file

Signed-off-by: Kirill Makurin <[email protected]>
---
 mingw-w64-crt/Makefile.am               |   1 +
 mingw-w64-crt/misc/__mingw_mbrtowc_cp.c | 157 ++++++++++++++++++++++++
 mingw-w64-crt/misc/mbrtowc.c            | 147 ----------------------
 3 files changed, 158 insertions(+), 147 deletions(-)
 create mode 100644 mingw-w64-crt/misc/__mingw_mbrtowc_cp.c

diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am
index f7ada9062..cae1bf309 100644
--- a/mingw-w64-crt/Makefile.am
+++ b/mingw-w64-crt/Makefile.am
@@ -167,6 +167,7 @@ src_libws2_32=libsrc/ws2_32.c \
 
 # Files included in all libmsvcr*.a
 src_msvcrt_common=\
+  misc/__mingw_mbrtowc_cp.c \
   misc/_onexit.c \
   misc/mbrlen.c \
   misc/mbrtowc.c \
diff --git a/mingw-w64-crt/misc/__mingw_mbrtowc_cp.c 
b/mingw-w64-crt/misc/__mingw_mbrtowc_cp.c
new file mode 100644
index 000000000..d079465c6
--- /dev/null
+++ b/mingw-w64-crt/misc/__mingw_mbrtowc_cp.c
@@ -0,0 +1,157 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+#include <errno.h>
+#include <locale.h>
+#include <stdlib.h>
+#include <wchar.h>
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+#include "mingw-wchar.h"
+
+/**
+ * __mingw_mbrtowc_cp is internal implementation for C95 functions mbrlen,
+ * mbrtowc and mbsrtowcs.
+ *
+ * In order to perform conversion we need the following information:
+ *
+ *  - code page used by active locale (which can be a thread locale for
+ *    msvcr80.dll and later); obtained by calling ___lc_codepage_func
+ *
+ *  - maximum character length in used code page; obtained by calling
+ *    ___mb_cur_max_func
+ *
+ *  - for double-byte code pages, we need to recognize leading bytes in order
+ *    to correctly convert multibyte characters; this can be done with Win32
+ *    function IsDBCSLeadByteEx or CRT function isleadbyte
+ *
+ * crtdll.dll's ___lc_codepage_func is quite expensive as it obtains this
+ * information by parsing return value of setlocale(LC_CTYPE, NULL). Using
+ * __mingw_mbrtowc_cp allows mbsrtowcs call both ___lc_codepage_func and
+ * ___mb_cur_max_func only once.
+ */
+
+size_t __mingw_mbrtowc_cp (
+  wchar_t *__restrict__ wc,
+  const char *__restrict__ mbs,
+  size_t count,
+  mbstate_t *__restrict__ state,
+  unsigned cp,
+  int mb_cur_max
+) {
+  /**
+   * Calling mbrtowc (..., NULL, ..., state) is equivalent to
+   *
+   *  mbrtowc (NULL, "", 1, state)
+   */
+  if (mbs == NULL) {
+    wc = NULL;
+    mbs = "";
+    count = 1;
+  }
+
+  /* Detect invalid conversion state */
+  if ((unsigned) *state > 0xFF) {
+    goto einval;
+  }
+
+  /* Both ISO C and POSIX do not mention this case */
+  if (count == 0) {
+    return (size_t) -2;
+  }
+
+  /* Treat `state` as an array of bytes */
+  union {
+    mbstate_t state;
+    char bytes[4];
+  } conversion_state = {.state = *state};
+
+  /* For SBCS code pages `state` must always be in initial state */
+  if (mb_cur_max == 1 && conversion_state.bytes[0]) {
+    goto einval;
+  }
+
+  /* Handle "C" locale */
+  if (cp == 0) {
+    if (wc != NULL) {
+      *wc = (unsigned char) mbs[0];
+    }
+    return !!mbs[0];
+  }
+
+  /* Length of potential multibyte character */
+  int length = 1;
+
+  /* Number of bytes consumed from `mbs` */
+  int bytes_consumed = 0;
+
+  if (conversion_state.bytes[0]) {
+    conversion_state.bytes[1] = mbs[0];
+    bytes_consumed = 1;
+    length = 2;
+  } else if (mb_cur_max == 2 && isleadbyte ((unsigned char) mbs[0])) {
+    conversion_state.bytes[0] = mbs[0];
+
+    /* We need to examine mbs[1] */
+    if (count < 2) {
+      *state = conversion_state.state;
+      return (size_t) -2;
+    }
+
+    conversion_state.bytes[1] = mbs[1];
+    bytes_consumed = 2;
+    length = 2;
+  } else {
+    conversion_state.bytes[0] = mbs[0];
+    bytes_consumed = 1;
+  }
+
+  /* Store terminating '\0' */
+  if (conversion_state.bytes[0] == '\0') {
+    if (wc != NULL) {
+      *wc = L'\0';
+    }
+
+    /* Set `state` to initial conversion state */
+    *state = 0;
+
+    return 0;
+  }
+
+  /* Truncated multibyte character */
+  if (length == 2 && conversion_state.bytes[1] == '\0') {
+    goto eilseq;
+  }
+
+  /* Converted wide character */
+  wchar_t wcOut = WEOF;
+
+  int ret = MultiByteToWideChar (
+    cp, MB_ERR_INVALID_CHARS, conversion_state.bytes, length, &wcOut, 1
+  );
+
+  if (ret != 1) {
+    goto eilseq;
+  }
+
+  if (wc != NULL) {
+    *wc = wcOut;
+  }
+
+  /* Set `state` to initial conversion state */
+  *state = 0;
+
+  return bytes_consumed;
+
+eilseq:
+  errno = EILSEQ;
+  return (size_t) -1;
+
+einval:
+  errno = EINVAL;
+  return (size_t) -1;
+}
diff --git a/mingw-w64-crt/misc/mbrtowc.c b/mingw-w64-crt/misc/mbrtowc.c
index b9324fc95..8e0f0c2b9 100644
--- a/mingw-w64-crt/misc/mbrtowc.c
+++ b/mingw-w64-crt/misc/mbrtowc.c
@@ -3,159 +3,12 @@
  * This file is part of the mingw-w64 runtime package.
  * No warranty is given; refer to the file DISCLAIMER.PD within this package.
  */
-#include <errno.h>
 #include <locale.h>
 #include <stdlib.h>
 #include <wchar.h>
 
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
 #include "mingw-wchar.h"
 
-/**
- * __mingw_mbrtowc_cp is internal implementation for C95 functions mbrlen,
- * mbrtowc and mbsrtowcs.
- *
- * In order to perform conversion we need the following information:
- *
- *  - code page used by active locale (which can be a thread locale for
- *    msvcr80.dll and later); obtained by calling ___lc_codepage_func
- *
- *  - maximum character length in used code page; obtained by calling
- *    ___mb_cur_max_func
- *
- *  - for double-byte code pages, we need to recognize leading bytes in order
- *    to correctly convert multibyte characters; this can be done with Win32
- *    function IsDBCSLeadByteEx or CRT function isleadbyte
- *
- * crtdll.dll's ___lc_codepage_func is quite expensive as it obtains this
- * information by parsing return value of setlocale(LC_CTYPE, NULL). Using
- * __mingw_mbrtowc_cp allows mbsrtowcs call both ___lc_codepage_func and
- * ___mb_cur_max_func only once.
- */
-
-size_t __mingw_mbrtowc_cp (
-  wchar_t *__restrict__ wc,
-  const char *__restrict__ mbs,
-  size_t count,
-  mbstate_t *__restrict__ state,
-  unsigned cp,
-  int mb_cur_max
-) {
-  /**
-   * Calling mbrtowc (..., NULL, ..., state) is equivalent to
-   *
-   *  mbrtowc (NULL, "", 1, state)
-   */
-  if (mbs == NULL) {
-    wc = NULL;
-    mbs = "";
-    count = 1;
-  }
-
-  /* Detect invalid conversion state */
-  if ((unsigned) *state > 0xFF) {
-    goto einval;
-  }
-
-  /* Both ISO C and POSIX do not mention this case */
-  if (count == 0) {
-    return (size_t) -2;
-  }
-
-  /* Treat `state` as an array of bytes */
-  union {
-    mbstate_t state;
-    char bytes[4];
-  } conversion_state = {.state = *state};
-
-  /* For SBCS code pages `state` must always be in initial state */
-  if (mb_cur_max == 1 && conversion_state.bytes[0]) {
-    goto einval;
-  }
-
-  /* Handle "C" locale */
-  if (cp == 0) {
-    if (wc != NULL) {
-      *wc = (unsigned char) mbs[0];
-    }
-    return !!mbs[0];
-  }
-
-  /* Length of potential multibyte character */
-  int length = 1;
-
-  /* Number of bytes consumed from `mbs` */
-  int bytes_consumed = 0;
-
-  if (conversion_state.bytes[0]) {
-    conversion_state.bytes[1] = mbs[0];
-    bytes_consumed = 1;
-    length = 2;
-  } else if (mb_cur_max == 2 && isleadbyte ((unsigned char) mbs[0])) {
-    conversion_state.bytes[0] = mbs[0];
-
-    /* We need to examine mbs[1] */
-    if (count < 2) {
-      *state = conversion_state.state;
-      return (size_t) -2;
-    }
-
-    conversion_state.bytes[1] = mbs[1];
-    bytes_consumed = 2;
-    length = 2;
-  } else {
-    conversion_state.bytes[0] = mbs[0];
-    bytes_consumed = 1;
-  }
-
-  /* Store terminating '\0' */
-  if (conversion_state.bytes[0] == '\0') {
-    if (wc != NULL) {
-      *wc = L'\0';
-    }
-
-    /* Set `state` to initial conversion state */
-    *state = 0;
-
-    return 0;
-  }
-
-  /* Truncated multibyte character */
-  if (length == 2 && conversion_state.bytes[1] == '\0') {
-    goto eilseq;
-  }
-
-  /* Converted wide character */
-  wchar_t wcOut = WEOF;
-
-  int ret = MultiByteToWideChar (
-    cp, MB_ERR_INVALID_CHARS, conversion_state.bytes, length, &wcOut, 1
-  );
-
-  if (ret != 1) {
-    goto eilseq;
-  }
-
-  if (wc != NULL) {
-    *wc = wcOut;
-  }
-
-  /* Set `state` to initial conversion state */
-  *state = 0;
-
-  return bytes_consumed;
-
-eilseq:
-  errno = EILSEQ;
-  return (size_t) -1;
-
-einval:
-  errno = EINVAL;
-  return (size_t) -1;
-}
-
 size_t mbrtowc (
   wchar_t *__restrict__ wc,
   const char *__restrict__ mbs,
-- 
2.51.0.windows.1

_______________________________________________
Mingw-w64-public mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

Reply via email to