This internal function is used to implement C95 functions mbrlen, mbrtowc and 
mbsrtowcs. This makes difference for crtdll.dll, where `___lc_codepage_func` 
parses return value of setlocale(LC_ALL, NULL) and converts code page part to 
an `int`. With this change `mbsrtowcs` only needs to call both 
`___lc_codepage_func` and `___mb_cur_max_func` once.

I wasn't sure where to declare __mingw_mbrtowc_cp; I don't feel like creating 
internal header file just for one function, so I declared it in wchar.h.

If we're good with these patches, I'll send similar patches for `wcrtomb` and 
`wcsrtombs`.

I just pushed them to my GitHub fork to run CI tests[1].

- Kirill Makurin

[1] https://github.com/maiddaisuki/mingw-w64/actions/runs/18244326038
From 6faece58c5c89255f803387e2d9ebe322fade097 Mon Sep 17 00:00:00 2001
From: Kirill Makurin <[email protected]>
Date: Sat, 4 Oct 2025 21:18:50 +0900
Subject: [PATCH 1/3] crt: add internal function __mingw_mbrtowc_cp

This function is internally called from mbrlen, mbrtowc and mbsrtowcs
functions.

Previous implementation of mbsrtowcs was calling mbrtowc, which internally
was calling ___lc_codepage_func. Implementation of ___lc_codepage_func for
crtdll.dll is quite expensive as it parses return value of setlocale(LC_ALL, 
NULL).

Using internal __mingw_mbrtowc_cp instead mbrtowc allows mbsrtowcs call both
___lc_codepage_func and ___mb_cur_max_func only once.

Signed-off-by: Kirill Makurin <[email protected]>
---
 mingw-w64-crt/misc/mbrlen.c    | 11 +++++-
 mingw-w64-crt/misc/mbrtowc.c   | 61 ++++++++++++++++++++++++++--------
 mingw-w64-crt/misc/mbsrtowcs.c |  9 +++--
 mingw-w64-headers/crt/wchar.h  |  3 ++
 4 files changed, 66 insertions(+), 18 deletions(-)

diff --git a/mingw-w64-crt/misc/mbrlen.c b/mingw-w64-crt/misc/mbrlen.c
index 7b57f5753..5bddaf53f 100644
--- a/mingw-w64-crt/misc/mbrlen.c
+++ b/mingw-w64-crt/misc/mbrlen.c
@@ -3,6 +3,8 @@
  * This file is part of the mingw-w64 runtime package.
  * No warranty is given; refer to the file DISCLAIMER.PD within this package.
  */
+#include <locale.h>
+#include <stdlib.h>
 #include <wchar.h>
 
 size_t mbrlen (
@@ -15,5 +17,12 @@ size_t mbrlen (
     static mbstate_t state_mbrlen = {0};
     state = &state_mbrlen;
   }
-  return mbrtowc (NULL, mbs, count, state);
+
+  /* Code page used by current locale */
+  unsigned cp = ___lc_codepage_func ();
+
+  /* Maximum character length used by current locale */
+  int mb_cur_max = ___mb_cur_max_func ();
+
+  return __mingw_mbrtowc_cp (NULL, mbs, count, state, cp, mb_cur_max);
 }
diff --git a/mingw-w64-crt/misc/mbrtowc.c b/mingw-w64-crt/misc/mbrtowc.c
index a6933a0cb..c4f7556cb 100644
--- a/mingw-w64-crt/misc/mbrtowc.c
+++ b/mingw-w64-crt/misc/mbrtowc.c
@@ -11,18 +11,36 @@
 #define WIN32_LEAN_AND_MEAN
 #include <windows.h>
 
-size_t mbrtowc (
+/**
+ * __mingw_mbrtowc_cp is internal implementation for C95 functions mbrlen,
+ * mbrtowc and mbsrtowcs.
+ *
+ * In order to perform conversion we need the following information:
+ *
+ *  - code page used by active locale (which can be a thread locale for
+ *    msvcr80.dll and later); obtained by calling ___lc_codepage_func
+ *
+ *  - maximum character length in used code page; obtained by calling
+ *    ___mb_cur_max_func
+ *
+ *  - for double-byte code pages, we need to recognize leading bytes in order
+ *    to correctly convert multibyte characters; this can be done with Win32
+ *    function IsDBCSLeadByteEx or CRT function isleadbyte
+ *
+ * crtdll.dll's ___lc_codepage_func is quite expensive as it obtains this
+ * information by parsing return value of setlocale(LC_CTYPE, NULL). Using
+ * __mingw_mbrtowc_cp allows mbsrtowcs call both ___lc_codepage_func and
+ * ___mb_cur_max_func only once.
+ */
+
+size_t __mingw_mbrtowc_cp (
   wchar_t *__restrict__ wc,
   const char *__restrict__ mbs,
   size_t count,
-  mbstate_t *__restrict__ state
+  mbstate_t *__restrict__ state,
+  unsigned cp,
+  int mb_cur_max
 ) {
-  /* Use private `mbstate_t` if caller did not supply one */
-  if (state == NULL) {
-    static mbstate_t state_mbrtowc = {0};
-    state = &state_mbrtowc;
-  }
-
   /**
    * Calling mbrtowc (..., NULL, ..., state) is equivalent to
    *
@@ -44,12 +62,6 @@ size_t mbrtowc (
     return (size_t) -2;
   }
 
-  /* Code page used by current locale */
-  unsigned cp = ___lc_codepage_func ();
-
-  /* Maximum character length used by current locale */
-  int mb_cur_max = ___mb_cur_max_func ();
-
   /* Treat `state` as an array of bytes */
   union {
     mbstate_t state;
@@ -141,3 +153,24 @@ einval:
   errno = EINVAL;
   return (size_t) -1;
 }
+
+size_t mbrtowc (
+  wchar_t *__restrict__ wc,
+  const char *__restrict__ mbs,
+  size_t count,
+  mbstate_t *__restrict__ state
+) {
+  /* Use private `mbstate_t` if caller did not supply one */
+  if (state == NULL) {
+    static mbstate_t state_mbrtowc = {0};
+    state = &state_mbrtowc;
+  }
+
+  /* Code page used by current locale */
+  unsigned cp = ___lc_codepage_func ();
+
+  /* Maximum character length used by current locale */
+  int mb_cur_max = ___mb_cur_max_func ();
+
+  return __mingw_mbrtowc_cp (wc, mbs, count, state, cp, mb_cur_max);
+}
diff --git a/mingw-w64-crt/misc/mbsrtowcs.c b/mingw-w64-crt/misc/mbsrtowcs.c
index e7e4105bb..1c3ccec6c 100644
--- a/mingw-w64-crt/misc/mbsrtowcs.c
+++ b/mingw-w64-crt/misc/mbsrtowcs.c
@@ -4,8 +4,8 @@
  * No warranty is given; refer to the file DISCLAIMER.PD within this package.
  */
 #include <locale.h>
-#include <wchar.h>
 #include <stdlib.h>
+#include <wchar.h>
 
 size_t mbsrtowcs (
   wchar_t *wcs,
@@ -29,12 +29,15 @@ size_t mbsrtowcs (
   /* Next multibyte character to convert */
   const char *mbc = *mbs;
 
+  /* Code page used by current locale */
+  unsigned cp = ___lc_codepage_func ();
+
   /* Maximum character length in `cp` */
   int mb_cur_max = ___mb_cur_max_func();
 
   while (1) {
-    const size_t length = mbrtowc (
-      &wc, mbc, mb_cur_max, &conversion_state
+    const size_t length = __mingw_mbrtowc_cp (
+      &wc, mbc, mb_cur_max, &conversion_state, cp, mb_cur_max
     );
 
     /* Conversion failed */
diff --git a/mingw-w64-headers/crt/wchar.h b/mingw-w64-headers/crt/wchar.h
index 898d0e821..1bb3abe5b 100644
--- a/mingw-w64-headers/crt/wchar.h
+++ b/mingw-w64-headers/crt/wchar.h
@@ -1206,6 +1206,9 @@ __MINGW_ASM_CALL(__mingw_vsnwprintf);
 #endif
   typedef wchar_t _Wint_t;
 
+#ifndef _UCRT
+  size_t __cdecl __mingw_mbrtowc_cp(wchar_t * __restrict__ _DstCh,const char * 
__restrict__ _SrcCh,size_t _SizeInBytes,mbstate_t * __restrict__ _State, 
unsigned _Cp, int _MbCurMax);
+#endif
   wint_t __cdecl btowc(int);
   int __cdecl mbsinit(const mbstate_t *ps);
   size_t __cdecl mbrlen(const char * __restrict__ _Ch,size_t 
_SizeInBytes,mbstate_t * __restrict__ _State);
-- 
2.51.0.windows.1

From 725ff47bfaa5762a214ed79194d5eb162c7fa1bf Mon Sep 17 00:00:00 2001
From: Kirill Makurin <[email protected]>
Date: Sat, 4 Oct 2025 21:28:27 +0900
Subject: [PATCH 2/3] crt: move definition of __mingw_mbrtowc_cp to a separate
 file

Signed-off-by: Kirill Makurin <[email protected]>
---
 mingw-w64-crt/Makefile.am               |   1 +
 mingw-w64-crt/misc/__mingw_mbrtowc_cp.c | 155 ++++++++++++++++++++++++
 mingw-w64-crt/misc/mbrtowc.c            | 147 ----------------------
 3 files changed, 156 insertions(+), 147 deletions(-)
 create mode 100644 mingw-w64-crt/misc/__mingw_mbrtowc_cp.c

diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am
index 439f60ea5..389f956ec 100644
--- a/mingw-w64-crt/Makefile.am
+++ b/mingw-w64-crt/Makefile.am
@@ -167,6 +167,7 @@ src_libws2_32=libsrc/ws2_32.c \
 
 # Files included in all libmsvcr*.a
 src_msvcrt_common=\
+  misc/__mingw_mbrtowc_cp.c \
   misc/_onexit.c \
   misc/mbrlen.c \
   misc/mbrtowc.c \
diff --git a/mingw-w64-crt/misc/__mingw_mbrtowc_cp.c 
b/mingw-w64-crt/misc/__mingw_mbrtowc_cp.c
new file mode 100644
index 000000000..9972d2a19
--- /dev/null
+++ b/mingw-w64-crt/misc/__mingw_mbrtowc_cp.c
@@ -0,0 +1,155 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+#include <errno.h>
+#include <locale.h>
+#include <stdlib.h>
+#include <wchar.h>
+
+#define WIN32_LEAN_AND_MEAN
+#include <windows.h>
+
+/**
+ * __mingw_mbrtowc_cp is internal implementation for C95 functions mbrlen,
+ * mbrtowc and mbsrtowcs.
+ *
+ * In order to perform conversion we need the following information:
+ *
+ *  - code page used by active locale (which can be a thread locale for
+ *    msvcr80.dll and later); obtained by calling ___lc_codepage_func
+ *
+ *  - maximum character length in used code page; obtained by calling
+ *    ___mb_cur_max_func
+ *
+ *  - for double-byte code pages, we need to recognize leading bytes in order
+ *    to correctly convert multibyte characters; this can be done with Win32
+ *    function IsDBCSLeadByteEx or CRT function isleadbyte
+ *
+ * crtdll.dll's ___lc_codepage_func is quite expensive as it obtains this
+ * information by parsing return value of setlocale(LC_CTYPE, NULL). Using
+ * __mingw_mbrtowc_cp allows mbsrtowcs call both ___lc_codepage_func and
+ * ___mb_cur_max_func only once.
+ */
+
+size_t __mingw_mbrtowc_cp (
+  wchar_t *__restrict__ wc,
+  const char *__restrict__ mbs,
+  size_t count,
+  mbstate_t *__restrict__ state,
+  unsigned cp,
+  int mb_cur_max
+) {
+  /**
+   * Calling mbrtowc (..., NULL, ..., state) is equivalent to
+   *
+   *  mbrtowc (NULL, "", 1, state)
+   */
+  if (mbs == NULL) {
+    wc = NULL;
+    mbs = "";
+    count = 1;
+  }
+
+  /* Detect invalid conversion state */
+  if ((unsigned) *state > 0xFF) {
+    goto einval;
+  }
+
+  /* Both ISO C and POSIX do not mention this case */
+  if (count == 0) {
+    return (size_t) -2;
+  }
+
+  /* Treat `state` as an array of bytes */
+  union {
+    mbstate_t state;
+    char bytes[4];
+  } conversion_state = {.state = *state};
+
+  /* For SBCS code pages `state` must always be in initial state */
+  if (mb_cur_max == 1 && conversion_state.bytes[0]) {
+    goto einval;
+  }
+
+  /* Handle "C" locale */
+  if (cp == 0) {
+    if (wc != NULL) {
+      *wc = (unsigned char) mbs[0];
+    }
+    return !!mbs[0];
+  }
+
+  /* Length of potential multibyte character */
+  int length = 1;
+
+  /* Number of bytes consumed from `mbs` */
+  int bytes_consumed = 0;
+
+  if (conversion_state.bytes[0]) {
+    conversion_state.bytes[1] = mbs[0];
+    bytes_consumed = 1;
+    length = 2;
+  } else if (mb_cur_max == 2 && isleadbyte ((unsigned char) mbs[0])) {
+    conversion_state.bytes[0] = mbs[0];
+
+    /* We need to examine mbs[1] */
+    if (count < 2) {
+      *state = conversion_state.state;
+      return (size_t) -2;
+    }
+
+    conversion_state.bytes[1] = mbs[1];
+    bytes_consumed = 2;
+    length = 2;
+  } else {
+    conversion_state.bytes[0] = mbs[0];
+    bytes_consumed = 1;
+  }
+
+  /* Store terminating '\0' */
+  if (conversion_state.bytes[0] == '\0') {
+    if (wc != NULL) {
+      *wc = L'\0';
+    }
+
+    /* Set `state` to initial conversion state */
+    *state = 0;
+
+    return 0;
+  }
+
+  /* Truncated multibyte character */
+  if (length == 2 && conversion_state.bytes[1] == '\0') {
+    goto eilseq;
+  }
+
+  /* Converted wide character */
+  wchar_t wcOut = WEOF;
+
+  int ret = MultiByteToWideChar (
+    cp, MB_ERR_INVALID_CHARS, conversion_state.bytes, length, &wcOut, 1
+  );
+
+  if (ret != 1) {
+    goto eilseq;
+  }
+
+  if (wc != NULL) {
+    *wc = wcOut;
+  }
+
+  /* Set `state` to initial conversion state */
+  *state = 0;
+
+  return bytes_consumed;
+
+eilseq:
+  errno = EILSEQ;
+  return (size_t) -1;
+
+einval:
+  errno = EINVAL;
+  return (size_t) -1;
+}
diff --git a/mingw-w64-crt/misc/mbrtowc.c b/mingw-w64-crt/misc/mbrtowc.c
index c4f7556cb..195adf63d 100644
--- a/mingw-w64-crt/misc/mbrtowc.c
+++ b/mingw-w64-crt/misc/mbrtowc.c
@@ -3,157 +3,10 @@
  * This file is part of the mingw-w64 runtime package.
  * No warranty is given; refer to the file DISCLAIMER.PD within this package.
  */
-#include <errno.h>
 #include <locale.h>
 #include <stdlib.h>
 #include <wchar.h>
 
-#define WIN32_LEAN_AND_MEAN
-#include <windows.h>
-
-/**
- * __mingw_mbrtowc_cp is internal implementation for C95 functions mbrlen,
- * mbrtowc and mbsrtowcs.
- *
- * In order to perform conversion we need the following information:
- *
- *  - code page used by active locale (which can be a thread locale for
- *    msvcr80.dll and later); obtained by calling ___lc_codepage_func
- *
- *  - maximum character length in used code page; obtained by calling
- *    ___mb_cur_max_func
- *
- *  - for double-byte code pages, we need to recognize leading bytes in order
- *    to correctly convert multibyte characters; this can be done with Win32
- *    function IsDBCSLeadByteEx or CRT function isleadbyte
- *
- * crtdll.dll's ___lc_codepage_func is quite expensive as it obtains this
- * information by parsing return value of setlocale(LC_CTYPE, NULL). Using
- * __mingw_mbrtowc_cp allows mbsrtowcs call both ___lc_codepage_func and
- * ___mb_cur_max_func only once.
- */
-
-size_t __mingw_mbrtowc_cp (
-  wchar_t *__restrict__ wc,
-  const char *__restrict__ mbs,
-  size_t count,
-  mbstate_t *__restrict__ state,
-  unsigned cp,
-  int mb_cur_max
-) {
-  /**
-   * Calling mbrtowc (..., NULL, ..., state) is equivalent to
-   *
-   *  mbrtowc (NULL, "", 1, state)
-   */
-  if (mbs == NULL) {
-    wc = NULL;
-    mbs = "";
-    count = 1;
-  }
-
-  /* Detect invalid conversion state */
-  if ((unsigned) *state > 0xFF) {
-    goto einval;
-  }
-
-  /* Both ISO C and POSIX do not mention this case */
-  if (count == 0) {
-    return (size_t) -2;
-  }
-
-  /* Treat `state` as an array of bytes */
-  union {
-    mbstate_t state;
-    char bytes[4];
-  } conversion_state = {.state = *state};
-
-  /* For SBCS code pages `state` must always be in initial state */
-  if (mb_cur_max == 1 && conversion_state.bytes[0]) {
-    goto einval;
-  }
-
-  /* Handle "C" locale */
-  if (cp == 0) {
-    if (wc != NULL) {
-      *wc = (unsigned char) mbs[0];
-    }
-    return !!mbs[0];
-  }
-
-  /* Length of potential multibyte character */
-  int length = 1;
-
-  /* Number of bytes consumed from `mbs` */
-  int bytes_consumed = 0;
-
-  if (conversion_state.bytes[0]) {
-    conversion_state.bytes[1] = mbs[0];
-    bytes_consumed = 1;
-    length = 2;
-  } else if (mb_cur_max == 2 && isleadbyte ((unsigned char) mbs[0])) {
-    conversion_state.bytes[0] = mbs[0];
-
-    /* We need to examine mbs[1] */
-    if (count < 2) {
-      *state = conversion_state.state;
-      return (size_t) -2;
-    }
-
-    conversion_state.bytes[1] = mbs[1];
-    bytes_consumed = 2;
-    length = 2;
-  } else {
-    conversion_state.bytes[0] = mbs[0];
-    bytes_consumed = 1;
-  }
-
-  /* Store terminating '\0' */
-  if (conversion_state.bytes[0] == '\0') {
-    if (wc != NULL) {
-      *wc = L'\0';
-    }
-
-    /* Set `state` to initial conversion state */
-    *state = 0;
-
-    return 0;
-  }
-
-  /* Truncated multibyte character */
-  if (length == 2 && conversion_state.bytes[1] == '\0') {
-    goto eilseq;
-  }
-
-  /* Converted wide character */
-  wchar_t wcOut = WEOF;
-
-  int ret = MultiByteToWideChar (
-    cp, MB_ERR_INVALID_CHARS, conversion_state.bytes, length, &wcOut, 1
-  );
-
-  if (ret != 1) {
-    goto eilseq;
-  }
-
-  if (wc != NULL) {
-    *wc = wcOut;
-  }
-
-  /* Set `state` to initial conversion state */
-  *state = 0;
-
-  return bytes_consumed;
-
-eilseq:
-  errno = EILSEQ;
-  return (size_t) -1;
-
-einval:
-  errno = EINVAL;
-  return (size_t) -1;
-}
-
 size_t mbrtowc (
   wchar_t *__restrict__ wc,
   const char *__restrict__ mbs,
-- 
2.51.0.windows.1

_______________________________________________
Mingw-w64-public mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

Reply via email to