Currently v*scanf functions are broken and crash when are called with more
than 30 arguments in va_list. This is because va_list v*scanf functions are
redirected to variadic *scanf functions and this redirect implemented in
scanf.S file has fixed limit for 30 arguments.

Number of arguments for msvcrt *scanf function can be determined from
format string by counting number of '%' characters which are not followed
by another '%' or '*'. Every scanf parameter is pointer and therefore has
fixed size which means that required stack size can be exactly calculated.

Fix this scanf.S redirect implementation by dynamically allocating stack
for exact number of pointer parameters.

---

I have tested this patch for i686 and x86_64. Both ARM (arm32 and aarch64)
changes are untested, so please test it if vsscanf() on these platforms
still works.

With this patch following code works fine without any crashing.
Compile for msvcrt with -std=c89 or -D__USE_MINGW_ANSI_STDIO=0

  #include <stdio.h>
  #include <stdarg.h>

  int call_vsscanf(const char *str, const char *format, ...)
  {
    int ret;
    va_list ap;
    va_start(ap, format);
    ret = vsscanf(str, format, ap);
    va_end(ap);
    return ret;
  }

  int main()
  {
    char b[53];
    call_vsscanf(
      "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ",
      "%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c"
      "%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c",
      &b[0],&b[1],&b[2],&b[3],&b[4],&b[5],&b[6],&b[7],&b[8],&b[9],&b[10],&b[11],
      &b[12],&b[13],&b[14],&b[15],&b[16],&b[17],&b[18],&b[19],&b[20],&b[21],
      &b[22],&b[23],&b[24],&b[25],&b[26],&b[27],&b[28],&b[29],&b[30],&b[31],
      &b[32],&b[33],&b[34],&b[35],&b[36],&b[37],&b[38],&b[39],&b[40],&b[41],
      &b[42],&b[43],&b[44],&b[45],&b[46],&b[47],&b[48],&b[49],&b[50],&b[51]
    );
    printf("b=%s\n", b);
    return 0;
  }
---
 mingw-w64-crt/Makefile.am                     |   2 +
 mingw-w64-crt/stdio/scanf.S                   | 166 ++++++++++--------
 mingw-w64-crt/stdio/scanf2-argcount-char.c    |   9 +
 .../stdio/scanf2-argcount-template.c          |  22 +++
 mingw-w64-crt/stdio/scanf2-argcount-wchar.c   |   9 +
 mingw-w64-crt/stdio/vfscanf.c                 |   6 +-
 mingw-w64-crt/stdio/vfwscanf.c                |   6 +-
 mingw-w64-crt/stdio/vsscanf.c                 |   6 +-
 mingw-w64-crt/stdio/vswscanf.c                |   6 +-
 9 files changed, 154 insertions(+), 78 deletions(-)
 create mode 100644 mingw-w64-crt/stdio/scanf2-argcount-char.c
 create mode 100644 mingw-w64-crt/stdio/scanf2-argcount-template.c
 create mode 100644 mingw-w64-crt/stdio/scanf2-argcount-wchar.c

diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am
index 5fe84f8bafe4..6cae3b4a52d2 100644
--- a/mingw-w64-crt/Makefile.am
+++ b/mingw-w64-crt/Makefile.am
@@ -504,6 +504,7 @@ src_libmingwex=\
   misc/wmemset.c         misc/ftw.c                 misc/ftw64.c            
misc/mingw-access.c          \
   \
   stdio/mingw_pformat.h    \
+  stdio/scanf2-argcount-char.c stdio/scanf2-argcount-wchar.c \
   stdio/vfscanf2.S         stdio/vfwscanf2.S         stdio/vscanf2.S          
stdio/vsscanf2.S          stdio/vswscanf2.S \
   stdio/vwscanf2.S         stdio/strtok_r.c          stdio/scanf.S \
   stdio/_Exit.c            stdio/_findfirst64i32.c   stdio/_findnext64i32.c   
stdio/_fstat.c \
@@ -2244,6 +2245,7 @@ EXTRA_DIST += revstamp.h \
   profile/gcrt0.c \
   profile/COPYING \
   profile/CYGWIN_LICENSE \
+  stdio/scanf2-argcount-template.c \
   stdio/scanf2-template.S
 
 DISTCHECK_CONFIGURE_FLAGS = --host=$(host_triplet) $(withsys)
diff --git a/mingw-w64-crt/stdio/scanf.S b/mingw-w64-crt/stdio/scanf.S
index 1e0bed9452ac..ae8090e4cf9c 100644
--- a/mingw-w64-crt/stdio/scanf.S
+++ b/mingw-w64-crt/stdio/scanf.S
@@ -9,17 +9,14 @@
    The goal of this routine is to turn a call to v*scanf into a call to
    s*scanf.  This is needed because mingw-w64 uses msvcr100.dll, which doesn't
    support the v*scanf functions instead of msvcr120.dll which does.
-   Unfortunately, there is no defined way to know exactly how big a va_list
-   is, so we use a hard-coded buffer.
-
-   I suppose a sufficiently-motivated person could try to parse the format
-   to figure out how many tokens there are... */
+*/
 
 /* The function prototype here is (essentially):
 
-   int __ms_vsscanf_internal (void *s,
+   int __ms_v*scanf_internal (void *s,
                             void *format,
                             void *arg,
+                            size_t count,
                             void *func);
 
    I say 'essentially' because passing a function pointer as void in ISO
@@ -37,19 +34,6 @@
         */
     .def __argtos;    .scl    2;    .type    32;    .endef
 
-    /* The max number of pointers we support.  Must be an even number
-       to keep the 64bit stack 16byte aligned.  Must not be less than 4.  */
-    .equ entries, 30
-
-    /* 64bit pointers are 8 bytes.  */
-    .equ sizeof, 8
-
-    /* Size of our buffer.  */
-    .equ iBytes, entries * sizeof
-
-    /* Stack space for first 2 args to s*scanf.  */
-    .equ iOffset, (2 * sizeof)
-
     .seh_proc __argtos
 __argtos:
 
@@ -58,48 +42,57 @@ __argtos:
       - format must be in rdx.  That's where it is on entry.
       - The first pointer in arg must be in r8. arg is in r8 on entry.
       - The second pointer in arg must be in r9. arg is in r8 on entry.
-      - The ($entries - 2) other pointers in arg must be on the stack,
+      - The (count - 2) other pointers in arg must be on the stack,
        starting 32bytes into rsp.  */
 
-    /* We need enough room to shadow (s + format)
-       + (enough room for all the other args).  */
-    subq $(iOffset + iBytes), %rsp
-    .seh_stackalloc iOffset + iBytes
+    pushq %rbp
+    .seh_pushreg %rbp
+    movq %rsp, %rbp
+    .seh_setframe %rbp, 0
 
+    /* We need to always reserve space to shadow 4 parameters.  */
+    subq $32, %rsp
+    .seh_stackalloc 32
     .seh_endprologue
 
-    /* We are going to copy $entries pointers from arg to our
-       local stack.  Except the first 2, since they will be
-       loaded in registers.  */
-    movq $entries - 2, %r10 /* # of ptrs to copy.  */
+    movq 48(%rbp), %r10 /* func. */
+
+    /* We need enough room to shadow all the other args.
+       Except the first 2, since they will be loaded in registers.  */
+    cmpq $2, %r9 /* count. */
+    jbe .SKIP
+    subq $2, %r9 /* # of ptrs to copy.  */
+    movq %r9, %rax
+    salq $3, %rax /* (count - 2) * 8. */
+    subq %rax, %rsp
 
-    /* The first 32 bytes are in registers, but by spec, space
-          must still be reserved for them on the stack.  Put the
+    /* We are going to copy parameters from arg to our local stack.
+       The first 32 bytes are in registers, but by spec, space
+       must still be reserved for them on the stack.  Put the
        rest of the pointers in the stack after that.  */
     lea 32(%rsp), %r11 /* dst.  */
 
 .LOOP:
-    subq $1, %r10
+    subq $1, %r9
 
     /* Use 16 to skip over the first 2 pointers.  */
-    movq 16(%r8, %r10, 8), %rax
-    movq %rax, (%r11, %r10, 8)
+    movq 16(%r8, %r9, 8), %rax
+    movq %rax, (%r11, %r9, 8)
     jnz .LOOP
 
-    /* r9 contains the routine we are going to call.  Since we are about to
-       overwrite it, move it somewhere safe.  */
-    movq %r9, %r10
-
+.SKIP:
     /* The stack is now correctly populated, and so are rcx and rdx.
        But we need to load the last 2 regs before making the call.  */
     movq 0x8(%r8), %r9 /* 2nd dest location (may be garbage if only 1 arg).  */
-    movq (%r8), %r8 /* 1st dest location.  */
+    movq (%r8), %r8 /* 1st dest location (may be garbage if no arg).  */
 
     /* Make the call.  */
     callq *%r10
 
-    addq $(iOffset + iBytes), %rsp
+    /* Restore stack.  */
+    movq %rbp, %rsp
 
+    popq %rbp
     retq
     .seh_endproc
 
@@ -113,31 +106,23 @@ __argtos:
         */
     .def __argtos;    .scl    2;    .type    32;    .endef
 
-    /* The max number of pointers we support.  Must not be less than 1.  */
-    .equ entries, 30
-
-    /* 64bit pointers are 8 bytes.  */
-    .equ sizeof, 4
-
-    /* Size of our buffer.  */
-    .set iBytes, entries * sizeof
-
-    /* Stack space for first 2 args to s*scanf.  */
-    .equ iOffset, (2 * sizeof)
-
 __argtos:
     pushl %ebp
     movl %esp, %ebp
     pushl %edi
+    pushl %ebx
 
     /* Reserve enough stack space for everything.
 
        Stack usage will look like:
        4 bytes - s
        4 bytes - format
-       (iBytes) bytes - variable # of parameters for sscanf (all ptrs).  */
+       4*count bytes - variable # of parameters for sscanf (all ptrs).  */
 
-    subl $(iOffset + iBytes), %esp
+    movl 20(%ebp), %ebx  /* count.  */
+    addl $2, %ebx  /* s + format.  */
+    sall $2, %ebx  /* (count + 2) * 4.  */
+    subl %ebx, %esp
 
     /* Write out s and format where they need to be for the sscanf call.  */
     movl 8(%ebp), %eax
@@ -145,10 +130,12 @@ __argtos:
     movl 12(%ebp), %edx
     movl %edx, 0x4(%esp)  /* format.  */
 
-    /* We are going to copy $entries pointers from arg to our
+    /* We are going to copy _count_ pointers from arg to our
        local stack.  */
-    movl $entries, %ecx /* # of ptrs to copy.  */
-    lea iOffset(%esp), %edi /* dst.  */
+    movl 20(%ebp), %ecx /* # of ptrs to copy.  */
+    testl %ecx, %ecx
+    jz .SKIP
+    lea 8(%esp), %edi /* dst.  */
     movl 16(%ebp), %edx /* src.  */
 
 .LOOP:
@@ -158,13 +145,16 @@ __argtos:
     movl %eax, (%edi, %ecx, 4)
     jnz .LOOP
 
+.SKIP:
     /* The stack is now correctly populated.  */
 
     /* Make the call.  */
-    call *20(%ebp)
+    call *24(%ebp)
 
     /* Restore stack.  */
-    addl $(iOffset + iBytes), %esp
+    addl %ebx, %esp
+
+    popl %ebx
     popl %edi
     leave
 
@@ -178,25 +168,32 @@ __argtos:
     .globl __argtos
 
 __argtos:
-    push    {r4-r7, lr}
-    sub     sp, sp, #128
-    mov     r12, r3
+    push    {r4-r8, lr}
+    ldr     r12, [sp, #0]
+
+    cmp     r3, #0
+    ldrne   r5, [r2], #4
+    subsne  r3, r3, #1
+    ldrne   r6, [r2], #4
+    subsne  r3, r3, #1
+    moveq   r8, #0
+    beq     2b
+
+    mov     r8, r3
+    sub     sp, sp, r8
     mov     r4, sp
-
-    ldr     r5, [r2], #4
-    ldr     r6, [r2], #4
-
-    mov     r3, #116
 1:  ldr     r7, [r2], #4
     str     r7, [r4], #4
-    subs    r3, r3, #4
+    subs    r3, r3, #1
     bne     1b
 
+2:
     mov     r2, r5
     mov     r3, r6
     blx     r12
-    add     sp, sp, #128
-    pop     {r4-r7, pc}
+
+    add     sp, sp, r8
+    pop     {r4-r8, pc}
 
 #elif defined (__aarch64__)
 
@@ -207,25 +204,46 @@ __argtos:
 __argtos:
     stp     x29, x30, [sp, #-16]!
     mov     x29, sp
-    sub     sp, sp, #256
-    mov     x9, sp
     mov     x10, x2
     mov     x11, x3
+    mov     x12, x4
+
+    cmp     r11, #0
+    b.eq    2b
 
     ldr     x2, [x10], #8
+    subs    x11, x11, #1
+    b.eq    2b
+
     ldr     x3, [x10], #8
+    subs    x11, x11, #1
+    b.eq    2b
+
     ldr     x4, [x10], #8
+    subs    x11, x11, #1
+    b.eq    2b
+
     ldr     x5, [x10], #8
+    subs    x11, x11, #1
+    b.eq    2b
+
     ldr     x6, [x10], #8
+    subs    x11, x11, #1
+    b.eq    2b
+
     ldr     x7, [x10], #8
+    subs    x11, x11, #1
+    b.eq    2b
 
-    mov     x12, #240
+    sub     sp, sp, x11
+    mov     x9, sp
 1:  ldr     x13, [x10], #8
     str     x13, [x9], #8
-    subs    x12, x12, #8
+    subs    x11, x11, #1
     b.ne    1b
 
-    blr     x11
+2:
+    blr     x12
     mov     sp, x29
     ldp     x29, x30, [sp], #16
     ret
diff --git a/mingw-w64-crt/stdio/scanf2-argcount-char.c 
b/mingw-w64-crt/stdio/scanf2-argcount-char.c
new file mode 100644
index 000000000000..ee0ee2fdedb9
--- /dev/null
+++ b/mingw-w64-crt/stdio/scanf2-argcount-char.c
@@ -0,0 +1,9 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+
+#define FUNC __ms_scanf_arg_count_internal
+#define TYPE char
+#include "scanf2-argcount-template.c"
diff --git a/mingw-w64-crt/stdio/scanf2-argcount-template.c 
b/mingw-w64-crt/stdio/scanf2-argcount-template.c
new file mode 100644
index 000000000000..c07e11797c39
--- /dev/null
+++ b/mingw-w64-crt/stdio/scanf2-argcount-template.c
@@ -0,0 +1,22 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+
+#include <stddef.h>
+
+size_t FUNC(const TYPE *format);
+size_t FUNC(const TYPE *format)
+{
+  size_t count = 0;
+  for (; *format; format++) {
+    if (*format != (TYPE)'%')
+      continue;
+    format++;
+    if (*format == (TYPE)'%' || *format == (TYPE)'*')
+      continue;
+    count++;
+  }
+  return count;
+}
diff --git a/mingw-w64-crt/stdio/scanf2-argcount-wchar.c 
b/mingw-w64-crt/stdio/scanf2-argcount-wchar.c
new file mode 100644
index 000000000000..d572e9be3e8a
--- /dev/null
+++ b/mingw-w64-crt/stdio/scanf2-argcount-wchar.c
@@ -0,0 +1,9 @@
+/**
+ * This file has no copyright assigned and is placed in the Public Domain.
+ * This file is part of the mingw-w64 runtime package.
+ * No warranty is given; refer to the file DISCLAIMER.PD within this package.
+ */
+
+#define FUNC __ms_wscanf_arg_count_internal
+#define TYPE wchar_t
+#include "scanf2-argcount-template.c"
diff --git a/mingw-w64-crt/stdio/vfscanf.c b/mingw-w64-crt/stdio/vfscanf.c
index dab72fe4a640..77db3333eef6 100644
--- a/mingw-w64-crt/stdio/vfscanf.c
+++ b/mingw-w64-crt/stdio/vfscanf.c
@@ -11,18 +11,22 @@ extern int __ms_vfscanf_internal (
   FILE * s,
   const char * format,
   va_list arg,
+  size_t count,
   int (*func)(FILE * __restrict__,  const char * __restrict__, ...))
   asm("__argtos");
 
+extern size_t __ms_scanf_arg_count_internal (const char * format);
+
 int __ms_vfscanf (FILE * __restrict__ stream, const char * __restrict__ 
format, va_list arg)
 {
+  size_t count = __ms_scanf_arg_count_internal (format);
   int ret;
 
 #if defined(_AMD64_) || defined(__x86_64__) || \
   defined(_X86_) || defined(__i386__) || \
   defined(_ARM_) || defined(__arm__) || \
   defined(_ARM64_) || defined(__aarch64__)
-  ret = __ms_vfscanf_internal (stream, format, arg, fscanf);
+  ret = __ms_vfscanf_internal (stream, format, arg, count, fscanf);
 #else
 #error "unknown platform"
 #endif
diff --git a/mingw-w64-crt/stdio/vfwscanf.c b/mingw-w64-crt/stdio/vfwscanf.c
index 52cf9283547b..6cb7c486300e 100644
--- a/mingw-w64-crt/stdio/vfwscanf.c
+++ b/mingw-w64-crt/stdio/vfwscanf.c
@@ -11,19 +11,23 @@ extern int __ms_vfwscanf_internal (
   FILE * s,
   const wchar_t * format,
   va_list arg,
+  size_t count,
   int (*func)(FILE * __restrict__,  const wchar_t * __restrict__, ...))
   asm("__argtos");
 
+extern size_t __ms_wscanf_arg_count_internal (const wchar_t * format);
+
 int __ms_vfwscanf (FILE * __restrict__ stream,
   const wchar_t * __restrict__ format, va_list arg)
 {
+  size_t count = __ms_wscanf_arg_count_internal (format);
   int ret;
 
 #if defined(_AMD64_) || defined(__x86_64__) || \
   defined(_X86_) || defined(__i386__) || \
   defined(_ARM_) || defined(__arm__) || \
   defined (_ARM64_) || defined (__aarch64__)
-  ret = __ms_vfwscanf_internal (stream, format, arg, fwscanf);
+  ret = __ms_vfwscanf_internal (stream, format, arg, count, fwscanf);
 #else
 #error "unknown platform"
 #endif
diff --git a/mingw-w64-crt/stdio/vsscanf.c b/mingw-w64-crt/stdio/vsscanf.c
index 6c8fe5a56f40..d0d2dde36e3e 100644
--- a/mingw-w64-crt/stdio/vsscanf.c
+++ b/mingw-w64-crt/stdio/vsscanf.c
@@ -11,19 +11,23 @@ extern int __ms_vsscanf_internal (
   const char * s,
   const char * format,
   va_list arg,
+  size_t count,
   int (*func)(const char * __restrict__,  const char * __restrict__, ...))
   asm("__argtos");
 
+extern size_t __ms_scanf_arg_count_internal (const char * format);
+
 int __ms_vsscanf (const char * __restrict__ s,
   const char * __restrict__ format, va_list arg)
 {
+  size_t count = __ms_scanf_arg_count_internal (format);
   int ret;
 
 #if defined(_AMD64_) || defined(__x86_64__) || \
   defined(_X86_) || defined(__i386__) || \
   defined(_ARM_) || defined(__arm__) || \
   defined(_ARM64_) || defined(__aarch64__)
-  ret = __ms_vsscanf_internal (s, format, arg, sscanf);
+  ret = __ms_vsscanf_internal (s, format, arg, count, sscanf);
 #else
 #error "unknown platform"
 #endif
diff --git a/mingw-w64-crt/stdio/vswscanf.c b/mingw-w64-crt/stdio/vswscanf.c
index 941ed1205772..62d4274c4b37 100644
--- a/mingw-w64-crt/stdio/vswscanf.c
+++ b/mingw-w64-crt/stdio/vswscanf.c
@@ -11,19 +11,23 @@ extern int __ms_vswscanf_internal (
   const wchar_t * s,
   const wchar_t * format,
   va_list arg,
+  size_t count,
   int (*func)(const wchar_t * __restrict__,  const wchar_t * __restrict__, 
...))
   asm("__argtos");
 
+extern size_t __ms_wscanf_arg_count_internal (const wchar_t * format);
+
 int __ms_vswscanf(const wchar_t * __restrict__ s, const wchar_t * __restrict__ 
format,
   va_list arg)
 {
+  size_t count = __ms_wscanf_arg_count_internal (format);
   int ret;
 
 #if defined(_AMD64_) || defined(__x86_64__) || \
   defined(_X86_) || defined(__i386__) || \
   defined(_ARM_) || defined(__arm__) || \
   defined(_ARM64_) || defined(__aarch64__)
-  ret = __ms_vswscanf_internal (s, format, arg, swscanf);
+  ret = __ms_vswscanf_internal (s, format, arg, count, swscanf);
 #else
 #error "unknown platform"
 #endif
-- 
2.20.1



_______________________________________________
Mingw-w64-public mailing list
Mingw-w64-public@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/mingw-w64-public

Reply via email to