Currently v*scanf functions are broken and crash when are called with more than 30 arguments in va_list. This is because va_list v*scanf functions are redirected to variadic *scanf functions and this redirect implemented in scanf.S file has fixed limit for 30 arguments.
Number of arguments for msvcrt *scanf function can be determined from format string by counting number of '%' characters which are not followed by another '%' or '*'. Every scanf parameter is pointer and therefore has fixed size which means that required stack size can be exactly calculated. Fix this scanf.S redirect implementation by dynamically allocating stack for exact number of pointer parameters. --- I have tested this patch for i686 and x86_64. Both ARM (arm32 and aarch64) changes are untested, so please test it if vsscanf() on these platforms still works. With this patch following code works fine without any crashing. Compile for msvcrt with -std=c89 or -D__USE_MINGW_ANSI_STDIO=0 #include <stdio.h> #include <stdarg.h> int call_vsscanf(const char *str, const char *format, ...) { int ret; va_list ap; va_start(ap, format); ret = vsscanf(str, format, ap); va_end(ap); return ret; } int main() { char b[53]; call_vsscanf( "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ", "%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c" "%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c%c", &b[0],&b[1],&b[2],&b[3],&b[4],&b[5],&b[6],&b[7],&b[8],&b[9],&b[10],&b[11], &b[12],&b[13],&b[14],&b[15],&b[16],&b[17],&b[18],&b[19],&b[20],&b[21], &b[22],&b[23],&b[24],&b[25],&b[26],&b[27],&b[28],&b[29],&b[30],&b[31], &b[32],&b[33],&b[34],&b[35],&b[36],&b[37],&b[38],&b[39],&b[40],&b[41], &b[42],&b[43],&b[44],&b[45],&b[46],&b[47],&b[48],&b[49],&b[50],&b[51] ); printf("b=%s\n", b); return 0; } --- mingw-w64-crt/Makefile.am | 2 + mingw-w64-crt/stdio/scanf.S | 166 ++++++++++-------- mingw-w64-crt/stdio/scanf2-argcount-char.c | 9 + .../stdio/scanf2-argcount-template.c | 22 +++ mingw-w64-crt/stdio/scanf2-argcount-wchar.c | 9 + mingw-w64-crt/stdio/vfscanf.c | 6 +- mingw-w64-crt/stdio/vfwscanf.c | 6 +- mingw-w64-crt/stdio/vsscanf.c | 6 +- mingw-w64-crt/stdio/vswscanf.c | 6 +- 9 files changed, 154 insertions(+), 78 deletions(-) create mode 100644 mingw-w64-crt/stdio/scanf2-argcount-char.c create mode 100644 mingw-w64-crt/stdio/scanf2-argcount-template.c create mode 100644 mingw-w64-crt/stdio/scanf2-argcount-wchar.c diff --git a/mingw-w64-crt/Makefile.am b/mingw-w64-crt/Makefile.am index 5fe84f8bafe4..6cae3b4a52d2 100644 --- a/mingw-w64-crt/Makefile.am +++ b/mingw-w64-crt/Makefile.am @@ -504,6 +504,7 @@ src_libmingwex=\ misc/wmemset.c misc/ftw.c misc/ftw64.c misc/mingw-access.c \ \ stdio/mingw_pformat.h \ + stdio/scanf2-argcount-char.c stdio/scanf2-argcount-wchar.c \ stdio/vfscanf2.S stdio/vfwscanf2.S stdio/vscanf2.S stdio/vsscanf2.S stdio/vswscanf2.S \ stdio/vwscanf2.S stdio/strtok_r.c stdio/scanf.S \ stdio/_Exit.c stdio/_findfirst64i32.c stdio/_findnext64i32.c stdio/_fstat.c \ @@ -2244,6 +2245,7 @@ EXTRA_DIST += revstamp.h \ profile/gcrt0.c \ profile/COPYING \ profile/CYGWIN_LICENSE \ + stdio/scanf2-argcount-template.c \ stdio/scanf2-template.S DISTCHECK_CONFIGURE_FLAGS = --host=$(host_triplet) $(withsys) diff --git a/mingw-w64-crt/stdio/scanf.S b/mingw-w64-crt/stdio/scanf.S index 1e0bed9452ac..ae8090e4cf9c 100644 --- a/mingw-w64-crt/stdio/scanf.S +++ b/mingw-w64-crt/stdio/scanf.S @@ -9,17 +9,14 @@ The goal of this routine is to turn a call to v*scanf into a call to s*scanf. This is needed because mingw-w64 uses msvcr100.dll, which doesn't support the v*scanf functions instead of msvcr120.dll which does. - Unfortunately, there is no defined way to know exactly how big a va_list - is, so we use a hard-coded buffer. - - I suppose a sufficiently-motivated person could try to parse the format - to figure out how many tokens there are... */ +*/ /* The function prototype here is (essentially): - int __ms_vsscanf_internal (void *s, + int __ms_v*scanf_internal (void *s, void *format, void *arg, + size_t count, void *func); I say 'essentially' because passing a function pointer as void in ISO @@ -37,19 +34,6 @@ */ .def __argtos; .scl 2; .type 32; .endef - /* The max number of pointers we support. Must be an even number - to keep the 64bit stack 16byte aligned. Must not be less than 4. */ - .equ entries, 30 - - /* 64bit pointers are 8 bytes. */ - .equ sizeof, 8 - - /* Size of our buffer. */ - .equ iBytes, entries * sizeof - - /* Stack space for first 2 args to s*scanf. */ - .equ iOffset, (2 * sizeof) - .seh_proc __argtos __argtos: @@ -58,48 +42,57 @@ __argtos: - format must be in rdx. That's where it is on entry. - The first pointer in arg must be in r8. arg is in r8 on entry. - The second pointer in arg must be in r9. arg is in r8 on entry. - - The ($entries - 2) other pointers in arg must be on the stack, + - The (count - 2) other pointers in arg must be on the stack, starting 32bytes into rsp. */ - /* We need enough room to shadow (s + format) - + (enough room for all the other args). */ - subq $(iOffset + iBytes), %rsp - .seh_stackalloc iOffset + iBytes + pushq %rbp + .seh_pushreg %rbp + movq %rsp, %rbp + .seh_setframe %rbp, 0 + /* We need to always reserve space to shadow 4 parameters. */ + subq $32, %rsp + .seh_stackalloc 32 .seh_endprologue - /* We are going to copy $entries pointers from arg to our - local stack. Except the first 2, since they will be - loaded in registers. */ - movq $entries - 2, %r10 /* # of ptrs to copy. */ + movq 48(%rbp), %r10 /* func. */ + + /* We need enough room to shadow all the other args. + Except the first 2, since they will be loaded in registers. */ + cmpq $2, %r9 /* count. */ + jbe .SKIP + subq $2, %r9 /* # of ptrs to copy. */ + movq %r9, %rax + salq $3, %rax /* (count - 2) * 8. */ + subq %rax, %rsp - /* The first 32 bytes are in registers, but by spec, space - must still be reserved for them on the stack. Put the + /* We are going to copy parameters from arg to our local stack. + The first 32 bytes are in registers, but by spec, space + must still be reserved for them on the stack. Put the rest of the pointers in the stack after that. */ lea 32(%rsp), %r11 /* dst. */ .LOOP: - subq $1, %r10 + subq $1, %r9 /* Use 16 to skip over the first 2 pointers. */ - movq 16(%r8, %r10, 8), %rax - movq %rax, (%r11, %r10, 8) + movq 16(%r8, %r9, 8), %rax + movq %rax, (%r11, %r9, 8) jnz .LOOP - /* r9 contains the routine we are going to call. Since we are about to - overwrite it, move it somewhere safe. */ - movq %r9, %r10 - +.SKIP: /* The stack is now correctly populated, and so are rcx and rdx. But we need to load the last 2 regs before making the call. */ movq 0x8(%r8), %r9 /* 2nd dest location (may be garbage if only 1 arg). */ - movq (%r8), %r8 /* 1st dest location. */ + movq (%r8), %r8 /* 1st dest location (may be garbage if no arg). */ /* Make the call. */ callq *%r10 - addq $(iOffset + iBytes), %rsp + /* Restore stack. */ + movq %rbp, %rsp + popq %rbp retq .seh_endproc @@ -113,31 +106,23 @@ __argtos: */ .def __argtos; .scl 2; .type 32; .endef - /* The max number of pointers we support. Must not be less than 1. */ - .equ entries, 30 - - /* 64bit pointers are 8 bytes. */ - .equ sizeof, 4 - - /* Size of our buffer. */ - .set iBytes, entries * sizeof - - /* Stack space for first 2 args to s*scanf. */ - .equ iOffset, (2 * sizeof) - __argtos: pushl %ebp movl %esp, %ebp pushl %edi + pushl %ebx /* Reserve enough stack space for everything. Stack usage will look like: 4 bytes - s 4 bytes - format - (iBytes) bytes - variable # of parameters for sscanf (all ptrs). */ + 4*count bytes - variable # of parameters for sscanf (all ptrs). */ - subl $(iOffset + iBytes), %esp + movl 20(%ebp), %ebx /* count. */ + addl $2, %ebx /* s + format. */ + sall $2, %ebx /* (count + 2) * 4. */ + subl %ebx, %esp /* Write out s and format where they need to be for the sscanf call. */ movl 8(%ebp), %eax @@ -145,10 +130,12 @@ __argtos: movl 12(%ebp), %edx movl %edx, 0x4(%esp) /* format. */ - /* We are going to copy $entries pointers from arg to our + /* We are going to copy _count_ pointers from arg to our local stack. */ - movl $entries, %ecx /* # of ptrs to copy. */ - lea iOffset(%esp), %edi /* dst. */ + movl 20(%ebp), %ecx /* # of ptrs to copy. */ + testl %ecx, %ecx + jz .SKIP + lea 8(%esp), %edi /* dst. */ movl 16(%ebp), %edx /* src. */ .LOOP: @@ -158,13 +145,16 @@ __argtos: movl %eax, (%edi, %ecx, 4) jnz .LOOP +.SKIP: /* The stack is now correctly populated. */ /* Make the call. */ - call *20(%ebp) + call *24(%ebp) /* Restore stack. */ - addl $(iOffset + iBytes), %esp + addl %ebx, %esp + + popl %ebx popl %edi leave @@ -178,25 +168,32 @@ __argtos: .globl __argtos __argtos: - push {r4-r7, lr} - sub sp, sp, #128 - mov r12, r3 + push {r4-r8, lr} + ldr r12, [sp, #0] + + cmp r3, #0 + ldrne r5, [r2], #4 + subsne r3, r3, #1 + ldrne r6, [r2], #4 + subsne r3, r3, #1 + moveq r8, #0 + beq 2b + + mov r8, r3 + sub sp, sp, r8 mov r4, sp - - ldr r5, [r2], #4 - ldr r6, [r2], #4 - - mov r3, #116 1: ldr r7, [r2], #4 str r7, [r4], #4 - subs r3, r3, #4 + subs r3, r3, #1 bne 1b +2: mov r2, r5 mov r3, r6 blx r12 - add sp, sp, #128 - pop {r4-r7, pc} + + add sp, sp, r8 + pop {r4-r8, pc} #elif defined (__aarch64__) @@ -207,25 +204,46 @@ __argtos: __argtos: stp x29, x30, [sp, #-16]! mov x29, sp - sub sp, sp, #256 - mov x9, sp mov x10, x2 mov x11, x3 + mov x12, x4 + + cmp r11, #0 + b.eq 2b ldr x2, [x10], #8 + subs x11, x11, #1 + b.eq 2b + ldr x3, [x10], #8 + subs x11, x11, #1 + b.eq 2b + ldr x4, [x10], #8 + subs x11, x11, #1 + b.eq 2b + ldr x5, [x10], #8 + subs x11, x11, #1 + b.eq 2b + ldr x6, [x10], #8 + subs x11, x11, #1 + b.eq 2b + ldr x7, [x10], #8 + subs x11, x11, #1 + b.eq 2b - mov x12, #240 + sub sp, sp, x11 + mov x9, sp 1: ldr x13, [x10], #8 str x13, [x9], #8 - subs x12, x12, #8 + subs x11, x11, #1 b.ne 1b - blr x11 +2: + blr x12 mov sp, x29 ldp x29, x30, [sp], #16 ret diff --git a/mingw-w64-crt/stdio/scanf2-argcount-char.c b/mingw-w64-crt/stdio/scanf2-argcount-char.c new file mode 100644 index 000000000000..ee0ee2fdedb9 --- /dev/null +++ b/mingw-w64-crt/stdio/scanf2-argcount-char.c @@ -0,0 +1,9 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the mingw-w64 runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ + +#define FUNC __ms_scanf_arg_count_internal +#define TYPE char +#include "scanf2-argcount-template.c" diff --git a/mingw-w64-crt/stdio/scanf2-argcount-template.c b/mingw-w64-crt/stdio/scanf2-argcount-template.c new file mode 100644 index 000000000000..c07e11797c39 --- /dev/null +++ b/mingw-w64-crt/stdio/scanf2-argcount-template.c @@ -0,0 +1,22 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the mingw-w64 runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ + +#include <stddef.h> + +size_t FUNC(const TYPE *format); +size_t FUNC(const TYPE *format) +{ + size_t count = 0; + for (; *format; format++) { + if (*format != (TYPE)'%') + continue; + format++; + if (*format == (TYPE)'%' || *format == (TYPE)'*') + continue; + count++; + } + return count; +} diff --git a/mingw-w64-crt/stdio/scanf2-argcount-wchar.c b/mingw-w64-crt/stdio/scanf2-argcount-wchar.c new file mode 100644 index 000000000000..d572e9be3e8a --- /dev/null +++ b/mingw-w64-crt/stdio/scanf2-argcount-wchar.c @@ -0,0 +1,9 @@ +/** + * This file has no copyright assigned and is placed in the Public Domain. + * This file is part of the mingw-w64 runtime package. + * No warranty is given; refer to the file DISCLAIMER.PD within this package. + */ + +#define FUNC __ms_wscanf_arg_count_internal +#define TYPE wchar_t +#include "scanf2-argcount-template.c" diff --git a/mingw-w64-crt/stdio/vfscanf.c b/mingw-w64-crt/stdio/vfscanf.c index dab72fe4a640..77db3333eef6 100644 --- a/mingw-w64-crt/stdio/vfscanf.c +++ b/mingw-w64-crt/stdio/vfscanf.c @@ -11,18 +11,22 @@ extern int __ms_vfscanf_internal ( FILE * s, const char * format, va_list arg, + size_t count, int (*func)(FILE * __restrict__, const char * __restrict__, ...)) asm("__argtos"); +extern size_t __ms_scanf_arg_count_internal (const char * format); + int __ms_vfscanf (FILE * __restrict__ stream, const char * __restrict__ format, va_list arg) { + size_t count = __ms_scanf_arg_count_internal (format); int ret; #if defined(_AMD64_) || defined(__x86_64__) || \ defined(_X86_) || defined(__i386__) || \ defined(_ARM_) || defined(__arm__) || \ defined(_ARM64_) || defined(__aarch64__) - ret = __ms_vfscanf_internal (stream, format, arg, fscanf); + ret = __ms_vfscanf_internal (stream, format, arg, count, fscanf); #else #error "unknown platform" #endif diff --git a/mingw-w64-crt/stdio/vfwscanf.c b/mingw-w64-crt/stdio/vfwscanf.c index 52cf9283547b..6cb7c486300e 100644 --- a/mingw-w64-crt/stdio/vfwscanf.c +++ b/mingw-w64-crt/stdio/vfwscanf.c @@ -11,19 +11,23 @@ extern int __ms_vfwscanf_internal ( FILE * s, const wchar_t * format, va_list arg, + size_t count, int (*func)(FILE * __restrict__, const wchar_t * __restrict__, ...)) asm("__argtos"); +extern size_t __ms_wscanf_arg_count_internal (const wchar_t * format); + int __ms_vfwscanf (FILE * __restrict__ stream, const wchar_t * __restrict__ format, va_list arg) { + size_t count = __ms_wscanf_arg_count_internal (format); int ret; #if defined(_AMD64_) || defined(__x86_64__) || \ defined(_X86_) || defined(__i386__) || \ defined(_ARM_) || defined(__arm__) || \ defined (_ARM64_) || defined (__aarch64__) - ret = __ms_vfwscanf_internal (stream, format, arg, fwscanf); + ret = __ms_vfwscanf_internal (stream, format, arg, count, fwscanf); #else #error "unknown platform" #endif diff --git a/mingw-w64-crt/stdio/vsscanf.c b/mingw-w64-crt/stdio/vsscanf.c index 6c8fe5a56f40..d0d2dde36e3e 100644 --- a/mingw-w64-crt/stdio/vsscanf.c +++ b/mingw-w64-crt/stdio/vsscanf.c @@ -11,19 +11,23 @@ extern int __ms_vsscanf_internal ( const char * s, const char * format, va_list arg, + size_t count, int (*func)(const char * __restrict__, const char * __restrict__, ...)) asm("__argtos"); +extern size_t __ms_scanf_arg_count_internal (const char * format); + int __ms_vsscanf (const char * __restrict__ s, const char * __restrict__ format, va_list arg) { + size_t count = __ms_scanf_arg_count_internal (format); int ret; #if defined(_AMD64_) || defined(__x86_64__) || \ defined(_X86_) || defined(__i386__) || \ defined(_ARM_) || defined(__arm__) || \ defined(_ARM64_) || defined(__aarch64__) - ret = __ms_vsscanf_internal (s, format, arg, sscanf); + ret = __ms_vsscanf_internal (s, format, arg, count, sscanf); #else #error "unknown platform" #endif diff --git a/mingw-w64-crt/stdio/vswscanf.c b/mingw-w64-crt/stdio/vswscanf.c index 941ed1205772..62d4274c4b37 100644 --- a/mingw-w64-crt/stdio/vswscanf.c +++ b/mingw-w64-crt/stdio/vswscanf.c @@ -11,19 +11,23 @@ extern int __ms_vswscanf_internal ( const wchar_t * s, const wchar_t * format, va_list arg, + size_t count, int (*func)(const wchar_t * __restrict__, const wchar_t * __restrict__, ...)) asm("__argtos"); +extern size_t __ms_wscanf_arg_count_internal (const wchar_t * format); + int __ms_vswscanf(const wchar_t * __restrict__ s, const wchar_t * __restrict__ format, va_list arg) { + size_t count = __ms_wscanf_arg_count_internal (format); int ret; #if defined(_AMD64_) || defined(__x86_64__) || \ defined(_X86_) || defined(__i386__) || \ defined(_ARM_) || defined(__arm__) || \ defined(_ARM64_) || defined(__aarch64__) - ret = __ms_vswscanf_internal (s, format, arg, swscanf); + ret = __ms_vswscanf_internal (s, format, arg, count, swscanf); #else #error "unknown platform" #endif -- 2.20.1 _______________________________________________ Mingw-w64-public mailing list Mingw-w64-public@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/mingw-w64-public