Assembly routines break Windows 64-bit SEH

E. Madison Bray Thu, 02 May 2019 05:33:16 -0700

Hello,

Attached is a sample program which deliberately induces a segmentation
fault in some of GMP's assembly code; in this case mpn_divrem_1 since
that happens to be where I first discovered the problem, though it
could affect most any of them.  I am working on 64-bit Cygwin, but
this problem can affect any code on Windows (including Cygwin, MinGW,
native Windows) that uses GMP.  Though the problem is particularly
severe on Cygwin, which relies on structured exception handling to
catch exceptions and convert them to POSIX signals and/or return
proper error codes.


When working correctly, compiling and running the attached program
should work as follows:

$ gcc test.c -lgmp
$ ./a.exe; echo $?
Segmentation fault (core dumped)
139

Instead this is what happens currently:

$ ./a.exe; echo $?
0

No output, and the process exit code is zero (this latter effect is
kind of an unfortunate problem in Cygwin that if a process terminates
due to an exception where Cygwin's exception handler was somehow not
able to run, the exit code returned defaults to zero).

This is because the assembly routines do not include the metadata that
is necessary on 64-bit Windows [1] for stack unwinding to work
properly during exception handling.  This was brought up once before
on this list many years ago [2] but AFAICT nothing has ever been done
about it.

I was able to confirm that this was the issue by manually editing the
assembly for mpn_divrem_1 and recompiling/linking.  I modified the
function prologue to look like:

        .seh_proc __gmpn_divrem_1_x86_64
__gmpn_divrem_1_x86_64:

        push    %rdi
        .seh_pushreg    %rdi
        push    %rsi
        .seh_pushreg    %rsi
        mov     %rcx, %rdi
        mov     %rdx, %rsi
        mov     %r8, %rdx
        mov     %r9, %rcx

        mov     56(%rsp), %r8
        xor     %eax, %eax
        push    %r13
        .seh_pushreg    %r13
        push    %r12
        .seh_pushreg    %r12
        push    %rbp
        .seh_pushreg    %rbp
        push    %rbx
        .seh_pushreg    %rbx
        .seh_endprologue

At the end of the function just a .seh_endproc is needed as well.

Technically this is still not quite correct because the procedure does
later modify RSP in preparation for a `call    __gmpn_invert_limb`.
Accordingly, this requires establishment of a frame pointer during the
prologue, or otherwise if an exception occurred in __gmpn_invert_limb
stack unwinding would still fail.  In my test case this does not
happen so it still works--but I should still note this subtlety.

I believe this is possible to fix in general, and would be happy to
work on a patch if it would be accepted in principle.

Many of the existing m4 macros in the assembly routines (such as
PROLOGUE and EPILOGUE) can be modified for x64 to add the necessary
bits.  It would probably be good also to replace some of the explicit
`push <reg>` instructions in the prologues with some new macros
similar to those provided by MASM [3].  For example a
`push_reg(<reg>)` macro would emit (on Windows 64):

    push <reg>
    .seh_pushreg <reg>

whereas on all other platforms it would just emit the plain `push
<reg>` instruction.  This part I believe is easy.

The trickiest part is just ensuring that some register is available to
establish a frame pointer, when necessary (it doesn't necessarily have
to be RBP; any nonvolatile register will do).  In the case of
mpn_divrem_1 I can see that %r14 is available, but I will have to go
through all the routines one by one and work this out.

So, should I work on this?

Thanks,
Madison


[1] 
https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64?view=vs-2019
[2] https://gmplib.org/list-archives/gmp-bugs/2008-March/000951.html
[3] 
https://docs.microsoft.com/en-us/cpp/build/exception-handling-x64?view=vs-2019#masm-macros

#include <gmp.h>
#include <stdlib.h>


int main(void) {
    mp_limb_t *rlp;
    mp_size_t qxn = 0;
    mp_limb_t s2p[1] = {0};
    mp_size_t s2n = 1;
    mp_limb_t s3limb = 1;

    /* Use of this function in particular is arbitrary aside from
     * the fact that it is known to demonstrate the problem on my
     * system (my system uses an assembly implementation for it)
     */

    /* Just set to something that will segfault when accessed */
    rlp = (mp_limb_t*)0x1234;
    mpn_divrem_1(rlp, qxn, s2p, s2n, s3limb);
    return 1;
}

















































































                


















        .text
        .align  16, 0x90
        .globl  __gmpn_preinv_divrem_1_x86_64
        
        .seh_proc __gmpn_preinv_divrem_1_x86_64
__gmpn_preinv_divrem_1_x86_64:

        push    %rdi
        .seh_pushreg    %rdi
        push    %rsi
        .seh_pushreg    %rsi
        mov     %rcx, %rdi
        mov     %rdx, %rsi
        mov     %r8, %rdx
        mov     %r9, %rcx

        mov     56(%rsp), %r8   
        mov     64(%rsp), %r9   
        xor     %eax, %eax
        push    %r13
        push    %r12
        push    %rbp
        push    %rbx

        mov     %rsi, %r12
        mov     %rcx, %rbx
        add     %rsi, %rcx
        mov     %rdx, %rsi

        lea     -8(%rdi,%rcx,8), %rdi

        test    %r8, %r8
        js      Lnent

        mov     104(%rsp), %cl
        shl     %cl, %r8
        jmp     Luent
.seh_endproc
        

        .align  16, 0x90
        .globl  __gmpn_divrem_1_x86_64
        
        .seh_proc __gmpn_divrem_1_x86_64
__gmpn_divrem_1_x86_64:

        push    %rdi
        .seh_pushreg    %rdi
        push    %rsi
        .seh_pushreg    %rsi
        mov     %rcx, %rdi
        mov     %rdx, %rsi
        mov     %r8, %rdx
        mov     %r9, %rcx

        mov     56(%rsp), %r8   
        xor     %eax, %eax
        push    %r13
        .seh_pushreg    %r13
        push    %r12
        .seh_pushreg    %r12
        push    %rbp
        .seh_pushreg    %rbp
        push    %rbx
        .seh_pushreg    %rbx
        .seh_endprologue

        mov     %rsi, %r12
        mov     %rcx, %rbx
        add     %rsi, %rcx
        mov     %rdx, %rsi
        je      Lret

        lea     -8(%rdi,%rcx,8), %rdi
        xor     %ebp, %ebp

        test    %r8, %r8
        jns     Lunnormalized

Lnormalized:
        test    %rbx, %rbx
        je      L8                      
        mov     -8(%rsi,%rbx,8), %rbp
        dec     %rbx
        mov     %rbp, %rax
        sub     %r8, %rbp
        cmovc   %rax, %rbp
        sbb     %eax, %eax
        inc     %eax
        mov     %rax, (%rdi)
        lea     -8(%rdi), %rdi
L8:


        push    %r8

        sub     $32, %rsp       
        mov     %r8, %rcx               
        
        call    __gmpn_invert_limb
        add     $32, %rsp       
        pop     %r8



        mov     %rax, %r9
        mov     %rbp, %rax
        jmp     Lnent

        .align  16, 0x90
Lntop:mov       (%rsi,%rbx,8), %r10             
        mul     %r9                     
        add     %r10, %rax              
        adc     %rbp, %rdx              
        mov     %rax, %rbp              
        mov     %rdx, %r13              
        imul    %r8, %rdx                       
        sub     %rdx, %r10              
        mov     %r8, %rax                       
        add     %r10, %rax              
        cmp     %rbp, %r10              
        cmovc   %r10, %rax              
        adc     $-1, %r13               
        cmp     %r8, %rax                       
        jae     Lnfx                    
Lnok:   mov     %r13, (%rdi)            
        sub     $8, %rdi                        
Lnent:lea       1(%rax), %rbp           
        dec     %rbx                    
        jns     Lntop                   

        xor     %ecx, %ecx
        jmp     Lfrac

Lnfx:   sub     %r8, %rax
        inc     %r13
        jmp     Lnok

Lunnormalized:
        test    %rbx, %rbx
        je      L44
        mov     -8(%rsi,%rbx,8), %rax
        cmp     %r8, %rax
        jae     L44
        mov     %rbp, (%rdi)
        mov     %rax, %rbp
        lea     -8(%rdi), %rdi
        je      Lret
        dec     %rbx
L44:
        bsr     %r8, %rcx
        not     %ecx
        shl     %cl, %r8
        shl     %cl, %rbp

        push    %rcx


        push    %r8


        sub     $40, %rsp       
        mov     %r8, %rcx               
        
        call    __gmpn_invert_limb

        add     $40, %rsp       
        pop     %r8


        pop     %rcx

        mov     %rax, %r9
        mov     %rbp, %rax
        test    %rbx, %rbx
        je      Lfrac

Luent:dec       %rbx
        mov     (%rsi,%rbx,8), %rbp
        neg     %ecx
        shr     %cl, %rbp
        neg     %ecx
        or      %rbp, %rax
        jmp     Lent

        .align  16, 0x90
Lutop:mov       (%rsi,%rbx,8), %r10
        shl     %cl, %rbp
        neg     %ecx
        shr     %cl, %r10
        neg     %ecx
        or      %r10, %rbp
        mul     %r9
        add     %rbp, %rax
        adc     %r11, %rdx
        mov     %rax, %r11
        mov     %rdx, %r13
        imul    %r8, %rdx
        sub     %rdx, %rbp
        mov     %r8, %rax
        add     %rbp, %rax
        cmp     %r11, %rbp
        cmovc   %rbp, %rax
        adc     $-1, %r13
        cmp     %r8, %rax
        jae     Lufx
Luok:   mov     %r13, (%rdi)
        sub     $8, %rdi
Lent:   mov     (%rsi,%rbx,8), %rbp
        dec     %rbx
        lea     1(%rax), %r11
        jns     Lutop

Luend:shl       %cl, %rbp
        mul     %r9
        add     %rbp, %rax
        adc     %r11, %rdx
        mov     %rax, %r11
        mov     %rdx, %r13
        imul    %r8, %rdx
        sub     %rdx, %rbp
        mov     %r8, %rax
        add     %rbp, %rax
        cmp     %r11, %rbp
        cmovc   %rbp, %rax
        adc     $-1, %r13
        cmp     %r8, %rax
        jae     Lefx
Leok:   mov     %r13, (%rdi)
        sub     $8, %rdi
        jmp     Lfrac

Lufx:   sub     %r8, %rax
        inc     %r13
        jmp     Luok
Lefx:   sub     %r8, %rax
        inc     %r13
        jmp     Leok

Lfrac:mov       %r8, %rbp
        neg     %rbp
        jmp     Lfent

        .align  16, 0x90                        
Lftop:mul       %r9                     
        add     %r11, %rdx              
        mov     %rax, %r11              
        mov     %rdx, %r13              
        imul    %rbp, %rdx              
        mov     %r8, %rax                       
        add     %rdx, %rax              
        cmp     %r11, %rdx              
        cmovc   %rdx, %rax              
        adc     $-1, %r13               
        mov     %r13, (%rdi)            
        sub     $8, %rdi                        
Lfent:lea       1(%rax), %r11           
        dec     %r12                    
        jns     Lftop                   

        shr     %cl, %rax
Lret:   pop     %rbx
        pop     %rbp
        pop     %r12
        pop     %r13
        pop     %rsi
        pop     %rdi
        ret
.seh_endproc

_______________________________________________
gmp-bugs mailing list
[email protected]
https://gmplib.org/mailman/listinfo/gmp-bugs

Assembly routines break Windows 64-bit SEH

Reply via email to