Using RBP as a temporary register breaks frame pointer convention and
breaks stack traces when unwinding from an interrupt in the crypto code.

Use R12 instead of RBP for the TBL register.  Since R12 is also used as
another temporary register (T1), it gets clobbered in each round of
computation.  So the table address needs to be freshly reloaded into R12
each time it's used.

Reported-by: Eric Biggers <ebigg...@google.com>
Reported-by: Peter Zijlstra <pet...@infradead.org>
Signed-off-by: Josh Poimboeuf <jpoim...@redhat.com>
---
 arch/x86/crypto/sha256-avx2-asm.S | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/arch/x86/crypto/sha256-avx2-asm.S 
b/arch/x86/crypto/sha256-avx2-asm.S
index 89c8f09787d2..cdd647231fa9 100644
--- a/arch/x86/crypto/sha256-avx2-asm.S
+++ b/arch/x86/crypto/sha256-avx2-asm.S
@@ -99,7 +99,7 @@ e       = %edx        # clobbers NUM_BLKS
 y3     = %esi  # clobbers INP
 
 
-TBL    = %rbp
+TBL    = %r12  # clobbered by T1
 SRND   = CTX   # SRND is same register as CTX
 
 a = %eax
@@ -531,7 +531,6 @@ STACK_SIZE  = _RSP      + _RSP_SIZE
 ENTRY(sha256_transform_rorx)
 .align 32
        pushq   %rbx
-       pushq   %rbp
        pushq   %r12
        pushq   %r13
        pushq   %r14
@@ -568,8 +567,6 @@ ENTRY(sha256_transform_rorx)
        mov     CTX, _CTX(%rsp)
 
 loop0:
-       lea     K256(%rip), TBL
-
        ## Load first 16 dwords from two blocks
        VMOVDQ  0*32(INP),XTMP0
        VMOVDQ  1*32(INP),XTMP1
@@ -597,18 +594,22 @@ last_block_enter:
 
 .align 16
 loop1:
+       lea     K256(%rip), TBL
        vpaddd  0*32(TBL, SRND), X0, XFER
        vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
        FOUR_ROUNDS_AND_SCHED   _XFER + 0*32
 
+       lea     K256(%rip), TBL
        vpaddd  1*32(TBL, SRND), X0, XFER
        vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
        FOUR_ROUNDS_AND_SCHED   _XFER + 1*32
 
+       lea     K256(%rip), TBL
        vpaddd  2*32(TBL, SRND), X0, XFER
        vmovdqa XFER, 2*32+_XFER(%rsp, SRND)
        FOUR_ROUNDS_AND_SCHED   _XFER + 2*32
 
+       lea     K256(%rip), TBL
        vpaddd  3*32(TBL, SRND), X0, XFER
        vmovdqa XFER, 3*32+_XFER(%rsp, SRND)
        FOUR_ROUNDS_AND_SCHED   _XFER + 3*32
@@ -619,9 +620,12 @@ loop1:
 
 loop2:
        ## Do last 16 rounds with no scheduling
+       lea     K256(%rip), TBL
        vpaddd  0*32(TBL, SRND), X0, XFER
        vmovdqa XFER, 0*32+_XFER(%rsp, SRND)
        DO_4ROUNDS      _XFER + 0*32
+
+       lea     K256(%rip), TBL
        vpaddd  1*32(TBL, SRND), X1, XFER
        vmovdqa XFER, 1*32+_XFER(%rsp, SRND)
        DO_4ROUNDS      _XFER + 1*32
@@ -676,9 +680,6 @@ loop3:
        ja      done_hash
 
 do_last_block:
-       #### do last block
-       lea     K256(%rip), TBL
-
        VMOVDQ  0*16(INP),XWORD0
        VMOVDQ  1*16(INP),XWORD1
        VMOVDQ  2*16(INP),XWORD2
@@ -718,7 +719,6 @@ done_hash:
        popq    %r14
        popq    %r13
        popq    %r12
-       popq    %rbp
        popq    %rbx
        ret
 ENDPROC(sha256_transform_rorx)
-- 
2.13.5

Reply via email to