On Fri, Jan 26, 2018 at 12:33:31PM +0000, David Woodhouse wrote:
> On Fri, 2018-01-26 at 13:11 +0100, Borislav Petkov wrote:
> > 
> > +ENTRY(__fill_rsb_clobber_ax)
> > +       ___FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, %_ASM_SP
> > +END(__fill_rsb_clobber_ax)
> > +EXPORT_SYMBOL_GPL(__fill_rsb_clobber_ax)
> 
> You still have clear vs. fill confusion there.

I just took what was there originally:

-                                 __stringify(__FILL_RETURN_BUFFER(%0, 
RSB_CLEAR_LOOPS, %1)),

RSB_CLEAR_LOOPS

> 
> How about making it take the loop count in %eax? That would allow us to
> drop the ___FILL_RETURN_BUFFER macro entirely.
> 
> Or does that make us depend on your other fixes to accept jumps in
> places other than the first instruction of altinstr? 
> 
> Even if you give us separate __clear_rsb_clobber_ax vs.
> __fill_rsb_clobber_ax functions, we could still kill the macro in
> nospec-branch.h and use a .macro in retpoline.S for the actual
> implementation, couldn't we?

All good ideas. So how about the below diff ontop?

It builds and boots in a vm here. I need to go to the store but will
play with it more when I get back.

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 60c4c342316c..f7823a5a8714 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -252,7 +252,7 @@ ENTRY(__switch_to_asm)
         * exist, overwrite the RSB with entries which capture
         * speculative execution to prevent attack.
         */
-       FILL_RETURN_BUFFER %ebx, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+       FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
        /* restore callee-saved registers */
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index ff6f8022612c..7a190ff524e2 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -499,7 +499,7 @@ ENTRY(__switch_to_asm)
         * exist, overwrite the RSB with entries which capture
         * speculative execution to prevent attack.
         */
-       FILL_RETURN_BUFFER %r12, RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
+       FILL_RETURN_BUFFER RSB_CLEAR_LOOPS, X86_FEATURE_RSB_CTXSW
 #endif
 
        /* restore callee-saved registers */
diff --git a/arch/x86/include/asm/asm-prototypes.h 
b/arch/x86/include/asm/asm-prototypes.h
index 1908214b9125..b889705f995a 100644
--- a/arch/x86/include/asm/asm-prototypes.h
+++ b/arch/x86/include/asm/asm-prototypes.h
@@ -38,4 +38,7 @@ INDIRECT_THUNK(dx)
 INDIRECT_THUNK(si)
 INDIRECT_THUNK(di)
 INDIRECT_THUNK(bp)
+asmlinkage void __fill_rsb_clobber_ax(void);
+asmlinkage void __clr_rsb_clobber_ax(void);
+
 #endif /* CONFIG_RETPOLINE */
diff --git a/arch/x86/include/asm/nospec-branch.h 
b/arch/x86/include/asm/nospec-branch.h
index 61d4d7033758..3049433687c8 100644
--- a/arch/x86/include/asm/nospec-branch.h
+++ b/arch/x86/include/asm/nospec-branch.h
@@ -27,34 +27,8 @@
 #define RSB_CLEAR_LOOPS                32      /* To forcibly overwrite all 
entries */
 #define RSB_FILL_LOOPS         16      /* To avoid underflow */
 
-/*
- * Google experimented with loop-unrolling and this turned out to be
- * the optimal version — two calls, each with their own speculation
- * trap should their return address end up getting used, in a loop.
- */
-#define __FILL_RETURN_BUFFER(reg, nr, sp)      \
-       mov     $(nr/2), reg;                   \
-771:                                           \
-       call    772f;                           \
-773:   /* speculation trap */                  \
-       pause;                                  \
-       lfence;                                 \
-       jmp     773b;                           \
-772:                                           \
-       call    774f;                           \
-775:   /* speculation trap */                  \
-       pause;                                  \
-       lfence;                                 \
-       jmp     775b;                           \
-774:                                           \
-       dec     reg;                            \
-       jnz     771b;                           \
-       add     $(BITS_PER_LONG/8) * nr, sp;
-
 #ifdef __ASSEMBLY__
 
-#include <asm/bitsperlong.h>
-
 /*
  * This should be used immediately before a retpoline alternative.  It tells
  * objtool where the retpolines are so that it can make sense of the control
@@ -123,40 +97,9 @@
 #endif
 .endm
 
-/* Same as above but with alignment additionally */
-.macro  ___FILL_RETURN_BUFFER reg:req nr:req sp:req
-       mov     (\nr / 2), \reg
-       .align 16
-771:
-       call    772f
-773:                                           /* speculation trap */
-       pause
-       lfence
-       jmp     773b
-       .align 16
-772:
-       call    774f
-775:                                           /* speculation trap */
-       pause
-       lfence
-       jmp     775b
-       .align 16
-774:
-       dec     \reg
-       jnz     771b
-       add     (BITS_PER_LONG/8) * \nr, \sp
-.endm
-
- /*
-  * A simpler FILL_RETURN_BUFFER macro. Don't make people use the CPP
-  * monstrosity above, manually.
-  */
-.macro FILL_RETURN_BUFFER reg:req nr:req ftr:req
+.macro FILL_RETURN_BUFFER nr:req ftr:req
 #ifdef CONFIG_RETPOLINE
-       ANNOTATE_NOSPEC_ALTERNATIVE
-       ALTERNATIVE "jmp .Lskip_rsb_\@",                                \
-               __stringify(__FILL_RETURN_BUFFER(\reg,\nr,%_ASM_SP))    \
-               \ftr
+       ALTERNATIVE "jmp .Lskip_rsb_\@", "call __clr_rsb_clobber_ax", \ftr
 .Lskip_rsb_\@:
 #endif
 .endm
diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h
index 491f6e0be66e..d3a67fba200a 100644
--- a/arch/x86/include/asm/processor.h
+++ b/arch/x86/include/asm/processor.h
@@ -971,9 +971,4 @@ bool xen_set_default_idle(void);
 
 void stop_this_cpu(void *dummy);
 void df_debug(struct pt_regs *regs, long error_code);
-
-#ifdef CONFIG_RETPOLINE
-asmlinkage void __fill_rsb_clobber_ax(void);
-#endif
-
 #endif /* _ASM_X86_PROCESSOR_H */
diff --git a/arch/x86/lib/retpoline.S b/arch/x86/lib/retpoline.S
index 297b0fd2ad10..522d92bd3176 100644
--- a/arch/x86/lib/retpoline.S
+++ b/arch/x86/lib/retpoline.S
@@ -7,6 +7,7 @@
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 #include <asm/nospec-branch.h>
+#include <asm/bitsperlong.h>
 
 .macro THUNK reg
        .section .text.__x86.indirect_thunk
@@ -18,6 +19,32 @@ ENTRY(__x86_indirect_thunk_\reg)
 ENDPROC(__x86_indirect_thunk_\reg)
 .endm
 
+.macro BOINK_RSB nr:req sp:req
+       push %_ASM_AX
+       mov     $(\nr / 2), %_ASM_AX
+       .align 16
+771:
+       call    772f
+773:                                           /* speculation trap */
+       pause
+       lfence
+       jmp     773b
+       .align 16
+772:
+       call    774f
+775:                                           /* speculation trap */
+       pause
+       lfence
+       jmp     775b
+       .align 16
+774:
+       dec     %_ASM_AX
+       jnz     771b
+       add     $((BITS_PER_LONG/8) * \nr), \sp
+       pop %_ASM_AX
+.endm
+
+
 /*
  * Despite being an assembler file we can't just use .irp here
  * because __KSYM_DEPS__ only uses the C preprocessor and would
@@ -48,6 +75,13 @@ GENERATE_THUNK(r15)
 #endif
 
 ENTRY(__fill_rsb_clobber_ax)
-       ___FILL_RETURN_BUFFER %_ASM_AX, RSB_CLEAR_LOOPS, %_ASM_SP
+       BOINK_RSB RSB_FILL_LOOPS, %_ASM_SP
+       ret
 END(__fill_rsb_clobber_ax)
 EXPORT_SYMBOL_GPL(__fill_rsb_clobber_ax)
+
+ENTRY(__clr_rsb_clobber_ax)
+       BOINK_RSB RSB_CLEAR_LOOPS, %_ASM_SP
+       ret
+END(__clr_rsb_clobber_ax)
+EXPORT_SYMBOL_GPL(__clr_rsb_clobber_ax)

-- 
Regards/Gruss,
    Boris.

Good mailing practices for 400: avoid top-posting and trim the reply.

Reply via email to