Re: [PATCH bpf] bpf, x64: implement retpoline for tail call

2018-02-22 Thread Daniel Borkmann
On 02/22/2018 04:53 AM, Eric Dumazet wrote:
> On Wed, 2018-02-21 at 19:43 -0800, Alexei Starovoitov wrote:
>> On Wed, Feb 21, 2018 at 07:04:02PM -0800, Eric Dumazet wrote:
>>> On Thu, 2018-02-22 at 01:05 +0100, Daniel Borkmann wrote:
>>>
>>> ...
>>>
 +/* Instead of plain jmp %rax, we emit a retpoline to control
 + * speculative execution for the indirect branch.
 + */
 +static void emit_retpoline_rax_trampoline(u8 **pprog)
 +{
 +  u8 *prog = *pprog;
 +  int cnt = 0;
 +
 +  EMIT1_off32(0xE8, 7);/* callq  */
 +  /* capture_spec: */
 +  EMIT2(0xF3, 0x90);   /* pause */
 +  EMIT3(0x0F, 0xAE, 0xE8); /* lfence */
 +  EMIT2(0xEB, 0xF9);   /* jmp  */
 +  /* set_up_target: */
 +  EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */
 +  EMIT1(0xC3); /* retq */
 +
 +  BUILD_BUG_ON(cnt != RETPOLINE_SIZE);
 +  *pprog = prog;
>>>
>>> You might define the actual code sequence (and length) in 
>>> arch/x86/include/asm/nospec-branch.h
>>>
>>> If we need to adjust code sequences for RETPOLINE, then we wont
>>> forget/miss that arch/x86/net/bpf_jit_comp.c had it hard-coded.
>>
>> like adding a comment to asm/nospec-branch.h that says
>> "dont forget to adjust bpf_jit_comp.c" ?
>> but clang/gcc generate slightly different sequences for
>> retpoline anyway, so even if '.macro RETPOLINE_JMP' in
>> nospec-branch.h changes it doesn't mean that x64 jit has to change.
>> So what kinda comment there would make sense?
> 
> I was thinking of something very explicit :
> 
> /* byte sequence for following assembly code used by eBPF
>call ...
>...
>retq
> */
> #define RETPOLINE_RAX_DIRECT_FOR_EBPF \
>    EMIT1_off32(0xE8, 7);/* callq  */   \
>    /* capture_spec: */\
>    EMIT2(0xF3, 0x90);   /* pause */   \
>    EMIT3(0x0F, 0xAE, 0xE8); /* lfence */  \
>    EMIT2(0xEB, 0xF9);   /* jmp  */  \
>    /* set_up_target: */   \
>    EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */   \
>    EMIT1(0xC3); /* retq */\
> 
> Might be simply byte encoded, (array of 17 bytes)
> 
> Well, something like that anyway...

Okay, sounds fine. Will respin, thanks Eric!


Re: [PATCH bpf] bpf, x64: implement retpoline for tail call

2018-02-21 Thread Alexei Starovoitov
On Wed, Feb 21, 2018 at 07:53:22PM -0800, Eric Dumazet wrote:
> > So what kinda comment there would make sense?
> 
> I was thinking of something very explicit :
> 
> /* byte sequence for following assembly code used by eBPF
>call ...
>...
>retq
> */
> #define RETPOLINE_RAX_DIRECT_FOR_EBPF \
>    EMIT1_off32(0xE8, 7);/* callq  */   \
>    /* capture_spec: */\
>    EMIT2(0xF3, 0x90);   /* pause */   \
>    EMIT3(0x0F, 0xAE, 0xE8); /* lfence */  \
>    EMIT2(0xEB, 0xF9);   /* jmp  */  \
>    /* set_up_target: */   \
>    EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */   \
>    EMIT1(0xC3); /* retq */\

got it. yeah. makes sense to me.




Re: [PATCH bpf] bpf, x64: implement retpoline for tail call

2018-02-21 Thread Eric Dumazet
On Wed, 2018-02-21 at 19:43 -0800, Alexei Starovoitov wrote:
> On Wed, Feb 21, 2018 at 07:04:02PM -0800, Eric Dumazet wrote:
> > On Thu, 2018-02-22 at 01:05 +0100, Daniel Borkmann wrote:
> > 
> > ...
> > 
> > > +/* Instead of plain jmp %rax, we emit a retpoline to control
> > > + * speculative execution for the indirect branch.
> > > + */
> > > +static void emit_retpoline_rax_trampoline(u8 **pprog)
> > > +{
> > > + u8 *prog = *pprog;
> > > + int cnt = 0;
> > > +
> > > + EMIT1_off32(0xE8, 7);/* callq  */
> > > + /* capture_spec: */
> > > + EMIT2(0xF3, 0x90);   /* pause */
> > > + EMIT3(0x0F, 0xAE, 0xE8); /* lfence */
> > > + EMIT2(0xEB, 0xF9);   /* jmp  */
> > > + /* set_up_target: */
> > > + EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */
> > > + EMIT1(0xC3); /* retq */
> > > +
> > > + BUILD_BUG_ON(cnt != RETPOLINE_SIZE);
> > > + *pprog = prog;
> > 
> > You might define the actual code sequence (and length) in 
> > arch/x86/include/asm/nospec-branch.h
> > 
> > If we need to adjust code sequences for RETPOLINE, then we wont
> > forget/miss that arch/x86/net/bpf_jit_comp.c had it hard-coded.
> 
> like adding a comment to asm/nospec-branch.h that says
> "dont forget to adjust bpf_jit_comp.c" ?
> but clang/gcc generate slightly different sequences for
> retpoline anyway, so even if '.macro RETPOLINE_JMP' in
> nospec-branch.h changes it doesn't mean that x64 jit has to change.
> So what kinda comment there would make sense?

I was thinking of something very explicit :

/* byte sequence for following assembly code used by eBPF
   call ...
   ...
   retq
*/
#define RETPOLINE_RAX_DIRECT_FOR_EBPF \
   EMIT1_off32(0xE8, 7);/* callq  */   \
   /* capture_spec: */\
   EMIT2(0xF3, 0x90);   /* pause */   \
   EMIT3(0x0F, 0xAE, 0xE8); /* lfence */  \
   EMIT2(0xEB, 0xF9);   /* jmp  */  \
   /* set_up_target: */   \
   EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */   \
   EMIT1(0xC3); /* retq */\

Might be simply byte encoded, (array of 17 bytes)

Well, something like that anyway...



Re: [PATCH bpf] bpf, x64: implement retpoline for tail call

2018-02-21 Thread Alexei Starovoitov
On Wed, Feb 21, 2018 at 07:04:02PM -0800, Eric Dumazet wrote:
> On Thu, 2018-02-22 at 01:05 +0100, Daniel Borkmann wrote:
> 
> ...
> 
> > +/* Instead of plain jmp %rax, we emit a retpoline to control
> > + * speculative execution for the indirect branch.
> > + */
> > +static void emit_retpoline_rax_trampoline(u8 **pprog)
> > +{
> > +   u8 *prog = *pprog;
> > +   int cnt = 0;
> > +
> > +   EMIT1_off32(0xE8, 7);/* callq  */
> > +   /* capture_spec: */
> > +   EMIT2(0xF3, 0x90);   /* pause */
> > +   EMIT3(0x0F, 0xAE, 0xE8); /* lfence */
> > +   EMIT2(0xEB, 0xF9);   /* jmp  */
> > +   /* set_up_target: */
> > +   EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */
> > +   EMIT1(0xC3); /* retq */
> > +
> > +   BUILD_BUG_ON(cnt != RETPOLINE_SIZE);
> > +   *pprog = prog;
> 
> You might define the actual code sequence (and length) in 
> arch/x86/include/asm/nospec-branch.h
> 
> If we need to adjust code sequences for RETPOLINE, then we wont
> forget/miss that arch/x86/net/bpf_jit_comp.c had it hard-coded.

like adding a comment to asm/nospec-branch.h that says
"dont forget to adjust bpf_jit_comp.c" ?
but clang/gcc generate slightly different sequences for
retpoline anyway, so even if '.macro RETPOLINE_JMP' in
nospec-branch.h changes it doesn't mean that x64 jit has to change.
So what kinda comment there would make sense?



Re: [PATCH bpf] bpf, x64: implement retpoline for tail call

2018-02-21 Thread Eric Dumazet
On Thu, 2018-02-22 at 01:05 +0100, Daniel Borkmann wrote:

...

> +/* Instead of plain jmp %rax, we emit a retpoline to control
> + * speculative execution for the indirect branch.
> + */
> +static void emit_retpoline_rax_trampoline(u8 **pprog)
> +{
> + u8 *prog = *pprog;
> + int cnt = 0;
> +
> + EMIT1_off32(0xE8, 7);/* callq  */
> + /* capture_spec: */
> + EMIT2(0xF3, 0x90);   /* pause */
> + EMIT3(0x0F, 0xAE, 0xE8); /* lfence */
> + EMIT2(0xEB, 0xF9);   /* jmp  */
> + /* set_up_target: */
> + EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */
> + EMIT1(0xC3); /* retq */
> +
> + BUILD_BUG_ON(cnt != RETPOLINE_SIZE);
> + *pprog = prog;

You might define the actual code sequence (and length) in 
arch/x86/include/asm/nospec-branch.h

If we need to adjust code sequences for RETPOLINE, then we wont
forget/miss that arch/x86/net/bpf_jit_comp.c had it hard-coded.

Thanks Daniel.



[PATCH bpf] bpf, x64: implement retpoline for tail call

2018-02-21 Thread Daniel Borkmann
Implement a retpoline [0] for the BPF tail call JIT'ing that converts
the indirect jump via jmp %rax that is used to make the long jump into
another JITed BPF image. Since this is subject to speculative execution,
we need to control the transient instruction sequence here as well
when CONFIG_RETPOLINE is set, and direct it into a pause + lfence loop.
The latter aligns also with what gcc / clang emits (e.g. [1]).

JIT dump after patch:

  # bpftool p d x i 1
   0: (18) r2 = map[id:1]
   2: (b7) r3 = 0
   3: (85) call bpf_tail_call#12
   4: (b7) r0 = 2
   5: (95) exit

With CONFIG_RETPOLINE:

  # bpftool p d j i 1
  [...]
  33:   cmp%edx,0x24(%rsi)
  36:   jbe0x0072  |*
  38:   mov0x24(%rbp),%eax
  3e:   cmp$0x20,%eax
  41:   ja 0x0072  |
  43:   add$0x1,%eax
  46:   mov%eax,0x24(%rbp)
  4c:   mov0x90(%rsi,%rdx,8),%rax
  54:   test   %rax,%rax
  57:   je 0x0072  |
  59:   mov0x28(%rax),%rax
  5d:   add$0x25,%rax
  61:   callq  0x006d  |+
  66:   pause  |
  68:   lfence |
  6b:   jmp0x0066  |
  6d:   mov%rax,(%rsp) |
  71:   retq   |
  72:   mov$0x2,%eax
  [...]

  * relative fall-through jumps in error case
  + retpoline for indirect jump

Without CONFIG_RETPOLINE:

  # bpftool p d j i 1
  [...]
  33:   cmp%edx,0x24(%rsi)
  36:   jbe0x0063  |*
  38:   mov0x24(%rbp),%eax
  3e:   cmp$0x20,%eax
  41:   ja 0x0063  |
  43:   add$0x1,%eax
  46:   mov%eax,0x24(%rbp)
  4c:   mov0x90(%rsi,%rdx,8),%rax
  54:   test   %rax,%rax
  57:   je 0x0063  |
  59:   mov0x28(%rax),%rax
  5d:   add$0x25,%rax
  61:   jmpq   *%rax   |-
  63:   mov$0x2,%eax
  [...]

  * relative fall-through jumps in error case
  - plain indirect jump as before

  [0] https://support.google.com/faqs/answer/7625886
  [1] 
https://github.com/gcc-mirror/gcc/commit/a31e654fa107be968b802786d747e962c2fcdb2b

Signed-off-by: Daniel Borkmann 
---
 arch/x86/net/bpf_jit_comp.c | 52 -
 1 file changed, 42 insertions(+), 10 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index 4923d92..7e8d562 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -261,6 +261,35 @@ static void emit_prologue(u8 **pprog, u32 stack_depth)
*pprog = prog;
 }
 
+#ifdef CONFIG_RETPOLINE
+# define RETPOLINE_SIZE17
+# define OFFSET_ADJRETPOLINE_SIZE
+
+/* Instead of plain jmp %rax, we emit a retpoline to control
+ * speculative execution for the indirect branch.
+ */
+static void emit_retpoline_rax_trampoline(u8 **pprog)
+{
+   u8 *prog = *pprog;
+   int cnt = 0;
+
+   EMIT1_off32(0xE8, 7);/* callq  */
+   /* capture_spec: */
+   EMIT2(0xF3, 0x90);   /* pause */
+   EMIT3(0x0F, 0xAE, 0xE8); /* lfence */
+   EMIT2(0xEB, 0xF9);   /* jmp  */
+   /* set_up_target: */
+   EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */
+   EMIT1(0xC3); /* retq */
+
+   BUILD_BUG_ON(cnt != RETPOLINE_SIZE);
+   *pprog = prog;
+}
+#else
+/* Plain jmp %rax version used. */
+# define OFFSET_ADJ2
+#endif
+
 /* generate the following code:
  * ... bpf_tail_call(void *ctx, struct bpf_array *array, u64 index) ...
  *   if (index >= array->map.max_entries)
@@ -290,7 +319,7 @@ static void emit_bpf_tail_call(u8 **pprog)
EMIT2(0x89, 0xD2);/* mov edx, edx */
EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], 
edx */
  offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 43 /* number of bytes to jump */
+#define OFFSET1 (41 + OFFSET_ADJ) /* number of bytes to jump */
EMIT2(X86_JBE, OFFSET1);  /* jbe out */
label1 = cnt;
 
@@ -299,7 +328,7 @@ static void emit_bpf_tail_call(u8 **pprog)
 */
EMIT2_off32(0x8B, 0x85, 36);  /* mov eax, dword ptr [rbp + 
36] */
EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT 
*/
-#define OFFSET2 32
+#define OFFSET2 (30 + OFFSET_ADJ)
EMIT2(X86_JA, OFFSET2);   /* ja out */
label2 = cnt;
EMIT3(0x83, 0xC0, 0x01);  /* add eax, 1 */
@@ -313,7 +342,7 @@ static void emit_bpf_tail_call(u8 **pprog)
 *   goto out;
 */
EMIT3(0x48, 0x85, 0xC0);  /* test rax,rax */
-#define OFFSET3 10
+#define OFFSET3 (8 + OFFSET_ADJ)
EMIT2(X86_JE, OFFSET3);   /* je out */
label3 = cnt;
 
@@ -326,16 +355,19 @@ static void emit_bpf_tail_call(u8 **pprog)
 * rdi == ctx (1st arg)
 * rax == prog->bpf_func + prologue_size
 */
-   EMIT2(0xFF, 0xE0);/* jmp rax */
-
-   /* out: */
-