Simplify the BPF JIT prologue such that it more closely resembles a
typical compiler-generated prologue.  This also reduces the prologue
size quite a bit.

The frame pointer setup instructions at the beginning don't actually
accomplish anything because RBP gets clobbered anyway later in the
prologue.  So remove those instructions for now.

Signed-off-by: Josh Poimboeuf <jpoim...@redhat.com>
---
 arch/x86/net/bpf_jit_comp.c | 100 +++++++++++++++++-------------------
 1 file changed, 47 insertions(+), 53 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c
index da8c988b0f0f..485692d4b163 100644
--- a/arch/x86/net/bpf_jit_comp.c
+++ b/arch/x86/net/bpf_jit_comp.c
@@ -186,56 +186,48 @@ struct jit_context {
 #define BPF_MAX_INSN_SIZE      128
 #define BPF_INSN_SAFETY                64
 
-#define AUX_STACK_SPACE                40 /* Space for RBX, R13, R14, R15, 
tailcnt */
-
-#define PROLOGUE_SIZE          37
+#define PROLOGUE_SIZE          20
 
 /*
  * Emit x86-64 prologue code for BPF program and check its size.
  * bpf_tail_call helper will skip it while jumping into another program
  */
-static void emit_prologue(u8 **pprog, u32 stack_depth, bool ebpf_from_cbpf)
+static void emit_prologue(u8 **pprog, u32 stack_depth)
 {
        u8 *prog = *pprog;
        int cnt = 0;
 
+       /* push r15 */
+       EMIT2(0x41, 0x57);
+       /* push r14 */
+       EMIT2(0x41, 0x56);
+       /* push r13 */
+       EMIT2(0x41, 0x55);
        /* push rbp */
        EMIT1(0x55);
+       /* push rbx */
+       EMIT1(0x53);
 
-       /* mov rbp,rsp */
-       EMIT3(0x48, 0x89, 0xE5);
-
-       /* sub rsp, rounded_stack_depth + AUX_STACK_SPACE */
-       EMIT3_off32(0x48, 0x81, 0xEC,
-                   round_up(stack_depth, 8) + AUX_STACK_SPACE);
-
-       /* sub rbp, AUX_STACK_SPACE */
-       EMIT4(0x48, 0x83, 0xED, AUX_STACK_SPACE);
-
-       /* mov qword ptr [rbp+0],rbx */
-       EMIT4(0x48, 0x89, 0x5D, 0);
-       /* mov qword ptr [rbp+8],r13 */
-       EMIT4(0x4C, 0x89, 0x6D, 8);
-       /* mov qword ptr [rbp+16],r14 */
-       EMIT4(0x4C, 0x89, 0x75, 16);
-       /* mov qword ptr [rbp+24],r15 */
-       EMIT4(0x4C, 0x89, 0x7D, 24);
+       /*
+        * Push the tail call counter (tail_call_cnt) for eBPF tail calls.
+        * Initialized to zero.
+        *
+        * push $0
+        */
+       EMIT2(0x6a, 0x00);
 
-       if (!ebpf_from_cbpf) {
-               /*
-                * Clear the tail call counter (tail_call_cnt): for eBPF tail
-                * calls we need to reset the counter to 0. It's done in two
-                * instructions, resetting RAX register to 0, and moving it
-                * to the counter location.
-                */
+       /*
+        * RBP is used for the BPF program's FP register.  It points to the end
+        * of the program's stack area.
+        *
+        * mov rbp, rsp
+        */
+       EMIT3(0x48, 0x89, 0xE5);
 
-               /* xor eax, eax */
-               EMIT2(0x31, 0xc0);
-               /* mov qword ptr [rbp+32], rax */
-               EMIT4(0x48, 0x89, 0x45, 32);
+       /* sub rsp, rounded_stack_depth */
+       EMIT3_off32(0x48, 0x81, 0xEC, round_up(stack_depth, 8));
 
-               BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
-       }
+       BUILD_BUG_ON(cnt != PROLOGUE_SIZE);
 
        *pprog = prog;
 }
@@ -245,19 +237,22 @@ static void emit_epilogue(u8 **pprog)
        u8 *prog = *pprog;
        int cnt = 0;
 
-       /* mov rbx, qword ptr [rbp+0] */
-       EMIT4(0x48, 0x8B, 0x5D, 0);
-       /* mov r13, qword ptr [rbp+8] */
-       EMIT4(0x4C, 0x8B, 0x6D, 8);
-       /* mov r14, qword ptr [rbp+16] */
-       EMIT4(0x4C, 0x8B, 0x75, 16);
-       /* mov r15, qword ptr [rbp+24] */
-       EMIT4(0x4C, 0x8B, 0x7D, 24);
+       /* lea rsp, [rbp+0x8] */
+       EMIT4(0x48, 0x8D, 0x65, 0x08);
+
+       /* pop rbx */
+       EMIT1(0x5B);
+       /* pop rbp */
+       EMIT1(0x5D);
+       /* pop r13 */
+       EMIT2(0x41, 0x5D);
+       /* pop r14 */
+       EMIT2(0x41, 0x5E);
+       /* pop r15 */
+       EMIT2(0x41, 0x5F);
 
-       /* add rbp, AUX_STACK_SPACE */
-       EMIT4(0x48, 0x83, 0xC5, AUX_STACK_SPACE);
-       EMIT1(0xC9); /* leave */
-       EMIT1(0xC3); /* ret */
+       /* ret */
+       EMIT1(0xC3);
 
        *pprog = prog;
 }
@@ -295,7 +290,7 @@ static void emit_bpf_tail_call(u8 **pprog)
        EMIT2(0x89, 0xD2);                        /* mov edx, edx */
        EMIT3(0x39, 0x56,                         /* cmp dword ptr [rsi + 16], 
edx */
              offsetof(struct bpf_array, map.max_entries));
-#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
+#define OFFSET1 (35 + RETPOLINE_RAX_BPF_JIT_SIZE) /* Number of bytes to jump */
        EMIT2(X86_JBE, OFFSET1);                  /* jbe out */
        label1 = cnt;
 
@@ -303,13 +298,13 @@ static void emit_bpf_tail_call(u8 **pprog)
         * if (tail_call_cnt > MAX_TAIL_CALL_CNT)
         *      goto out;
         */
-       EMIT2_off32(0x8B, 0x85, 36);              /* mov eax, dword ptr [rbp + 
36] */
+       EMIT3(0x8B, 0x45, 0x04);                  /* mov eax, dword ptr [rbp + 
4] */
        EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT);     /* cmp eax, MAX_TAIL_CALL_CNT 
*/
-#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE)
+#define OFFSET2 (27 + RETPOLINE_RAX_BPF_JIT_SIZE)
        EMIT2(X86_JA, OFFSET2);                   /* ja out */
        label2 = cnt;
        EMIT3(0x83, 0xC0, 0x01);                  /* add eax, 1 */
-       EMIT2_off32(0x89, 0x85, 36);              /* mov dword ptr [rbp + 36], 
eax */
+       EMIT3(0x89, 0x45, 0x04);                  /* mov dword ptr [rbp + 4], 
eax */
 
        /* prog = array->ptrs[index]; */
        EMIT4_off32(0x48, 0x8B, 0x84, 0xD6,       /* mov rax, [rsi + rdx * 8 + 
offsetof(...)] */
@@ -437,8 +432,7 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 
*image,
        int proglen = 0;
        u8 *prog = temp;
 
-       emit_prologue(&prog, bpf_prog->aux->stack_depth,
-                     bpf_prog_was_classic(bpf_prog));
+       emit_prologue(&prog, bpf_prog->aux->stack_depth);
 
        for (i = 0; i < insn_cnt; i++, insn++) {
                const s32 imm32 = insn->imm;
-- 
2.20.1

Reply via email to