On Tue, Jun 11, 2019 at 06:21:28PM +0200, Peter Zijlstra wrote: > although at this point I'm > thinking we should just used the instruction decode we have instead of > playing iffy games with packed structures.
How's something like this? It accepts jmp/32 jmp/8 call and nop5_atomic. --- Subject: x86/alternatives: Teach text_poke_bp() to emulate instructions From: Peter Zijlstra <pet...@infradead.org> Date: Wed Jun 5 10:48:37 CEST 2019 In preparation for static_call support, teach text_poke_bp() to emulate instructions, including CALL. The current text_poke_bp() takes a @handler argument which is used as a jump target when the temporary INT3 is hit by a different CPU. When patching CALL instructions, this doesn't work because we'd miss the PUSH of the return address. Instead, teach poke_int3_handler() to emulate an instruction, typically the instruction we're patching in. This fits almost all text_poke_bp() users, except arch_unoptimize_kprobe() which restores random text, and for that site we have to build an explicit emulate instruction. Signed-off-by: Peter Zijlstra (Intel) <pet...@infradead.org> --- arch/x86/include/asm/text-patching.h | 15 +++++-- arch/x86/kernel/alternative.c | 73 +++++++++++++++++++++++++---------- arch/x86/kernel/jump_label.c | 3 - arch/x86/kernel/kprobes/opt.c | 11 +++-- 4 files changed, 75 insertions(+), 27 deletions(-) --- a/arch/x86/include/asm/text-patching.h +++ b/arch/x86/include/asm/text-patching.h @@ -37,7 +37,7 @@ extern void text_poke_early(void *addr, extern void *text_poke(void *addr, const void *opcode, size_t len); extern void *text_poke_kgdb(void *addr, const void *opcode, size_t len); extern int poke_int3_handler(struct pt_regs *regs); -extern void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler); +extern void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate); extern int after_bootmem; extern __ro_after_init struct mm_struct *poking_mm; extern __ro_after_init unsigned long poking_addr; @@ -48,8 +48,17 @@ static inline void int3_emulate_jmp(stru regs->ip = ip; } -#define INT3_INSN_SIZE 1 -#define CALL_INSN_SIZE 5 +#define INT3_INSN_SIZE 1 +#define INT3_INSN_OPCODE 0xCC + +#define CALL_INSN_SIZE 5 +#define CALL_INSN_OPCODE 0xE8 + +#define JMP_INSN_SIZE 5 +#define JMP_INSN_OPCODE 0xE9 + +#define JMP8_INSN_SIZE 2 +#define JMP8_INSN_OPCODE 0xEB static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val) { --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -920,31 +920,45 @@ static void do_sync_core(void *info) sync_core(); } -static bool bp_patching_in_progress; -static void *bp_int3_handler, *bp_int3_addr; +static const void *bp_int3_addr; +static const struct insn *bp_int3_insn; int poke_int3_handler(struct pt_regs *regs) { + long ip; + /* * Having observed our INT3 instruction, we now must observe - * bp_patching_in_progress. - * - * in_progress = TRUE INT3 - * WMB RMB - * write INT3 if (in_progress) + * bp_int3_addr and bp_int3_insn: * - * Idem for bp_int3_handler. + * bp_int3_{addr,insn) = .. INT3 + * WMB RMB + * write INT3 if (insn) */ smp_rmb(); - if (likely(!bp_patching_in_progress)) + if (likely(!bp_int3_insn)) return 0; if (user_mode(regs) || regs->ip != (unsigned long)bp_int3_addr) return 0; - /* set up the specified breakpoint handler */ - regs->ip = (unsigned long) bp_int3_handler; + ip = regs->ip - INT3_INSN_SIZE + bp_int3_insn->length; + + switch (bp_int3_insn->opcode.bytes[0]) { + case CALL_INSN_OPCODE: + int3_emulate_call(regs, ip + bp_int3_insn->immediate.value); + break; + + case JMP_INSN_OPCODE: + case JMP8_INSN_OPCODE: + int3_emulate_jmp(regs, ip + bp_int3_insn->immediate.value); + break; + + default: /* assume NOP */ + int3_emulate_jmp(regs, ip); + break; + } return 1; } @@ -955,7 +969,7 @@ NOKPROBE_SYMBOL(poke_int3_handler); * @addr: address to patch * @opcode: opcode of new instruction * @len: length to copy - * @handler: address to jump to when the temporary breakpoint is hit + * @emulate: opcode to emulate, when NULL use @opcode * * Modify multi-byte instruction by using int3 breakpoint on SMP. * We completely avoid stop_machine() here, and achieve the @@ -970,19 +984,40 @@ NOKPROBE_SYMBOL(poke_int3_handler); * replacing opcode * - sync cores */ -void text_poke_bp(void *addr, const void *opcode, size_t len, void *handler) +void text_poke_bp(void *addr, const void *opcode, size_t len, const void *emulate) { - unsigned char int3 = 0xcc; + unsigned char int3 = INT3_INSN_OPCODE; + struct insn insn; - bp_int3_handler = handler; - bp_int3_addr = (u8 *)addr + sizeof(int3); - bp_patching_in_progress = true; + bp_int3_addr = addr + INT3_INSN_SIZE; lockdep_assert_held(&text_mutex); + if (!emulate) + emulate = opcode; + + kernel_insn_init(&insn, emulate, MAX_INSN_SIZE); + insn_get_length(&insn); + + BUG_ON(!insn_complete(&insn)); + BUG_ON(insn.length != len); + + switch (insn.opcode.bytes[0]) { + case CALL_INSN_OPCODE: + case JMP_INSN_OPCODE: + case JMP8_INSN_OPCODE: + break; + + default: + BUG_ON(len != 5); + BUG_ON(memcmp(emulate, ideal_nops[NOP_ATOMIC5], 5)); + } + + bp_int3_insn = &insn; + /* * Corresponding read barrier in int3 notifier for making sure the - * in_progress and handler are correctly ordered wrt. patching. + * in_progress and opcode are correctly ordered wrt. patching. */ smp_wmb(); @@ -1011,6 +1046,6 @@ void text_poke_bp(void *addr, const void * sync_core() implies an smp_mb() and orders this store against * the writing of the new instruction. */ - bp_patching_in_progress = false; + bp_int3_insn = NULL; } --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -87,8 +87,7 @@ static void __ref __jump_label_transform return; } - text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE, - (void *)jump_entry_code(entry) + JUMP_LABEL_NOP_SIZE); + text_poke_bp((void *)jump_entry_code(entry), code, JUMP_LABEL_NOP_SIZE, NULL); } void arch_jump_label_transform(struct jump_entry *entry, --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -437,8 +437,7 @@ void arch_optimize_kprobes(struct list_h insn_buff[0] = RELATIVEJUMP_OPCODE; *(s32 *)(&insn_buff[1]) = rel; - text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, - op->optinsn.insn); + text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, NULL); list_del_init(&op->list); } @@ -448,12 +447,18 @@ void arch_optimize_kprobes(struct list_h void arch_unoptimize_kprobe(struct optimized_kprobe *op) { u8 insn_buff[RELATIVEJUMP_SIZE]; + u8 emulate_buff[RELATIVEJUMP_SIZE]; /* Set int3 to first byte for kprobes */ insn_buff[0] = BREAKPOINT_INSTRUCTION; memcpy(insn_buff + 1, op->optinsn.copied_insn, RELATIVE_ADDR_SIZE); + + emulate_buff[0] = RELATIVEJUMP_OPCODE; + *(s32 *)(&emulate_buff[1]) = (s32)((long)op->optinsn.insn - + ((long)op->kp.addr + RELATIVEJUMP_SIZE)); + text_poke_bp(op->kp.addr, insn_buff, RELATIVEJUMP_SIZE, - op->optinsn.insn); + emulate_buff); } /*