On Tue, Apr 30, 2019 at 11:33:21AM -0700, Linus Torvalds wrote:
> Anyway, since Andy really likes the entry code change, can we have
> that patch in parallel and judge the difference that way? Iirc, that
> was x86-64 specific too.

Here goes, compile tested only...

It obviously needs a self-test, but that shoulnd't be too hard to
arrange.

---
 arch/x86/entry/entry_32.S            |  7 +++++++
 arch/x86/entry/entry_64.S            | 14 ++++++++++++--
 arch/x86/include/asm/text-patching.h | 20 ++++++++++++++++++++
 arch/x86/kernel/ftrace.c             | 24 +++++++++++++++++++-----
 4 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/arch/x86/entry/entry_32.S b/arch/x86/entry/entry_32.S
index 7b23431be5cb..d246302085a3 100644
--- a/arch/x86/entry/entry_32.S
+++ b/arch/x86/entry/entry_32.S
@@ -1479,6 +1479,13 @@ ENTRY(int3)
        ASM_CLAC
        pushl   $-1                             # mark this as an int
 
+       testl   $SEGMENT_RPL_MASK, PT_CS(%esp)
+       jnz     .Lfrom_usermode_no_gap
+       .rept 6
+       pushl   5*4(%esp)
+       .endr
+.Lfrom_usermode_no_gap:
+
        SAVE_ALL switch_stacks=1
        ENCODE_FRAME_POINTER
        TRACE_IRQS_OFF
diff --git a/arch/x86/entry/entry_64.S b/arch/x86/entry/entry_64.S
index 20e45d9b4e15..268cd9affe04 100644
--- a/arch/x86/entry/entry_64.S
+++ b/arch/x86/entry/entry_64.S
@@ -878,7 +878,7 @@ apicinterrupt IRQ_WORK_VECTOR                       
irq_work_interrupt              smp_irq_work_interrupt
  * @paranoid == 2 is special: the stub will never switch stacks.  This is for
  * #DF: if the thread stack is somehow unusable, we'll still get a useful OOPS.
  */
-.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 
ist_offset=0
+.macro idtentry sym do_sym has_error_code:req paranoid=0 shift_ist=-1 
ist_offset=0 create_gap=0
 ENTRY(\sym)
        UNWIND_HINT_IRET_REGS offset=\has_error_code*8
 
@@ -898,6 +898,16 @@ ENTRY(\sym)
        jnz     .Lfrom_usermode_switch_stack_\@
        .endif
 
+       .if \create_gap == 1
+       testb   $3, CS-ORIG_RAX(%rsp)
+       jnz     .Lfrom_usermode_no_gap_\@
+       .rept 6
+       pushq   5*8(%rsp)
+       .endr
+       UNWIND_HINT_IRET_REGS offset=8
+.Lfrom_usermode_no_gap_\@:
+       .endif
+
        .if \paranoid
        call    paranoid_entry
        .else
@@ -1129,7 +1139,7 @@ apicinterrupt3 HYPERV_STIMER0_VECTOR \
 #endif /* CONFIG_HYPERV */
 
 idtentry debug                 do_debug                has_error_code=0        
paranoid=1 shift_ist=IST_INDEX_DB ist_offset=DB_STACK_OFFSET
-idtentry int3                  do_int3                 has_error_code=0
+idtentry int3                  do_int3                 has_error_code=0        
create_gap=1
 idtentry stack_segment         do_stack_segment        has_error_code=1
 
 #ifdef CONFIG_XEN_PV
diff --git a/arch/x86/include/asm/text-patching.h 
b/arch/x86/include/asm/text-patching.h
index e85ff65c43c3..ba275b6292db 100644
--- a/arch/x86/include/asm/text-patching.h
+++ b/arch/x86/include/asm/text-patching.h
@@ -39,4 +39,24 @@ extern int poke_int3_handler(struct pt_regs *regs);
 extern void *text_poke_bp(void *addr, const void *opcode, size_t len, void 
*handler);
 extern int after_bootmem;
 
+static inline void int3_emulate_push(struct pt_regs *regs, unsigned long val)
+{
+       regs->sp -= sizeof(unsigned long);
+       *(unsigned long *)regs->sp = val;
+}
+
+static inline void int3_emulate_jmp(struct pt_regs *regs, unsigned long ip)
+{
+       regs->ip = ip;
+}
+
+#define INT3_INSN_SIZE 1
+#define CALL_INSN_SIZE 5
+
+static inline void int3_emulate_call(struct pt_regs *regs, unsigned long func)
+{
+       int3_emulate_push(regs, regs->ip - INT3_INSN_SIZE + CALL_INSN_SIZE);
+       int3_emulate_jmp(regs, func);
+}
+
 #endif /* _ASM_X86_TEXT_PATCHING_H */
diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c
index ef49517f6bb2..90d319687d7e 100644
--- a/arch/x86/kernel/ftrace.c
+++ b/arch/x86/kernel/ftrace.c
@@ -29,6 +29,7 @@
 #include <asm/kprobes.h>
 #include <asm/ftrace.h>
 #include <asm/nops.h>
+#include <asm/text-patching.h>
 
 #ifdef CONFIG_DYNAMIC_FTRACE
 
@@ -231,6 +232,7 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned 
long old_addr,
 }
 
 static unsigned long ftrace_update_func;
+static unsigned long ftrace_update_func_call;
 
 static int update_ftrace_func(unsigned long ip, void *new)
 {
@@ -259,6 +261,8 @@ int ftrace_update_ftrace_func(ftrace_func_t func)
        unsigned char *new;
        int ret;
 
+       ftrace_update_func_call = (unsigned long)func;
+
        new = ftrace_call_replace(ip, (unsigned long)func);
        ret = update_ftrace_func(ip, new);
 
@@ -295,12 +299,19 @@ int ftrace_int3_handler(struct pt_regs *regs)
                return 0;
 
        ip = regs->ip - 1;
-       if (!ftrace_location(ip) && !is_ftrace_caller(ip))
-               return 0;
-
-       regs->ip += MCOUNT_INSN_SIZE - 1;
+       if (ftrace_location(ip)) {
+               int3_emulate_call(regs, ftrace_update_func_call);
+               return 1;
+       } else if (is_ftrace_caller(ip)) {
+               if (!ftrace_update_func_call) {
+                       int3_emulate_jmp(regs, regs->ip - INT3_INSN_SIZE + 
CALL_INSN_SIZE);
+                       return 1;
+               }
+               int3_emulate_call(regs, ftrace_update_func_call);
+               return 1;
+       }
 
-       return 1;
+       return 0;
 }
 NOKPROBE_SYMBOL(ftrace_int3_handler);
 
@@ -859,6 +870,8 @@ void arch_ftrace_update_trampoline(struct ftrace_ops *ops)
 
        func = ftrace_ops_get_func(ops);
 
+       ftrace_update_func_call = (unsigned long)func;
+
        /* Do a safe modify in case the trampoline is executing */
        new = ftrace_call_replace(ip, (unsigned long)func);
        ret = update_ftrace_func(ip, new);
@@ -960,6 +973,7 @@ static int ftrace_mod_jmp(unsigned long ip, void *func)
 {
        unsigned char *new;
 
+       ftrace_update_func_call = 0UL;
        new = ftrace_jmp_replace(ip, (unsigned long)func);
 
        return update_ftrace_func(ip, new);

Reply via email to