On Wed, 29 Oct 2025 17:20:04 +0900
Masami Hiramatsu (Google) <[email protected]> wrote:

> On Wed, 29 Oct 2025 11:43:17 +0900
> Masami Hiramatsu (Google) <[email protected]> wrote:
> 
> > > Hmm, it seems that jiffies related things are updated frequently
> > > and it may cause interrupt storm or infinit recursive call.
> > 
> > I added another trace_printk() in el1_watchpt(). It seems el1_watchpt()
> > takes too long and there is no time to do any other things.
> > (Note the interval shown below is only within the el1_watchpt function,
> >  and in reality various processes (save/restore registers etc) for
> >  exception handling will be inserted before and after.)
> 
> Forget about this. I found the root cause. The x86 watchpoint exception
> happens after the instruction is executed, the arm64 happens before.
> 
> Thus what we need is to emulate or do single step the hooked instruction
> and restart from the next instruction from the watchpoint exception on
> arm64. I thought hw_breakpoint does that, but doesn't. Should we do this
> in do_watchpoint() or in user_handler?

There is a single step execution code but only for default overflow_handlers.
This is a bit strange becuase other users can not set it up outside of
the arch dependent code. Even if it can, it is simply redundant.

So I made changes below which allow users to set its own custom handler is
compatible with perf default overflow handlers.
I confirmed this works on both arm and arm64.

diff --git a/arch/arm64/kernel/hw_breakpoint.c 
b/arch/arm64/kernel/hw_breakpoint.c
index ab76b36dce82..e12ec95b471e 100644
--- a/arch/arm64/kernel/hw_breakpoint.c
+++ b/arch/arm64/kernel/hw_breakpoint.c
@@ -632,6 +632,7 @@ void do_breakpoint(unsigned long esr, struct pt_regs *regs)
        addr = instruction_pointer(regs);
        debug_info = &current->thread.debug;
 
+       trace_printk("breakpoint exception at address: 0x%lx\n", addr);
        for (i = 0; i < core_num_brps; ++i) {
                rcu_read_lock();
 
@@ -661,6 +662,7 @@ void do_breakpoint(unsigned long esr, struct pt_regs *regs)
                rcu_read_unlock();
        }
 
+       trace_printk("breakpoint handling complete, step=%d\n", step);
        if (!step)
                return;
 
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index fd1d91017b99..40dd897e26b0 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -902,6 +902,7 @@ struct perf_event {
        u64                             (*clock)(void);
        perf_overflow_handler_t         overflow_handler;
        void                            *overflow_handler_context;
+       bool                            default_overflow_compatible;
        struct bpf_prog                 *prog;
        u64                             bpf_cookie;
 
@@ -1505,13 +1506,7 @@ extern int perf_event_output(struct perf_event *event,
 static inline bool
 is_default_overflow_handler(struct perf_event *event)
 {
-       perf_overflow_handler_t overflow_handler = event->overflow_handler;
-
-       if (likely(overflow_handler == perf_event_output_forward))
-               return true;
-       if (unlikely(overflow_handler == perf_event_output_backward))
-               return true;
-       return false;
+       return event->default_overflow_compatible;
 }
 
 extern void
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 177e57c1a362..6bbbde82cb21 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -12946,9 +12946,11 @@ perf_event_alloc(struct perf_event_attr *attr, int cpu,
        } else if (is_write_backward(event)){
                event->overflow_handler = perf_event_output_backward;
                event->overflow_handler_context = NULL;
+               event->default_overflow_compatible = true;
        } else {
                event->overflow_handler = perf_event_output_forward;
                event->overflow_handler_context = NULL;
+               event->default_overflow_compatible = true;
        }
 
        perf_event__state_init(event);
diff --git a/kernel/trace/trace_wprobe.c b/kernel/trace/trace_wprobe.c
index 98605b207f43..f2c2f26fd668 100644
--- a/kernel/trace/trace_wprobe.c
+++ b/kernel/trace/trace_wprobe.c
@@ -163,6 +163,8 @@ static void wprobe_perf_handler(struct perf_event *bp,
 static int __register_trace_wprobe(struct trace_wprobe *tw)
 {
        struct perf_event_attr attr;
+       struct perf_event *bp;
+       int cpu;
 
        if (tw->bp_event)
                return -EINVAL;
@@ -179,6 +181,11 @@ static int __register_trace_wprobe(struct trace_wprobe *tw)
                tw->bp_event = NULL;
                return ret;
        }
+       /* Mark wprobe_perf_handler is compatible with default one. */
+       for_each_online_cpu(cpu) {
+               bp = per_cpu(*tw->bp_event, cpu);
+               bp->default_overflow_compatible = true;
+       }
 
        return 0;
 }

-- 
Masami Hiramatsu (Google) <[email protected]>

Reply via email to