On Tue, Sep 10, 2024 at 11:10:23PM +0900, Masami Hiramatsu wrote:
> On Mon,  9 Sep 2024 09:45:48 +0200
> Jiri Olsa <[email protected]> wrote:
> 
> > Adding support for uprobe consumer to be defined as session and have
> > new behaviour for consumer's 'handler' and 'ret_handler' callbacks.
> > 
> > The session means that 'handler' and 'ret_handler' callbacks are
> > connected in a way that allows to:
> > 
> >   - control execution of 'ret_handler' from 'handler' callback
> >   - share data between 'handler' and 'ret_handler' callbacks
> > 
> > The session is enabled by setting new 'session' bool field to true
> > in uprobe_consumer object.
> > 
> > We use return_consumer object to keep track of consumers with
> > 'ret_handler'. This object also carries the shared data between
> > 'handler' and and 'ret_handler' callbacks.
> > 
> > The control of 'ret_handler' callback execution is done via return
> > value of the 'handler' callback. This patch adds new 'ret_handler'
> > return value (2) which means to ignore ret_handler callback.
> > 
> > Actions on 'handler' callback return values are now:
> > 
> >   0 - execute ret_handler (if it's defined)
> >   1 - remove uprobe
> >   2 - do nothing (ignore ret_handler)
> > 
> > The session concept fits to our common use case where we do filtering
> > on entry uprobe and based on the result we decide to run the return
> > uprobe (or not).
> 
> OK, so this allows uprobes handler to record any input parameter and
> access it from ret_handler as fprobe's private entry data, right?

at the moment the shared data is 8 bytes.. I explored the way of having
the generic (configured) sized data and it complicates the code a bit
and we have no use case for that at the moment, so I decided to keep it
simple for now

I mentioned that in the cover email, I think it can be done as follow if
it's needed in future:

  - I kept the session cookie instead of introducing generic session
    data, because it makes the change much more complicated, I think
    it can be added as a followup if it's needed

> 
> The code looks good to me.
> 
> Acked-by: Masami Hiramatsu (Google) <[email protected]>
> 
> trace_uprobe should also support $argN in retuprobe.

could you please share more details on the use case?

thanks,
jirka

> 
> https://lore.kernel.org/all/170952365552.229804.224112990211602895.stgit@devnote2/
> 
> Thank you,
> 
> > 
> > It's also convenient to share the data between session callbacks.
> > 
> > Signed-off-by: Jiri Olsa <[email protected]>
> > ---
> >  include/linux/uprobes.h                       |  17 ++-
> >  kernel/events/uprobes.c                       | 132 ++++++++++++++----
> >  kernel/trace/bpf_trace.c                      |   6 +-
> >  kernel/trace/trace_uprobe.c                   |  12 +-
> >  .../selftests/bpf/bpf_testmod/bpf_testmod.c   |   2 +-
> >  5 files changed, 133 insertions(+), 36 deletions(-)
> > 
> > diff --git a/include/linux/uprobes.h b/include/linux/uprobes.h
> > index 2b294bf1881f..557901e04624 100644
> > --- a/include/linux/uprobes.h
> > +++ b/include/linux/uprobes.h
> > @@ -24,7 +24,7 @@ struct notifier_block;
> >  struct page;
> >  
> >  #define UPROBE_HANDLER_REMOVE              1
> > -#define UPROBE_HANDLER_MASK                1
> > +#define UPROBE_HANDLER_IGNORE              2
> >  
> >  #define MAX_URETPROBE_DEPTH                64
> >  
> > @@ -37,13 +37,16 @@ struct uprobe_consumer {
> >      * for the current process. If filter() is omitted or returns true,
> >      * UPROBE_HANDLER_REMOVE is effectively ignored.
> >      */
> > -   int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs);
> > +   int (*handler)(struct uprobe_consumer *self, struct pt_regs *regs, 
> > __u64 *data);
> >     int (*ret_handler)(struct uprobe_consumer *self,
> >                             unsigned long func,
> > -                           struct pt_regs *regs);
> > +                           struct pt_regs *regs, __u64 *data);
> >     bool (*filter)(struct uprobe_consumer *self, struct mm_struct *mm);
> >  
> >     struct list_head cons_node;
> > +   bool session;
> > +
> > +   __u64 id;       /* set when uprobe_consumer is registered */
> >  };
> >  
> >  #ifdef CONFIG_UPROBES
> > @@ -83,14 +86,22 @@ struct uprobe_task {
> >     unsigned int                    depth;
> >  };
> >  
> > +struct return_consumer {
> > +   __u64   cookie;
> > +   __u64   id;
> > +};
> > +
> >  struct return_instance {
> >     struct uprobe           *uprobe;
> >     unsigned long           func;
> >     unsigned long           stack;          /* stack pointer */
> >     unsigned long           orig_ret_vaddr; /* original return address */
> >     bool                    chained;        /* true, if instance is nested 
> > */
> > +   int                     consumers_cnt;
> >  
> >     struct return_instance  *next;          /* keep as stack */
> > +
> > +   struct return_consumer  consumers[] __counted_by(consumers_cnt);
> >  };
> >  
> >  enum rp_check {
> > diff --git a/kernel/events/uprobes.c b/kernel/events/uprobes.c
> > index 4b7e590dc428..9e971f86afdf 100644
> > --- a/kernel/events/uprobes.c
> > +++ b/kernel/events/uprobes.c
> > @@ -67,6 +67,8 @@ struct uprobe {
> >     loff_t                  ref_ctr_offset;
> >     unsigned long           flags;
> >  
> > +   unsigned int            consumers_cnt;
> > +
> >     /*
> >      * The generic code assumes that it has two members of unknown type
> >      * owned by the arch-specific code:
> > @@ -826,8 +828,12 @@ static struct uprobe *alloc_uprobe(struct inode 
> > *inode, loff_t offset,
> >  
> >  static void consumer_add(struct uprobe *uprobe, struct uprobe_consumer *uc)
> >  {
> > +   static atomic64_t id;
> > +
> >     down_write(&uprobe->consumer_rwsem);
> >     list_add_rcu(&uc->cons_node, &uprobe->consumers);
> > +   uc->id = (__u64) atomic64_inc_return(&id);
> > +   uprobe->consumers_cnt++;
> >     up_write(&uprobe->consumer_rwsem);
> >  }
> >  
> > @@ -839,6 +845,7 @@ static void consumer_del(struct uprobe *uprobe, struct 
> > uprobe_consumer *uc)
> >  {
> >     down_write(&uprobe->consumer_rwsem);
> >     list_del_rcu(&uc->cons_node);
> > +   uprobe->consumers_cnt--;
> >     up_write(&uprobe->consumer_rwsem);
> >  }
> >  
> > @@ -1786,6 +1793,30 @@ static struct uprobe_task *get_utask(void)
> >     return current->utask;
> >  }
> >  
> > +static size_t ri_size(int consumers_cnt)
> > +{
> > +   struct return_instance *ri;
> > +
> > +   return sizeof(*ri) + sizeof(ri->consumers[0]) * consumers_cnt;
> > +}
> > +
> > +static struct return_instance *alloc_return_instance(int consumers_cnt)
> > +{
> > +   struct return_instance *ri;
> > +
> > +   ri = kzalloc(ri_size(consumers_cnt), GFP_KERNEL);
> > +   if (ri)
> > +           ri->consumers_cnt = consumers_cnt;
> > +   return ri;
> > +}
> > +
> > +static struct return_instance *dup_return_instance(struct return_instance 
> > *old)
> > +{
> > +   size_t size = ri_size(old->consumers_cnt);
> > +
> > +   return kmemdup(old, size, GFP_KERNEL);
> > +}
> > +
> >  static int dup_utask(struct task_struct *t, struct uprobe_task *o_utask)
> >  {
> >     struct uprobe_task *n_utask;
> > @@ -1798,11 +1829,10 @@ static int dup_utask(struct task_struct *t, struct 
> > uprobe_task *o_utask)
> >  
> >     p = &n_utask->return_instances;
> >     for (o = o_utask->return_instances; o; o = o->next) {
> > -           n = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
> > +           n = dup_return_instance(o);
> >             if (!n)
> >                     return -ENOMEM;
> >  
> > -           *n = *o;
> >             /*
> >              * uprobe's refcnt has to be positive at this point, kept by
> >              * utask->return_instances items; return_instances can't be
> > @@ -1895,39 +1925,35 @@ static void cleanup_return_instances(struct 
> > uprobe_task *utask, bool chained,
> >     utask->return_instances = ri;
> >  }
> >  
> > -static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs)
> > +static void prepare_uretprobe(struct uprobe *uprobe, struct pt_regs *regs,
> > +                         struct return_instance *ri)
> >  {
> > -   struct return_instance *ri;
> >     struct uprobe_task *utask;
> >     unsigned long orig_ret_vaddr, trampoline_vaddr;
> >     bool chained;
> >  
> >     if (!get_xol_area())
> > -           return;
> > +           goto free;
> >  
> >     utask = get_utask();
> >     if (!utask)
> > -           return;
> > +           goto free;
> >  
> >     if (utask->depth >= MAX_URETPROBE_DEPTH) {
> >             printk_ratelimited(KERN_INFO "uprobe: omit uretprobe due to"
> >                             " nestedness limit pid/tgid=%d/%d\n",
> >                             current->pid, current->tgid);
> > -           return;
> > +           goto free;
> >     }
> >  
> >     /* we need to bump refcount to store uprobe in utask */
> >     if (!try_get_uprobe(uprobe))
> > -           return;
> > -
> > -   ri = kmalloc(sizeof(struct return_instance), GFP_KERNEL);
> > -   if (!ri)
> > -           goto fail;
> > +           goto free;
> >  
> >     trampoline_vaddr = uprobe_get_trampoline_vaddr();
> >     orig_ret_vaddr = arch_uretprobe_hijack_return_addr(trampoline_vaddr, 
> > regs);
> >     if (orig_ret_vaddr == -1)
> > -           goto fail;
> > +           goto put;
> >  
> >     /* drop the entries invalidated by longjmp() */
> >     chained = (orig_ret_vaddr == trampoline_vaddr);
> > @@ -1945,7 +1971,7 @@ static void prepare_uretprobe(struct uprobe *uprobe, 
> > struct pt_regs *regs)
> >                      * attack from user-space.
> >                      */
> >                     uprobe_warn(current, "handle tail call");
> > -                   goto fail;
> > +                   goto put;
> >             }
> >             orig_ret_vaddr = utask->return_instances->orig_ret_vaddr;
> >     }
> > @@ -1960,9 +1986,10 @@ static void prepare_uretprobe(struct uprobe *uprobe, 
> > struct pt_regs *regs)
> >     utask->return_instances = ri;
> >  
> >     return;
> > -fail:
> > -   kfree(ri);
> > +put:
> >     put_uprobe(uprobe);
> > +free:
> > +   kfree(ri);
> >  }
> >  
> >  /* Prepare to single-step probed instruction out of line. */
> > @@ -2114,35 +2141,78 @@ static struct uprobe 
> > *find_active_uprobe_rcu(unsigned long bp_vaddr, int *is_swb
> >     return uprobe;
> >  }
> >  
> > +static struct return_consumer *
> > +return_consumer_next(struct return_instance *ri, struct return_consumer 
> > *ric)
> > +{
> > +   return ric ? ric + 1 : &ri->consumers[0];
> > +}
> > +
> > +static struct return_consumer *
> > +return_consumer_find(struct return_instance *ri, int *iter, int id)
> > +{
> > +   struct return_consumer *ric;
> > +   int idx = *iter;
> > +
> > +   for (ric = &ri->consumers[idx]; idx < ri->consumers_cnt; idx++, ric++) {
> > +           if (ric->id == id) {
> > +                   *iter = idx + 1;
> > +                   return ric;
> > +           }
> > +   }
> > +   return NULL;
> > +}
> > +
> >  static void handler_chain(struct uprobe *uprobe, struct pt_regs *regs)
> >  {
> >     struct uprobe_consumer *uc;
> >     int remove = UPROBE_HANDLER_REMOVE;
> > -   bool need_prep = false; /* prepare return uprobe, when needed */
> > +   struct return_consumer *ric = NULL;
> > +   struct return_instance *ri = NULL;
> >     bool has_consumers = false;
> >  
> >     current->utask->auprobe = &uprobe->arch;
> >  
> >     list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
> >                              srcu_read_lock_held(&uprobes_srcu)) {
> > +           __u64 cookie = 0;
> >             int rc = 0;
> >  
> >             if (uc->handler) {
> > -                   rc = uc->handler(uc, regs);
> > -                   WARN(rc & ~UPROBE_HANDLER_MASK,
> > +                   rc = uc->handler(uc, regs, &cookie);
> > +                   WARN(rc < 0 || rc > 2,
> >                             "bad rc=0x%x from %ps()\n", rc, uc->handler);
> >             }
> >  
> > -           if (uc->ret_handler)
> > -                   need_prep = true;
> > -
> > +           /*
> > +            * The handler can return following values:
> > +            * 0 - execute ret_handler (if it's defined)
> > +            * 1 - remove uprobe
> > +            * 2 - do nothing (ignore ret_handler)
> > +            */
> >             remove &= rc;
> >             has_consumers = true;
> > +
> > +           if (rc == 0 && uc->ret_handler) {
> > +                   /*
> > +                    * Preallocate return_instance object optimistically 
> > with
> > +                    * all possible consumers, so we allocate just once.
> > +                    */
> > +                   if (!ri) {
> > +                           ri = 
> > alloc_return_instance(uprobe->consumers_cnt);
> > +                           if (!ri)
> > +                                   return;
> > +                   }
> > +                   ric = return_consumer_next(ri, ric);
> > +                   ric->cookie = cookie;
> > +                   ric->id = uc->id;
> > +           }
> >     }
> >     current->utask->auprobe = NULL;
> >  
> > -   if (need_prep && !remove)
> > -           prepare_uretprobe(uprobe, regs); /* put bp at return */
> > +   if (ri && !remove)
> > +           prepare_uretprobe(uprobe, regs, ri); /* put bp at return */
> > +   else
> > +           kfree(ri);
> >  
> >     if (remove && has_consumers) {
> >             down_read(&uprobe->register_rwsem);
> > @@ -2160,15 +2230,25 @@ static void handler_chain(struct uprobe *uprobe, 
> > struct pt_regs *regs)
> >  static void
> >  handle_uretprobe_chain(struct return_instance *ri, struct pt_regs *regs)
> >  {
> > +   struct return_consumer *ric = NULL;
> >     struct uprobe *uprobe = ri->uprobe;
> >     struct uprobe_consumer *uc;
> > -   int srcu_idx;
> > +   int srcu_idx, iter = 0;
> >  
> >     srcu_idx = srcu_read_lock(&uprobes_srcu);
> >     list_for_each_entry_srcu(uc, &uprobe->consumers, cons_node,
> >                              srcu_read_lock_held(&uprobes_srcu)) {
> > +           /*
> > +            * If we don't find return consumer, it means uprobe consumer
> > +            * was added after we hit uprobe and return consumer did not
> > +            * get registered in which case we call the ret_handler only
> > +            * if it's not session consumer.
> > +            */
> > +           ric = return_consumer_find(ri, &iter, uc->id);
> > +           if (!ric && uc->session)
> > +                   continue;
> >             if (uc->ret_handler)
> > -                   uc->ret_handler(uc, ri->func, regs);
> > +                   uc->ret_handler(uc, ri->func, regs, ric ? &ric->cookie 
> > : NULL);
> >     }
> >     srcu_read_unlock(&uprobes_srcu, srcu_idx);
> >  }
> > diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
> > index ac0a01cc8634..de241503c8fb 100644
> > --- a/kernel/trace/bpf_trace.c
> > +++ b/kernel/trace/bpf_trace.c
> > @@ -3332,7 +3332,8 @@ uprobe_multi_link_filter(struct uprobe_consumer *con, 
> > struct mm_struct *mm)
> >  }
> >  
> >  static int
> > -uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs 
> > *regs)
> > +uprobe_multi_link_handler(struct uprobe_consumer *con, struct pt_regs 
> > *regs,
> > +                     __u64 *data)
> >  {
> >     struct bpf_uprobe *uprobe;
> >  
> > @@ -3341,7 +3342,8 @@ uprobe_multi_link_handler(struct uprobe_consumer 
> > *con, struct pt_regs *regs)
> >  }
> >  
> >  static int
> > -uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long 
> > func, struct pt_regs *regs)
> > +uprobe_multi_link_ret_handler(struct uprobe_consumer *con, unsigned long 
> > func, struct pt_regs *regs,
> > +                         __u64 *data)
> >  {
> >     struct bpf_uprobe *uprobe;
> >  
> > diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c
> > index f7443e996b1b..11103dde897b 100644
> > --- a/kernel/trace/trace_uprobe.c
> > +++ b/kernel/trace/trace_uprobe.c
> > @@ -88,9 +88,11 @@ static struct trace_uprobe *to_trace_uprobe(struct 
> > dyn_event *ev)
> >  static int register_uprobe_event(struct trace_uprobe *tu);
> >  static int unregister_uprobe_event(struct trace_uprobe *tu);
> >  
> > -static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs 
> > *regs);
> > +static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs 
> > *regs,
> > +                        __u64 *data);
> >  static int uretprobe_dispatcher(struct uprobe_consumer *con,
> > -                           unsigned long func, struct pt_regs *regs);
> > +                           unsigned long func, struct pt_regs *regs,
> > +                           __u64 *data);
> >  
> >  #ifdef CONFIG_STACK_GROWSUP
> >  static unsigned long adjust_stack_addr(unsigned long addr, unsigned int n)
> > @@ -1500,7 +1502,8 @@ trace_uprobe_register(struct trace_event_call *event, 
> > enum trace_reg type,
> >     }
> >  }
> >  
> > -static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs 
> > *regs)
> > +static int uprobe_dispatcher(struct uprobe_consumer *con, struct pt_regs 
> > *regs,
> > +                        __u64 *data)
> >  {
> >     struct trace_uprobe *tu;
> >     struct uprobe_dispatch_data udd;
> > @@ -1530,7 +1533,8 @@ static int uprobe_dispatcher(struct uprobe_consumer 
> > *con, struct pt_regs *regs)
> >  }
> >  
> >  static int uretprobe_dispatcher(struct uprobe_consumer *con,
> > -                           unsigned long func, struct pt_regs *regs)
> > +                           unsigned long func, struct pt_regs *regs,
> > +                           __u64 *data)
> >  {
> >     struct trace_uprobe *tu;
> >     struct uprobe_dispatch_data udd;
> > diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c 
> > b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
> > index 1fc16657cf42..e91ff5b285f0 100644
> > --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
> > +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c
> > @@ -421,7 +421,7 @@ static struct bin_attribute bin_attr_bpf_testmod_file 
> > __ro_after_init = {
> >  
> >  static int
> >  uprobe_ret_handler(struct uprobe_consumer *self, unsigned long func,
> > -              struct pt_regs *regs)
> > +              struct pt_regs *regs, __u64 *data)
> >  
> >  {
> >     regs->ax  = 0x12345678deadbeef;
> > -- 
> > 2.46.0
> > 
> 
> 
> -- 
> Masami Hiramatsu (Google) <[email protected]>

Reply via email to