On Mon, Jan 25, 2016 at 10:04:10PM +0100, Peter Zijlstra wrote:
> On Mon, Jan 25, 2016 at 03:54:14PM +0100, Peter Zijlstra wrote:
> > Alexander, Alexei,
> > 
> > How about the below? That uses event->state == PERF_EVENT_STATE_EXIT to
> > indicate the event has been given up by its 'owner' and decouples us
> > from the actual event->owner logic.
> > 
> > This retains the event->owner and event->owner_list thing purely for the
> > prclt(.option = PR_TASK_PERF_EVENTS_{EN,DIS}ABLE) calls, but does give
> > us strict 'owner' semantics in that:
> > 
> >   struct perf_event *my_event = perf_event_create_kernel_counter();
> > 
> >   /* ... */
> > 
> >   perf_event_release_kernel(my_event);
> > 
> > Or
> > 
> >   int fd = sys_perf_event_open(...);
> > 
> >   close(fd); /* last, calls fops::release */
> > 
> > Will destroy the event dead. event::refcount will 'retain' the object
> > but it will become non functional and is strictly meant as a temporal
> > existence guarantee (for when RCU isn't good enough).
> > 
> > So this should restore the scm_rights case, which preserves the fd but
> > could result in not having event->owner (and therefore being removed
> > from its owner_list), which is fine.
> > 
> > BPF still needs to get fixed to use filedesc references instead.
> 
> Still no BPF, but this one actually 'works', as in it doesn't have the
> blatant exit races and has survived a few hours of runtime.
> 
> ---
>  include/linux/perf_event.h |    3 
>  kernel/events/core.c       |  304 
> ++++++++++++++++++++++-----------------------
>  2 files changed, 150 insertions(+), 157 deletions(-)

I think I understand what you're trying to do and
the patch looks good to me.
As far as BPF side I did the following...
does it match the model you outlined above?
I did basic testing and it looks fine.

Subject: [PATCH ] perf,bpf: convert perf_event_array to use struct file

Signed-off-by: Alexei Starovoitov <a...@kernel.org>
---
 include/linux/perf_event.h |  4 ++--
 kernel/bpf/arraymap.c      | 21 +++++++++++----------
 kernel/events/core.c       | 20 ++++++++------------
 kernel/trace/bpf_trace.c   | 14 ++++++++++----
 4 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index f9828a48f16a..df275020fde9 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -729,7 +729,7 @@ extern int perf_event_init_task(struct task_struct *child);
 extern void perf_event_exit_task(struct task_struct *child);
 extern void perf_event_free_task(struct task_struct *task);
 extern void perf_event_delayed_put(struct task_struct *task);
-extern struct perf_event *perf_event_get(unsigned int fd);
+extern struct file *perf_event_get(unsigned int fd);
 extern const struct perf_event_attr *perf_event_attrs(struct perf_event 
*event);
 extern void perf_event_print_debug(void);
 extern void perf_pmu_disable(struct pmu *pmu);
@@ -1070,7 +1070,7 @@ static inline int perf_event_init_task(struct task_struct 
*child) { return 0; }
 static inline void perf_event_exit_task(struct task_struct *child)     { }
 static inline void perf_event_free_task(struct task_struct *task)      { }
 static inline void perf_event_delayed_put(struct task_struct *task)    { }
-static inline struct perf_event *perf_event_get(unsigned int fd)       { 
return ERR_PTR(-EINVAL); }
+static inline struct file *perf_event_get(unsigned int fd)     { return 
ERR_PTR(-EINVAL); }
 static inline const struct perf_event_attr *perf_event_attrs(struct perf_event 
*event)
 {
        return ERR_PTR(-EINVAL);
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index b0799bced518..89ebbc4d1164 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -291,10 +291,13 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map 
*map, int fd)
 {
        struct perf_event *event;
        const struct perf_event_attr *attr;
+       struct file *file;
 
-       event = perf_event_get(fd);
-       if (IS_ERR(event))
-               return event;
+       file = perf_event_get(fd);
+       if (IS_ERR(file))
+               return file;
+
+       event = file->private_data;
 
        attr = perf_event_attrs(event);
        if (IS_ERR(attr))
@@ -304,24 +307,22 @@ static void *perf_event_fd_array_get_ptr(struct bpf_map 
*map, int fd)
                goto err;
 
        if (attr->type == PERF_TYPE_RAW)
-               return event;
+               return file;
 
        if (attr->type == PERF_TYPE_HARDWARE)
-               return event;
+               return file;
 
        if (attr->type == PERF_TYPE_SOFTWARE &&
            attr->config == PERF_COUNT_SW_BPF_OUTPUT)
-               return event;
+               return file;
 err:
-       perf_event_release_kernel(event);
+       fput(file);
        return ERR_PTR(-EINVAL);
 }
 
 static void perf_event_fd_array_put_ptr(void *ptr)
 {
-       struct perf_event *event = ptr;
-
-       perf_event_release_kernel(event);
+       fput((struct file *)ptr);
 }
 
 static const struct bpf_map_ops perf_event_array_ops = {
diff --git a/kernel/events/core.c b/kernel/events/core.c
index 06ae52e99ac2..2a95e0d2370f 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -8896,21 +8896,17 @@ void perf_event_delayed_put(struct task_struct *task)
                WARN_ON_ONCE(task->perf_event_ctxp[ctxn]);
 }
 
-struct perf_event *perf_event_get(unsigned int fd)
+struct file *perf_event_get(unsigned int fd)
 {
-       int err;
-       struct fd f;
-       struct perf_event *event;
-
-       err = perf_fget_light(fd, &f);
-       if (err)
-               return ERR_PTR(err);
+       struct file *file;
 
-       event = f.file->private_data;
-       atomic_long_inc(&event->refcount);
-       fdput(f);
+       file = fget_raw(fd);
+       if (file->f_op != &perf_fops) {
+               fput(file);
+               return ERR_PTR(-EBADF);
+       }
 
-       return event;
+       return file;
 }
 
 const struct perf_event_attr *perf_event_attrs(struct perf_event *event)
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 45dd798bcd37..326a75e884db 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -191,14 +191,17 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, 
u64 r4, u64 r5)
        struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
        struct bpf_array *array = container_of(map, struct bpf_array, map);
        struct perf_event *event;
+       struct file *file;
 
        if (unlikely(index >= array->map.max_entries))
                return -E2BIG;
 
-       event = (struct perf_event *)array->ptrs[index];
-       if (!event)
+       file = (struct file *)array->ptrs[index];
+       if (unlikely(!file))
                return -ENOENT;
 
+       event = file->private_data;
+
        /* make sure event is local and doesn't have pmu::count */
        if (event->oncpu != smp_processor_id() ||
            event->pmu->count)
@@ -228,6 +231,7 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 index, 
u64 r4, u64 size)
        void *data = (void *) (long) r4;
        struct perf_sample_data sample_data;
        struct perf_event *event;
+       struct file *file;
        struct perf_raw_record raw = {
                .size = size,
                .data = data,
@@ -236,10 +240,12 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 
index, u64 r4, u64 size)
        if (unlikely(index >= array->map.max_entries))
                return -E2BIG;
 
-       event = (struct perf_event *)array->ptrs[index];
-       if (unlikely(!event))
+       file = (struct file *)array->ptrs[index];
+       if (unlikely(!file))
                return -ENOENT;
 
+       event = file->private_data;
+
        if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
                     event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
                return -EINVAL;
-- 
2.4.6

Reply via email to