On Thu, Sep 01, 2022 at 04:05:53PM +0530, Ravi Bangoria wrote:
> On 29-Aug-22 8:10 PM, Peter Zijlstra wrote:
> > On Mon, Aug 29, 2022 at 02:04:33PM +0200, Peter Zijlstra wrote:
> >> On Mon, Aug 29, 2022 at 05:03:47PM +0530, Ravi Bangoria wrote:
> >>> @@ -12598,6 +12590,7 @@ 
> >>> EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
> >>>  
> >>>  void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
> >>>  {
> >>> +#if 0 // XXX buggered - cpu hotplug, who cares
> >>>   struct perf_event_context *src_ctx;
> >>>   struct perf_event_context *dst_ctx;
> >>>   struct perf_event *event, *tmp;
> >>> @@ -12658,6 +12651,7 @@ void perf_pmu_migrate_context(struct pmu *pmu, 
> >>> int src_cpu, int dst_cpu)
> >>>   }
> >>>   mutex_unlock(&dst_ctx->mutex);
> >>>   mutex_unlock(&src_ctx->mutex);
> >>> +#endif
> >>>  }
> >>>  EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
> >>>  
> >>
> >> Note to self; fix this :-) I'll see if I have time for that later today.
> > 
> > Urgh, while going through that it appears the whole refcounting thing
> > isn't fully done either.
> 
> Not sure I follow. Can you please elaborate.

Sure, and sorry, I've not been feeling too well the past few days; I
meant to post something, but I just couldn't find it in me to finish it
let alone make it work.

So the basic issue I mentioned is that:


/*
 *           ,------------------------[1:n]---------------------.
 *           V                                                  V
 * perf_event_context <-[1:n]-> perf_event_pmu_context <--- perf_event
 *           ^                      ^     |                     |
 *           `--------[1:n]---------'     `-[n:1]-> pmu <-[1:n]-'
 *
 *
 * XXX destroy epc when empty
 *   refcount, !rcu
 *
 * XXX epc locking
 *
 *   event->pmu_ctx            ctx->mutex && inactive
 *   ctx->pmu_ctx_list         ctx->mutex && ctx->lock
 *
 */
struct perf_event_pmu_context {
        ...
        atomic_t                        refcount; /* event <-> epc */
        ...
}

But that then also suggests that:

struct perf_event_context {
        ...
        refcount_t                      refcount;
        ...
}

should be: ctx <-> epc, and that is not so, the ctx::refcount still
counts the number of events.

Now this might not be bad, but it is confusing.

Anywya; below is what I have, but it is entirely unfinished untested
might eat your ganny etc..


---
Index: linux-2.6/include/linux/perf_event.h
===================================================================
--- linux-2.6.orig/include/linux/perf_event.h
+++ linux-2.6/include/linux/perf_event.h
@@ -883,7 +883,7 @@ struct perf_event_context {
        int                             nr_freq;
        int                             rotate_disable;
 
-       refcount_t                      refcount;
+       refcount_t                      refcount; /* ctx <-> epc */
        struct task_struct              *task;
 
        /*
Index: linux-2.6/kernel/events/core.c
===================================================================
--- linux-2.6.orig/kernel/events/core.c
+++ linux-2.6/kernel/events/core.c
@@ -2750,7 +2750,7 @@ perf_install_in_context(struct perf_even
        WARN_ON_ONCE(!exclusive_event_installable(event, ctx));
 
        if (event->cpu != -1)
-               event->cpu = cpu;
+               WARN_ON_ONCE(event->cpu != cpu);
 
        /*
         * Ensures that if we can observe event->ctx, both the event and ctx
@@ -4764,6 +4764,7 @@ find_get_pmu_context(struct pmu *pmu, st
                        list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
                        epc->ctx = ctx;
                        raw_spin_unlock_irq(&ctx->lock);
+                       // XXX get_ctx(ctx);
                } else {
                        WARN_ON_ONCE(epc->ctx != ctx);
                        atomic_inc(&epc->refcount);
@@ -4800,6 +4801,7 @@ find_get_pmu_context(struct pmu *pmu, st
 
        list_add(&epc->pmu_ctx_entry, &ctx->pmu_ctx_list);
        epc->ctx = ctx;
+       // XXX get_ctx(ctx)
 
 found_epc:
        if (task_ctx_data && !epc->task_ctx_data) {
@@ -4837,6 +4839,8 @@ static void put_pmu_ctx(struct perf_even
                list_del_init(&epc->pmu_ctx_entry);
                epc->ctx = NULL;
                raw_spin_unlock_irqrestore(&ctx->lock, flags);
+               
+               // XXX put_ctx(ctx)
        }
 
        WARN_ON_ONCE(!list_empty(&epc->pinned_active));
@@ -12444,6 +12448,8 @@ SYSCALL_DEFINE5(perf_event_open,
        perf_unpin_context(ctx);
 
        mutex_unlock(&ctx->mutex);
+       
+       // XXX put_ctx(ctx);
 
        if (task) {
                up_read(&task->signal->exec_update_lock);
@@ -12588,34 +12594,44 @@ err:
 }
 EXPORT_SYMBOL_GPL(perf_event_create_kernel_counter);
 
-void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
+static void __perf_pmu_remove(struct perf_event_context *ctx,
+                             int cpu, struct pmu *pmu,
+                             struct perf_event_groups *groups,
+                             struct list_head *events)
 {
-#if 0 // XXX buggered - cpu hotplug, who cares
-       struct perf_event_context *src_ctx;
-       struct perf_event_context *dst_ctx;
-       struct perf_event *event, *tmp;
-       LIST_HEAD(events);
+       struct perf_event *event;
 
-       src_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, src_cpu)->ctx;
-       dst_ctx = &per_cpu_ptr(pmu->pmu_cpu_context, dst_cpu)->ctx;
+       for (event = perf_event_groups_first(groups, cpu, pmu, NULL);
+            event;
+            event = perf_event_groups_next(event, pmu)) {
 
-       /*
-        * See perf_event_ctx_lock() for comments on the details
-        * of swizzling perf_event::ctx.
-        */
-       mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
-       list_for_each_entry_safe(event, tmp, &src_ctx->event_list,
-                                event_entry) {
                perf_remove_from_context(event, 0);
-               unaccount_event_cpu(event, src_cpu);
-               put_ctx(src_ctx);
-               list_add(&event->migrate_entry, &events);
+               unaccount_event_cpu(event, cpu);
+               put_pmu_ctx(event->pmu_ctx);
+               list_add(&event->migrate_event, events);
        }
+}
 
-       /*
-        * Wait for the events to quiesce before re-instating them.
-        */
-       synchronize_rcu();
+static void __perf_pmu_install_event(struct pmu *pmu,
+                                    struct perf_event_context *ctx,
+                                    int cpu, struct perf_event *event)
+{
+       struct perf_event_pmu_context *epc;
+
+       event->cpu = cpu;
+       epc = find_get_pmu_context(pmu, ctx, event);
+       event->pmu_ctx = epc;
+
+       if (event->state >= PERF_EVENT_STATE_OFF)
+               event->state = PERF_EVENT_STATE_INACTIVE;
+       account_event_cpu(event, cpu);
+       perf_install_in_context(ctx, event, cpu);
+}
+
+static void __perf_pmu_install(struct perf_event_context *ctx,
+                              int cpu, struct pmu *pmu, struct list_head 
*events)
+{
+       struct perf_event *event, *tmp;
 
        /*
         * Re-instate events in 2 passes.
@@ -12625,33 +12641,50 @@ void perf_pmu_migrate_context(struct pmu
         * leader will enable its siblings, even if those are still on the old
         * context.
         */
-       list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
+       list_for_each_entry_safe(event, tmp, events, migrate_entry) {
                if (event->group_leader == event)
                        continue;
 
                list_del(&event->migrate_entry);
-               if (event->state >= PERF_EVENT_STATE_OFF)
-                       event->state = PERF_EVENT_STATE_INACTIVE;
-               account_event_cpu(event, dst_cpu);
-               perf_install_in_context(dst_ctx, event, dst_cpu);
-               get_ctx(dst_ctx);
+               __perf_pmu_install_event(pmu, ctx, cpu, event);
        }
 
        /*
         * Once all the siblings are setup properly, install the group leaders
         * to make it go.
         */
-       list_for_each_entry_safe(event, tmp, &events, migrate_entry) {
+       list_for_each_entry_safe(event, tmp, events, migrate_entry) {
                list_del(&event->migrate_entry);
-               if (event->state >= PERF_EVENT_STATE_OFF)
-                       event->state = PERF_EVENT_STATE_INACTIVE;
-               account_event_cpu(event, dst_cpu);
-               perf_install_in_context(dst_ctx, event, dst_cpu);
-               get_ctx(dst_ctx);
+               __perf_pmu_install_event(pmu, ctx, cpu, event);
        }
+}
+
+void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu)
+{
+       struct perf_event_context *src_ctx, *dst_ctx;
+       LIST_HEAD(events);
+
+       src_ctx = &per_cpu_ptr(&cpu_context, src_cpu)->ctx;
+       dst_ctx = &per_cpu_ptr(&cpu_context, dst_cpu)->ctx;
+
+       /*
+        * See perf_event_ctx_lock() for comments on the details
+        * of swizzling perf_event::ctx.
+        */
+       mutex_lock_double(&src_ctx->mutex, &dst_ctx->mutex);
+
+       __perf_pmu_remove(src_ctx, src_cpu, pmu, &src_src->pinned_groups, 
&events);
+       __perf_pmu_remove(src_ctx, src_cpu, pmu, &src_src->flexible_groups, 
&events);
+
+       /*
+        * Wait for the events to quiesce before re-instating them.
+        */
+       synchronize_rcu();
+
+       __perf_pmu_install(dst_ctx, dst_cpu, pmu, &events);
+
        mutex_unlock(&dst_ctx->mutex);
        mutex_unlock(&src_ctx->mutex);
-#endif
 }
 EXPORT_SYMBOL_GPL(perf_pmu_migrate_context);
 

Reply via email to