On Tue, Mar 07, 2017 at 05:51:32PM +0100, Oleg Nesterov wrote:

> I am looking at perf_event_init_task() too and I can't understand the
> error handling...
> 
> perf_event_init_context() can return success even if inherit_task_group() in
> the first list_for_each_entry(pinned_groups) fails, "ret" will be overwritten
> by the 2nd list_for_each_entry(flexible_groups) loop. "inherited_all" should
> be cleared, still this looks confusing at least.

Yes, this looks buggy. But I cannot explain how that would result in the
observed use-after-free. If we 'loose' an error this way,
perf_event_init_context() will not fail, and hence we'll not hit that
perf_event_free_task() path.

The task would continue living with an incorrectly copied context, and
counter behaviour would be affected.

> inherit_event() returns NULL under is_orphaned_event() check, not ERR_PTR().
> Is it correct?

Yes. This is all a tad tricky, but it seems to be correct.

By returning NULL, not an error, we affect the silent discard of
orphaned events. This is correct, because otherwise
perf_event_release_kernel() would have come by and explicitly discarded
those events for us anyway.


My current patch is below; won't do much good I suspect..

---
 kernel/events/core.c | 55 +++++++++++++++++++++++++++++++++++-----------------
 1 file changed, 37 insertions(+), 18 deletions(-)

diff --git a/kernel/events/core.c b/kernel/events/core.c
index a17ed56c8ce1..3cd907e00377 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -4256,7 +4256,7 @@ int perf_event_release_kernel(struct perf_event *event)
 
        raw_spin_lock_irq(&ctx->lock);
        /*
-        * Mark this even as STATE_DEAD, there is no external reference to it
+        * Mark this event as STATE_DEAD, there is no external reference to it
         * anymore.
         *
         * Anybody acquiring event->child_mutex after the below loop _must_
@@ -10417,21 +10417,9 @@ void perf_event_free_task(struct task_struct *task)
                        continue;
 
                mutex_lock(&ctx->mutex);
-again:
-               list_for_each_entry_safe(event, tmp, &ctx->pinned_groups,
-                               group_entry)
-                       perf_free_event(event, ctx);
-
-               list_for_each_entry_safe(event, tmp, &ctx->flexible_groups,
-                               group_entry)
+               list_for_each_entry_safe(event, tmp, &ctx->event_list, 
event_entry)
                        perf_free_event(event, ctx);
-
-               if (!list_empty(&ctx->pinned_groups) ||
-                               !list_empty(&ctx->flexible_groups))
-                       goto again;
-
                mutex_unlock(&ctx->mutex);
-
                put_ctx(ctx);
        }
 }
@@ -10469,7 +10457,12 @@ const struct perf_event_attr *perf_event_attrs(struct 
perf_event *event)
 }
 
 /*
- * inherit a event from parent task to child task:
+ * Inherit a event from parent task to child task.
+ *
+ * Returns:
+ *  - valid pointer on success
+ *  - NULL for orphaned events
+ *  - IS_ERR() on error
  */
 static struct perf_event *
 inherit_event(struct perf_event *parent_event,
@@ -10563,6 +10556,16 @@ inherit_event(struct perf_event *parent_event,
        return child_event;
 }
 
+/*
+ * Inherits an event group.
+ *
+ * This will quietly suppress orphaned events; !inherit_event() is not an 
error.
+ * This matches with perf_event_release_kernel() removing all child events.
+ *
+ * Returns:
+ *  - 0 on success
+ *  - <0 on error
+ */
 static int inherit_group(struct perf_event *parent_event,
              struct task_struct *parent,
              struct perf_event_context *parent_ctx,
@@ -10577,6 +10580,11 @@ static int inherit_group(struct perf_event 
*parent_event,
                                 child, NULL, child_ctx);
        if (IS_ERR(leader))
                return PTR_ERR(leader);
+       /*
+        * @leader can be NULL here because of is_orphaned_event(). In this
+        * case inherit_event() will create individual events, similar to what
+        * perf_group_detach() would do anyway.
+        */
        list_for_each_entry(sub, &parent_event->sibling_list, group_entry) {
                child_ctr = inherit_event(sub, parent, parent_ctx,
                                            child, leader, child_ctx);
@@ -10586,6 +10594,17 @@ static int inherit_group(struct perf_event 
*parent_event,
        return 0;
 }
 
+/*
+ * Creates the child task context and tries to inherit the event-group.
+ *
+ * Clears @inherited_all on !attr.inherited or error. Note that we'll leave
+ * inherited_all set when we 'fail' to inherit an orphaned event; this is
+ * consistent with perf_event_release_kernel() removing all child events.
+ *
+ * Returns:
+ *  - 0 on success
+ *  - <0 on error
+ */
 static int
 inherit_task_group(struct perf_event *event, struct task_struct *parent,
                   struct perf_event_context *parent_ctx,
@@ -10608,7 +10627,6 @@ inherit_task_group(struct perf_event *event, struct 
task_struct *parent,
                 * First allocate and initialize a context for the
                 * child.
                 */
-
                child_ctx = alloc_perf_context(parent_ctx->pmu, child);
                if (!child_ctx)
                        return -ENOMEM;
@@ -10670,7 +10688,7 @@ static int perf_event_init_context(struct task_struct 
*child, int ctxn)
                ret = inherit_task_group(event, parent, parent_ctx,
                                         child, ctxn, &inherited_all);
                if (ret)
-                       break;
+                       goto out_unlock;
        }
 
        /*
@@ -10686,7 +10704,7 @@ static int perf_event_init_context(struct task_struct 
*child, int ctxn)
                ret = inherit_task_group(event, parent, parent_ctx,
                                         child, ctxn, &inherited_all);
                if (ret)
-                       break;
+                       goto out_unlock;
        }
 
        raw_spin_lock_irqsave(&parent_ctx->lock, flags);
@@ -10714,6 +10732,7 @@ static int perf_event_init_context(struct task_struct 
*child, int ctxn)
        }
 
        raw_spin_unlock_irqrestore(&parent_ctx->lock, flags);
+out_unlock:
        mutex_unlock(&parent_ctx->mutex);
 
        perf_unpin_context(parent_ctx);

Reply via email to