First part of cgroup support for CMT.

A monr's position in monrs hierarchy depends on the position of it's
target cgroup or thread in the cgroup hierarchy.
(See code comments for details).

A monr that monitors a cgroup keeps a reference to in monr->monr_cgroup
and its used in future patches to add support for cgroup monitoring
without requiring an active perf_event at all times.

Signed-off-by: David Carrillo-Cisneros <davi...@google.com>
---
 arch/x86/events/intel/cmt.c | 293 ++++++++++++++++++++++++++++++++++++++++++++
 arch/x86/events/intel/cmt.h |   2 +
 2 files changed, 295 insertions(+)

diff --git a/arch/x86/events/intel/cmt.c b/arch/x86/events/intel/cmt.c
index 3883cb4..a5b7d2d 100644
--- a/arch/x86/events/intel/cmt.c
+++ b/arch/x86/events/intel/cmt.c
@@ -125,6 +125,14 @@ static inline struct pmonr *pkgd_pmonr(struct pkg_data 
*pkgd, struct monr *monr)
        return rcu_dereference_check(monr->pmonrs[pkgd->pkgid], safe);
 }
 
+#ifdef CONFIG_CGROUP_PERF
+static inline struct cgroup_subsys_state *get_root_perf_css(void)
+{
+       /* Get css for root cgroup */
+       return  init_css_set.subsys[perf_event_cgrp_id];
+}
+#endif
+
 static inline void pmonr_set_rmids(struct pmonr *pmonr,
                                   u32 sched_rmid, u32 read_rmid)
 {
@@ -416,6 +424,7 @@ static void monr_dealloc(struct monr *monr)
 
        if (WARN_ON_ONCE(monr->nr_has_user) ||
            WARN_ON_ONCE(monr->nr_nolazy_rmid) ||
+           WARN_ON_ONCE(monr->mon_cgrp) ||
            WARN_ON_ONCE(monr->mon_events))
                return;
 
@@ -639,6 +648,7 @@ static int monr_apply_uflags(struct monr *monr, enum 
cmt_user_flags *puflags)
        goto exit;
 }
 
+/* can be NULL if the monr was for a cgroup that has gone offline. */
 static inline struct monr *monr_from_event(struct perf_event *event)
 {
        return (struct monr *) READ_ONCE(event->hw.cmt_monr);
@@ -727,6 +737,75 @@ static int monr_append_event(struct monr *monr, struct 
perf_event *event)
        return err;
 }
 
+#ifdef CONFIG_CGROUP_PERF
+static inline struct monr *monr_from_perf_cgroup(struct perf_cgroup *cgrp)
+{
+       return (struct monr *)READ_ONCE(cgrp->arch_info);
+}
+
+static inline void perf_cgroup_set_monr(struct perf_cgroup *cgrp,
+                                       struct monr *monr)
+{
+       WRITE_ONCE(cgrp->arch_info, monr);
+}
+
+/* Get cgroup for both task and cgroup event. */
+static struct perf_cgroup *perf_cgroup_from_task_event(struct perf_event 
*event)
+{
+#ifdef CONFIG_LOCKDEP
+       bool rcu_safe = lockdep_is_held(&cmt_mutex);
+#endif
+
+       return container_of(
+               task_css_check(event->hw.target, perf_event_cgrp_id, rcu_safe),
+               struct perf_cgroup, css);
+}
+
+static struct perf_cgroup *perf_cgroup_from_css(struct cgroup_subsys_state 
*css)
+{
+       return container_of(css, struct perf_cgroup, css);
+}
+
+/**
+ * perf_cgroup_mon_started() - Tell if cgroup is monitored by its own monr.
+ *
+ * A perf_cgroup is being monitored when it is referenced back by
+ * its monr's mon_cgrp. Otherwise, the cgroup only uses the monr used to
+ * monitor another cgroup (the one that is referenced back by monr's mon_cgrp).
+ */
+static inline bool perf_cgroup_mon_started(struct perf_cgroup *cgrp)
+{
+       struct monr *monr;
+
+       /*
+        * monr can be referenced by a cgroup other than the one in its
+        * mon_cgrp, be careful.
+        */
+       monr = monr_from_perf_cgroup(cgrp);
+
+       /* Root monr do not have a cgroup associated before initialization. */
+       return  monr->mon_cgrp == cgrp;
+}
+
+/**
+ * perf_cgroup_find_lma() - Find @cgrp lowest monitored ancestor.
+ *
+ * Find lowest monitored ancestor for @cgrp, not including this cgroup
+ * Return: lma or NULL if no ancestor is monitored.
+ */
+struct perf_cgroup *perf_cgroup_find_lma(struct perf_cgroup *cgrp)
+{
+       struct cgroup_subsys_state *parent_css;
+
+       do {
+               parent_css = cgrp->css.parent;
+               cgrp = parent_css ? perf_cgroup_from_css(parent_css) : NULL;
+       } while (cgrp && !perf_cgroup_mon_started(cgrp));
+       return cgrp;
+}
+
+#endif
+
 /**
  * pmonr_update_sched_rmid() - Update sched_rmid for @pmonr in current package.
  *
@@ -815,6 +894,214 @@ static void monr_hrchy_remove_leaf(struct monr *monr)
        monr_hrchy_release_locks(&flags);
 }
 
+#ifdef CONFIG_CGROUP_PERF
+
+/* Similar to css_next_descendant_pre but skips the subtree rooted by pos. */
+struct cgroup_subsys_state *
+css_skip_subtree_pre(struct cgroup_subsys_state *pos,
+                    struct cgroup_subsys_state *root)
+{
+       struct cgroup_subsys_state *next;
+
+       while (pos != root) {
+               next = css_next_child(pos, pos->parent);
+               if (next)
+                       return next;
+               pos = pos->parent;
+       }
+       return NULL;
+}
+
+/* Make all monrs of css descendants of css to depend on new_monr. */
+inline void css_subtree_update_monr_dependants(struct cgroup_subsys_state *css,
+                                              struct monr *new_monr)
+{
+       struct cgroup_subsys_state *pos_css;
+       struct perf_cgroup *pos_cgrp;
+       struct monr *pos_monr;
+       unsigned long flags;
+
+       lockdep_assert_held(&cmt_mutex);
+
+       rcu_read_lock();
+
+       pos_css = css_next_descendant_pre(css, css);
+       while (pos_css) {
+               pos_cgrp = perf_cgroup_from_css(pos_css);
+               pos_monr = monr_from_perf_cgroup(pos_cgrp);
+
+               /* Skip css that are not online, sync'ed with cmt_mutex. */
+               if (!(pos_css->flags & CSS_ONLINE)) {
+                       pos_css = css_next_descendant_pre(pos_css, css);
+                       continue;
+               }
+               if (!perf_cgroup_mon_started(pos_cgrp)) {
+                       perf_cgroup_set_monr(pos_cgrp, new_monr);
+                       pos_css = css_next_descendant_pre(pos_css, css);
+                       continue;
+               }
+               rcu_read_unlock();
+
+               monr_hrchy_acquire_locks(&flags);
+               pos_monr->parent = new_monr;
+               list_move_tail(&pos_monr->parent_entry, &new_monr->children);
+               monr_hrchy_release_locks(&flags);
+
+               rcu_read_lock();
+               /*
+                * Skip subtrees rooted by a css that owns a monr, since the
+                * css in those subtrees use the monr at their subtree root.
+                */
+               pos_css = css_skip_subtree_pre(pos_css, css);
+       }
+       rcu_read_unlock();
+}
+
+static inline int __css_start_monitoring(struct cgroup_subsys_state *css)
+{
+       struct perf_cgroup *cgrp, *cgrp_lma, *pos_cgrp;
+       struct monr *monr, *monr_parent, *pos_monr, *tmp_monr;
+       unsigned long flags;
+
+       lockdep_assert_held(&cmt_mutex);
+
+       cgrp = perf_cgroup_from_css(css);
+
+       cgrp_lma = perf_cgroup_find_lma(cgrp);
+       if (!cgrp_lma) {
+               perf_cgroup_set_monr(cgrp, monr_hrchy_root);
+               monr_hrchy_root->mon_cgrp = cgrp;
+               return 0;
+       }
+       /*
+        * The monr for the lowest monitored ancestor is direct ancestor
+        * of monr in the monr hierarchy.
+        */
+       monr_parent = monr_from_perf_cgroup(cgrp_lma);
+
+       monr = monr_alloc();
+       if (IS_ERR(monr))
+               return PTR_ERR(monr);
+       /*
+        * New monr has no children yet so it can be inserted in hierarchy as
+        * a leaf. Since all monr's pmonr are in Off state, there is no risk
+        * of pmonr state transitions in the scheduler path.
+        */
+       monr_hrchy_acquire_locks(&flags);
+       monr_hrchy_insert_leaf(monr, monr_parent);
+       monr_hrchy_release_locks(&flags);
+
+       /*
+        * Previous lock also works as a barrier to prevent attaching
+        * the monr to cgrp before it is in monr hierarchy.
+        */
+       perf_cgroup_set_monr(cgrp, monr);
+       monr->mon_cgrp = cgrp;
+       css_subtree_update_monr_dependants(css, monr);
+
+       monr_hrchy_acquire_locks(&flags);
+       /* Move task-event monrs that are descendant from css's cgroup. */
+       list_for_each_entry_safe(pos_monr, tmp_monr,
+                                &monr_parent->children, parent_entry) {
+               if (pos_monr->mon_cgrp)
+                       continue;
+               /*
+                * all events in event group have the same cgroup.
+                * No RCU read lock necessary for task_css_check since calling
+                * inside critical section.
+                */
+               pos_cgrp = perf_cgroup_from_task_event(pos_monr->mon_events);
+               if (!cgroup_is_descendant(pos_cgrp->css.cgroup,
+                                         cgrp->css.cgroup))
+                       continue;
+               pos_monr->parent = monr;
+               list_move_tail(&pos_monr->parent_entry, &monr->children);
+       }
+       monr_hrchy_release_locks(&flags);
+
+       return 0;
+}
+
+static inline void __css_stop_monitoring(struct cgroup_subsys_state *css)
+{
+       struct perf_cgroup *cgrp, *cgrp_lma;
+       struct monr *monr, *monr_parent, *pos_monr;
+       unsigned long flags;
+
+       lockdep_assert_held(&cmt_mutex);
+
+       cgrp = perf_cgroup_from_css(css);
+       monr = monr_from_perf_cgroup(cgrp);
+       /*
+        * When css is root cgroup's css, detach cgroup but do not
+        * destroy monr.
+        */
+       cgrp_lma = perf_cgroup_find_lma(cgrp);
+       if (!cgrp_lma) {
+               /* monr of root cgrp must be monr_hrchy_root. */
+               monr->mon_cgrp = NULL;
+               return;
+       }
+
+       monr_parent = monr_from_perf_cgroup(cgrp_lma);
+       css_subtree_update_monr_dependants(css, monr_parent);
+
+       monr_hrchy_acquire_locks(&flags);
+
+       /* Move the children monrs that are no cgroups. */
+       list_for_each_entry(pos_monr, &monr->children, parent_entry)
+               pos_monr->parent = monr_parent;
+       list_splice_tail_init(&monr->children, &monr_parent->children);
+
+       perf_cgroup_set_monr(cgrp, monr_from_perf_cgroup(cgrp_lma));
+       monr->mon_cgrp = NULL;
+       monr_hrchy_remove_leaf(monr);
+
+       monr_hrchy_release_locks(&flags);
+}
+
+static bool is_cgroup_event(struct perf_event *event)
+{
+       return event->cgrp;
+}
+
+static int monr_hrchy_attach_cgroup_event(struct perf_event *event)
+{
+       struct monr *monr;
+       struct perf_cgroup *cgrp = event->cgrp;
+       int err;
+       bool started = false;
+
+       if (!perf_cgroup_mon_started(cgrp)) {
+               css_get(&cgrp->css);
+               err = __css_start_monitoring(&cgrp->css);
+               css_put(&cgrp->css);
+               if (err)
+                       return err;
+               started = true;
+       }
+
+       monr = monr_from_perf_cgroup(cgrp);
+       err = monr_append_event(monr, event);
+       if (err && started) {
+               css_get(&cgrp->css);
+               __css_stop_monitoring(&cgrp->css);
+               css_put(&cgrp->css);
+       }
+
+       return err;
+}
+
+/* return monr of cgroup that contains the task to monitor. */
+static struct monr *monr_hrchy_get_monr_parent(struct perf_event *event)
+{
+       struct perf_cgroup *cgrp = perf_cgroup_from_task_event(event);
+
+       return monr_from_perf_cgroup(cgrp);
+}
+
+#else /* CONFIG_CGROUP_PERF */
+
 static bool is_cgroup_event(struct perf_event *event)
 {
        return false;
@@ -834,6 +1121,8 @@ static struct monr *monr_hrchy_get_monr_parent(struct 
perf_event *event)
        return monr_hrchy_root;
 }
 
+#endif
+
 static int monr_hrchy_attach_cpu_event(struct perf_event *event)
 {
        return monr_append_event(monr_hrchy_root, event);
@@ -883,6 +1172,10 @@ static int monr_hrchy_attach_event(struct perf_event 
*event)
 
 static void monr_destroy(struct monr *monr)
 {
+#ifdef CONFIG_CGROUP_PERF
+       if (monr->mon_cgrp)
+               __css_stop_monitoring(&monr->mon_cgrp->css);
+#endif
        monr_hrchy_remove_leaf(monr);
        monr_dealloc(monr);
 }
diff --git a/arch/x86/events/intel/cmt.h b/arch/x86/events/intel/cmt.h
index 754a9c8..dc52641 100644
--- a/arch/x86/events/intel/cmt.h
+++ b/arch/x86/events/intel/cmt.h
@@ -252,6 +252,7 @@ enum cmt_user_flags {
 
 /**
  * struct monr - MONitored Resource.
+ * @mon_cgrp:          The cgroup associated with this monr, if any
  * @mon_events:                The head of event's group that use this monr, 
if any.
  * @entry:             List entry into cmt_event_monrs.
  * @pmonrs:            Per-package pmonrs.
@@ -271,6 +272,7 @@ enum cmt_user_flags {
  * On initialization, all monr's pmonrs start in Off state.
  */
 struct monr {
+       struct perf_cgroup              *mon_cgrp;
        struct perf_event               *mon_events;
        struct list_head                entry;
        struct pmonr                    **pmonrs;
-- 
2.8.0.rc3.226.g39d4020

Reply via email to