From: "Paul E. McKenney" <[email protected]>

Currently, the rcu_node tree ->expmask bitmasks are initially set to
reflect the online CPUs.  This is pointless, because only the CPUs
preempted within RCU read-side critical sections by the preceding
synchronize_sched_expedited() need to be tracked.  This commit therefore
instead sets up these bitmasks based on the state of the ->blkd_tasks
lists.

Signed-off-by: Paul E. McKenney <[email protected]>
---
 kernel/rcu/tree_plugin.h | 50 ++++++++++++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 23 deletions(-)

diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h
index 79376e2461c9..ce8fb810770c 100644
--- a/kernel/rcu/tree_plugin.h
+++ b/kernel/rcu/tree_plugin.h
@@ -98,8 +98,7 @@ RCU_STATE_INITIALIZER(rcu_preempt, 'p', call_rcu);
 static struct rcu_state *rcu_state_p = &rcu_preempt_state;
 
 static int rcu_preempted_readers_exp(struct rcu_node *rnp);
-static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
-                              bool wake);
+static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp);
 
 /*
  * Tell them what RCU they are running.
@@ -415,7 +414,7 @@ void rcu_read_unlock_special(struct task_struct *t)
                 * then we need to report up the rcu_node hierarchy.
                 */
                if (!empty_exp && empty_exp_now)
-                       rcu_report_exp_rnp(&rcu_preempt_state, rnp, true);
+                       rcu_report_exp_rnp(&rcu_preempt_state, rnp);
        } else {
                local_irq_restore(flags);
        }
@@ -626,13 +625,9 @@ static int sync_rcu_preempt_exp_done(struct rcu_node *rnp)
  * recursively up the tree.  (Calm down, calm down, we do the recursion
  * iteratively!)
  *
- * Most callers will set the "wake" flag, but the task initiating the
- * expedited grace period need not wake itself.
- *
  * Caller must hold sync_rcu_preempt_exp_mutex.
  */
-static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp,
-                              bool wake)
+static void rcu_report_exp_rnp(struct rcu_state *rsp, struct rcu_node *rnp)
 {
        unsigned long flags;
        unsigned long mask;
@@ -646,10 +641,8 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, 
struct rcu_node *rnp,
                }
                if (rnp->parent == NULL) {
                        raw_spin_unlock_irqrestore(&rnp->lock, flags);
-                       if (wake) {
-                               smp_mb(); /* EGP done before wake_up(). */
-                               wake_up(&sync_rcu_preempt_exp_wq);
-                       }
+                       smp_mb(); /* EGP done before wake_up(). */
+                       wake_up(&sync_rcu_preempt_exp_wq);
                        break;
                }
                mask = rnp->grpmask;
@@ -663,8 +656,11 @@ static void rcu_report_exp_rnp(struct rcu_state *rsp, 
struct rcu_node *rnp,
 
 /*
  * Snapshot the tasks blocking the newly started preemptible-RCU expedited
- * grace period for the specified rcu_node structure.  If there are no such
- * tasks, report it up the rcu_node hierarchy.
+ * grace period for the specified rcu_node structure.  If there are such
+ * tasks, set the ->exp_mask bits up the rcu_node tree.  Note that we
+ * do not set the ->exp_mask bits on the leaf rcu_node structures, because:
+ * (1) No particular CPU is blocking us and (2) A non-NULL ->exp_tasks
+ * pointer tells us that a given leaf rcu_node is blocked by some tasks.
  *
  * Caller must hold sync_rcu_preempt_exp_mutex and must exclude
  * CPU hotplug operations.
@@ -673,14 +669,27 @@ static void
 sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct rcu_node *rnp)
 {
        unsigned long flags;
+       unsigned long mask;
+       struct rcu_node *rnp_up;
 
        raw_spin_lock_irqsave(&rnp->lock, flags);
        smp_mb__after_unlock_lock();
        if (!rcu_preempt_has_tasks(rnp)) {
                raw_spin_unlock_irqrestore(&rnp->lock, flags);
-               rcu_report_exp_rnp(rsp, rnp, false); /* No tasks, report. */
        } else {
+               /* Propagate ->expmask bits up the rcu_node tree. */
                rnp->exp_tasks = rnp->blkd_tasks.next;
+               rnp_up = rnp;
+               while (rnp_up->parent) {
+                       mask = rnp_up->grpmask;
+                       rnp_up = rnp_up->parent;
+                       if (ACCESS_ONCE(rnp_up->expmask) & mask)
+                               break;
+                       raw_spin_lock(&rnp_up->lock); /* irqs already off */
+                       smp_mb__after_unlock_lock();
+                       rnp_up->expmask |= mask;
+                       raw_spin_unlock(&rnp_up->lock); /* irqs still off */
+               }
                rcu_initiate_boost(rnp, flags);  /* releases rnp->lock */
        }
 }
@@ -699,7 +708,6 @@ sync_rcu_preempt_exp_init(struct rcu_state *rsp, struct 
rcu_node *rnp)
  */
 void synchronize_rcu_expedited(void)
 {
-       unsigned long flags;
        struct rcu_node *rnp;
        struct rcu_state *rsp = &rcu_preempt_state;
        unsigned long snap;
@@ -750,13 +758,9 @@ void synchronize_rcu_expedited(void)
        /* force all RCU readers onto ->blkd_tasks lists. */
        synchronize_sched_expedited();
 
-       /* Initialize ->expmask for all non-leaf rcu_node structures. */
-       rcu_for_each_nonleaf_node_breadth_first(rsp, rnp) {
-               raw_spin_lock_irqsave(&rnp->lock, flags);
-               smp_mb__after_unlock_lock();
-               rnp->expmask = rnp->qsmaskinit;
-               raw_spin_unlock_irqrestore(&rnp->lock, flags);
-       }
+       /* Check initial state.  Later under CONFIG_PROVE_RCU. */
+       rcu_for_each_node_breadth_first(rsp, rnp)
+               WARN_ON_ONCE(rnp->expmask);
 
        /* Snapshot current state of ->blkd_tasks lists. */
        rcu_for_each_leaf_node(rsp, rnp)
-- 
1.8.1.5

--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html
Please read the FAQ at  http://www.tux.org/lkml/

Reply via email to