rcu_pending() decides whether rcu_core() should run on the current CPU's
timer tick.  It does not account for expedited grace periods: after an
expedited GP completes, a non-offloaded CPU's callbacks remain in
RCU_WAIT_TAIL (not yet advanced to RCU_DONE_TAIL) and rcu_core() is
never invoked to advance them.

Detect that case via rcu_segcblist_nextgp() combined with a new
memory-ordering-free poll variant,
poll_state_synchronize_rcu_full_unordered().  This keeps rcu_pending()
cheap: it runs on every tick that has pending callbacks, so it must
not pay for the two memory barriers in
poll_state_synchronize_rcu_full().  The check is only a hint to run
rcu_core(); the ordered re-check and the actual callback advancement
happen there.

Signed-off-by: Puranjay Mohan <[email protected]>
---
 kernel/rcu/tree.c | 38 +++++++++++++++++++++++++++++++-------
 1 file changed, 31 insertions(+), 7 deletions(-)

diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index 169d98ed52bbb..b01d7bf6b57b1 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -3598,6 +3598,24 @@ bool poll_state_synchronize_rcu(unsigned long oldstate)
 }
 EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
 
+/*
+ * Racy, memory-ordering-free test of whether the normal or expedited grace
+ * period recorded in *gsp has completed.  Callers that need the full
+ * memory-ordering guarantees must use poll_state_synchronize_rcu_full();
+ * this variant is only a hint (e.g. for rcu_pending()) and leaves any
+ * required ordering to a subsequent ordered check.
+ */
+static bool poll_state_synchronize_rcu_full_unordered(struct rcu_gp_seq *gsp)
+{
+       struct rcu_node *rnp = rcu_get_root();
+
+       return gsp->norm == RCU_GET_STATE_COMPLETED ||
+              rcu_seq_done_exact(&rnp->gp_seq, gsp->norm) ||
+              gsp->exp == RCU_GET_STATE_COMPLETED ||
+              (gsp->exp != RCU_GET_STATE_NOT_TRACKED &&
+               rcu_seq_done_exact(&rcu_state.expedited_sequence, gsp->exp));
+}
+
 /**
  * poll_state_synchronize_rcu_full - Has the specified RCU grace period 
completed?
  * @gsp: value from get_state_synchronize_rcu_full() or 
start_poll_synchronize_rcu_full()
@@ -3633,14 +3651,8 @@ EXPORT_SYMBOL_GPL(poll_state_synchronize_rcu);
  */
 bool poll_state_synchronize_rcu_full(struct rcu_gp_seq *gsp)
 {
-       struct rcu_node *rnp = rcu_get_root();
-
        smp_mb(); // Order against root rcu_node structure grace-period cleanup.
-       if (gsp->norm == RCU_GET_STATE_COMPLETED ||
-           rcu_seq_done_exact(&rnp->gp_seq, gsp->norm) ||
-           gsp->exp == RCU_GET_STATE_COMPLETED ||
-           (gsp->exp != RCU_GET_STATE_NOT_TRACKED &&
-            rcu_seq_done_exact(&rcu_state.expedited_sequence, gsp->exp))) {
+       if (poll_state_synchronize_rcu_full_unordered(gsp)) {
                smp_mb(); /* Ensure GP ends before subsequent accesses. */
                return true;
        }
@@ -3710,6 +3722,7 @@ EXPORT_SYMBOL_GPL(cond_synchronize_rcu_full);
 static int rcu_pending(int user)
 {
        bool gp_in_progress;
+       struct rcu_gp_seq gp_state;
        struct rcu_data *rdp = this_cpu_ptr(&rcu_data);
        struct rcu_node *rnp = rdp->mynode;
 
@@ -3740,6 +3753,17 @@ static int rcu_pending(int user)
            rcu_segcblist_ready_cbs(&rdp->cblist))
                return 1;
 
+       /*
+        * Has a GP (normal or expedited) completed for pending callbacks?
+        * This is only a racy hint to decide whether to run rcu_core(); the
+        * ordered re-check and callback advancement happen there, so the
+        * unordered test avoids paying for memory barriers on every tick.
+        */
+       if (!rcu_rdp_is_offloaded(rdp) &&
+           rcu_segcblist_nextgp(&rdp->cblist, &gp_state) &&
+           poll_state_synchronize_rcu_full_unordered(&gp_state))
+               return 1;
+
        /* Has RCU gone idle with this CPU needing another grace period? */
        if (!gp_in_progress && rcu_segcblist_is_enabled(&rdp->cblist) &&
            !rcu_rdp_is_offloaded(rdp) &&
-- 
2.53.0-Meta


Reply via email to