Debug-only stuck-state detector for upstream review.  This commit is
NOT for merge; it is a review aid.  Reviewers can enable
CONFIG_RCU_GP_CLEANUP_STALE_CHECK to gain runtime confidence in the
preceding fix commits.

Signed-off-by: Joel Fernandes <[email protected]>
---
 kernel/rcu/Kconfig.debug | 11 ++++++++++
 kernel/rcu/tree.c        | 47 ++++++++++++++++++++++++++++++++++++++++
 kernel/rcu/tree.h        |  8 +++++++
 3 files changed, 66 insertions(+)

diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug
index 35218ba74eb5..5a40c4fe544c 100644
--- a/kernel/rcu/Kconfig.debug
+++ b/kernel/rcu/Kconfig.debug
@@ -98,6 +98,17 @@ config RCU_TORTURE_TEST_LOG_GP
          Say Y here if you want grace-period sequence numbers logged.
          Say N if you are unsure.
 
+config RCU_GP_CLEANUP_STALE_CHECK
+       bool "Detect stuck defer_qs_pending state at GP cleanup"
+       depends on RCU_TORTURE_TEST
+       default n
+       help
+         This option adds a per-CPU instrumentation counter on every
+         PENDING -> IDLE transition of rdp->defer_qs_pending, and a
+         detector in rcu_gp_cleanup().
+
+         Say N if you are unsure.
+
 config RCU_REF_SCALE_TEST
        tristate "Scalability tests for read-side synchronization (RCU and 
others)"
        depends on DEBUG_KERNEL
diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c
index d0816468ffee..1307f3fb48ac 100644
--- a/kernel/rcu/tree.c
+++ b/kernel/rcu/tree.c
@@ -2146,6 +2146,52 @@ static noinline_for_stack void rcu_gp_fqs_loop(void)
        }
 }
 
+#ifdef CONFIG_RCU_GP_CLEANUP_STALE_CHECK
+/*
+ * Threshold of consecutive GPs with rdp->defer_qs_pending stuck at
+ * PENDING and no observed PENDING -> IDLE transition before WARN.
+ */
+#define RCU_DEFER_QS_STUCK_GPS_THRESHOLD       5
+
+static void rcu_gp_cleanup_stale_check(void)
+{
+       int cpu;
+       unsigned long cur_gp_seq = READ_ONCE(rcu_state.gp_seq);
+
+       for_each_online_cpu(cpu) {
+               struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu);
+               s64 clears_now;
+               int p_now;
+
+               if (READ_ONCE(rdp->gp_seq) != cur_gp_seq) {
+                       rdp->defer_qs_pending_stuck_gps = 0;
+                       rdp->defer_qs_pending_clears_snap =
+                               atomic64_read(&rdp->defer_qs_pending_clears);
+                       continue;
+               }
+
+               clears_now = atomic64_read(&rdp->defer_qs_pending_clears);
+               p_now = READ_ONCE(rdp->defer_qs_pending);
+
+               if (p_now != DEFER_QS_PENDING ||
+                   clears_now != rdp->defer_qs_pending_clears_snap) {
+                       rdp->defer_qs_pending_stuck_gps = 0;
+                       rdp->defer_qs_pending_clears_snap = clears_now;
+                       continue;
+               }
+
+               rdp->defer_qs_pending_stuck_gps++;
+               WARN_ONCE(rdp->defer_qs_pending_stuck_gps >=
+                         RCU_DEFER_QS_STUCK_GPS_THRESHOLD,
+                       "RCU: defer_qs_pending STUCK on CPU %d for %u GPs 
(gp_seq=%lu, clears=%lld)\n",
+                       cpu, rdp->defer_qs_pending_stuck_gps,
+                       cur_gp_seq, clears_now);
+       }
+}
+#else
+static inline void rcu_gp_cleanup_stale_check(void) { }
+#endif /* CONFIG_RCU_GP_CLEANUP_STALE_CHECK */
+
 /*
  * Clean up after the old grace period.
  */
@@ -2220,6 +2266,7 @@ static noinline void rcu_gp_cleanup(void)
 
        /* Declare grace period done, trace first to use old GP number. */
        trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end"));
+       rcu_gp_cleanup_stale_check();
        rcu_seq_end(&rcu_state.gp_seq);
        ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq);
        WRITE_ONCE(rcu_state.gp_state, RCU_GP_IDLE);
diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h
index 4069132f9d44..29d852bbe218 100644
--- a/kernel/rcu/tree.h
+++ b/kernel/rcu/tree.h
@@ -204,6 +204,11 @@ struct rcu_data {
                                        /* period it is aware of. */
        struct irq_work defer_qs_iw;    /* Obtain later scheduler attention. */
        int defer_qs_pending;           /* irqwork or softirq pending? */
+#ifdef CONFIG_RCU_GP_CLEANUP_STALE_CHECK
+       atomic64_t   defer_qs_pending_clears;
+       s64          defer_qs_pending_clears_snap;
+       unsigned int defer_qs_pending_stuck_gps;
+#endif
        struct work_struct strict_work; /* Schedule readers for strict GPs. */
 
        /* 2) batch handling */
@@ -299,6 +304,9 @@ struct rcu_data {
 static inline void rcu_defer_qs_clear(struct rcu_data *rdp)
 {
        WRITE_ONCE(rdp->defer_qs_pending, DEFER_QS_IDLE);
+#ifdef CONFIG_RCU_GP_CLEANUP_STALE_CHECK
+       atomic64_inc(&rdp->defer_qs_pending_clears);
+#endif
 }
 
 /* Values for nocb_defer_wakeup field in struct rcu_data. */
-- 
2.34.1


Reply via email to