Debug-only stuck-state detector for upstream review. This commit is NOT for merge; it is a review aid. Reviewers can enable CONFIG_RCU_GP_CLEANUP_STALE_CHECK to gain runtime confidence in the preceding fix commits.
Signed-off-by: Joel Fernandes <[email protected]> --- kernel/rcu/Kconfig.debug | 11 ++++++++++ kernel/rcu/tree.c | 47 ++++++++++++++++++++++++++++++++++++++++ kernel/rcu/tree.h | 8 +++++++ 3 files changed, 66 insertions(+) diff --git a/kernel/rcu/Kconfig.debug b/kernel/rcu/Kconfig.debug index 35218ba74eb5..5a40c4fe544c 100644 --- a/kernel/rcu/Kconfig.debug +++ b/kernel/rcu/Kconfig.debug @@ -98,6 +98,17 @@ config RCU_TORTURE_TEST_LOG_GP Say Y here if you want grace-period sequence numbers logged. Say N if you are unsure. +config RCU_GP_CLEANUP_STALE_CHECK + bool "Detect stuck defer_qs_pending state at GP cleanup" + depends on RCU_TORTURE_TEST + default n + help + This option adds a per-CPU instrumentation counter on every + PENDING -> IDLE transition of rdp->defer_qs_pending, and a + detector in rcu_gp_cleanup(). + + Say N if you are unsure. + config RCU_REF_SCALE_TEST tristate "Scalability tests for read-side synchronization (RCU and others)" depends on DEBUG_KERNEL diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index d0816468ffee..1307f3fb48ac 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -2146,6 +2146,52 @@ static noinline_for_stack void rcu_gp_fqs_loop(void) } } +#ifdef CONFIG_RCU_GP_CLEANUP_STALE_CHECK +/* + * Threshold of consecutive GPs with rdp->defer_qs_pending stuck at + * PENDING and no observed PENDING -> IDLE transition before WARN. + */ +#define RCU_DEFER_QS_STUCK_GPS_THRESHOLD 5 + +static void rcu_gp_cleanup_stale_check(void) +{ + int cpu; + unsigned long cur_gp_seq = READ_ONCE(rcu_state.gp_seq); + + for_each_online_cpu(cpu) { + struct rcu_data *rdp = per_cpu_ptr(&rcu_data, cpu); + s64 clears_now; + int p_now; + + if (READ_ONCE(rdp->gp_seq) != cur_gp_seq) { + rdp->defer_qs_pending_stuck_gps = 0; + rdp->defer_qs_pending_clears_snap = + atomic64_read(&rdp->defer_qs_pending_clears); + continue; + } + + clears_now = atomic64_read(&rdp->defer_qs_pending_clears); + p_now = READ_ONCE(rdp->defer_qs_pending); + + if (p_now != DEFER_QS_PENDING || + clears_now != rdp->defer_qs_pending_clears_snap) { + rdp->defer_qs_pending_stuck_gps = 0; + rdp->defer_qs_pending_clears_snap = clears_now; + continue; + } + + rdp->defer_qs_pending_stuck_gps++; + WARN_ONCE(rdp->defer_qs_pending_stuck_gps >= + RCU_DEFER_QS_STUCK_GPS_THRESHOLD, + "RCU: defer_qs_pending STUCK on CPU %d for %u GPs (gp_seq=%lu, clears=%lld)\n", + cpu, rdp->defer_qs_pending_stuck_gps, + cur_gp_seq, clears_now); + } +} +#else +static inline void rcu_gp_cleanup_stale_check(void) { } +#endif /* CONFIG_RCU_GP_CLEANUP_STALE_CHECK */ + /* * Clean up after the old grace period. */ @@ -2220,6 +2266,7 @@ static noinline void rcu_gp_cleanup(void) /* Declare grace period done, trace first to use old GP number. */ trace_rcu_grace_period(rcu_state.name, rcu_state.gp_seq, TPS("end")); + rcu_gp_cleanup_stale_check(); rcu_seq_end(&rcu_state.gp_seq); ASSERT_EXCLUSIVE_WRITER(rcu_state.gp_seq); WRITE_ONCE(rcu_state.gp_state, RCU_GP_IDLE); diff --git a/kernel/rcu/tree.h b/kernel/rcu/tree.h index 4069132f9d44..29d852bbe218 100644 --- a/kernel/rcu/tree.h +++ b/kernel/rcu/tree.h @@ -204,6 +204,11 @@ struct rcu_data { /* period it is aware of. */ struct irq_work defer_qs_iw; /* Obtain later scheduler attention. */ int defer_qs_pending; /* irqwork or softirq pending? */ +#ifdef CONFIG_RCU_GP_CLEANUP_STALE_CHECK + atomic64_t defer_qs_pending_clears; + s64 defer_qs_pending_clears_snap; + unsigned int defer_qs_pending_stuck_gps; +#endif struct work_struct strict_work; /* Schedule readers for strict GPs. */ /* 2) batch handling */ @@ -299,6 +304,9 @@ struct rcu_data { static inline void rcu_defer_qs_clear(struct rcu_data *rdp) { WRITE_ONCE(rdp->defer_qs_pending, DEFER_QS_IDLE); +#ifdef CONFIG_RCU_GP_CLEANUP_STALE_CHECK + atomic64_inc(&rdp->defer_qs_pending_clears); +#endif } /* Values for nocb_defer_wakeup field in struct rcu_data. */ -- 2.34.1

