basechain->stats is rcu protected data.
And write critical section of basechain->stats data is
nft_chain_stats_replace().
The function is executed in commit phase. so that actually commit_mutex
lock protects that.
Hence commit_mutex lockdep should be used for rcu_dereference_protected()
in the nft_chain_stats_replace() instead of NFNL_SUBSYS_NFTABLES.
By this patch, rcu APIs are used to handle basechain->stats data.
test commands:
%iptables-nft -I INPUT
%iptables-nft -Z
%iptables-nft -Z
splat looks like:
[89279.358755] =
[89279.363656] WARNING: suspicious RCU usage
[89279.368458] 4.20.0-rc2+ #44 Tainted: GWL
[89279.374661] -
[89279.379542] net/netfilter/nf_tables_api.c:1404 suspicious
rcu_dereference_protected() usage!
[89279.389520]
other info that might help us debug this:
[89279.398893]
rcu_scheduler_active = 2, debug_locks = 1
[89279.406556] 1 lock held by iptables-nft/5225:
[89279.411728] #0: bf45a000 (>nft.commit_mutex){+.+.}, at:
nf_tables_valid_genid+0x1f/0x70 [nf_tables]
[89279.424022]
stack backtrace:
[89279.429236] CPU: 0 PID: 5225 Comm: iptables-nft Tainted: GWL
4.20.0-rc2+ #44
[89279.430135] Call Trace:
[89279.430135] dump_stack+0xc9/0x16b
[89279.430135] ? show_regs_print_info+0x5/0x5
[89279.430135] ? lockdep_rcu_suspicious+0x117/0x160
[89279.430135] nft_chain_commit_update+0x4ea/0x640 [nf_tables]
[89279.430135] ? sched_clock_local+0xd4/0x140
[89279.430135] ? check_flags.part.35+0x440/0x440
[89279.430135] ? __rhashtable_remove_fast.constprop.67+0xec0/0xec0 [nf_tables]
[89279.430135] ? sched_clock_cpu+0x126/0x170
[89279.430135] ? find_held_lock+0x39/0x1c0
[89279.430135] ? hlock_class+0x140/0x140
[89279.430135] ? is_bpf_text_address+0x5/0xf0
[89279.430135] ? check_flags.part.35+0x440/0x440
[89279.430135] ? __lock_is_held+0xb4/0x140
[89279.430135] nf_tables_commit+0x2555/0x39c0 [nf_tables]
Fixes: f102d66b335a4 ("netfilter: nf_tables: use dedicated mutex to guard
transactions")
Signed-off-by: Taehee Yoo
---
include/linux/netfilter/nfnetlink.h | 12
net/netfilter/nf_tables_api.c | 21 +
net/netfilter/nf_tables_core.c | 2 +-
3 files changed, 14 insertions(+), 21 deletions(-)
diff --git a/include/linux/netfilter/nfnetlink.h
b/include/linux/netfilter/nfnetlink.h
index 4a520d3304a2..cf09ab37b45b 100644
--- a/include/linux/netfilter/nfnetlink.h
+++ b/include/linux/netfilter/nfnetlink.h
@@ -62,18 +62,6 @@ static inline bool lockdep_nfnl_is_held(__u8 subsys_id)
}
#endif /* CONFIG_PROVE_LOCKING */
-/*
- * nfnl_dereference - fetch RCU pointer when updates are prevented by subsys
mutex
- *
- * @p: The pointer to read, prior to dereferencing
- * @ss: The nfnetlink subsystem ID
- *
- * Return the value of the specified RCU-protected pointer, but omit
- * the READ_ONCE(), because caller holds the NFNL subsystem mutex.
- */
-#define nfnl_dereference(p, ss)\
- rcu_dereference_protected(p, lockdep_nfnl_is_held(ss))
-
#define MODULE_ALIAS_NFNL_SUBSYS(subsys) \
MODULE_ALIAS("nfnetlink-subsys-" __stringify(subsys))
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index ddeaa1990e1e..e82ad1795194 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1216,7 +1216,8 @@ static int nf_tables_fill_chain_info(struct sk_buff *skb,
struct net *net,
if (nla_put_string(skb, NFTA_CHAIN_TYPE, basechain->type->name))
goto nla_put_failure;
- if (basechain->stats && nft_dump_stats(skb, basechain->stats))
+ if (rcu_access_pointer(basechain->stats) &&
+ nft_dump_stats(skb, rcu_dereference(basechain->stats)))
goto nla_put_failure;
}
@@ -1392,7 +1393,8 @@ static struct nft_stats __percpu *nft_stats_alloc(const
struct nlattr *attr)
return newstats;
}
-static void nft_chain_stats_replace(struct nft_base_chain *chain,
+static void nft_chain_stats_replace(struct net *net,
+ struct nft_base_chain *chain,
struct nft_stats __percpu *newstats)
{
struct nft_stats __percpu *oldstats;
@@ -1400,8 +1402,9 @@ static void nft_chain_stats_replace(struct nft_base_chain
*chain,
if (newstats == NULL)
return;
- if (chain->stats) {
- oldstats = nfnl_dereference(chain->stats, NFNL_SUBSYS_NFTABLES);
+ if (rcu_access_pointer(chain->stats)) {
+ oldstats = rcu_dereference_protected(chain->stats,
+ lockdep_commit_lock_is_held(net));
rcu_assign_pointer(chain->stats, newstats);
synchronize_rcu();
free_percpu(oldstats);
@@ -1439,9 +1442,10 @@ static void nf_tables_chain_destroy(struct