Hi,
On Thu, 28 Jun 2018, Stefano Brivio wrote:
> Commit 45040978c899 ("netfilter: ipset: Fix set:list type crash
> when flush/dump set in parallel") postponed decreasing set
> reference counters to the RCU callback.
>
> An 'ipset del' command can terminate before the RCU grace period
> is elapsed, and if sets are listed before then, the reference
> counter shown in userspace will be wrong:
>
> # ipset create h hash:ip; ipset create l list:set; ipset add l
> # ipset del l h; ipset list h
> Name: h
> Type: hash:ip
> Revision: 4
> Header: family inet hashsize 1024 maxelem 65536
> Size in memory: 88
> References: 1
> Number of entries: 0
> Members:
> # sleep 1; ipset list h
> Name: h
> Type: hash:ip
> Revision: 4
> Header: family inet hashsize 1024 maxelem 65536
> Size in memory: 88
> References: 0
> Number of entries: 0
> Members:
>
> Fix this by making the reference count update synchronous again.
>
> As a result, when sets are listed, ip_set_name_byindex() might
> now fetch a set whose reference count is already zero. Instead
> of relying on the reference count to protect against concurrent
> set renaming and listing, note that those two operations are
> serialised by the nfnl mutex, and that the set itself is
> protected by RCU nowadays.
Listing is not serialized by the nfnl mutex because a netlink dump is used
behind it. So I believe the patch is not correct and therefore I cannot
apply it.
Best regards,
Jozsef
> Reported-by: Li Shuang <[email protected]>
> Fixes: 45040978c899 ("netfilter: ipset: Fix set:list type crash when
> flush/dump set in parallel")
> Signed-off-by: Stefano Brivio <[email protected]>
> ---
> net/netfilter/ipset/ip_set_core.c | 7 +------
> net/netfilter/ipset/ip_set_list_set.c | 12 ++++++++----
> 2 files changed, 9 insertions(+), 10 deletions(-)
>
> diff --git a/net/netfilter/ipset/ip_set_core.c
> b/net/netfilter/ipset/ip_set_core.c
> index bc4bd247bb7d..afe42af0ad13 100644
> --- a/net/netfilter/ipset/ip_set_core.c
> +++ b/net/netfilter/ipset/ip_set_core.c
> @@ -693,10 +693,7 @@ ip_set_put_byindex(struct net *net, ip_set_id_t index)
> EXPORT_SYMBOL_GPL(ip_set_put_byindex);
>
> /* Get the name of a set behind a set index.
> - * We assume the set is referenced, so it does exist and
> - * can't be destroyed. The set cannot be renamed due to
> - * the referencing either.
> - *
> + * This can only be called by operations serialised by nfnl mutex.
> */
> const char *
> ip_set_name_byindex(struct net *net, ip_set_id_t index)
> @@ -704,9 +701,7 @@ ip_set_name_byindex(struct net *net, ip_set_id_t index)
> const struct ip_set *set = ip_set_rcu_get(net, index);
>
> BUG_ON(!set);
> - BUG_ON(set->ref == 0);
>
> - /* Referenced, so it's safe */
> return set->name;
> }
> EXPORT_SYMBOL_GPL(ip_set_name_byindex);
> diff --git a/net/netfilter/ipset/ip_set_list_set.c
> b/net/netfilter/ipset/ip_set_list_set.c
> index 072a658fde04..9a056729968f 100644
> --- a/net/netfilter/ipset/ip_set_list_set.c
> +++ b/net/netfilter/ipset/ip_set_list_set.c
> @@ -148,9 +148,7 @@ __list_set_del_rcu(struct rcu_head * rcu)
> {
> struct set_elem *e = container_of(rcu, struct set_elem, rcu);
> struct ip_set *set = e->set;
> - struct list_set *map = set->data;
>
> - ip_set_put_byindex(map->net, e->id);
> ip_set_ext_destroy(set, e);
> kfree(e);
> }
> @@ -158,14 +156,20 @@ __list_set_del_rcu(struct rcu_head * rcu)
> static inline void
> list_set_del(struct ip_set *set, struct set_elem *e)
> {
> + struct list_set *map = set->data;
> +
> set->elements--;
> + ip_set_put_byindex(map->net, e->id);
> list_del_rcu(&e->list);
> call_rcu(&e->rcu, __list_set_del_rcu);
> }
>
> static inline void
> -list_set_replace(struct set_elem *e, struct set_elem *old)
> +list_set_replace(struct ip_set *set, struct set_elem *e, struct set_elem
> *old)
> {
> + struct list_set *map = set->data;
> +
> + ip_set_put_byindex(map->net, old->id);
> list_replace_rcu(&old->list, &e->list);
> call_rcu(&old->rcu, __list_set_del_rcu);
> }
> @@ -298,7 +302,7 @@ list_set_uadd(struct ip_set *set, void *value, const
> struct ip_set_ext *ext,
> INIT_LIST_HEAD(&e->list);
> list_set_init_extensions(set, ext, e);
> if (n)
> - list_set_replace(e, n);
> + list_set_replace(set, e, n);
> else if (next)
> list_add_tail_rcu(&e->list, &next->list);
> else if (prev)
> --
> 2.15.1
>
> --
> To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
-
E-mail : [email protected], [email protected]
PGP key : http://www.kfki.hu/~kadlec/pgp_public_key.txt
Address : Wigner Research Centre for Physics, Hungarian Academy of Sciences
H-1525 Budapest 114, POB. 49, Hungary
--
To unsubscribe from this list: send the line "unsubscribe netfilter-devel" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html