Michal Luczaj wrote:
> Element replace (with a socket different from the one stored) may race with
> socket's close() link popping & unlinking. __sock_map_delete()
> unconditionally unrefs the (wrong) element:
>
> // set map[0] = s0
> map_update_elem(map, 0, s0)
>
> // drop fd of s0
> close(s0)
> sock_map_close()
> lock_sock(sk) (s0!)
> sock_map_remove_links(sk)
> link = sk_psock_link_pop()
> sock_map_unlink(sk, link)
> sock_map_delete_from_link
> // replace map[0] with s1
> map_update_elem(map, 0, s1)
> sock_map_update_elem
> (s1!) lock_sock(sk)
> sock_map_update_common
> psock = sk_psock(sk)
> spin_lock(&stab->lock)
> osk = stab->sks[idx]
> sock_map_add_link(...,
> &stab->sks[idx])
> sock_map_unref(osk,
> &stab->sks[idx])
> psock = sk_psock(osk)
> sk_psock_put(sk, psock)
> if
> (refcount_dec_and_test(&psock))
> sk_psock_drop(sk, psock)
> spin_unlock(&stab->lock)
> unlock_sock(sk)
> __sock_map_delete
> spin_lock(&stab->lock)
> sk = *psk // s1 replaced s0; sk == s1
> if (!sk_test || sk_test == sk) // sk_test (s0) != sk (s1); no
> branch
> sk = xchg(psk, NULL)
> if (sk)
> sock_map_unref(sk, psk) // unref s1; sks[idx] will dangle
> psock = sk_psock(sk)
> sk_psock_put(sk, psock)
> if (refcount_dec_and_test())
> sk_psock_drop(sk, psock)
> spin_unlock(&stab->lock)
> release_sock(sk)
>
> Then close(map) enqueues bpf_map_free_deferred, which finally calls
> sock_map_free(). This results in some refcount_t warnings along with a
> KASAN splat[1].
>
[...]
> Fixes: 604326b41a6f ("bpf, sockmap: convert to generic sk_msg interface")
> Signed-off-by: Michal Luczaj <[email protected]>
> ---
> net/core/sock_map.c | 5 ++---
> 1 file changed, 2 insertions(+), 3 deletions(-)
>
> diff --git a/net/core/sock_map.c b/net/core/sock_map.c
> index
> 20b348b1964a10a1b0bfbe1a90a4a4cd99715b81..f1b9b3958792cd599efcb591742874e9b3f4a76b
> 100644
> --- a/net/core/sock_map.c
> +++ b/net/core/sock_map.c
> @@ -412,12 +412,11 @@ static void *sock_map_lookup_sys(struct bpf_map *map,
> void *key)
> static int __sock_map_delete(struct bpf_stab *stab, struct sock *sk_test,
> struct sock **psk)
> {
> - struct sock *sk;
> + struct sock *sk = NULL;
> int err = 0;
>
> spin_lock_bh(&stab->lock);
> - sk = *psk;
> - if (!sk_test || sk_test == sk)
> + if (!sk_test || sk_test == *psk)
> sk = xchg(psk, NULL);
>
> if (likely(sk))
>
> --
> 2.46.2
>
Reviewed-by: John Fastabend <[email protected]>