Unix sockets like a block box. You never know what is stored there: there may be a file descriptor holding a mount or a block device, or there may be whole universes with namespaces, sockets with receive queues full of sockets etc.
The patch adds a little debug and accounts number of files (not recursive), which is in receive queue of a unix socket. Sometimes this is useful to determine, that socket should be investigated or which task should be killed to put reference counter on a resourse. v2: Pass correct argument to lockdep Signed-off-by: Kirill Tkhai <[email protected]> Signed-off-by: David S. Miller <[email protected]> (cherry picked from commit 3c32da19a858fb1ae8a76bf899160be49f338506) VvS: taken from vz7 commit 56a318575ebf8cef4b677f9e20b9590848fcff85 Signed-off-by: Vasily Averin <[email protected]> --- include/net/af_unix.h | 9 ++++++-- net/unix/af_unix.c | 54 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 57 insertions(+), 6 deletions(-) diff --git a/include/net/af_unix.h b/include/net/af_unix.h index 7ec1cdb66be8..a10e2c52bf9b 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -41,7 +41,11 @@ struct unix_skb_parms { u32 consumed; } __randomize_layout; -#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) +struct scm_stat { + u32 nr_fds; +}; + +#define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) @@ -64,7 +68,8 @@ struct unix_sock { #define UNIX_GC_CANDIDATE 0 #define UNIX_GC_MAYBE_CYCLE 1 struct socket_wq peer_wq; - wait_queue_entry_t peer_wake; + wait_queue_entry_t peer_wake; + struct scm_stat scm_stat; }; static inline struct unix_sock *unix_sk(const struct sock *sk) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5d47d67901af..67648b61ce11 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -673,6 +673,16 @@ static int unix_set_peek_off(struct sock *sk, int val) return 0; } +static void unix_show_fdinfo(struct seq_file *m, struct socket *sock) +{ + struct sock *sk = sock->sk; + struct unix_sock *u; + + if (sk) { + u = unix_sk(sock->sk); + seq_printf(m, "scm_fds: %u\n", READ_ONCE(u->scm_stat.nr_fds)); + } +} static const struct proto_ops unix_stream_ops = { .family = PF_UNIX, @@ -695,6 +705,7 @@ static const struct proto_ops unix_stream_ops = { .sendpage = unix_stream_sendpage, .splice_read = unix_stream_splice_read, .set_peek_off = unix_set_peek_off, + .show_fdinfo = unix_show_fdinfo, }; static const struct proto_ops unix_dgram_ops = { @@ -717,6 +728,7 @@ static const struct proto_ops unix_dgram_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, .set_peek_off = unix_set_peek_off, + .show_fdinfo = unix_show_fdinfo, }; static const struct proto_ops unix_seqpacket_ops = { @@ -739,6 +751,7 @@ static const struct proto_ops unix_seqpacket_ops = { .mmap = sock_no_mmap, .sendpage = sock_no_sendpage, .set_peek_off = unix_set_peek_off, + .show_fdinfo = unix_show_fdinfo, }; static struct proto unix_proto = { @@ -776,6 +789,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) mutex_init(&u->bindlock); /* single task binding lock */ init_waitqueue_head(&u->peer_wait); init_waitqueue_func_entry(&u->peer_wake, unix_dgram_peer_wake_relay); + memset(&u->scm_stat, 0, sizeof(struct scm_stat)); unix_insert_socket(unix_sockets_unbound(sk), sk); out: if (sk == NULL) @@ -1550,6 +1564,28 @@ static bool unix_skb_scm_eq(struct sk_buff *skb, unix_secdata_eq(scm, skb); } +static void scm_stat_add(struct sock *sk, struct sk_buff *skb) +{ + struct scm_fp_list *fp = UNIXCB(skb).fp; + struct unix_sock *u = unix_sk(sk); + + lockdep_assert_held(&sk->sk_receive_queue.lock); + + if (unlikely(fp && fp->count)) + u->scm_stat.nr_fds += fp->count; +} + +static void scm_stat_del(struct sock *sk, struct sk_buff *skb) +{ + struct scm_fp_list *fp = UNIXCB(skb).fp; + struct unix_sock *u = unix_sk(sk); + + lockdep_assert_held(&sk->sk_receive_queue.lock); + + if (unlikely(fp && fp->count)) + u->scm_stat.nr_fds -= fp->count; +} + /* * Send AF_UNIX data. */ @@ -1735,7 +1771,10 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, if (sock_flag(other, SOCK_RCVTSTAMP)) __net_timestamp(skb); maybe_add_creds(skb, sock, other); - skb_queue_tail(&other->sk_receive_queue, skb); + spin_lock(&other->sk_receive_queue.lock); + scm_stat_add(other, skb); + __skb_queue_tail(&other->sk_receive_queue, skb); + spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); other->sk_data_ready(other); sock_put(other); @@ -1837,7 +1876,10 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto pipe_err_free; maybe_add_creds(skb, sock, other); - skb_queue_tail(&other->sk_receive_queue, skb); + spin_lock(&other->sk_receive_queue.lock); + scm_stat_add(other, skb); + __skb_queue_tail(&other->sk_receive_queue, skb); + spin_unlock(&other->sk_receive_queue.lock); unix_state_unlock(other); other->sk_data_ready(other); sent += size; @@ -2037,7 +2079,7 @@ static int unix_dgram_recvmsg(struct socket *sock, struct msghdr *msg, skip = sk_peek_offset(sk, flags); skb = __skb_try_recv_datagram(sk, &sk->sk_receive_queue, flags, - NULL, &skip, &err, &last); + scm_stat_del, &skip, &err, &last); if (skb) break; @@ -2332,8 +2374,12 @@ static int unix_stream_read_generic(struct unix_stream_read_state *state, sk_peek_offset_bwd(sk, chunk); - if (UNIXCB(skb).fp) + if (UNIXCB(skb).fp) { + spin_lock(&sk->sk_receive_queue.lock); + scm_stat_del(sk, skb); + spin_unlock(&sk->sk_receive_queue.lock); unix_detach_fds(&scm, skb); + } if (unix_skb_len(skb)) break; -- 2.25.1 _______________________________________________ Devel mailing list [email protected] https://lists.openvz.org/mailman/listinfo/devel
