From: Geliang Tang <[email protected]> Current in-kernel TCP sockets -- i.e. from nvme_tcp_try_recv() -- need to call .read_sock interface of struct proto_ops, but it's not implemented in MPTCP.
This patch implements it with reference to __tcp_read_sock() and __mptcp_recvmsg_mskq(). Corresponding to tcp_recv_skb(), a new helper for MPTCP named mptcp_recv_skb() is added to peek a skb from sk->sk_receive_queue. Compared with __mptcp_recvmsg_mskq(), mptcp_read_sock() uses sk->sk_rcvbuf as the max read length. The LISTEN status is checked before the while loop, and mptcp_recv_skb() and mptcp_cleanup_rbuf() are invoked after the loop. In the loop, all flags checks for __mptcp_recvmsg_mskq() are removed. Reviewed-by: Hannes Reinecke <[email protected]> Signed-off-by: Geliang Tang <[email protected]> Reviewed-by: Mat Martineau <[email protected]> Signed-off-by: Matthieu Baerts (NGI0) <[email protected]> --- net/mptcp/protocol.c | 82 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 82 insertions(+) diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 5eb2a6182d4b..2071f984da12 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -4317,6 +4317,86 @@ static __poll_t mptcp_poll(struct file *file, struct socket *sock, return mask; } +static struct sk_buff *mptcp_recv_skb(struct sock *sk, u32 *off) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct sk_buff *skb; + u32 offset; + + if (!list_empty(&msk->backlog_list)) + mptcp_move_skbs(sk); + + while ((skb = skb_peek(&sk->sk_receive_queue)) != NULL) { + offset = MPTCP_SKB_CB(skb)->offset; + if (offset < skb->len) { + *off = offset; + return skb; + } + mptcp_eat_recv_skb(sk, skb); + } + return NULL; +} + +/* + * Note: + * - It is assumed that the socket was locked by the caller. + */ +static int __mptcp_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor, bool noack) +{ + struct mptcp_sock *msk = mptcp_sk(sk); + struct sk_buff *skb; + int copied = 0; + u32 offset; + + msk_owned_by_me(msk); + + if (sk->sk_state == TCP_LISTEN) + return -ENOTCONN; + while ((skb = mptcp_recv_skb(sk, &offset)) != NULL) { + u32 data_len = skb->len - offset; + int count; + u32 size; + + size = min_t(size_t, data_len, INT_MAX); + count = recv_actor(desc, skb, offset, size); + if (count <= 0) { + if (!copied) + copied = count; + break; + } + + copied += count; + + msk->bytes_consumed += count; + if (count < data_len) { + MPTCP_SKB_CB(skb)->offset += count; + MPTCP_SKB_CB(skb)->map_seq += count; + break; + } + + mptcp_eat_recv_skb(sk, skb); + } + + if (noack) + goto out; + + mptcp_rcv_space_adjust(msk, copied); + + if (copied > 0) { + mptcp_recv_skb(sk, &offset); + mptcp_cleanup_rbuf(msk, copied); + } +out: + return copied; +} + +static int mptcp_read_sock(struct sock *sk, read_descriptor_t *desc, + sk_read_actor_t recv_actor) +{ + return __mptcp_read_sock(sk, desc, recv_actor, false); +} + static const struct proto_ops mptcp_stream_ops = { .family = PF_INET, .owner = THIS_MODULE, @@ -4337,6 +4417,7 @@ static const struct proto_ops mptcp_stream_ops = { .recvmsg = inet_recvmsg, .mmap = sock_no_mmap, .set_rcvlowat = mptcp_set_rcvlowat, + .read_sock = mptcp_read_sock, }; static struct inet_protosw mptcp_protosw = { @@ -4441,6 +4522,7 @@ static const struct proto_ops mptcp_v6_stream_ops = { .compat_ioctl = inet6_compat_ioctl, #endif .set_rcvlowat = mptcp_set_rcvlowat, + .read_sock = mptcp_read_sock, }; static struct proto mptcp_v6_prot; -- 2.51.0

