On 3/14/26 9:13 PM, [email protected] wrote:
> From: Wesley Atwell <[email protected]>
>
> Teach TCP to grow sk_rcvbuf when scale rounding would otherwise expose
> more sender-visible window than the current hard receive-memory backing
> can cover.
>
> The new helper keeps backlog and memory-pressure limits in the same
> units as the rest of the receive path, while __tcp_select_window()
> backs any rounding slack before advertising it.
>
> Signed-off-by: Wesley Atwell <[email protected]>
> ---
> include/net/tcp.h | 12 ++++++++++++
> net/ipv4/tcp_input.c | 36 ++++++++++++++++++++++++++++++++++--
> net/ipv4/tcp_output.c | 15 +++++++++++++--
> 3 files changed, 59 insertions(+), 4 deletions(-)
>
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index fc22ab6b80d5..5b479ad44f89 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -397,6 +397,7 @@ int tcp_ioctl(struct sock *sk, int cmd, int *karg);
> enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff
> *skb);
> void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
> void tcp_rcvbuf_grow(struct sock *sk, u32 newval);
> +bool tcp_try_grow_rcvbuf(struct sock *sk, int needed);
> void tcp_rcv_space_adjust(struct sock *sk);
> int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
> void tcp_twsk_destructor(struct sock *sk);
> @@ -1844,6 +1845,17 @@ static inline int tcp_rwnd_avail(const struct sock *sk)
> return tcp_rmem_avail(sk) - READ_ONCE(sk->sk_backlog.len);
> }
>
> +/* Passive children clone the listener's sk_socket until accept() grafts
> + * their own struct socket, so only sockets that point back to themselves
> + * should autotune receive-buffer backing.
> + */
> +static inline bool tcp_rcvbuf_grow_allowed(const struct sock *sk)
> +{
> + struct socket *sock = READ_ONCE(sk->sk_socket);
> +
> + return sock && READ_ONCE(sock->sk) == sk;
This is executed under the sk socket lock, ONCE annotation not needed.
> +}
> +
> /* Note: caller must be prepared to deal with negative returns */
> static inline int tcp_space(const struct sock *sk)
> {
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 352f814a4ff6..32256519a085 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -774,6 +774,38 @@ static void tcp_init_buffer_space(struct sock *sk)
> (u32)TCP_INIT_CWND * tp->advmss);
> }
>
> +/* Try to grow sk_rcvbuf so the hard receive-memory limit covers @needed
> + * bytes beyond sk_rmem_alloc while preserving sender-visible headroom
> + * already consumed by sk_backlog.len.
> + */
> +bool tcp_try_grow_rcvbuf(struct sock *sk, int needed)
> +{
> + struct net *net = sock_net(sk);
> + int backlog;
> + int rmem2;
> + int target;
> +
> + needed = max(needed, 0);
> + backlog = READ_ONCE(sk->sk_backlog.len);
> + target = tcp_rmem_used(sk) + backlog + needed;
> +
> + if (target <= READ_ONCE(sk->sk_rcvbuf))
> + return true;
> +
> + rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
> + if (READ_ONCE(sk->sk_rcvbuf) >= rmem2 ||
> + (sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
> + tcp_under_memory_pressure(sk) ||
> + sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
> + return false;
> +
> + WRITE_ONCE(sk->sk_rcvbuf,
> + min_t(int, rmem2,
> + max_t(int, READ_ONCE(sk->sk_rcvbuf), target)));
> +
> + return target <= READ_ONCE(sk->sk_rcvbuf);
Same here, and more cases below.
/P