On 3/14/26 9:13 PM, [email protected] wrote:
> From: Wesley Atwell <[email protected]>
> 
> Teach TCP to grow sk_rcvbuf when scale rounding would otherwise expose
> more sender-visible window than the current hard receive-memory backing
> can cover.
> 
> The new helper keeps backlog and memory-pressure limits in the same
> units as the rest of the receive path, while __tcp_select_window()
> backs any rounding slack before advertising it.
> 
> Signed-off-by: Wesley Atwell <[email protected]>
> ---
>  include/net/tcp.h     | 12 ++++++++++++
>  net/ipv4/tcp_input.c  | 36 ++++++++++++++++++++++++++++++++++--
>  net/ipv4/tcp_output.c | 15 +++++++++++++--
>  3 files changed, 59 insertions(+), 4 deletions(-)
> 
> diff --git a/include/net/tcp.h b/include/net/tcp.h
> index fc22ab6b80d5..5b479ad44f89 100644
> --- a/include/net/tcp.h
> +++ b/include/net/tcp.h
> @@ -397,6 +397,7 @@ int tcp_ioctl(struct sock *sk, int cmd, int *karg);
>  enum skb_drop_reason tcp_rcv_state_process(struct sock *sk, struct sk_buff 
> *skb);
>  void tcp_rcv_established(struct sock *sk, struct sk_buff *skb);
>  void tcp_rcvbuf_grow(struct sock *sk, u32 newval);
> +bool tcp_try_grow_rcvbuf(struct sock *sk, int needed);
>  void tcp_rcv_space_adjust(struct sock *sk);
>  int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp);
>  void tcp_twsk_destructor(struct sock *sk);
> @@ -1844,6 +1845,17 @@ static inline int tcp_rwnd_avail(const struct sock *sk)
>       return tcp_rmem_avail(sk) - READ_ONCE(sk->sk_backlog.len);
>  }
>  
> +/* Passive children clone the listener's sk_socket until accept() grafts
> + * their own struct socket, so only sockets that point back to themselves
> + * should autotune receive-buffer backing.
> + */
> +static inline bool tcp_rcvbuf_grow_allowed(const struct sock *sk)
> +{
> +     struct socket *sock = READ_ONCE(sk->sk_socket);
> +
> +     return sock && READ_ONCE(sock->sk) == sk;

This is executed under the sk socket lock, ONCE annotation not needed.

> +}
> +
>  /* Note: caller must be prepared to deal with negative returns */
>  static inline int tcp_space(const struct sock *sk)
>  {
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 352f814a4ff6..32256519a085 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -774,6 +774,38 @@ static void tcp_init_buffer_space(struct sock *sk)
>                                   (u32)TCP_INIT_CWND * tp->advmss);
>  }
>  
> +/* Try to grow sk_rcvbuf so the hard receive-memory limit covers @needed
> + * bytes beyond sk_rmem_alloc while preserving sender-visible headroom
> + * already consumed by sk_backlog.len.
> + */
> +bool tcp_try_grow_rcvbuf(struct sock *sk, int needed)
> +{
> +     struct net *net = sock_net(sk);
> +     int backlog;
> +     int rmem2;
> +     int target;
> +
> +     needed = max(needed, 0);
> +     backlog = READ_ONCE(sk->sk_backlog.len);
> +     target = tcp_rmem_used(sk) + backlog + needed;
> +
> +     if (target <= READ_ONCE(sk->sk_rcvbuf))
> +             return true;
> +
> +     rmem2 = READ_ONCE(net->ipv4.sysctl_tcp_rmem[2]);
> +     if (READ_ONCE(sk->sk_rcvbuf) >= rmem2 ||
> +         (sk->sk_userlocks & SOCK_RCVBUF_LOCK) ||
> +         tcp_under_memory_pressure(sk) ||
> +         sk_memory_allocated(sk) >= sk_prot_mem_limits(sk, 0))
> +             return false;
> +
> +     WRITE_ONCE(sk->sk_rcvbuf,
> +                min_t(int, rmem2,
> +                      max_t(int, READ_ONCE(sk->sk_rcvbuf), target)));
> +
> +     return target <= READ_ONCE(sk->sk_rcvbuf);

Same here, and more cases below.

/P


Reply via email to