From: Wesley Atwell <[email protected]> Track the scaling basis that was in force when tp->rcv_wnd was last advertised, and provide helpers to refresh or interpret that snapshot.
Later patches use this live-window basis to preserve sender-visible rwnd accounting when receive-side memory costs drift after advertisement. Signed-off-by: Wesley Atwell <[email protected]> --- .../networking/net_cachelines/tcp_sock.rst | 1 + include/linux/tcp.h | 1 + include/net/tcp.h | 52 ++++++++++++++++++- net/ipv4/tcp.c | 1 + 4 files changed, 54 insertions(+), 1 deletion(-) diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst b/Documentation/networking/net_cachelines/tcp_sock.rst index fecf61166a54..09ece1c59c2d 100644 --- a/Documentation/networking/net_cachelines/tcp_sock.rst +++ b/Documentation/networking/net_cachelines/tcp_sock.rst @@ -11,6 +11,7 @@ Type Name fastpath_tx_access fastpa struct inet_connection_sock inet_conn u16 tcp_header_len read_mostly read_mostly tcp_bound_to_half_wnd,tcp_current_mss(tx);tcp_rcv_established(rx) u16 gso_segs read_mostly tcp_xmit_size_goal +u8 rcv_wnd_scaling_ratio read_write read_mostly tcp_set_rcv_wnd,tcp_can_ingest,tcp_repair_set_window,do_tcp_getsockopt __be32 pred_flags read_write read_mostly tcp_select_window(tx);tcp_rcv_established(rx) u64 bytes_received read_write tcp_rcv_nxt_update(rx) u32 segs_in read_write tcp_v6_rcv(rx) diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 6982f10e826b..2ace563d59d6 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -297,6 +297,7 @@ struct tcp_sock { est_ecnfield:2,/* ECN field for AccECN delivered estimates */ accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */ prev_ecnfield:2; /* ECN bits from the previous segment */ + u8 rcv_wnd_scaling_ratio; /* 0 if unknown, else tp->rcv_wnd basis */ __be32 pred_flags; u64 tcp_clock_cache; /* cache last tcp_clock_ns() (see tcp_mstamp_refresh()) */ u64 tcp_mstamp; /* most recent packet received/sent */ diff --git a/include/net/tcp.h b/include/net/tcp.h index 3a0060599afe..6fa7cdb0979e 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1741,6 +1741,31 @@ static inline int tcp_space_from_win(const struct sock *sk, int win) return __tcp_space_from_win(tcp_sk(sk)->scaling_ratio, win); } +static inline bool tcp_wnd_snapshot_valid(u8 scaling_ratio) +{ + return scaling_ratio != 0; +} + +static inline bool tcp_space_from_wnd_snapshot(u8 scaling_ratio, int win, + int *space) +{ + if (!tcp_wnd_snapshot_valid(scaling_ratio)) + return false; + + *space = __tcp_space_from_win(scaling_ratio, win); + return true; +} + +/* Rebuild hard receive-memory units for data already covered by tp->rcv_wnd if + * the advertise-time basis is known. + */ +static inline bool tcp_space_from_rcv_wnd(const struct tcp_sock *tp, int win, + int *space) +{ + return tcp_space_from_wnd_snapshot(tp->rcv_wnd_scaling_ratio, win, + space); +} + /* Assume a 50% default for skb->len/skb->truesize ratio. * This may be adjusted later in tcp_measure_rcv_mss(). */ @@ -1748,7 +1773,32 @@ static inline int tcp_space_from_win(const struct sock *sk, int win) static inline void tcp_scaling_ratio_init(struct sock *sk) { - tcp_sk(sk)->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; + struct tcp_sock *tp = tcp_sk(sk); + + tp->scaling_ratio = TCP_DEFAULT_SCALING_RATIO; + tp->rcv_wnd_scaling_ratio = TCP_DEFAULT_SCALING_RATIO; +} + +/* tp->rcv_wnd is paired with the scaling_ratio that was in force when that + * window was last advertised. Callers can leave a zero snapshot when the + * advertise-time basis is unknown and refresh the pair on the next local + * window update. + */ +static inline void tcp_set_rcv_wnd_snapshot(struct tcp_sock *tp, u32 win, + u8 scaling_ratio) +{ + tp->rcv_wnd = win; + tp->rcv_wnd_scaling_ratio = scaling_ratio; +} + +static inline void tcp_set_rcv_wnd(struct tcp_sock *tp, u32 win) +{ + tcp_set_rcv_wnd_snapshot(tp, win, tp->scaling_ratio); +} + +static inline void tcp_set_rcv_wnd_unknown(struct tcp_sock *tp, u32 win) +{ + tcp_set_rcv_wnd_snapshot(tp, win, 0); } /* TCP receive-side accounting reuses sk_rcvbuf as both a hard memory limit diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 516087c622ad..0383ee8d3b78 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -5275,6 +5275,7 @@ static void __init tcp_struct_check(void) CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ce); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, received_ecn_bytes); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, app_limited); + CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd_scaling_ratio); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_wnd); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_mwnd_seq); CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_txrx, rcv_tstamp); -- 2.43.0
