From: Wesley Atwell <[email protected]> Extend TCP_REPAIR_WINDOW so repair and restore can round-trip both the live rwnd snapshot and the remembered maximum sender-visible window.
Keep the ABI append-only by accepting the legacy and v1 prefix lengths on both get and set, rebuilding any missing max-window state from the live window when older userspace restores a socket. Signed-off-by: Wesley Atwell <[email protected]> --- include/net/tcp.h | 13 +++---- include/uapi/linux/tcp.h | 8 +++++ net/ipv4/tcp.c | 73 ++++++++++++++++++++++++++++++++++++---- 3 files changed, 81 insertions(+), 13 deletions(-) diff --git a/include/net/tcp.h b/include/net/tcp.h index 5b479ad44f89..12e62fea2aaf 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1766,13 +1766,14 @@ static inline bool tcp_space_from_wnd_snapshot(u8 scaling_ratio, int win, } /* Rebuild hard receive-memory units for data already covered by tp->rcv_wnd if - * the advertise-time basis is known. + * the advertise-time basis is known. Legacy TCP_REPAIR restores can only + * recover tp->rcv_wnd itself; callers must fall back when the snapshot is + * unknown. */ static inline bool tcp_space_from_rcv_wnd(const struct tcp_sock *tp, int win, int *space) { - return tcp_space_from_wnd_snapshot(tp->rcv_wnd_scaling_ratio, win, - space); + return tcp_space_from_wnd_snapshot(tp->rcv_wnd_scaling_ratio, win, space); } /* Same as tcp_space_from_rcv_wnd(), but for the remembered maximum @@ -1800,9 +1801,9 @@ static inline void tcp_scaling_ratio_init(struct sock *sk) } /* tp->rcv_wnd is paired with the scaling_ratio that was in force when that - * window was last advertised. Callers can leave a zero snapshot when the - * advertise-time basis is unknown and refresh the pair on the next local - * window update. + * window was last advertised. Legacy TCP_REPAIR restores can only recover the + * window value itself and use a zero snapshot until a fresh local window + * advertisement refreshes the pair. */ static inline void tcp_set_rcv_wnd_snapshot(struct tcp_sock *tp, u32 win, u8 scaling_ratio) diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h index 03772dd4d399..564a77f69130 100644 --- a/include/uapi/linux/tcp.h +++ b/include/uapi/linux/tcp.h @@ -152,6 +152,11 @@ struct tcp_repair_opt { __u32 opt_val; }; +/* Append-only repair ABI. + * Older userspace may stop at rcv_wup or rcv_wnd_scaling_ratio. + * The kernel accepts those prefix lengths and rebuilds any missing + * receive-window snapshot state on restore. + */ struct tcp_repair_window { __u32 snd_wl1; __u32 snd_wnd; @@ -159,6 +164,9 @@ struct tcp_repair_window { __u32 rcv_wnd; __u32 rcv_wup; + __u32 rcv_wnd_scaling_ratio; /* 0 means live-window basis unknown */ + __u32 rcv_mwnd_seq; + __u32 rcv_mwnd_scaling_ratio; /* 0 means max-window basis unknown */ }; enum { diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 66706dbb90f5..39a1265876ea 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3533,17 +3533,31 @@ static inline bool tcp_can_repair_sock(const struct sock *sk) (sk->sk_state != TCP_LISTEN); } +/* Keep accepting the pre-extension TCP_REPAIR_WINDOW layout so legacy + * userspace can restore sockets without fabricating a snapshot basis. + */ +static inline int tcp_repair_window_legacy_size(void) +{ + return offsetof(struct tcp_repair_window, rcv_wnd_scaling_ratio); +} + +static inline int tcp_repair_window_v1_size(void) +{ + return offsetof(struct tcp_repair_window, rcv_mwnd_seq); +} + static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len) { - struct tcp_repair_window opt; + struct tcp_repair_window opt = {}; if (!tp->repair) return -EPERM; - if (len != sizeof(opt)) + if (len != tcp_repair_window_legacy_size() && + len != tcp_repair_window_v1_size() && len != sizeof(opt)) return -EINVAL; - if (copy_from_sockptr(&opt, optbuf, sizeof(opt))) + if (copy_from_sockptr(&opt, optbuf, len)) return -EFAULT; if (opt.max_window < opt.snd_wnd) @@ -3559,9 +3573,47 @@ static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int len) tp->snd_wnd = opt.snd_wnd; tp->max_window = opt.max_window; - tp->rcv_wnd = opt.rcv_wnd; + if (len == tcp_repair_window_legacy_size()) { + /* Legacy repair UAPI has no advertise-time basis for tp->rcv_wnd. + * Mark the snapshot unknown until a fresh local advertisement + * re-establishes the pair. + */ + tcp_set_rcv_wnd_unknown(tp, opt.rcv_wnd); + tp->rcv_wup = opt.rcv_wup; + tcp_init_max_rcv_wnd_seq(tp); + return 0; + } + + if (opt.rcv_wnd_scaling_ratio > U8_MAX) + return -EINVAL; + + tcp_set_rcv_wnd_snapshot(tp, opt.rcv_wnd, opt.rcv_wnd_scaling_ratio); tp->rcv_wup = opt.rcv_wup; - tp->rcv_mwnd_seq = opt.rcv_wup + opt.rcv_wnd; + + if (len == tcp_repair_window_v1_size()) { + /* v1 repair can restore the live-window snapshot, but not a + * retracted max-window snapshot. Rebuild it from the live pair + * until a fresh local advertisement updates it again. + */ + tcp_init_max_rcv_wnd_seq(tp); + return 0; + } + + if (opt.rcv_mwnd_scaling_ratio > U8_MAX) + return -EINVAL; + + /* Userspace may repair sequence-space values after checkpoint without + * also rebasing the remembered max advertised right edge. If the exact + * snapshot no longer covers the restored live window, treat it like + * v1 and rebuild the max-window side from the live pair. + */ + if (after(opt.rcv_wup + opt.rcv_wnd, opt.rcv_mwnd_seq)) { + tcp_init_max_rcv_wnd_seq(tp); + return 0; + } + + tp->rcv_mwnd_seq = opt.rcv_mwnd_seq; + tp->rcv_mwnd_scaling_ratio = opt.rcv_mwnd_scaling_ratio; return 0; } @@ -4650,12 +4702,16 @@ int do_tcp_getsockopt(struct sock *sk, int level, break; case TCP_REPAIR_WINDOW: { - struct tcp_repair_window opt; + struct tcp_repair_window opt = {}; if (copy_from_sockptr(&len, optlen, sizeof(int))) return -EFAULT; - if (len != sizeof(opt)) + /* Mirror the accepted set-side prefix lengths so checkpoint + * tools can round-trip exactly the layout version they know. + */ + if (len != tcp_repair_window_legacy_size() && + len != tcp_repair_window_v1_size() && len != sizeof(opt)) return -EINVAL; if (!tp->repair) @@ -4666,6 +4722,9 @@ int do_tcp_getsockopt(struct sock *sk, int level, opt.max_window = tp->max_window; opt.rcv_wnd = tp->rcv_wnd; opt.rcv_wup = tp->rcv_wup; + opt.rcv_wnd_scaling_ratio = tp->rcv_wnd_scaling_ratio; + opt.rcv_mwnd_seq = tp->rcv_mwnd_seq; + opt.rcv_mwnd_scaling_ratio = tp->rcv_mwnd_scaling_ratio; if (copy_to_sockptr(optval, &opt, len)) return -EFAULT; -- 2.43.0
