From: Wesley Atwell <[email protected]>

Extend TCP_REPAIR_WINDOW so repair and restore can round-trip both the
live rwnd snapshot and the remembered maximum sender-visible window.

Keep the ABI append-only by accepting the legacy and v1 prefix lengths on
both get and set, rebuilding any missing max-window state from the live
window when older userspace restores a socket.

Signed-off-by: Wesley Atwell <[email protected]>
---
 include/net/tcp.h        | 13 +++----
 include/uapi/linux/tcp.h |  8 +++++
 net/ipv4/tcp.c           | 73 ++++++++++++++++++++++++++++++++++++----
 3 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 5b479ad44f89..12e62fea2aaf 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1766,13 +1766,14 @@ static inline bool tcp_space_from_wnd_snapshot(u8 
scaling_ratio, int win,
 }
 
 /* Rebuild hard receive-memory units for data already covered by tp->rcv_wnd if
- * the advertise-time basis is known.
+ * the advertise-time basis is known. Legacy TCP_REPAIR restores can only
+ * recover tp->rcv_wnd itself; callers must fall back when the snapshot is
+ * unknown.
  */
 static inline bool tcp_space_from_rcv_wnd(const struct tcp_sock *tp, int win,
                                          int *space)
 {
-       return tcp_space_from_wnd_snapshot(tp->rcv_wnd_scaling_ratio, win,
-                                          space);
+       return tcp_space_from_wnd_snapshot(tp->rcv_wnd_scaling_ratio, win, 
space);
 }
 
 /* Same as tcp_space_from_rcv_wnd(), but for the remembered maximum
@@ -1800,9 +1801,9 @@ static inline void tcp_scaling_ratio_init(struct sock *sk)
 }
 
 /* tp->rcv_wnd is paired with the scaling_ratio that was in force when that
- * window was last advertised. Callers can leave a zero snapshot when the
- * advertise-time basis is unknown and refresh the pair on the next local
- * window update.
+ * window was last advertised. Legacy TCP_REPAIR restores can only recover the
+ * window value itself and use a zero snapshot until a fresh local window
+ * advertisement refreshes the pair.
  */
 static inline void tcp_set_rcv_wnd_snapshot(struct tcp_sock *tp, u32 win,
                                            u8 scaling_ratio)
diff --git a/include/uapi/linux/tcp.h b/include/uapi/linux/tcp.h
index 03772dd4d399..564a77f69130 100644
--- a/include/uapi/linux/tcp.h
+++ b/include/uapi/linux/tcp.h
@@ -152,6 +152,11 @@ struct tcp_repair_opt {
        __u32   opt_val;
 };
 
+/* Append-only repair ABI.
+ * Older userspace may stop at rcv_wup or rcv_wnd_scaling_ratio.
+ * The kernel accepts those prefix lengths and rebuilds any missing
+ * receive-window snapshot state on restore.
+ */
 struct tcp_repair_window {
        __u32   snd_wl1;
        __u32   snd_wnd;
@@ -159,6 +164,9 @@ struct tcp_repair_window {
 
        __u32   rcv_wnd;
        __u32   rcv_wup;
+       __u32   rcv_wnd_scaling_ratio;  /* 0 means live-window basis unknown */
+       __u32   rcv_mwnd_seq;
+       __u32   rcv_mwnd_scaling_ratio; /* 0 means max-window basis unknown */
 };
 
 enum {
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 66706dbb90f5..39a1265876ea 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3533,17 +3533,31 @@ static inline bool tcp_can_repair_sock(const struct 
sock *sk)
                (sk->sk_state != TCP_LISTEN);
 }
 
+/* Keep accepting the pre-extension TCP_REPAIR_WINDOW layout so legacy
+ * userspace can restore sockets without fabricating a snapshot basis.
+ */
+static inline int tcp_repair_window_legacy_size(void)
+{
+       return offsetof(struct tcp_repair_window, rcv_wnd_scaling_ratio);
+}
+
+static inline int tcp_repair_window_v1_size(void)
+{
+       return offsetof(struct tcp_repair_window, rcv_mwnd_seq);
+}
+
 static int tcp_repair_set_window(struct tcp_sock *tp, sockptr_t optbuf, int 
len)
 {
-       struct tcp_repair_window opt;
+       struct tcp_repair_window opt = {};
 
        if (!tp->repair)
                return -EPERM;
 
-       if (len != sizeof(opt))
+       if (len != tcp_repair_window_legacy_size() &&
+           len != tcp_repair_window_v1_size() && len != sizeof(opt))
                return -EINVAL;
 
-       if (copy_from_sockptr(&opt, optbuf, sizeof(opt)))
+       if (copy_from_sockptr(&opt, optbuf, len))
                return -EFAULT;
 
        if (opt.max_window < opt.snd_wnd)
@@ -3559,9 +3573,47 @@ static int tcp_repair_set_window(struct tcp_sock *tp, 
sockptr_t optbuf, int len)
        tp->snd_wnd     = opt.snd_wnd;
        tp->max_window  = opt.max_window;
 
-       tp->rcv_wnd     = opt.rcv_wnd;
+       if (len == tcp_repair_window_legacy_size()) {
+               /* Legacy repair UAPI has no advertise-time basis for 
tp->rcv_wnd.
+                * Mark the snapshot unknown until a fresh local advertisement
+                * re-establishes the pair.
+                */
+               tcp_set_rcv_wnd_unknown(tp, opt.rcv_wnd);
+               tp->rcv_wup     = opt.rcv_wup;
+               tcp_init_max_rcv_wnd_seq(tp);
+               return 0;
+       }
+
+       if (opt.rcv_wnd_scaling_ratio > U8_MAX)
+               return -EINVAL;
+
+       tcp_set_rcv_wnd_snapshot(tp, opt.rcv_wnd, opt.rcv_wnd_scaling_ratio);
        tp->rcv_wup     = opt.rcv_wup;
-       tp->rcv_mwnd_seq = opt.rcv_wup + opt.rcv_wnd;
+
+       if (len == tcp_repair_window_v1_size()) {
+               /* v1 repair can restore the live-window snapshot, but not a
+                * retracted max-window snapshot. Rebuild it from the live pair
+                * until a fresh local advertisement updates it again.
+                */
+               tcp_init_max_rcv_wnd_seq(tp);
+               return 0;
+       }
+
+       if (opt.rcv_mwnd_scaling_ratio > U8_MAX)
+               return -EINVAL;
+
+       /* Userspace may repair sequence-space values after checkpoint without
+        * also rebasing the remembered max advertised right edge. If the exact
+        * snapshot no longer covers the restored live window, treat it like
+        * v1 and rebuild the max-window side from the live pair.
+        */
+       if (after(opt.rcv_wup + opt.rcv_wnd, opt.rcv_mwnd_seq)) {
+               tcp_init_max_rcv_wnd_seq(tp);
+               return 0;
+       }
+
+       tp->rcv_mwnd_seq = opt.rcv_mwnd_seq;
+       tp->rcv_mwnd_scaling_ratio = opt.rcv_mwnd_scaling_ratio;
 
        return 0;
 }
@@ -4650,12 +4702,16 @@ int do_tcp_getsockopt(struct sock *sk, int level,
                break;
 
        case TCP_REPAIR_WINDOW: {
-               struct tcp_repair_window opt;
+               struct tcp_repair_window opt = {};
 
                if (copy_from_sockptr(&len, optlen, sizeof(int)))
                        return -EFAULT;
 
-               if (len != sizeof(opt))
+               /* Mirror the accepted set-side prefix lengths so checkpoint
+                * tools can round-trip exactly the layout version they know.
+                */
+               if (len != tcp_repair_window_legacy_size() &&
+                   len != tcp_repair_window_v1_size() && len != sizeof(opt))
                        return -EINVAL;
 
                if (!tp->repair)
@@ -4666,6 +4722,9 @@ int do_tcp_getsockopt(struct sock *sk, int level,
                opt.max_window  = tp->max_window;
                opt.rcv_wnd     = tp->rcv_wnd;
                opt.rcv_wup     = tp->rcv_wup;
+               opt.rcv_wnd_scaling_ratio = tp->rcv_wnd_scaling_ratio;
+               opt.rcv_mwnd_seq = tp->rcv_mwnd_seq;
+               opt.rcv_mwnd_scaling_ratio = tp->rcv_mwnd_scaling_ratio;
 
                if (copy_to_sockptr(optval, &opt, len))
                        return -EFAULT;
-- 
2.43.0


Reply via email to