Add sk_dst_reset() alongside sk_rethink_txhash() in the RTO, PLB,
and spurious-retrans paths so that the next transmit triggers a fresh
route lookup.  Propagate sk_txhash into fl6->mp_hash in
inet6_csk_route_req() and inet6_csk_route_socket() so
fib6_select_path() uses the socket's current hash for ECMP selection.

The ir_iif update in tcp_check_req() covers both IPv4 and IPv6
because it was cleaner than gating on address family; IPv4 is
otherwise unaltered, and not having autoflowlabel in IPv4 means
I wouldn't expect a new path on timeout.

It is possible that PLB does not need this (that there are other
methods of reacting to local congestion); I added the sk_dst_reset
for consistency.

Signed-off-by: Neil Spring <[email protected]>
---
 net/ipv4/tcp_input.c             | 4 +++-
 net/ipv4/tcp_minisocks.c         | 9 +++++++++
 net/ipv4/tcp_plb.c               | 1 +
 net/ipv4/tcp_timer.c             | 1 +
 net/ipv6/inet6_connection_sock.c | 8 ++++++++
 5 files changed, 22 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 7171442c3ed7..3d42ab45066c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5014,8 +5014,10 @@ static void tcp_rcv_spurious_retrans(struct sock *sk,
            skb->protocol == htons(ETH_P_IPV6) &&
            (tcp_sk(sk)->inet_conn.icsk_ack.lrcv_flowlabel !=
             ntohl(ip6_flowlabel(ipv6_hdr(skb)))) &&
-           sk_rethink_txhash(sk))
+           sk_rethink_txhash(sk)) {
                NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH);
+               sk_dst_reset(sk);
+       }
 
        /* Save last flowlabel after a spurious retrans. */
        tcp_save_lrcv_flowlabel(sk, skb);
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 199f0b579e89..ef4b3771e9d8 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -750,6 +750,15 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff 
*skb,
                 * Reset timer after retransmitting SYNACK, similar to
                 * the idea of fast retransmit in recovery.
                 */
+
+#if IS_ENABLED(CONFIG_IPV6)
+               if (sk->sk_family == AF_INET6)
+                       inet_rsk(req)->ir_iif = tcp_v6_iif(skb);
+               else
+#endif
+                       inet_rsk(req)->ir_iif =
+                               inet_request_bound_dev_if(sk, skb);
+
                if (!tcp_oow_rate_limited(sock_net(sk), skb,
                                          LINUX_MIB_TCPACKSKIPPEDSYNRECV,
                                          &tcp_rsk(req)->last_oow_ack_time)) {
diff --git a/net/ipv4/tcp_plb.c b/net/ipv4/tcp_plb.c
index 68ccdb9a5412..d7cc00a58e53 100644
--- a/net/ipv4/tcp_plb.c
+++ b/net/ipv4/tcp_plb.c
@@ -79,6 +79,7 @@ void tcp_plb_check_rehash(struct sock *sk, struct 
tcp_plb_state *plb)
                return;
 
        sk_rethink_txhash(sk);
+       sk_dst_reset(sk);
        plb->consec_cong_rounds = 0;
        tcp_sk(sk)->plb_rehash++;
        NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPLBREHASH);
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index ea99988795e7..acc22fc532c2 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -299,6 +299,7 @@ static int tcp_write_timeout(struct sock *sk)
        if (sk_rethink_txhash(sk)) {
                tp->timeout_rehash++;
                __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH);
+               sk_dst_reset(sk);
        }
 
        return 0;
diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c
index 37534e116899..2fe753bb38b4 100644
--- a/net/ipv6/inet6_connection_sock.c
+++ b/net/ipv6/inet6_connection_sock.c
@@ -48,6 +48,11 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk,
        fl6->flowi6_uid = sk_uid(sk);
        security_req_classify_flow(req, flowi6_to_flowi_common(fl6));
 
+       if (req->num_retrans)
+               fl6->mp_hash = jhash_1word(req->num_retrans,
+                                          (__force u32)ireq->ir_rmt_port)
+                               >> 1;
+
        if (!dst) {
                dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p);
                if (IS_ERR(dst))
@@ -70,6 +75,9 @@ struct dst_entry *inet6_csk_route_socket(struct sock *sk,
        fl6->saddr = np->saddr;
        fl6->flowlabel = np->flow_label;
        IP6_ECN_flow_xmit(sk, fl6->flowlabel);
+
+       if (sk->sk_txhash)
+               fl6->mp_hash = sk->sk_txhash >> 1;
        fl6->flowi6_oif = sk->sk_bound_dev_if;
        fl6->flowi6_mark = sk->sk_mark;
        fl6->fl6_sport = inet->inet_sport;
-- 
2.52.0


Reply via email to