Add sk_dst_reset() alongside sk_rethink_txhash() in the RTO, PLB, and spurious-retrans paths so that the next transmit triggers a fresh route lookup. Propagate sk_txhash into fl6->mp_hash in inet6_csk_route_req() and inet6_csk_route_socket() so fib6_select_path() uses the socket's current hash for ECMP selection.
The ir_iif update in tcp_check_req() covers both IPv4 and IPv6 because it was cleaner than gating on address family; IPv4 is otherwise unaltered, and not having autoflowlabel in IPv4 means I wouldn't expect a new path on timeout. It is possible that PLB does not need this (that there are other methods of reacting to local congestion); I added the sk_dst_reset for consistency. Signed-off-by: Neil Spring <[email protected]> --- net/ipv4/tcp_input.c | 4 +++- net/ipv4/tcp_minisocks.c | 9 +++++++++ net/ipv4/tcp_plb.c | 1 + net/ipv4/tcp_timer.c | 1 + net/ipv6/inet6_connection_sock.c | 8 ++++++++ 5 files changed, 22 insertions(+), 1 deletion(-) diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 7171442c3ed7..3d42ab45066c 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5014,8 +5014,10 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, skb->protocol == htons(ETH_P_IPV6) && (tcp_sk(sk)->inet_conn.icsk_ack.lrcv_flowlabel != ntohl(ip6_flowlabel(ipv6_hdr(skb)))) && - sk_rethink_txhash(sk)) + sk_rethink_txhash(sk)) { NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPDUPLICATEDATAREHASH); + sk_dst_reset(sk); + } /* Save last flowlabel after a spurious retrans. */ tcp_save_lrcv_flowlabel(sk, skb); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 199f0b579e89..ef4b3771e9d8 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -750,6 +750,15 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * Reset timer after retransmitting SYNACK, similar to * the idea of fast retransmit in recovery. */ + +#if IS_ENABLED(CONFIG_IPV6) + if (sk->sk_family == AF_INET6) + inet_rsk(req)->ir_iif = tcp_v6_iif(skb); + else +#endif + inet_rsk(req)->ir_iif = + inet_request_bound_dev_if(sk, skb); + if (!tcp_oow_rate_limited(sock_net(sk), skb, LINUX_MIB_TCPACKSKIPPEDSYNRECV, &tcp_rsk(req)->last_oow_ack_time)) { diff --git a/net/ipv4/tcp_plb.c b/net/ipv4/tcp_plb.c index 68ccdb9a5412..d7cc00a58e53 100644 --- a/net/ipv4/tcp_plb.c +++ b/net/ipv4/tcp_plb.c @@ -79,6 +79,7 @@ void tcp_plb_check_rehash(struct sock *sk, struct tcp_plb_state *plb) return; sk_rethink_txhash(sk); + sk_dst_reset(sk); plb->consec_cong_rounds = 0; tcp_sk(sk)->plb_rehash++; NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPPLBREHASH); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index ea99988795e7..acc22fc532c2 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -299,6 +299,7 @@ static int tcp_write_timeout(struct sock *sk) if (sk_rethink_txhash(sk)) { tp->timeout_rehash++; __NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPTIMEOUTREHASH); + sk_dst_reset(sk); } return 0; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 37534e116899..2fe753bb38b4 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -48,6 +48,11 @@ struct dst_entry *inet6_csk_route_req(const struct sock *sk, fl6->flowi6_uid = sk_uid(sk); security_req_classify_flow(req, flowi6_to_flowi_common(fl6)); + if (req->num_retrans) + fl6->mp_hash = jhash_1word(req->num_retrans, + (__force u32)ireq->ir_rmt_port) + >> 1; + if (!dst) { dst = ip6_dst_lookup_flow(sock_net(sk), sk, fl6, final_p); if (IS_ERR(dst)) @@ -70,6 +75,9 @@ struct dst_entry *inet6_csk_route_socket(struct sock *sk, fl6->saddr = np->saddr; fl6->flowlabel = np->flow_label; IP6_ECN_flow_xmit(sk, fl6->flowlabel); + + if (sk->sk_txhash) + fl6->mp_hash = sk->sk_txhash >> 1; fl6->flowi6_oif = sk->sk_bound_dev_if; fl6->flowi6_mark = sk->sk_mark; fl6->fl6_sport = inet->inet_sport; -- 2.52.0
