On Wed, Jan 21, 2026 at 3:32 PM <[email protected]> wrote:
>
> From: Chia-Yu Chang <[email protected]>
>
> Detect spurious retransmission of a previously sent ACK carrying the
> AccECN option after the second retransmission. Since this might be caused
> by the middlebox dropping ACK with options it does not recognize, disable
> the sending of the AccECN option in all subsequent ACKs. This patch
> follows Section 3.2.3.2.2 of AccECN spec (RFC9768).
>
> Also, a new AccECN option sending mode is added to tcp_ecn_option sysctl:
> (TCP_ECN_OPTION_PERSIST), which ignores the AccECN fallback policy and
> persistently sends AccECN option once it fits into TCP option space.
>
> Signed-off-by: Chia-Yu Chang <[email protected]>
> Acked-by: Paolo Abeni <[email protected]>
>
> ---
> v5:
> - Add empty line between variable declarations and code
> ---
> Documentation/networking/ip-sysctl.rst | 4 +++-
> include/linux/tcp.h | 3 ++-
> include/net/tcp_ecn.h | 2 ++
> net/ipv4/sysctl_net_ipv4.c | 2 +-
> net/ipv4/tcp_input.c | 10 ++++++++++
> net/ipv4/tcp_output.c | 7 ++++++-
> 6 files changed, 24 insertions(+), 4 deletions(-)
>
> diff --git a/Documentation/networking/ip-sysctl.rst
> b/Documentation/networking/ip-sysctl.rst
> index bc9a01606daf..28c7e4f5ecf9 100644
> --- a/Documentation/networking/ip-sysctl.rst
> +++ b/Documentation/networking/ip-sysctl.rst
> @@ -482,7 +482,9 @@ tcp_ecn_option - INTEGER
> 1 Send AccECN option sparingly according to the minimum option
> rules outlined in draft-ietf-tcpm-accurate-ecn.
> 2 Send AccECN option on every packet whenever it fits into TCP
> - option space.
> + option space except when AccECN fallback is triggered.
> + 3 Send AccECN option on every packet whenever it fits into TCP
> + option space even when AccECN fallback is triggered.
> = ============================================================
>
> Default: 2
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 683f38362977..32b031d09294 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -294,7 +294,8 @@ struct tcp_sock {
> u8 nonagle : 4,/* Disable Nagle algorithm? */
> rate_app_limited:1; /* rate_{delivered,interval_us} limited?
> */
> u8 received_ce_pending:4, /* Not yet transmit cnt of received_ce
> */
> - unused2:4;
> + accecn_opt_sent:1,/* Sent AccECN option in previous ACK */
> + unused2:3;
> u8 accecn_minlen:2,/* Minimum length of AccECN option sent */
> est_ecnfield:2,/* ECN field for AccECN delivered estimates */
> accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */
> diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
> index bf7d3f9f22c7..41b593ece1dd 100644
> --- a/include/net/tcp_ecn.h
> +++ b/include/net/tcp_ecn.h
> @@ -29,6 +29,7 @@ enum tcp_accecn_option {
> TCP_ACCECN_OPTION_DISABLED = 0,
> TCP_ACCECN_OPTION_MINIMUM = 1,
> TCP_ACCECN_OPTION_FULL = 2,
> + TCP_ACCECN_OPTION_PERSIST = 3,
> };
>
> /* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */
> @@ -413,6 +414,7 @@ static inline void tcp_accecn_init_counters(struct
> tcp_sock *tp)
> tp->received_ce_pending = 0;
> __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
> __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes);
> + tp->accecn_opt_sent = 0;
> tp->accecn_minlen = 0;
> tp->accecn_opt_demand = 0;
> tp->est_ecnfield = 0;
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index a1a50a5c80dc..385b5b986d23 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -749,7 +749,7 @@ static struct ctl_table ipv4_net_table[] = {
> .mode = 0644,
> .proc_handler = proc_dou8vec_minmax,
> .extra1 = SYSCTL_ZERO,
> - .extra2 = SYSCTL_TWO,
> + .extra2 = SYSCTL_THREE,
> },
> {
> .procname = "tcp_ecn_option_beacon",
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 8b774019a3a6..472bd57913ae 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -4863,6 +4863,8 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq,
> u32 end_seq)
>
> static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff
> *skb)
> {
> + struct tcp_sock *tp = tcp_sk(sk);
> +
> /* When the ACK path fails or drops most ACKs, the sender would
> * timeout and spuriously retransmit the same segment repeatedly.
> * If it seems our ACKs are not reaching the other side,
> @@ -4882,6 +4884,14 @@ static void tcp_rcv_spurious_retrans(struct sock *sk,
> const struct sk_buff *skb)
> /* Save last flowlabel after a spurious retrans. */
> tcp_save_lrcv_flowlabel(sk, skb);
> #endif
> + /* Check DSACK info to detect that the previous ACK carrying the
> + * AccECN option was lost after the second retransmision, and then
> + * stop sending AccECN option in all subsequent ACKs.
> + */
> + if (tcp_ecn_mode_accecn(tp) &&
> + TCP_SKB_CB(skb)->seq == tp->duplicate_sack[0].start_seq &&
> + tp->accecn_opt_sent)
> + tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_SEND);
> }
tcp_rcv_spurious_retrans() has two callers.
tcp_send_dupack() checked dsack is enabled.
tcp_data_queue() : No such check.
So I wonder if tp->duplicate_sack[0].start_seq could contain garbage ?
Perhaps test tp->rx_opt.dsack ?
>
> static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 042e7e9b13cc..0cbba38ea87a 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -713,9 +713,12 @@ static void tcp_options_write(struct tcphdr *th, struct
> tcp_sock *tp,
> if (tp) {
> tp->accecn_minlen = 0;
> tp->accecn_opt_tstamp = tp->tcp_mstamp;
> + tp->accecn_opt_sent = 1;
> if (tp->accecn_opt_demand)
> tp->accecn_opt_demand--;
> }
> + } else if (tp) {
> + tp->accecn_opt_sent = 0;
> }
>
> if (unlikely(OPTION_SACK_ADVERTISE & options)) {
> @@ -1187,7 +1190,9 @@ static unsigned int tcp_established_options(struct sock
> *sk, struct sk_buff *skb
> if (tcp_ecn_mode_accecn(tp)) {
> int ecn_opt =
> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option);
>
> - if (ecn_opt && tp->saw_accecn_opt &&
> !tcp_accecn_opt_fail_send(tp) &&
> + if (ecn_opt && tp->saw_accecn_opt &&
> + (ecn_opt >= TCP_ACCECN_OPTION_PERSIST ||
> + !tcp_accecn_opt_fail_send(tp)) &&
> (ecn_opt >= TCP_ACCECN_OPTION_FULL ||
> tp->accecn_opt_demand ||
> tcp_accecn_option_beacon_check(sk))) {
> opts->use_synack_ecn_bytes = 0;
> --
> 2.34.1
>