On Wed, Jan 21, 2026 at 3:32 PM <[email protected]> wrote:
>
> From: Chia-Yu Chang <[email protected]>
>
> Detect spurious retransmission of a previously sent ACK carrying the
> AccECN option after the second retransmission. Since this might be caused
> by the middlebox dropping ACK with options it does not recognize, disable
> the sending of the AccECN option in all subsequent ACKs. This patch
> follows Section 3.2.3.2.2 of AccECN spec (RFC9768).
>
> Also, a new AccECN option sending mode is added to tcp_ecn_option sysctl:
> (TCP_ECN_OPTION_PERSIST), which ignores the AccECN fallback policy and
> persistently sends AccECN option once it fits into TCP option space.
>
> Signed-off-by: Chia-Yu Chang <[email protected]>
> Acked-by: Paolo Abeni <[email protected]>
>
> ---
> v5:
> - Add empty line between variable declarations and code
> ---
>  Documentation/networking/ip-sysctl.rst |  4 +++-
>  include/linux/tcp.h                    |  3 ++-
>  include/net/tcp_ecn.h                  |  2 ++
>  net/ipv4/sysctl_net_ipv4.c             |  2 +-
>  net/ipv4/tcp_input.c                   | 10 ++++++++++
>  net/ipv4/tcp_output.c                  |  7 ++++++-
>  6 files changed, 24 insertions(+), 4 deletions(-)
>
> diff --git a/Documentation/networking/ip-sysctl.rst 
> b/Documentation/networking/ip-sysctl.rst
> index bc9a01606daf..28c7e4f5ecf9 100644
> --- a/Documentation/networking/ip-sysctl.rst
> +++ b/Documentation/networking/ip-sysctl.rst
> @@ -482,7 +482,9 @@ tcp_ecn_option - INTEGER
>         1 Send AccECN option sparingly according to the minimum option
>           rules outlined in draft-ietf-tcpm-accurate-ecn.
>         2 Send AccECN option on every packet whenever it fits into TCP
> -         option space.
> +         option space except when AccECN fallback is triggered.
> +       3 Send AccECN option on every packet whenever it fits into TCP
> +         option space even when AccECN fallback is triggered.
>         = ============================================================
>
>         Default: 2
> diff --git a/include/linux/tcp.h b/include/linux/tcp.h
> index 683f38362977..32b031d09294 100644
> --- a/include/linux/tcp.h
> +++ b/include/linux/tcp.h
> @@ -294,7 +294,8 @@ struct tcp_sock {
>         u8      nonagle     : 4,/* Disable Nagle algorithm?             */
>                 rate_app_limited:1;  /* rate_{delivered,interval_us} limited? 
> */
>         u8      received_ce_pending:4, /* Not yet transmit cnt of received_ce 
> */
> -               unused2:4;
> +               accecn_opt_sent:1,/* Sent AccECN option in previous ACK */
> +               unused2:3;
>         u8      accecn_minlen:2,/* Minimum length of AccECN option sent */
>                 est_ecnfield:2,/* ECN field for AccECN delivered estimates */
>                 accecn_opt_demand:2,/* Demand AccECN option for n next ACKs */
> diff --git a/include/net/tcp_ecn.h b/include/net/tcp_ecn.h
> index bf7d3f9f22c7..41b593ece1dd 100644
> --- a/include/net/tcp_ecn.h
> +++ b/include/net/tcp_ecn.h
> @@ -29,6 +29,7 @@ enum tcp_accecn_option {
>         TCP_ACCECN_OPTION_DISABLED = 0,
>         TCP_ACCECN_OPTION_MINIMUM = 1,
>         TCP_ACCECN_OPTION_FULL = 2,
> +       TCP_ACCECN_OPTION_PERSIST = 3,
>  };
>
>  /* Apply either ECT(0) or ECT(1) based on TCP_CONG_ECT_1_NEGOTIATION flag */
> @@ -413,6 +414,7 @@ static inline void tcp_accecn_init_counters(struct 
> tcp_sock *tp)
>         tp->received_ce_pending = 0;
>         __tcp_accecn_init_bytes_counters(tp->received_ecn_bytes);
>         __tcp_accecn_init_bytes_counters(tp->delivered_ecn_bytes);
> +       tp->accecn_opt_sent = 0;
>         tp->accecn_minlen = 0;
>         tp->accecn_opt_demand = 0;
>         tp->est_ecnfield = 0;
> diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
> index a1a50a5c80dc..385b5b986d23 100644
> --- a/net/ipv4/sysctl_net_ipv4.c
> +++ b/net/ipv4/sysctl_net_ipv4.c
> @@ -749,7 +749,7 @@ static struct ctl_table ipv4_net_table[] = {
>                 .mode           = 0644,
>                 .proc_handler   = proc_dou8vec_minmax,
>                 .extra1         = SYSCTL_ZERO,
> -               .extra2         = SYSCTL_TWO,
> +               .extra2         = SYSCTL_THREE,
>         },
>         {
>                 .procname       = "tcp_ecn_option_beacon",
> diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
> index 8b774019a3a6..472bd57913ae 100644
> --- a/net/ipv4/tcp_input.c
> +++ b/net/ipv4/tcp_input.c
> @@ -4863,6 +4863,8 @@ static void tcp_dsack_extend(struct sock *sk, u32 seq, 
> u32 end_seq)
>
>  static void tcp_rcv_spurious_retrans(struct sock *sk, const struct sk_buff 
> *skb)
>  {
> +       struct tcp_sock *tp = tcp_sk(sk);
> +
>         /* When the ACK path fails or drops most ACKs, the sender would
>          * timeout and spuriously retransmit the same segment repeatedly.
>          * If it seems our ACKs are not reaching the other side,
> @@ -4882,6 +4884,14 @@ static void tcp_rcv_spurious_retrans(struct sock *sk, 
> const struct sk_buff *skb)
>         /* Save last flowlabel after a spurious retrans. */
>         tcp_save_lrcv_flowlabel(sk, skb);
>  #endif
> +       /* Check DSACK info to detect that the previous ACK carrying the
> +        * AccECN option was lost after the second retransmision, and then
> +        * stop sending AccECN option in all subsequent ACKs.
> +        */
> +       if (tcp_ecn_mode_accecn(tp) &&
> +           TCP_SKB_CB(skb)->seq == tp->duplicate_sack[0].start_seq &&
> +           tp->accecn_opt_sent)
> +               tcp_accecn_fail_mode_set(tp, TCP_ACCECN_OPT_FAIL_SEND);
>  }

tcp_rcv_spurious_retrans() has two callers.

tcp_send_dupack() checked dsack is enabled.

tcp_data_queue() : No such check.

So I wonder if tp->duplicate_sack[0].start_seq could contain garbage ?

Perhaps test tp->rx_opt.dsack ?

>
>  static void tcp_send_dupack(struct sock *sk, const struct sk_buff *skb)
> diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
> index 042e7e9b13cc..0cbba38ea87a 100644
> --- a/net/ipv4/tcp_output.c
> +++ b/net/ipv4/tcp_output.c
> @@ -713,9 +713,12 @@ static void tcp_options_write(struct tcphdr *th, struct 
> tcp_sock *tp,
>                 if (tp) {
>                         tp->accecn_minlen = 0;
>                         tp->accecn_opt_tstamp = tp->tcp_mstamp;
> +                       tp->accecn_opt_sent = 1;
>                         if (tp->accecn_opt_demand)
>                                 tp->accecn_opt_demand--;
>                 }
> +       } else if (tp) {
> +               tp->accecn_opt_sent = 0;
>         }
>
>         if (unlikely(OPTION_SACK_ADVERTISE & options)) {
> @@ -1187,7 +1190,9 @@ static unsigned int tcp_established_options(struct sock 
> *sk, struct sk_buff *skb
>         if (tcp_ecn_mode_accecn(tp)) {
>                 int ecn_opt = 
> READ_ONCE(sock_net(sk)->ipv4.sysctl_tcp_ecn_option);
>
> -               if (ecn_opt && tp->saw_accecn_opt && 
> !tcp_accecn_opt_fail_send(tp) &&
> +               if (ecn_opt && tp->saw_accecn_opt &&
> +                   (ecn_opt >= TCP_ACCECN_OPTION_PERSIST ||
> +                    !tcp_accecn_opt_fail_send(tp)) &&
>                     (ecn_opt >= TCP_ACCECN_OPTION_FULL || 
> tp->accecn_opt_demand ||
>                      tcp_accecn_option_beacon_check(sk))) {
>                         opts->use_synack_ecn_bytes = 0;
> --
> 2.34.1
>

Reply via email to