On 5/14/25 3:56 PM, chia-yu.ch...@nokia-bell-labs.com wrote: > This patch uses the existing 1-byte holes in the tcp_sock_write_txrx > group for new u8 members, but adds a 4-byte hole in tcp_sock_write_rx > group after the new u32 delivered_ecn_bytes[3] member. Therefore, the > group size of tcp_sock_write_rx is increased from 96 to 112.
Note that I'm still concerned by the relevant increase of the cacheline groups size. My fear is that this change could defeat some/most of the benefist from the cacheline reorg for all tcp users. Some additional feedback from Eric and/or Neal more than welcome! A possible alternative could be placing all the accounting fields outside of all the fastpath cache groups, i.e. after __cacheline_group_end(tcp_sock_write_rx) > @@ -710,6 +713,8 @@ static __be32 *process_tcp_ao_options(struct tcp_sock *tp, > return ptr; > } > > +#define NOP_LEFTOVER ((TCPOPT_NOP << 8) | TCPOPT_NOP) > + > /* Write previously computed TCP options to the packet. > * > * Beware: Something in the Internet is very sensitive to the ordering of > @@ -728,8 +733,10 @@ static void tcp_options_write(struct tcphdr *th, struct > tcp_sock *tp, > struct tcp_out_options *opts, > struct tcp_key *key) > { > + u16 leftover_bytes = NOP_LEFTOVER; /* replace next NOPs if avail */ > __be32 *ptr = (__be32 *)(th + 1); > u16 options = opts->options; /* mungable copy */ > + int leftover_size = 2; > > if (tcp_key_is_md5(key)) { > *ptr++ = htonl((TCPOPT_NOP << 24) | (TCPOPT_NOP << 16) | > @@ -763,18 +770,64 @@ static void tcp_options_write(struct tcphdr *th, struct > tcp_sock *tp, > *ptr++ = htonl(opts->tsecr); > } > > + if (OPTION_ACCECN & options) { > + const u8 ect0_idx = INET_ECN_ECT_0 - 1; > + const u8 ect1_idx = INET_ECN_ECT_1 - 1; > + const u8 ce_idx = INET_ECN_CE - 1; > + u32 e0b; > + u32 e1b; > + u32 ceb; > + u8 len; > + > + e0b = opts->ecn_bytes[ect0_idx] + TCP_ACCECN_E0B_INIT_OFFSET; > + e1b = opts->ecn_bytes[ect1_idx] + TCP_ACCECN_E1B_INIT_OFFSET; > + ceb = opts->ecn_bytes[ce_idx] + TCP_ACCECN_CEB_INIT_OFFSET; > + len = TCPOLEN_ACCECN_BASE + > + opts->num_accecn_fields * TCPOLEN_ACCECN_PERFIELD; > + > + if (opts->num_accecn_fields == 2) { > + *ptr++ = htonl((TCPOPT_ACCECN1 << 24) | (len << 16) | > + ((e1b >> 8) & 0xffff)); > + *ptr++ = htonl(((e1b & 0xff) << 24) | > + (ceb & 0xffffff)); > + } else if (opts->num_accecn_fields == 1) { > + *ptr++ = htonl((TCPOPT_ACCECN1 << 24) | (len << 16) | > + ((e1b >> 8) & 0xffff)); > + leftover_bytes = ((e1b & 0xff) << 8) | > + TCPOPT_NOP; > + leftover_size = 1; > + } else if (opts->num_accecn_fields == 0) { > + leftover_bytes = (TCPOPT_ACCECN1 << 8) | len; > + leftover_size = 2; > + } else if (opts->num_accecn_fields == 3) { > + *ptr++ = htonl((TCPOPT_ACCECN1 << 24) | (len << 16) | > + ((e1b >> 8) & 0xffff)); > + *ptr++ = htonl(((e1b & 0xff) << 24) | > + (ceb & 0xffffff)); > + *ptr++ = htonl(((e0b & 0xffffff) << 8) | > + TCPOPT_NOP); > + } > + if (tp) > + tp->accecn_minlen = 0; > + } > + > if (unlikely(OPTION_SACK_ADVERTISE & options)) { > - *ptr++ = htonl((TCPOPT_NOP << 24) | > - (TCPOPT_NOP << 16) | > + *ptr++ = htonl((leftover_bytes << 16) | > (TCPOPT_SACK_PERM << 8) | > TCPOLEN_SACK_PERM); > + leftover_bytes = NOP_LEFTOVER; AFAICS here leftover_size could be == 1. why is not reset to 2? More importantly this looks quite fragile and error prone. *Possibly* have a tcp_accecn_write_option() helper that would rewrite the existing option as needed could be simpler and less impacting for the existing code? /P