From: Ilpo Järvinen <[email protected]>

Add newly acked pkts EWMA. When ACK thinning occurs, select
between safer and unsafe cep delta in AccECN processing based
on it. If the packets ACKed per ACK tends to be large, don't
conservatively assume ACE field overflow.

This patch uses the existing 2-byte holes in the rx group for new
u16 variables withtout creating more holes. Below are the pahole
outcomes before and after this patch:

[BEFORE THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ecn_bytes[3]; /*  2744    12 */
    /* XXX 4 bytes hole, try to pack */

    [...]
    __cacheline_group_end__tcp_sock_write_rx[0];       /*  2816     0 */

    [...]
    /* size: 3264, cachelines: 51, members: 177 */
}

[AFTER THIS PATCH]
struct tcp_sock {
    [...]
    u32                        delivered_ecn_bytes[3]; /*  2744    12 */
    u16                        pkts_acked_ewma;        /*  2756     2 */
    /* XXX 2 bytes hole, try to pack */

    [...]
    __cacheline_group_end__tcp_sock_write_rx[0];       /*  2816     0 */

    [...]
    /* size: 3264, cachelines: 51, members: 178 */
}

Signed-off-by: Ilpo Järvinen <[email protected]>
Co-developed-by: Chia-Yu Chang <[email protected]>
Signed-off-by: Chia-Yu Chang <[email protected]>
Acked-by: Paolo Abeni <[email protected]>

---
v3:
- Add additional min() check if pkts_acked_ewma is not initialized.
---
 .../networking/net_cachelines/tcp_sock.rst    |  1 +
 include/linux/tcp.h                           |  1 +
 net/ipv4/tcp.c                                |  2 ++
 net/ipv4/tcp_input.c                          | 20 ++++++++++++++++++-
 4 files changed, 23 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/net_cachelines/tcp_sock.rst 
b/Documentation/networking/net_cachelines/tcp_sock.rst
index 26f32dbcf6ec..563daea10d6c 100644
--- a/Documentation/networking/net_cachelines/tcp_sock.rst
+++ b/Documentation/networking/net_cachelines/tcp_sock.rst
@@ -105,6 +105,7 @@ u32                           received_ce             
read_mostly         read_w
 u32[3]                        received_ecn_bytes      read_mostly         
read_write
 u8:4                          received_ce_pending     read_mostly         
read_write
 u32[3]                        delivered_ecn_bytes                         
read_write
+u16                           pkts_acked_ewma                             
read_write
 u8:2                          syn_ect_snt             write_mostly        
read_write
 u8:2                          syn_ect_rcv             read_mostly         
read_write
 u8:2                          accecn_minlen           write_mostly        
read_write
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 20b8c6e21fef..683f38362977 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -345,6 +345,7 @@ struct tcp_sock {
        u32     rate_interval_us;  /* saved rate sample: time elapsed */
        u32     rcv_rtt_last_tsecr;
        u32     delivered_ecn_bytes[3];
+       u16     pkts_acked_ewma;/* Pkts acked EWMA for AccECN cep heuristic */
        u64     first_tx_mstamp;  /* start of window send phase */
        u64     delivered_mstamp; /* time we reached "delivered" */
        u64     bytes_acked;    /* RFC4898 tcpEStatsAppHCThruOctetsAcked
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index f035440c475a..cd25fb50e81b 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3420,6 +3420,7 @@ int tcp_disconnect(struct sock *sk, int flags)
        tcp_accecn_init_counters(tp);
        tp->prev_ecnfield = 0;
        tp->accecn_opt_tstamp = 0;
+       tp->pkts_acked_ewma = 0;
        if (icsk->icsk_ca_initialized && icsk->icsk_ca_ops->release)
                icsk->icsk_ca_ops->release(sk);
        memset(icsk->icsk_ca_priv, 0, sizeof(icsk->icsk_ca_priv));
@@ -5193,6 +5194,7 @@ static void __init tcp_struct_check(void)
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, 
rate_interval_us);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, 
rcv_rtt_last_tsecr);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, 
delivered_ecn_bytes);
+       CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, 
pkts_acked_ewma);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, 
first_tx_mstamp);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, 
delivered_mstamp);
        CACHELINE_ASSERT_GROUP_MEMBER(struct tcp_sock, tcp_sock_write_rx, 
bytes_acked);
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 198f8a0d37be..8e95a4e302f4 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -488,6 +488,10 @@ static void tcp_count_delivered(struct tcp_sock *tp, u32 
delivered,
                tcp_count_delivered_ce(tp, delivered);
 }
 
+#define PKTS_ACKED_WEIGHT      6
+#define PKTS_ACKED_PREC                6
+#define ACK_COMP_THRESH                4
+
 /* Returns the ECN CE delta */
 static u32 __tcp_accecn_process(struct sock *sk, const struct sk_buff *skb,
                                u32 delivered_pkts, u32 delivered_bytes,
@@ -499,6 +503,7 @@ static u32 __tcp_accecn_process(struct sock *sk, const 
struct sk_buff *skb,
        u32 delta, safe_delta, d_ceb;
        bool opt_deltas_valid;
        u32 corrected_ace;
+       u32 ewma;
 
        /* Reordered ACK or uncertain due to lack of data to send and ts */
        if (!(flag & (FLAG_FORWARD_PROGRESS | FLAG_TS_PROGRESS)))
@@ -507,6 +512,18 @@ static u32 __tcp_accecn_process(struct sock *sk, const 
struct sk_buff *skb,
        opt_deltas_valid = tcp_accecn_process_option(tp, skb,
                                                     delivered_bytes, flag);
 
+       if (delivered_pkts) {
+               if (!tp->pkts_acked_ewma) {
+                       ewma = delivered_pkts << PKTS_ACKED_PREC;
+               } else {
+                       ewma = tp->pkts_acked_ewma;
+                       ewma = (((ewma << PKTS_ACKED_WEIGHT) - ewma) +
+                               (delivered_pkts << PKTS_ACKED_PREC)) >>
+                               PKTS_ACKED_WEIGHT;
+               }
+               tp->pkts_acked_ewma = min_t(u32, ewma, 0xFFFFU);
+       }
+
        if (!(flag & FLAG_SLOWPATH)) {
                /* AccECN counter might overflow on large ACKs */
                if (delivered_pkts <= TCP_ACCECN_CEP_ACE_MASK)
@@ -555,7 +572,8 @@ static u32 __tcp_accecn_process(struct sock *sk, const 
struct sk_buff *skb,
                if (d_ceb <
                    safe_delta * tp->mss_cache >> TCP_ACCECN_SAFETY_SHIFT)
                        return delta;
-       }
+       } else if (tp->pkts_acked_ewma > (ACK_COMP_THRESH << PKTS_ACKED_PREC))
+               return delta;
 
        return safe_delta;
 }
-- 
2.34.1


Reply via email to