The branch main has been updated by cc:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=a3aa6f65290482cedf4aeda1d0875ca6433c7f04

commit a3aa6f65290482cedf4aeda1d0875ca6433c7f04
Author:     Cheng Cui <[email protected]>
AuthorDate: 2023-06-01 11:48:07 +0000
Commit:     Cheng Cui <[email protected]>
CommitDate: 2023-06-01 11:55:01 +0000

    cc_cubic: Use units of micro seconds (usecs) instead of ticks in rtt.
    
    This improves TCP friendly cwnd in cases of low latency high drop rate
    networks. Tests show +42% and +37% better performance in 1Gpbs and 10Gbps
    cases.
    
    Reported by: Bhaskar Pardeshi from VMware.
    Reviewed By: rscheff, tuexen
    Approved by: rscheff (mentor), tuexen (mentor)
---
 sys/netinet/cc/cc_cubic.c | 60 +++++++++++++++++++++++++----------------------
 sys/netinet/cc/cc_cubic.h | 33 ++++++++++++++------------
 2 files changed, 50 insertions(+), 43 deletions(-)

diff --git a/sys/netinet/cc/cc_cubic.c b/sys/netinet/cc/cc_cubic.c
index 8992b9beba13..be9bd9859122 100644
--- a/sys/netinet/cc/cc_cubic.c
+++ b/sys/netinet/cc/cc_cubic.c
@@ -240,7 +240,7 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
 {
        struct cubic *cubic_data;
        unsigned long w_tf, w_cubic_next;
-       int ticks_since_cong;
+       int usecs_since_cong;
 
        cubic_data = ccv->cc_data;
        cubic_record_rtt(ccv);
@@ -253,7 +253,7 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
            (ccv->flags & CCF_CWND_LIMITED)) {
                 /* Use the logic in NewReno ack_received() for slow start. */
                if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) ||
-                   cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) {
+                   cubic_data->min_rtt_usecs == TCPTV_SRTTBASE) {
                        cubic_does_slow_start(ccv, cubic_data);
                } else {
                        if (cubic_data->flags & CUBICFLAG_HYSTART_IN_CSS) {
@@ -282,12 +282,12 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
                                cubic_data->K = cubic_k(cubic_data->max_cwnd /
                                                        CCV(ccv, t_maxseg));
                        }
-                       if ((ticks_since_cong =
-                           ticks - cubic_data->t_last_cong) < 0) {
+                       usecs_since_cong = (ticks - cubic_data->t_last_cong) * 
tick;
+                       if (usecs_since_cong < 0) {
                                /*
                                 * dragging t_last_cong along
                                 */
-                               ticks_since_cong = INT_MAX;
+                               usecs_since_cong = INT_MAX;
                                cubic_data->t_last_cong = ticks - INT_MAX;
                        }
                        /*
@@ -297,13 +297,14 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type)
                         * RTT is dominated by network buffering rather than
                         * propagation delay.
                         */
-                       w_tf = tf_cwnd(ticks_since_cong,
-                           cubic_data->mean_rtt_ticks, cubic_data->max_cwnd,
-                           CCV(ccv, t_maxseg));
+                       w_tf = tf_cwnd(usecs_since_cong, 
cubic_data->mean_rtt_usecs,
+                                      cubic_data->max_cwnd, CCV(ccv, 
t_maxseg));
 
-                       w_cubic_next = cubic_cwnd(ticks_since_cong +
-                           cubic_data->mean_rtt_ticks, cubic_data->max_cwnd,
-                           CCV(ccv, t_maxseg), cubic_data->K);
+                       w_cubic_next = cubic_cwnd(usecs_since_cong +
+                                                 cubic_data->mean_rtt_usecs,
+                                                 cubic_data->max_cwnd,
+                                                 CCV(ccv, t_maxseg),
+                                                 cubic_data->K);
 
                        ccv->flags &= ~CCF_ABC_SENTAWND;
 
@@ -397,8 +398,8 @@ cubic_cb_init(struct cc_var *ccv, void *ptr)
 
        /* Init some key variables with sensible defaults. */
        cubic_data->t_last_cong = ticks;
-       cubic_data->min_rtt_ticks = TCPTV_SRTTBASE;
-       cubic_data->mean_rtt_ticks = 1;
+       cubic_data->min_rtt_usecs = TCPTV_SRTTBASE;
+       cubic_data->mean_rtt_usecs = 1;
 
        ccv->cc_data = cubic_data;
        cubic_data->flags = CUBICFLAG_HYSTART_ENABLED;
@@ -549,13 +550,13 @@ cubic_post_recovery(struct cc_var *ccv)
 
        /* Calculate the average RTT between congestion epochs. */
        if (cubic_data->epoch_ack_count > 0 &&
-           cubic_data->sum_rtt_ticks >= cubic_data->epoch_ack_count) {
-               cubic_data->mean_rtt_ticks = (int)(cubic_data->sum_rtt_ticks /
+           cubic_data->sum_rtt_usecs >= cubic_data->epoch_ack_count) {
+               cubic_data->mean_rtt_usecs = (int)(cubic_data->sum_rtt_usecs /
                    cubic_data->epoch_ack_count);
        }
 
        cubic_data->epoch_ack_count = 0;
-       cubic_data->sum_rtt_ticks = 0;
+       cubic_data->sum_rtt_usecs = 0;
 }
 
 /*
@@ -565,13 +566,13 @@ static void
 cubic_record_rtt(struct cc_var *ccv)
 {
        struct cubic *cubic_data;
-       int t_srtt_ticks;
+       uint32_t t_srtt_usecs;
 
        /* Ignore srtt until a min number of samples have been taken. */
        if (CCV(ccv, t_rttupdated) >= CUBIC_MIN_RTT_SAMPLES) {
                cubic_data = ccv->cc_data;
-               t_srtt_ticks = tcp_get_srtt(ccv->ccvc.tcp,
-                                           TCP_TMR_GRANULARITY_TICKS);
+               t_srtt_usecs = tcp_get_srtt(ccv->ccvc.tcp,
+                                           TCP_TMR_GRANULARITY_USEC);
                /*
                 * Record the current SRTT as our minrtt if it's the smallest
                 * we've seen or minrtt is currently equal to its initialised
@@ -579,24 +580,27 @@ cubic_record_rtt(struct cc_var *ccv)
                 *
                 * XXXLAS: Should there be some hysteresis for minrtt?
                 */
-               if ((t_srtt_ticks < cubic_data->min_rtt_ticks ||
-                   cubic_data->min_rtt_ticks == TCPTV_SRTTBASE)) {
-                       cubic_data->min_rtt_ticks = max(1, t_srtt_ticks);
+               if ((t_srtt_usecs < cubic_data->min_rtt_usecs ||
+                   cubic_data->min_rtt_usecs == TCPTV_SRTTBASE)) {
+                       /* A minimal rtt is a single unshifted tick of a ticks
+                        * timer. */
+                       cubic_data->min_rtt_usecs = max(tick >> TCP_RTT_SHIFT,
+                                                       t_srtt_usecs);
 
                        /*
                         * If the connection is within its first congestion
-                        * epoch, ensure we prime mean_rtt_ticks with a
+                        * epoch, ensure we prime mean_rtt_usecs with a
                         * reasonable value until the epoch average RTT is
                         * calculated in cubic_post_recovery().
                         */
-                       if (cubic_data->min_rtt_ticks >
-                           cubic_data->mean_rtt_ticks)
-                               cubic_data->mean_rtt_ticks =
-                                   cubic_data->min_rtt_ticks;
+                       if (cubic_data->min_rtt_usecs >
+                           cubic_data->mean_rtt_usecs)
+                               cubic_data->mean_rtt_usecs =
+                                   cubic_data->min_rtt_usecs;
                }
 
                /* Sum samples for epoch average RTT calculation. */
-               cubic_data->sum_rtt_ticks += t_srtt_ticks;
+               cubic_data->sum_rtt_usecs += t_srtt_usecs;
                cubic_data->epoch_ack_count++;
        }
 }
diff --git a/sys/netinet/cc/cc_cubic.h b/sys/netinet/cc/cc_cubic.h
index 0749a9ebbc1a..3d408154c1a5 100644
--- a/sys/netinet/cc/cc_cubic.h
+++ b/sys/netinet/cc/cc_cubic.h
@@ -91,8 +91,8 @@
 struct cubic {
        /* CUBIC K in fixed point form with CUBIC_SHIFT worth of precision. */
        int64_t         K;
-       /* Sum of RTT samples across an epoch in ticks. */
-       int64_t         sum_rtt_ticks;
+       /* Sum of RTT samples across an epoch in usecs. */
+       int64_t         sum_rtt_usecs;
        /* cwnd at the most recent congestion event. */
        unsigned long   max_cwnd;
        /* cwnd at the previous congestion event. */
@@ -101,10 +101,10 @@ struct cubic {
        unsigned long   prev_max_cwnd_cp;
        /* various flags */
        uint32_t        flags;
-       /* Minimum observed rtt in ticks. */
-       int             min_rtt_ticks;
+       /* Minimum observed rtt in usecs. */
+       int             min_rtt_usecs;
        /* Mean observed rtt between congestion epochs. */
-       int             mean_rtt_ticks;
+       int             mean_rtt_usecs;
        /* ACKs since last congestion event. */
        int             epoch_ack_count;
        /* Timestamp (in ticks) of arriving in congestion avoidance from last
@@ -222,14 +222,15 @@ cubic_k(unsigned long wmax_pkts)
  * XXXLAS: Characterise bounds for overflow.
  */
 static __inline unsigned long
-cubic_cwnd(int ticks_since_cong, unsigned long wmax, uint32_t smss, int64_t K)
+cubic_cwnd(int usecs_since_cong, unsigned long wmax, uint32_t smss, int64_t K)
 {
        int64_t cwnd;
 
        /* K is in fixed point form with CUBIC_SHIFT worth of precision. */
 
        /* t - K, with CUBIC_SHIFT worth of precision. */
-       cwnd = (((int64_t)ticks_since_cong << CUBIC_SHIFT) - (K * hz)) / hz;
+       cwnd = (((int64_t)usecs_since_cong << CUBIC_SHIFT) - (K * hz * tick)) /
+              (hz * tick);
 
        if (cwnd > CUBED_ROOT_MAX_ULONG)
                return INT_MAX;
@@ -255,15 +256,17 @@ cubic_cwnd(int ticks_since_cong, unsigned long wmax, 
uint32_t smss, int64_t K)
 }
 
 /*
- * Compute an approximation of the NewReno cwnd some number of ticks after a
+ * Compute an approximation of the NewReno cwnd some number of usecs after a
  * congestion event. RTT should be the average RTT estimate for the path
  * measured over the previous congestion epoch and wmax is the value of cwnd at
  * the last congestion event. The "TCP friendly" concept in the CUBIC I-D is
  * rather tricky to understand and it turns out this function is not required.
  * It is left here for reference.
+ *
+ * XXX: Not used
  */
 static __inline unsigned long
-reno_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax,
+reno_cwnd(int usecs_since_cong, int rtt_usecs, unsigned long wmax,
     uint32_t smss)
 {
 
@@ -272,26 +275,26 @@ reno_cwnd(int ticks_since_cong, int rtt_ticks, unsigned 
long wmax,
         * W_tcp(t) deals with cwnd/wmax in pkts, so because our cwnd is in
         * bytes, we have to multiply by smss.
         */
-       return (((wmax * RENO_BETA) + (((ticks_since_cong * smss)
-           << CUBIC_SHIFT) / rtt_ticks)) >> CUBIC_SHIFT);
+       return (((wmax * RENO_BETA) + (((usecs_since_cong * smss)
+           << CUBIC_SHIFT) / rtt_usecs)) >> CUBIC_SHIFT);
 }
 
 /*
- * Compute an approximation of the "TCP friendly" cwnd some number of ticks
+ * Compute an approximation of the "TCP friendly" cwnd some number of usecs
  * after a congestion event that is designed to yield the same average cwnd as
  * NewReno while using CUBIC's beta of 0.7. RTT should be the average RTT
  * estimate for the path measured over the previous congestion epoch and wmax 
is
  * the value of cwnd at the last congestion event.
  */
 static __inline unsigned long
-tf_cwnd(int ticks_since_cong, int rtt_ticks, unsigned long wmax,
+tf_cwnd(int usecs_since_cong, int rtt_usecs, unsigned long wmax,
     uint32_t smss)
 {
 
        /* Equation 4 of I-D. */
        return (((wmax * CUBIC_BETA) +
-           (((THREE_X_PT3 * (unsigned long)ticks_since_cong *
-           (unsigned long)smss) << CUBIC_SHIFT) / (TWO_SUB_PT3 * rtt_ticks)))
+           (((THREE_X_PT3 * (unsigned long)usecs_since_cong *
+           (unsigned long)smss) << CUBIC_SHIFT) / (TWO_SUB_PT3 * rtt_usecs)))
            >> CUBIC_SHIFT);
 }
 

Reply via email to