git: e022f2b0131a - main - tcp: Rack fixes and misc updates

Randall Stewart Fri, 09 Jun 2023 07:26:33 -0700

The branch main has been updated by rrs:

URL: 
https://cgit.FreeBSD.org/src/commit/?id=e022f2b0131af6848da2b921698c52b547e60e8b


commit e022f2b0131af6848da2b921698c52b547e60e8b
Author:     Randall Stewart <[email protected]>
AuthorDate: 2023-06-09 14:27:08 +0000
Commit:     Randall Stewart <[email protected]>
CommitDate: 2023-06-09 14:27:08 +0000

    tcp: Rack fixes and misc updates
    
    So over the past few weeks we have found several bugs and updated hybrid 
pacing to have
    more data in the low-level logging. We have also moved more of the BBlogs 
to "verbose" mode
    so that we don't generate a lot of the debug data unless you put 
verbose/debug on.
    There were a couple of notable bugs, one being the incorrect passing of 
percentage
    for reduction to timely and the other the incorrect use of 20% timely Beta 
instead of
    80%. This also expands a simply idea to be able to pace a cwnd (fillcw) as 
an alternate
    pacing mechanism combining that with timely reduction/increase.
    
    Reviewed by: tuexen
    Sponsored by: Netflix Inc
    Differential Revision:https://reviews.freebsd.org/D40391
---
 sys/netinet/tcp.h                 |   2 +
 sys/netinet/tcp_stacks/rack.c     | 469 ++++++++++++++++++++++++++++++--------
 sys/netinet/tcp_stacks/tcp_rack.h |   8 +-
 3 files changed, 389 insertions(+), 90 deletions(-)

diff --git a/sys/netinet/tcp.h b/sys/netinet/tcp.h
index 314cad76dee2..3c3086eabaf4 100644
--- a/sys/netinet/tcp.h
+++ b/sys/netinet/tcp.h
@@ -499,6 +499,8 @@ struct tcp_log_user {
 #define TCP_HYBRID_PACING_ENABLE       0x0010          /* We are enabling 
hybrid pacing else disable */
 #define TCP_HYBRID_PACING_S_MSS                0x0020          /* Clent wants 
us to set the mss overriding gp est in CU */
 #define TCP_HYBRID_PACING_SETMSS       0x1000          /* Internal flag that 
tellsus we set the mss on this entry */
+#define TCP_HYBRID_PACING_WASSET       0x2000          /* We init to this to 
know if a hybrid command was issued */
+
 
 struct tcp_hybrid_req {
        struct tcp_snd_req req;
diff --git a/sys/netinet/tcp_stacks/rack.c b/sys/netinet/tcp_stacks/rack.c
index 36fd5daf07dd..c4d4923fb592 100644
--- a/sys/netinet/tcp_stacks/rack.c
+++ b/sys/netinet/tcp_stacks/rack.c
@@ -320,15 +320,28 @@ static int32_t rack_hbp_thresh = 3;               /* what 
is the divisor max_rtt/min_rtt to d
 /* Part of pacing */
 static int32_t rack_max_per_above = 30;                /* When we go to 
increment stop if above 100+this% */
 
-/* Timely information */
-/* Combine these two gives the range of 'no change' to bw */
-/* ie the up/down provide the upper and lower bound */
+/* Timely information:
+ *
+ * Here we have various control parameters on how
+ * timely may change the multiplier. rack_gain_p5_ub
+ * is associated with timely but not directly influencing
+ * the rate decision like the other variables. It controls
+ * the way fill-cw interacts with timely and caps how much
+ * timely can boost the fill-cw b/w.
+ *
+ * The other values are various boost/shrink numbers as well
+ * as potential caps when adjustments are made to the timely
+ * gain (returned by rack_get_output_gain(). Remember too that
+ * the gain returned can be overriden by other factors such as
+ * probeRTT as well as fixed-rate-pacing.
+ */
+static int32_t rack_gain_p5_ub = 250;
 static int32_t rack_gp_per_bw_mul_up = 2;      /* 2% */
 static int32_t rack_gp_per_bw_mul_down = 4;    /* 4% */
 static int32_t rack_gp_rtt_maxmul = 3;         /* 3 x maxmin */
 static int32_t rack_gp_rtt_minmul = 1;         /* minrtt + (minrtt/mindiv) is 
lower rtt */
 static int32_t rack_gp_rtt_mindiv = 4;         /* minrtt + (minrtt * 
minmul/mindiv) is lower rtt */
-static int32_t rack_gp_decrease_per = 20;      /* 20% decrease in multiplier */
+static int32_t rack_gp_decrease_per = 80;      /* Beta value of timely 
decrease (.8) = 80 */
 static int32_t rack_gp_increase_per = 2;       /* 2% increase in multiplier */
 static int32_t rack_per_lower_bound = 50;      /* Don't allow to drop below 
this multiplier */
 static int32_t rack_per_upper_bound_ss = 0;    /* Don't allow SS to grow above 
this */
@@ -713,7 +726,7 @@ static void
 rack_log_gpset(struct tcp_rack *rack, uint32_t seq_end, uint32_t ack_end_t,
               uint32_t send_end_t, int line, uint8_t mode, struct rack_sendmap 
*rsm)
 {
-       if (tcp_bblogging_on(rack->rc_tp)) {
+       if (tcp_bblogging_on(rack->rc_tp) && (rack_verbose_logging != 0)) {
                union tcp_log_stackspecific log;
                struct timeval tv;
 
@@ -1175,8 +1188,8 @@ rack_init_sysctls(void)
        SYSCTL_ADD_S32(&rack_sysctl_ctx,
            SYSCTL_CHILDREN(rack_timely),
            OID_AUTO, "decrease", CTLFLAG_RW,
-           &rack_gp_decrease_per, 20,
-           "Rack timely decrease percentage of our GP multiplication factor");
+           &rack_gp_decrease_per, 80,
+           "Rack timely Beta value 80 = .8 (scaled by 100)");
        SYSCTL_ADD_S32(&rack_sysctl_ctx,
            SYSCTL_CHILDREN(rack_timely),
            OID_AUTO, "increase", CTLFLAG_RW,
@@ -1187,6 +1200,12 @@ rack_init_sysctls(void)
            OID_AUTO, "lowerbound", CTLFLAG_RW,
            &rack_per_lower_bound, 50,
            "Rack timely lowest percentage we allow GP multiplier to fall to");
+       SYSCTL_ADD_S32(&rack_sysctl_ctx,
+           SYSCTL_CHILDREN(rack_timely),
+           OID_AUTO, "p5_upper", CTLFLAG_RW,
+           &rack_gain_p5_ub, 250,
+           "Profile 5 upper bound to timely gain");
+
        SYSCTL_ADD_S32(&rack_sysctl_ctx,
            SYSCTL_CHILDREN(rack_timely),
            OID_AUTO, "upperboundss", CTLFLAG_RW,
@@ -1967,7 +1986,7 @@ rack_get_fixed_pacing_bw(struct tcp_rack *rack)
 static void
 rack_log_hybrid_bw(struct tcp_rack *rack, uint32_t seq, uint64_t cbw, uint64_t 
tim,
        uint64_t data, uint8_t mod, uint16_t aux,
-       struct tcp_sendfile_track *cur)
+       struct tcp_sendfile_track *cur, int line)
 {
 #ifdef TCP_REQUEST_TRK
        int do_log = 0;
@@ -1991,7 +2010,7 @@ rack_log_hybrid_bw(struct tcp_rack *rack, uint32_t seq, 
uint64_t cbw, uint64_t t
                 * All other less noisy logs here except the measure which
                 * also needs to come out on the point and the log.
                 */
-               do_log = tcp_bblogging_on(rack->rc_tp);         
+               do_log = tcp_bblogging_on(rack->rc_tp);
        } else {
                do_log = tcp_bblogging_point_on(rack->rc_tp, 
TCP_BBPOINT_REQ_LEVEL_LOGGING);
        }
@@ -2004,6 +2023,7 @@ rack_log_hybrid_bw(struct tcp_rack *rack, uint32_t seq, 
uint64_t cbw, uint64_t t
                /* Convert our ms to a microsecond */
                memset(&log, 0, sizeof(log));
 
+               log.u_bbr.cwnd_gain = line;
                log.u_bbr.timeStamp = tcp_get_usecs(&tv);
                log.u_bbr.rttProp = tim;
                log.u_bbr.bw_inuse = cbw;
@@ -2049,8 +2069,10 @@ rack_log_hybrid_bw(struct tcp_rack *rack, uint32_t seq, 
uint64_t cbw, uint64_t t
                        /* localtime = <delivered | applimited>*/
                        log.u_bbr.applimited = (uint32_t)(cur->localtime & 
0x00000000ffffffff);
                        log.u_bbr.delivered = (uint32_t)((cur->localtime >> 32) 
& 0x00000000ffffffff);
+#ifdef TCP_REQUEST_TRK
                        off = (uint64_t)(cur) - 
(uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
                        log.u_bbr.bbr_substate = (uint8_t)(off / sizeof(struct 
tcp_sendfile_track));
+#endif
                        log.u_bbr.flex4 = (uint32_t)(rack->rc_tp->t_sndbytes - 
cur->sent_at_fs);
                        log.u_bbr.flex5 = 
(uint32_t)(rack->rc_tp->t_snd_rxt_bytes - cur->rxt_at_fs);
                        log.u_bbr.flex7 = (uint16_t)cur->hybrid_flags;
@@ -2083,6 +2105,60 @@ rack_log_hybrid_bw(struct tcp_rack *rack, uint32_t seq, 
uint64_t cbw, uint64_t t
 #endif
 }
 
+#ifdef TCP_REQUEST_TRK
+static void
+rack_log_hybrid_sends(struct tcp_rack *rack, struct tcp_sendfile_track *cur, 
int line)
+{
+       if (tcp_bblogging_point_on(rack->rc_tp, TCP_BBPOINT_REQ_LEVEL_LOGGING)) 
{
+               union tcp_log_stackspecific log;
+               struct timeval tv;
+               uint64_t off;
+
+               /* Convert our ms to a microsecond */
+               memset(&log, 0, sizeof(log));
+
+               log.u_bbr.timeStamp = tcp_get_usecs(&tv);
+               log.u_bbr.cur_del_rate = rack->rc_tp->t_sndbytes;
+               log.u_bbr.delRate = cur->sent_at_fs;
+               log.u_bbr.rttProp = rack->rc_tp->t_snd_rxt_bytes;
+               log.u_bbr.bw_inuse = cur->rxt_at_fs;
+               log.u_bbr.cwnd_gain = line;
+               off = (uint64_t)(cur) - 
(uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
+               log.u_bbr.bbr_substate = (uint8_t)(off / sizeof(struct 
tcp_sendfile_track));
+               /* start = < flex1 | flex2 > */
+               log.u_bbr.flex2 = (uint32_t)(cur->start & 0x00000000ffffffff);
+               log.u_bbr.flex1 = (uint32_t)((cur->start >> 32) & 
0x00000000ffffffff);
+               /* end = < flex3 | flex4 > */
+               log.u_bbr.flex4 = (uint32_t)(cur->end & 0x00000000ffffffff);
+               log.u_bbr.flex3 = (uint32_t)((cur->end >> 32) & 
0x00000000ffffffff);
+
+               /* localtime = <delivered | applimited>*/
+               log.u_bbr.applimited = (uint32_t)(cur->localtime & 
0x00000000ffffffff);
+               log.u_bbr.delivered = (uint32_t)((cur->localtime >> 32) & 
0x00000000ffffffff);
+               /* client timestamp = <lt_epoch | epoch>*/
+               log.u_bbr.epoch = (uint32_t)(cur->timestamp & 
0x00000000ffffffff);
+               log.u_bbr.lt_epoch = (uint32_t)((cur->timestamp >> 32) & 
0x00000000ffffffff);
+               /* now set all the flags in */
+               log.u_bbr.pkts_out = cur->hybrid_flags;
+               log.u_bbr.flex6 = cur->flags;
+               /*
+                * Last send time  = <flex5 | pkt_epoch>  note we do not 
distinguish cases
+                * where a false retransmit occurred so first_send  <-> 
lastsend may
+                * include longer time then it actually took if we have a false 
rxt.
+                */
+               log.u_bbr.pkt_epoch = 
(uint32_t)(rack->r_ctl.last_tmit_time_acked & 0x00000000ffffffff);
+               log.u_bbr.flex5 = (uint32_t)((rack->r_ctl.last_tmit_time_acked 
>> 32) & 0x00000000ffffffff);
+
+               log.u_bbr.flex8 = HYBRID_LOG_SENT_LOST;
+               tcp_log_event(rack->rc_tp, NULL,
+                   &rack->rc_inp->inp_socket->so_rcv,
+                   &rack->rc_inp->inp_socket->so_snd,
+                   TCP_HYBRID_PACING_LOG, 0,
+                   0, &log, false, NULL, __func__, __LINE__, &tv);
+       }
+}
+#endif
+
 static inline uint64_t
 rack_compensate_for_linerate(struct tcp_rack *rack, uint64_t bw)
 {
@@ -2128,13 +2204,13 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, 
int *capped)
                 */
                struct tcp_sendfile_track *ent;
 
-               ent = rack->r_ctl.rc_last_sft;
+               ent = rack->r_ctl.rc_last_sft;
                microuptime(&tv);
                timenow = tcp_tv_to_lusectick(&tv);
                if (timenow >= ent->deadline) {
                        /* No time left we do DGP only */
                        rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
-                                          0, 0, 0, HYBRID_LOG_OUTOFTIME, 0, 
ent);
+                                          0, 0, 0, HYBRID_LOG_OUTOFTIME, 0, 
ent, __LINE__);
                        rack->r_ctl.bw_rate_cap = 0;
                        return;
                }
@@ -2143,7 +2219,7 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, int 
*capped)
                if (timeleft < HPTS_MSEC_IN_SEC) {
                        /* If there is less than a ms left just use DGPs rate */
                        rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
-                                          0, timeleft, 0, 
HYBRID_LOG_OUTOFTIME, 0, ent);
+                                          0, timeleft, 0, 
HYBRID_LOG_OUTOFTIME, 0, ent, __LINE__);
                        rack->r_ctl.bw_rate_cap = 0;
                        return;
                }
@@ -2159,7 +2235,7 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, int 
*capped)
                        else {
                                /* TSNH, we should catch it at the send */
                                rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
-                                                  0, timeleft, 0, 
HYBRID_LOG_CAPERROR, 0, ent);
+                                                  0, timeleft, 0, 
HYBRID_LOG_CAPERROR, 0, ent, __LINE__);
                                rack->r_ctl.bw_rate_cap = 0;
                                return;
                        }
@@ -2178,7 +2254,7 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, int 
*capped)
                        else {
                                /* TSNH, we should catch it at the send */
                                rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
-                                                  0, timeleft, lengone, 
HYBRID_LOG_CAPERROR, 0, ent);
+                                                  0, timeleft, lengone, 
HYBRID_LOG_CAPERROR, 0, ent, __LINE__);
                                rack->r_ctl.bw_rate_cap = 0;
                                return;
                        }
@@ -2186,7 +2262,7 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, int 
*capped)
                if (lenleft == 0) {
                        /* We have it all sent */
                        rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
-                                          0, timeleft, lenleft, 
HYBRID_LOG_ALLSENT, 0, ent);
+                                          0, timeleft, lenleft, 
HYBRID_LOG_ALLSENT, 0, ent, __LINE__);
                        if (rack->r_ctl.bw_rate_cap)
                                goto normal_ratecap;
                        else
@@ -2210,10 +2286,10 @@ rack_rate_cap_bw(struct tcp_rack *rack, uint64_t *bw, 
int *capped)
                        rack_log_type_pacing_sizes(rack->rc_tp, rack, 
rack->r_ctl.client_suggested_maxseg, orig_max, __LINE__, 5);
                }
                rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
-                                  calcbw, timeleft, lenleft, 
HYBRID_LOG_CAP_CALC, 0, ent);
+                                  calcbw, timeleft, lenleft, 
HYBRID_LOG_CAP_CALC, 0, ent, __LINE__);
                if ((calcbw > 0) && (*bw > calcbw)) {
                        rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
-                                          *bw, ent->deadline, lenleft, 
HYBRID_LOG_RATE_CAP, 0, ent);
+                                          *bw, ent->deadline, lenleft, 
HYBRID_LOG_RATE_CAP, 0, ent, __LINE__);
                        *capped = 1;
                        *bw = calcbw;
                }
@@ -2241,7 +2317,7 @@ normal_ratecap:
                *bw = rack->r_ctl.bw_rate_cap;
                rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
                                   *bw, 0, 0,
-                                  HYBRID_LOG_RATE_CAP, 1, NULL);
+                                  HYBRID_LOG_RATE_CAP, 1, NULL, __LINE__);
        }
 }
 
@@ -2916,7 +2992,7 @@ rack_log_progress_event(struct tcp_rack *rack, struct 
tcpcb *tp, uint32_t tick,
 static void
 rack_log_type_bbrsnd(struct tcp_rack *rack, uint32_t len, uint32_t slot, 
uint32_t cts, struct timeval *tv, int line)
 {
-       if (tcp_bblogging_on(rack->rc_tp)) {
+       if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
                union tcp_log_stackspecific log;
 
                memset(&log.u_bbr, 0, sizeof(log.u_bbr));
@@ -3116,7 +3192,7 @@ rack_log_alt_to_to_cancel(struct tcp_rack *rack,
 static void
 rack_log_to_processing(struct tcp_rack *rack, uint32_t cts, int32_t ret, 
int32_t timers)
 {
-       if (tcp_bblogging_on(rack->rc_tp)) {
+       if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
                union tcp_log_stackspecific log;
                struct timeval tv;
 
@@ -3819,15 +3895,30 @@ extra_boost:
 static uint32_t
 rack_get_decrease(struct tcp_rack *rack, uint32_t curper, int32_t rtt_diff)
 {
-       /*
+       /*-
         * norm_grad = rtt_diff / minrtt;
         * new_per = curper * (1 - B * norm_grad)
         *
-        * B = rack_gp_decrease_per (default 10%)
+        * B = rack_gp_decrease_per (default 80%)
         * rtt_dif = input var current rtt-diff
         * curper = input var current percentage
         * minrtt = from rack filter
         *
+        * In order to do the floating point calculations above we
+        * do an integer conversion. The code looks confusing so let me
+        * translate it into something that use more variables and
+        * is clearer for us humans :)
+        *
+        * uint64_t norm_grad, inverse, reduce_by, final_result;
+        * uint32_t perf;
+        *
+        * norm_grad = (((uint64_t)rtt_diff * 1000000) /
+        *             (uint64_t)get_filter_small(&rack->r_ctl.rc_gp_min_rtt));
+        * inverse = ((uint64_t)rack_gp_decrease * (uint64_t)1000000) * 
norm_grad;
+        * inverse /= 1000000;
+        * reduce_by = (1000000 - inverse);
+        * final_result = (cur_per * reduce_by) / 1000000;
+        * perf = (uint32_t)final_result;
         */
        uint64_t perf;
 
@@ -3852,7 +3943,7 @@ rack_decrease_highrtt(struct tcp_rack *rack, uint32_t 
curper, uint32_t rtt)
         * result = curper * (1 - (B * ( 1 -  ------          ))
         *                                     gp_srtt
         *
-        * B = rack_gp_decrease_per (default 10%)
+        * B = rack_gp_decrease_per (default .8 i.e. 80)
         * highrttthresh = filter_min * rack_gp_rtt_maxmul
         */
        uint64_t perf;
@@ -3864,6 +3955,20 @@ rack_decrease_highrtt(struct tcp_rack *rack, uint32_t 
curper, uint32_t rtt)
                                     ((uint64_t)rack_gp_decrease_per * 
((uint64_t)1000000 -
                                        ((uint64_t)highrttthresh * 
(uint64_t)1000000) /
                                                    (uint64_t)rtt)) / 100)) 
/(uint64_t)1000000);
+       if (tcp_bblogging_on(rack->rc_tp)) {
+               uint64_t log1;
+
+               log1 = rtt;
+               log1 <<= 32;
+               log1 |= highrttthresh;
+               rack_log_timely(rack,
+                               rack_gp_decrease_per,
+                               (uint64_t)curper,
+                               log1,
+                               perf,
+                               __LINE__,
+                               15);
+       }
        return (perf);
 }
 
@@ -3911,7 +4016,7 @@ rack_decrease_bw_mul(struct tcp_rack *rack, int 
timely_says, uint32_t rtt, int32
                /* Sent in SS */
                if (timely_says == 2) {
                        new_per = rack_decrease_highrtt(rack, 
rack->r_ctl.rack_per_of_gp_ss, rtt);
-                       alt = rack_get_decrease(rack, 
rack->r_ctl.rack_per_of_gp_rec, rtt_diff);
+                       alt = rack_get_decrease(rack, 
rack->r_ctl.rack_per_of_gp_ss, rtt_diff);
                        if (alt < new_per)
                                val = alt;
                        else
@@ -3944,7 +4049,7 @@ rack_decrease_bw_mul(struct tcp_rack *rack, int 
timely_says, uint32_t rtt, int32
                /* Sent in CA */
                if (timely_says == 2) {
                        new_per = rack_decrease_highrtt(rack, 
rack->r_ctl.rack_per_of_gp_ca, rtt);
-                       alt = rack_get_decrease(rack, 
rack->r_ctl.rack_per_of_gp_rec, rtt_diff);
+                       alt = rack_get_decrease(rack, 
rack->r_ctl.rack_per_of_gp_ca, rtt_diff);
                        if (alt < new_per)
                                val = alt;
                        else
@@ -5040,7 +5145,7 @@ rack_do_goodput_measurement(struct tcpcb *tp, struct 
tcp_rack *rack,
            (rack->r_ctl.num_measurements >= rack->r_ctl.req_measurements)) {
                /* We have enough measurements now */
                rack->gp_ready = 1;
-               if ((rack->rc_always_pace && (rack->use_fixed_rate == 0)) ||
+               if (rack->dgp_on ||
                    rack->rack_hibeta)
                        rack_set_cc_pacing(rack);
                if (rack->defer_options)
@@ -6860,21 +6965,20 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct 
tcpcb *tp, uint32_t cts,
                 * even a SACK should not disturb us (with
                 * the exception of r_rr_config 3).
                 */
-               if (rack->r_ctl.rc_hpts_flags & PACE_TMR_RACK) {
+               if ((rack->r_ctl.rc_hpts_flags & PACE_TMR_RACK) ||
+                   (IN_RECOVERY(tp->t_flags))) {
                        if (rack->r_rr_config != 3)
                                tp->t_flags2 |= TF2_DONT_SACK_QUEUE;
                        else if (rack->rc_pace_dnd) {
-                               if (IN_RECOVERY(tp->t_flags)) {
-                                       /*
-                                        * When DND is on, we only let a sack
-                                        * interrupt us if we are not in 
recovery.
-                                        *
-                                        * If DND is off, then we never hit here
-                                        * and let all sacks wake us up.
-                                        *
-                                        */
-                                       tp->t_flags2 |= TF2_DONT_SACK_QUEUE;
-                               }
+                               /*
+                                * When DND is on, we only let a sack
+                                * interrupt us if we are not in recovery.
+                                *
+                                * If DND is off, then we never hit here
+                                * and let all sacks wake us up.
+                                *
+                                */
+                               tp->t_flags2 |= TF2_DONT_SACK_QUEUE;
                        }
                }
                /* For sack attackers we want to ignore sack */
@@ -10357,7 +10461,7 @@ rack_process_to_cumack(struct tcpcb *tp, struct 
tcp_rack *rack, register uint32_
 
        rack->r_wanted_output = 1;
        if (SEQ_GT(th_ack, tp->snd_una))
-           rack->r_ctl.last_cumack_advance = acktime;
+               rack->r_ctl.last_cumack_advance = acktime;
 
        /* Tend any TLP that has been marked for 1/2 the seq space (its old)  */
        if ((rack->rc_last_tlp_acked_set == 1)&&
@@ -10484,6 +10588,7 @@ more:
                }
        }
        /* Now do we consume the whole thing? */
+       rack->r_ctl.last_tmit_time_acked = rsm->r_tim_lastsent[(rsm->r_rtr_cnt 
- 1)];
        if (SEQ_GEQ(th_ack, rsm->r_end)) {
                /* Its all consumed. */
                uint32_t left;
@@ -10619,16 +10724,43 @@ more:
        /* The trim will move th_ack into r_start for us */
        tqhash_trim(rack->r_ctl.tqh, th_ack);
        /* Now do we need to move the mbuf fwd too? */
-       if (rsm->m) {
-               while (rsm->soff >= rsm->m->m_len) {
-                       rsm->soff -= rsm->m->m_len;
-                       rsm->m = rsm->m->m_next;
-                       KASSERT((rsm->m != NULL),
-                               (" nrsm:%p hit at soff:%u null m",
-                                rsm, rsm->soff));
+       {
+               struct mbuf *m;
+               uint32_t soff;
+
+               m = rsm->m;
+               soff = rsm->soff;
+               if (m) {
+                       while (soff >= m->m_len) {
+                               soff -= m->m_len;
+                               KASSERT((m->m_next != NULL),
+                                       (" rsm:%p  off:%u soff:%u m:%p",
+                                        rsm, rsm->soff, soff, m));
+                               m = m->m_next;
+                               if (m == NULL) {
+                                       /*
+                                        * This is a fall-back that prevents a 
panic. In reality
+                                        * we should be able to walk the mbuf's 
and find our place.
+                                        * At this point snd_una has not been 
updated with the sbcut() yet
+                                        * but tqhash_trim did update 
rsm->r_start so the offset calcuation
+                                        * should work fine. This is 
undesirable since we will take cache
+                                        * hits to access the socket buffer. 
And even more puzzling is that
+                                        * it happens occasionally. It should 
not :(
+                                        */
+                                       m = 
sbsndmbuf(&rack->rc_inp->inp_socket->so_snd,
+                                                     (rsm->r_start - 
tp->snd_una),
+                                                     &soff);
+                                       break;
+                               }
+                       }
+                       /*
+                        * Now save in our updated values.
+                        */
+                       rsm->m = m;
+                       rsm->soff = soff;
+                       rsm->orig_m_len = rsm->m->m_len;
+                       rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
                }
-               rsm->orig_m_len = rsm->m->m_len;
-               rsm->orig_t_space = M_TRAILINGROOM(rsm->m);
        }
        if (rack->app_limited_needs_set &&
            SEQ_GEQ(th_ack, tp->gput_seq))
@@ -11516,7 +11648,7 @@ rack_check_bottom_drag(struct tcpcb *tp,
                            (rack->r_ctl.num_measurements >= 
rack->r_ctl.req_measurements)) {
                                /* We have enough measurements now */
                                rack->gp_ready = 1;
-                               if ((rack->rc_always_pace && 
(rack->use_fixed_rate == 0)) ||
+                               if (rack->dgp_on ||
                                    rack->rack_hibeta)
                                        rack_set_cc_pacing(rack);
                                if (rack->defer_options)
@@ -11557,7 +11689,7 @@ rack_log_hybrid(struct tcp_rack *rack, uint32_t seq,
        int do_log;
 
        do_log = tcp_bblogging_on(rack->rc_tp);
-       if (do_log == 0) { 
+       if (do_log == 0) {
                if ((do_log = tcp_bblogging_point_on(rack->rc_tp, 
TCP_BBPOINT_REQ_LEVEL_LOGGING) )== 0)
                        return;
                /* We only allow the three below with point logging on */
@@ -11565,7 +11697,7 @@ rack_log_hybrid(struct tcp_rack *rack, uint32_t seq,
                    (mod != HYBRID_LOG_RULES_SET) &&
                    (mod != HYBRID_LOG_REQ_COMP))
                        return;
-               
+
        }
        if (do_log) {
                union tcp_log_stackspecific log;
@@ -11593,8 +11725,10 @@ rack_log_hybrid(struct tcp_rack *rack, uint32_t seq,
                        log.u_bbr.epoch = (uint32_t)(cur->deadline & 
0x00000000ffffffff);
                        log.u_bbr.lt_epoch = (uint32_t)((cur->deadline >> 32) & 
0x00000000ffffffff) ;
                        log.u_bbr.bbr_state = 1;
+#ifdef TCP_REQUEST_TRK
                        off = (uint64_t)(cur) - 
(uint64_t)(&rack->rc_tp->t_tcpreq_info[0]);
                        log.u_bbr.use_lt_bw = (uint8_t)(off / sizeof(struct 
tcp_sendfile_track));
+#endif
                } else {
                        log.u_bbr.flex2 = err;
                }
@@ -11633,7 +11767,8 @@ rack_set_dgp_hybrid_mode(struct tcp_rack *rack, tcp_seq 
seq, uint32_t len)
        rc_cur = tcp_req_find_req_for_seq(rack->rc_tp, seq);
        if (rc_cur == NULL) {
                /* If not in the beginning what about the end piece */
-               rack_log_hybrid(rack, seq, NULL, HYBRID_LOG_NO_RANGE, __LINE__, 
err);
+               if (rack->rc_hybrid_mode)
+                       rack_log_hybrid(rack, seq, NULL, HYBRID_LOG_NO_RANGE, 
__LINE__, err);
                rc_cur = tcp_req_find_req_for_seq(rack->rc_tp, (seq + len - 1));
        } else {
                err = 12345;
@@ -11644,12 +11779,17 @@ rack_set_dgp_hybrid_mode(struct tcp_rack *rack, 
tcp_seq seq, uint32_t len)
                rack->r_ctl.client_suggested_maxseg = 0;
                rack->rc_catch_up = 0;
                rack->r_ctl.bw_rate_cap = 0;
-               rack_log_hybrid(rack, (seq + len - 1), NULL, 
HYBRID_LOG_NO_RANGE, __LINE__, err);
+               if (rack->rc_hybrid_mode)
+                       rack_log_hybrid(rack, (seq + len - 1), NULL, 
HYBRID_LOG_NO_RANGE, __LINE__, err);
                if (rack->r_ctl.rc_last_sft) {
                        rack->r_ctl.rc_last_sft = NULL;
                }
                return;
        }
+       if ((rc_cur->hybrid_flags & TCP_HYBRID_PACING_WASSET) == 0) {
+               /* This entry was never setup for hybrid pacing on/off etc */
+               return;
+       }
        /*
         * Ok if we have a new entry *or* have never
         * set up an entry we need to proceed. If
@@ -11661,7 +11801,8 @@ rack_set_dgp_hybrid_mode(struct tcp_rack *rack, tcp_seq 
seq, uint32_t len)
        if ((rack->r_ctl.rc_last_sft != NULL) &&
            (rack->r_ctl.rc_last_sft == rc_cur)) {
                /* Its already in place */
-               rack_log_hybrid(rack, seq, rc_cur, HYBRID_LOG_ISSAME, __LINE__, 
0);
+               if (rack->rc_hybrid_mode)
+                       rack_log_hybrid(rack, seq, rc_cur, HYBRID_LOG_ISSAME, 
__LINE__, 0);
                return;
        }
        if (rack->rc_hybrid_mode == 0) {
@@ -11757,7 +11898,8 @@ rack_chk_req_and_hybrid_on_out(struct tcp_rack *rack, 
tcp_seq seq, uint32_t len,
                 * way it will complete when all of it is acked.
                 */
                ent->end_seq = (seq + len);
-               rack_log_hybrid_bw(rack, seq, len, 0, 0, HYBRID_LOG_EXTEND, 0, 
ent);
+               if (rack->rc_hybrid_mode)
+                       rack_log_hybrid_bw(rack, seq, len, 0, 0, 
HYBRID_LOG_EXTEND, 0, ent, __LINE__);
        }
        /* Now validate we have set the send time of this one */
        if ((ent->flags & TCP_TRK_TRACK_FLG_FSND) == 0) {
@@ -11941,6 +12083,7 @@ rack_req_check_for_comp(struct tcp_rack *rack, tcp_seq 
th_ack)
                /* Ok this ack frees it */
                rack_log_hybrid(rack, th_ack,
                                ent, HYBRID_LOG_REQ_COMP, __LINE__, 0);
+               rack_log_hybrid_sends(rack, ent, __LINE__);
                /* calculate the time based on the ack arrival */
                data = ent->end - ent->start;
                laa = tcp_tv_to_lusectick(&rack->r_ctl.act_rcv_time);
@@ -11962,7 +12105,7 @@ rack_req_check_for_comp(struct tcp_rack *rack, tcp_seq 
th_ack)
                        cbw /= tim;
                else
                        cbw = 0;
-               rack_log_hybrid_bw(rack, th_ack, cbw, tim, data, 
HYBRID_LOG_BW_MEASURE, 0, ent);
+               rack_log_hybrid_bw(rack, th_ack, cbw, tim, data, 
HYBRID_LOG_BW_MEASURE, 0, ent, __LINE__);
                /*
                 * Check to see if we are freeing what we are pointing to send 
wise
                 * if so be sure to NULL the pointer so we know we are no longer
@@ -14254,7 +14397,7 @@ rack_set_pace_segments(struct tcpcb *tp, struct 
tcp_rack *rack, uint32_t line, u
                        if (fill_override)
                                rate_wanted = *fill_override;
                        else
-                               rate_wanted = rack_get_output_bw(rack, bw_est, 
NULL, NULL);
+                               rate_wanted = rack_get_gp_est(rack);
                        if (rate_wanted) {
                                /* We have something */
                                rack->r_ctl.rc_pace_max_segs = 
rack_get_pacing_len(rack,
@@ -14885,8 +15028,6 @@ rack_init(struct tcpcb *tp, void **ptr)
         */
        rack->rc_new_rnd_needed = 1;
        rack->r_ctl.rc_split_limit = V_tcp_map_split_limit;
-       rack->r_ctl.rc_saved_beta.beta = V_newreno_beta_ecn;
-       rack->r_ctl.rc_saved_beta.beta_ecn = V_newreno_beta_ecn;
        /* We want abe like behavior as well */
        rack->r_ctl.rc_saved_beta.newreno_flags |= CC_NEWRENO_BETA_ECN_ENABLED;
        rack->r_ctl.rc_reorder_fade = rack_reorder_fade;
@@ -14924,8 +15065,18 @@ rack_init(struct tcpcb *tp, void **ptr)
        rack->rc_user_set_max_segs = rack_hptsi_segments;
        rack->rc_force_max_seg = 0;
        TAILQ_INIT(&rack->r_ctl.opt_list);
-       if (rack_hibeta_setting)
+       rack->r_ctl.rc_saved_beta.beta = V_newreno_beta_ecn;
+       rack->r_ctl.rc_saved_beta.beta_ecn = V_newreno_beta_ecn;
+       if (rack_hibeta_setting) {
                rack->rack_hibeta = 1;
+               if ((rack_hibeta_setting >= 50) &&
+                   (rack_hibeta_setting <= 100)) {
+                       rack->r_ctl.rc_saved_beta.beta = rack_hibeta_setting;
+                       rack->r_ctl.saved_hibeta = rack_hibeta_setting;
+               }
+       } else {
+               rack->r_ctl.saved_hibeta = 50;
+       }
        rack->r_ctl.rc_reorder_shift = rack_reorder_thresh;
        rack->r_ctl.rc_pkt_delay = rack_pkt_delay;
        rack->r_ctl.rc_tlp_cwnd_reduce = rack_lower_cwnd_at_tlp;
@@ -14941,7 +15092,7 @@ rack_init(struct tcpcb *tp, void **ptr)
                rack->rc_gp_no_rec_chg = 1;
        if (rack_pace_every_seg && tcp_can_enable_pacing()) {
                rack->rc_always_pace = 1;
-               if ((rack->gp_ready) && (rack->rc_always_pace && 
(rack->use_fixed_rate == 0)))
+               if (rack->rack_hibeta)
                        rack_set_cc_pacing(rack);
        } else
                rack->rc_always_pace = 0;
@@ -17204,6 +17355,19 @@ rack_log_pacing_delay_calc(struct tcp_rack *rack, 
uint32_t len, uint32_t slot,
                union tcp_log_stackspecific log;
                struct timeval tv;
 
+               if (rack_verbose_logging == 0) {
+                       /*
+                        * We are not verbose screen out all but
+                        * ones we always want.
+                        */
+                       if ((method != 2) &&
+                           (method != 3) &&
+                           (method != 7) &&
+                           (method != 14) &&
+                           (method != 20)) {
+                               return;
+                       }
+               }
                memset(&log, 0, sizeof(log));
                log.u_bbr.flex1 = slot;
                log.u_bbr.flex2 = len;
@@ -17307,6 +17471,60 @@ rack_get_pacing_len(struct tcp_rack *rack, uint64_t 
bw, uint32_t mss)
        return (new_tso);
 }
 
+static uint64_t
+rack_arrive_at_discounted_rate(struct tcp_rack *rack, uint64_t window_input, 
uint32_t *rate_set, uint32_t *gain_b)
+{
+       uint64_t reduced_win;
+       uint32_t gain;
+
+       if (window_input < rc_init_window(rack)) {
+               /*
+                * The cwnd is collapsed to
+                * nearly zero, maybe because of a time-out?
+                * Lets drop back to the lt-bw.
+                */
+               reduced_win = rack_get_lt_bw(rack);
+               /* Set the flag so the caller knows its a rate and not a 
reduced window */
+               *rate_set = 1;
+               gain = 100;
+       } else if  (IN_RECOVERY(rack->rc_tp->t_flags)) {
+               /*
+                * If we are in recover our cwnd needs to be less for
+                * our pacing consideration.
+                */
+               if (rack->rack_hibeta == 0) {
+                       reduced_win = window_input / 2;
+                       gain = 50;
+               } else {
+                       reduced_win = window_input * rack->r_ctl.saved_hibeta;
+                       reduced_win /= 100;
+                       gain = rack->r_ctl.saved_hibeta;
+               }
+       } else {
+               /*
+                * Apply Timely factor to increase/decrease the
+                * amount we are pacing at.
+                */
+               gain = rack_get_output_gain(rack, NULL);
+               if (gain > rack_gain_p5_ub) {
+                       gain = rack_gain_p5_ub;
+               }
+               reduced_win = window_input * gain;
+               reduced_win /= 100;
+       }
+       if (gain_b != NULL)
+               *gain_b = gain;
+       /*
+        * What is being returned here is a trimmed down
+        * window values in all cases where rate_set is left
+        * at 0. In one case we actually return the rate (lt_bw).
+        * the "reduced_win" is returned as a slimmed down cwnd that
+        * is then calculated by the caller into a rate when rate_set
+        * is 0.
+        */
+       return (reduced_win);
+}
+
 static int32_t
 pace_to_fill_cwnd(struct tcp_rack *rack, int32_t slot, uint32_t len, uint32_t 
segsiz, int *capped, uint64_t *rate_wanted, uint8_t non_paced)
 {
@@ -17331,12 +17549,21 @@ pace_to_fill_cwnd(struct tcp_rack *rack, int32_t 
slot, uint32_t len, uint32_t se
         * and the the smallest send window.
         */
        fill_bw = min(rack->rc_tp->snd_cwnd, rack->r_ctl.cwnd_to_use);
+       if (rack->rc_fillcw_apply_discount) {
+               uint32_t rate_set = 0;
+
+               fill_bw = rack_arrive_at_discounted_rate(rack, fill_bw, 
&rate_set, NULL);
+               if (rate_set) {
+                       goto at_lt_bw;
+               }
+       }
        /* Take the rwnd if its smaller */
        if (fill_bw > rack->rc_tp->snd_wnd)
                fill_bw = rack->rc_tp->snd_wnd;
        /* Now lets make it into a b/w */
        fill_bw *= (uint64_t)HPTS_USEC_IN_SEC;
        fill_bw /= (uint64_t)rack->r_ctl.rc_last_us_rtt;
+at_lt_bw:
        if (rack->r_fill_less_agg) {
                /*
                 * We want the average of the rate_wanted
@@ -17404,8 +17631,9 @@ pace_to_fill_cwnd(struct tcp_rack *rack, int32_t slot, 
uint32_t len, uint32_t se
                }
        }
        if (rack->r_ctl.bw_rate_cap && (fill_bw > rack->r_ctl.bw_rate_cap)) {
-               rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
-                                  fill_bw, 0, 0, HYBRID_LOG_RATE_CAP, 2, NULL);
+               if (rack->rc_hybrid_mode)
+                       rack_log_hybrid_bw(rack, rack->rc_tp->snd_max,
+                                          fill_bw, 0, 0, HYBRID_LOG_RATE_CAP, 
2, NULL, __LINE__);
                fill_bw = rack->r_ctl.bw_rate_cap;
        }
        /*
@@ -17513,9 +17741,27 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct 
tcpcb *tp, uint32_t len, str
                           (rack->r_ctl.gp_bw == 0)) {
                        /* no way to yet do an estimate */
                        bw_est = rate_wanted = 0;
-               } else {
+               } else if (rack->dgp_on) {
                        bw_est = rack_get_bw(rack);
                        rate_wanted = rack_get_output_bw(rack, bw_est, rsm, 
&capped);
+               } else {
+                       uint32_t gain, rate_set = 0;
+
+                       rate_wanted = min(rack->rc_tp->snd_cwnd, 
rack->r_ctl.cwnd_to_use);
+                       rate_wanted = rack_arrive_at_discounted_rate(rack, 
rate_wanted, &rate_set, &gain);
+                       if (rate_set == 0) {
+                               if (rate_wanted > rack->rc_tp->snd_wnd)
+                                       rate_wanted = rack->rc_tp->snd_wnd;
+                               /* Now lets make it into a b/w */
+                               rate_wanted *= (uint64_t)HPTS_USEC_IN_SEC;
+                               rate_wanted /= 
(uint64_t)rack->r_ctl.rc_last_us_rtt;
+                       }
+                       bw_est = rate_wanted;
+                       rack_log_pacing_delay_calc(rack, rack->rc_tp->snd_cwnd,
+                                                  rack->r_ctl.cwnd_to_use,
+                                                  rate_wanted, bw_est,
+                                                  rack->r_ctl.rc_last_us_rtt,
+                                                  88, __LINE__, NULL, gain);
                }
                if ((bw_est == 0) || (rate_wanted == 0) ||
                    ((rack->gp_ready == 0) && (rack->use_fixed_rate == 0))) {
@@ -17534,16 +17780,16 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct 
tcpcb *tp, uint32_t len, str
                 * means we may be off if we are larger than 1500 bytes
                 * or smaller. But this just makes us more conservative.
                 */
-               
+
                oh =  (tp->t_maxseg - segsiz) + sizeof(struct tcphdr);
                if (rack->r_is_v6) {
 #ifdef INET6
                        oh += sizeof(struct ip6_hdr);
-#endif                 
+#endif
                } else {
 #ifdef INET
                        oh += sizeof(struct ip);
-#endif                 
+#endif
                }
                /* We add a fixed 14 for the ethernet header */
                oh += 14;
@@ -17602,6 +17848,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct 
tcpcb *tp, uint32_t len, str
                prev_fill = rack->r_via_fill_cw;
                if ((rack->rc_pace_to_cwnd) &&
                    (capped == 0) &&
+                   (rack->dgp_on == 1) &&
                    (rack->use_fixed_rate == 0) &&
                    (rack->in_probe_rtt == 0) &&
                    (IN_FASTRECOVERY(rack->rc_tp->t_flags) == 0)) {
@@ -17652,8 +17899,8 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct 
tcpcb *tp, uint32_t len, str
                                if (rack->r_ctl.crte) {
                                        rack->rack_hdrw_pacing = 1;
                                        rack->r_ctl.rc_pace_max_segs = 
tcp_get_pacing_burst_size_w_divisor(tp, rate_wanted, segsiz,
-                                                                         
pace_one, rack->r_ctl.crte,
-                                                                         NULL, 
rack->r_ctl.pace_len_divisor);
+                                                                               
                           pace_one, rack->r_ctl.crte,
+                                                                               
                           NULL, rack->r_ctl.pace_len_divisor);
                                        rack_log_hdwr_pacing(rack,
                                                             rate_wanted, 
rack->r_ctl.crte->rate, __LINE__,
                                                             err, 0);
@@ -17695,8 +17942,8 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct 
tcpcb *tp, uint32_t len, str
                                                 * do allow hardware pacing to 
be restarted.
                                                 */
                                                rack_log_hdwr_pacing(rack,
-                                                            bw_est, 
rack->r_ctl.crte->rate, __LINE__,
-                                                            0, 5);
+                                                                    bw_est, 
rack->r_ctl.crte->rate, __LINE__,
+                                                                    0, 5);
                                                
tcp_rel_pacing_rate(rack->r_ctl.crte, rack->rc_tp);
                                                rack->r_ctl.crte = NULL;
                                                rack->rack_attempt_hdwr_pace = 
0;
@@ -17705,11 +17952,11 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct 
tcpcb *tp, uint32_t len, str
                                                goto done_w_hdwr;
                                        }
                                        nrte = 
tcp_chg_pacing_rate(rack->r_ctl.crte,
-                                                                           
rack->rc_tp,
-                                                                           
rack->rc_inp->inp_route.ro_nh->nh_ifp,
-                                                                           
rate_wanted,
-                                                                           
RS_PACING_GEQ,
-                                                                           
&err, &rack->r_ctl.crte_prev_rate);
+                                                                  rack->rc_tp,
+                                                                  
rack->rc_inp->inp_route.ro_nh->nh_ifp,
+                                                                  rate_wanted,
+                                                                  
RS_PACING_GEQ,
+                                                                  &err, 
&rack->r_ctl.crte_prev_rate);
                                        if (nrte == NULL) {
                                                /*
                                                 * Lost the rate, lets drop 
hardware pacing
@@ -17725,8 +17972,8 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct 
tcpcb *tp, uint32_t len, str
                                        } else if (nrte != rack->r_ctl.crte) {
                                                rack->r_ctl.crte = nrte;
                                                rack->r_ctl.rc_pace_max_segs = 
tcp_get_pacing_burst_size_w_divisor(tp, rate_wanted,
-                                                                               
 segsiz, pace_one, rack->r_ctl.crte,
-                                                                               
 NULL, rack->r_ctl.pace_len_divisor);
+                                                                               
                                   segsiz, pace_one, rack->r_ctl.crte,
+                                                                               
                                   NULL, rack->r_ctl.pace_len_divisor);
                                                rack_log_hdwr_pacing(rack,
                                                                     
rate_wanted, rack->r_ctl.crte->rate, __LINE__,
                                                                     err, 2);
@@ -17747,7 +17994,7 @@ rack_get_pacing_delay(struct tcp_rack *rack, struct 
tcpcb *tp, uint32_t len, str
                                                   98, __LINE__, NULL, 0);
                        slot = minslot;
                }
-done_w_hdwr:
+       done_w_hdwr:
                if (rack_limit_time_with_srtt &&
                    (rack->use_fixed_rate == 0) &&
                    (rack->rack_hdrw_pacing == 0)) {
@@ -18070,7 +18317,7 @@ rack_log_fsb(struct tcp_rack *rack, struct tcpcb *tp, 
struct socket *so, uint32_
             unsigned ipoptlen, int32_t orig_len, int32_t len, int error,
             int rsm_is_null, int optlen, int line, uint16_t mode)
 {
-       if (tcp_bblogging_on(rack->rc_tp)) {
+       if (rack_verbose_logging && tcp_bblogging_on(rack->rc_tp)) {
                union tcp_log_stackspecific log;
                struct timeval tv;
 
@@ -18869,6 +19116,10 @@ rack_fast_rsm_output(struct tcpcb *tp, struct tcp_rack 
*rack, struct rack_sendma
                        rack->r_ctl.last_sent_tlp_seq = rsm->r_start;
                        rack->r_ctl.last_sent_tlp_len = rsm->r_end - 
rsm->r_start;
                }
+               if (rack->r_ctl.rc_prr_sndcnt >= len)
+                       rack->r_ctl.rc_prr_sndcnt -= len;
+               else
+                       rack->r_ctl.rc_prr_sndcnt = 0;
        }
        tp->t_flags &= ~(TF_ACKNOW | TF_DELACK);
        rack->forced_ack = 0;   /* If we send something zap the FA flag */
@@ -19049,6 +19300,7 @@ rack_fast_output(struct tcpcb *tp, struct tcp_rack 
*rack, uint64_t ts_val,
                m = NULL;
                goto failed;
        }
+       rack->r_ctl.cwnd_to_use = tp->snd_cwnd;
        startseq = tp->snd_max;
        segsiz = min(ctf_fixed_maxseg(tp), rack->r_ctl.rc_pace_min_segs);
        inp = rack->rc_inp;
@@ -22402,6 +22654,7 @@ rack_set_dgp(struct tcp_rack *rack)
                if (tcp_can_enable_pacing() == 0)
                        return (EBUSY);
        }
+       rack->rc_fillcw_apply_discount = 0;
        rack->dgp_on = 1;
        rack->rc_always_pace = 1;
        rack->use_fixed_rate = 0;
@@ -22490,6 +22743,26 @@ rack_set_profile(struct tcp_rack *rack, int prof)
                err = rack_set_dgp(rack);
                if (err)
                        return (err);
+       } else if (prof == 5) {
+               err = rack_set_dgp(rack);
+               if (err)
+                       return (err);
+               /*
+                * By turning DGP off we change the rate
+                * picked to be only the one the cwnd and rtt
+                * get us.
+                */
+               rack->dgp_on = 0;
+       } else if (prof == 6) {
+               err = rack_set_dgp(rack);
+               if (err)
+                       return (err);
+               /*
+                * Profile 6 tweaks DGP so that it will apply to
+                * fill-cw the same settings that profile5 does
+                * to replace DGP. It gets then the max(dgp-rate, 
fillcw(discounted).
+                */
+               rack->rc_fillcw_apply_discount = 1;
        } else if (prof == 0) {
                /* This changes things back to the default settings */
                rack->dgp_on = 0;
@@ -22506,7 +22779,7 @@ rack_set_profile(struct tcp_rack *rack, int prof)
                }
                if (rack_pace_every_seg && tcp_can_enable_pacing()) {
                        rack->rc_always_pace = 1;
-                       if ((rack->gp_ready) && (rack->use_fixed_rate == 0))
+                       if (rack->rack_hibeta)
                                rack_set_cc_pacing(rack);
                } else
                        rack->rc_always_pace = 0;
@@ -22658,7 +22931,7 @@ process_hybrid_pacing(struct tcp_rack *rack, struct 
tcp_hybrid_req *hybrid)
                }
        }
        /* Now set in our flags */
-       sft->hybrid_flags = hybrid->hybrid_flags;
+       sft->hybrid_flags = hybrid->hybrid_flags | TCP_HYBRID_PACING_WASSET;
        if (hybrid->hybrid_flags & TCP_HYBRID_PACING_CSPR)
                sft->cspr = hybrid->cspr;
        else
@@ -22727,10 +23000,25 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack 
*rack, int sopt_name,
                break;
        case TCP_RACK_HI_BETA:
                RACK_OPTS_INC(tcp_rack_hi_beta);
-               if (optval)
+               if (optval > 0) {
                        rack->rack_hibeta = 1;
-               else
+                       if ((optval >= 50) &&
+                           (optval <= 100)) {
+                               /*
+                                * User wants to set a custom beta.
+                                */
+                               rack->r_ctl.saved_hibeta = optval;
+                               if (rack->rc_pacing_cc_set)
+                                       rack_undo_cc_pacing(rack);
+                               rack->r_ctl.rc_saved_beta.beta = optval;
+                       }
+                       if (rack->rc_pacing_cc_set == 0)
+                               rack_set_cc_pacing(rack);
+               } else {
                        rack->rack_hibeta = 0;
+                       if (rack->rc_pacing_cc_set)
+                               rack_undo_cc_pacing(rack);
+               }
                break;
        case TCP_RACK_PACING_BETA:
                RACK_OPTS_INC(tcp_rack_beta);
@@ -23003,7 +23291,7 @@ rack_process_option(struct tcpcb *tp, struct tcp_rack 
*rack, int sopt_name,
*** 94 LINES SKIPPED ***

git: e022f2b0131a - main - tcp: Rack fixes and misc updates

Reply via email to