svn commit: r365501 - head/sys/netinet/tcp_stacks

2020-09-09 Thread Randall Stewart
Author: rrs
Date: Wed Sep  9 11:11:50 2020
New Revision: 365501
URL: https://svnweb.freebsd.org/changeset/base/365501

Log:
  So it turns out that syzkaller hit another crash. It has to do with switching
  stacks with a SENT_FIN outstanding. Both rack and bbr will only send a
  FIN if all data is ack'd so this must be enforced. Also if the previous stack
  sent the FIN we need to make sure in rack that when we manufacture the
  "unknown" sends that we include the proper HAS_FIN bits.
  
  Note for BBR we take a simpler approach and just refuse to switch.
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D26269

Modified:
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Wed Sep  9 09:08:09 2020
(r365500)
+++ head/sys/netinet/tcp_stacks/bbr.c   Wed Sep  9 11:11:50 2020
(r365501)
@@ -10281,6 +10281,8 @@ bbr_handoff_ok(struct tcpcb *tp)
 */
return (EAGAIN);
}
+   if (tp->t_flags & TF_SENTFIN)
+   return (EINVAL);
if ((tp->t_flags & TF_SACK_PERMIT) || bbr_sack_not_required) {
return (0);
}

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Wed Sep  9 09:08:09 2020
(r365500)
+++ head/sys/netinet/tcp_stacks/rack.c  Wed Sep  9 11:11:50 2020
(r365501)
@@ -10451,7 +10451,12 @@ rack_init(struct tcpcb *tp)
rsm->r_rtr_cnt = 1;
rsm->r_rtr_bytes = 0;
rsm->r_start = tp->snd_una;
-   rsm->r_end = tp->snd_max;
+   if (tp->t_flags & TF_SENTFIN) {
+   rsm->r_end = tp->snd_max - 1;
+   rsm->r_flags |= RACK_HAS_FIN;
+   } else {
+   rsm->r_end = tp->snd_max;
+   }
rsm->usec_orig_send = us_cts;
rsm->r_dupack = 0;
insret = RB_INSERT(rack_rb_tree_head, >r_ctl.rc_mtree, 
rsm);
@@ -10518,8 +10523,21 @@ rack_handoff_ok(struct tcpcb *tp)
if ((tp->t_state == TCPS_SYN_SENT) ||
(tp->t_state == TCPS_SYN_RECEIVED)) {
/*
-* We really don't know you have to get to ESTAB or beyond
-* to tell.
+* We really don't know if you support sack, 
+* you have to get to ESTAB or beyond to tell.
+*/
+   return (EAGAIN);
+   }
+   if ((tp->t_flags & TF_SENTFIN) && ((tp->snd_max - tp->snd_una) > 1)) {
+   /*
+* Rack will only send a FIN after all data is acknowledged.
+* So in this case we have more data outstanding. We can't
+* switch stacks until either all data and only the FIN
+* is left (in which case rack_init() now knows how
+* to deal with that)  all is acknowledged and we
+* are only left with incoming data, though why you
+* would want to switch to rack after all data is acknowledged
+* I have no idea (rrs)!
 */
return (EAGAIN);
}
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r363725 - head/sys/netinet

2020-07-31 Thread Randall Stewart
Author: rrs
Date: Fri Jul 31 10:03:32 2020
New Revision: 363725
URL: https://svnweb.freebsd.org/changeset/base/363725

Log:
  The recent changes to move the ref count increment
  back from the end of the function created an issue.
  If one of the routines returns NULL during setup
  we have inp's with extra references (which is why
  the increment was at the end).
  
  Also the stack switch return code was being ignored
  and actually has meaning if the stack cannot take over
  it should return NULL.
  
  Fix both of these situation by being sure to test the
  return code and of course in any case of return NULL (there
  are 3) make sure we properly reduce the ref count.
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D25903

Modified:
  head/sys/netinet/tcp_subr.c

Modified: head/sys/netinet/tcp_subr.c
==
--- head/sys/netinet/tcp_subr.c Fri Jul 31 07:37:08 2020(r363724)
+++ head/sys/netinet/tcp_subr.c Fri Jul 31 10:03:32 2020(r363725)
@@ -1713,6 +1713,7 @@ tcp_newtcpcb(struct inpcb *inp)
if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) {
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
+   in_pcbrele_wlocked(inp);
refcount_release(>t_fb->tfb_refcnt);
uma_zfree(V_tcpcb_zone, tm);
return (NULL);
@@ -1723,6 +1724,7 @@ tcp_newtcpcb(struct inpcb *inp)
if (khelp_init_osd(HELPER_CLASS_TCP, tp->osd)) {
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp, 1);
+   in_pcbrele_wlocked(inp);
refcount_release(>t_fb->tfb_refcnt);
uma_zfree(V_tcpcb_zone, tm);
return (NULL);
@@ -1783,7 +1785,12 @@ tcp_newtcpcb(struct inpcb *inp)
tcp_log_tcpcbinit(tp);
 #endif
if (tp->t_fb->tfb_tcp_fb_init) {
-   (*tp->t_fb->tfb_tcp_fb_init)(tp);
+   if ((*tp->t_fb->tfb_tcp_fb_init)(tp)) {
+   refcount_release(>t_fb->tfb_refcnt);
+   in_pcbrele_wlocked(inp);
+   uma_zfree(V_tcpcb_zone, tm);
+   return (NULL);
+   }
}
 #ifdef STATS
if (V_tcp_perconn_stats_enable == 1)
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r362234 - head/sys/netinet/tcp_stacks

2020-06-16 Thread Randall Stewart
Author: rrs
Date: Tue Jun 16 18:16:45 2020
New Revision: 362234
URL: https://svnweb.freebsd.org/changeset/base/362234

Log:
  iSo in doing final checks on OCA firmware with all the latest tweaks the 
dup-ack checking
  packet drill script was failing with a number of unexpected acks. So it turns
  out if you have the default recvwin set up to 1Meg (like OCA's do) and you
  have no window scaling (like the dupack checking code) then we have another
  case where we are always trying to update the rwnd and sending an
  ack when we should not.
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D25298

Modified:
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Tue Jun 16 17:45:23 2020
(r362233)
+++ head/sys/netinet/tcp_stacks/bbr.c   Tue Jun 16 18:16:45 2020
(r362234)
@@ -12157,8 +12157,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeva
 * have gotten more data into the socket buffer to
 * send.
 */
-   recwin = min(max(sbspace(>so_rcv), 0),
-   TCP_MAXWIN << tp->rcv_scale);
+   recwin = lmin(lmax(sbspace(>so_rcv), 0),
+ (long)TCP_MAXWIN << tp->rcv_scale);
if ((bbr_window_update_needed(tp, so, recwin, maxseg) 
== 0) &&
((tcp_outflags[tp->t_state] & TH_RST) == 0) &&
((sbavail(sb) + ((tcp_outflags[tp->t_state] & 
TH_FIN) ? 1 : 0)) <=
@@ -12839,8 +12839,8 @@ recheck_resend:
ipoptlen == 0)
tso = 1;
 
-   recwin = min(max(sbspace(>so_rcv), 0),
-   TCP_MAXWIN << tp->rcv_scale);
+   recwin = lmin(lmax(sbspace(>so_rcv), 0),
+   (long)TCP_MAXWIN << tp->rcv_scale);
/*
 * Sender silly window avoidance.   We transmit under the following
 * conditions when len is non-zero:

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Tue Jun 16 17:45:23 2020
(r362233)
+++ head/sys/netinet/tcp_stacks/rack.c  Tue Jun 16 18:16:45 2020
(r362234)
@@ -12750,7 +12750,8 @@ again:
flags &= ~TH_FIN;
}
}
-   recwin = sbspace(>so_rcv);
+   recwin = lmin(lmax(sbspace(>so_rcv), 0),
+   (long)TCP_MAXWIN << tp->rcv_scale);
 
/*
 * Sender silly window avoidance.   We transmit under the following
@@ -13656,8 +13657,6 @@ send:
if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt) &&
recwin < (long)(tp->rcv_adv - tp->rcv_nxt))
recwin = (long)(tp->rcv_adv - tp->rcv_nxt);
-   if (recwin > (long)TCP_MAXWIN << tp->rcv_scale)
-   recwin = (long)TCP_MAXWIN << tp->rcv_scale;
}
 
/*
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r362225 - head/sys/netinet/tcp_stacks

2020-06-16 Thread Randall Stewart
Author: rrs
Date: Tue Jun 16 12:26:23 2020
New Revision: 362225
URL: https://svnweb.freebsd.org/changeset/base/362225

Log:
  So it turns out rack has a shortcoming in dup-ack counting. It counts the 
dupacks but
  then does not properly respond to them. This is because a few missing bits 
are not present.
  BBR actually does properly respond (though it also sends a TLP which is 
interesting and
  maybe something to fix)..
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D25294

Modified:
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Tue Jun 16 12:21:55 2020
(r362224)
+++ head/sys/netinet/tcp_stacks/rack.c  Tue Jun 16 12:26:23 2020
(r362225)
@@ -4588,7 +4588,7 @@ activate_rxt:
goto activate_rxt;
}
/* Convert from ms to usecs */
-   if (rsm->r_flags & RACK_SACK_PASSED) {
+   if ((rsm->r_flags & RACK_SACK_PASSED) || (rsm->r_dupack >= 
DUP_ACK_THRESHOLD)) {
if ((tp->t_flags & TF_SENTFIN) &&
((tp->snd_max - tp->snd_una) == 1) &&
(rsm->r_flags & RACK_HAS_FIN)) {
@@ -6237,7 +6237,7 @@ rack_log_output(struct tcpcb *tp, struct tcpopt *to, i
 * or FIN if seq_out is adding more on and a FIN is present
 * (and we are not resending).
 */
-   if ((th_flags & TH_SYN) && (seq_out == tp->iss)) 
+   if ((th_flags & TH_SYN) && (seq_out == tp->iss))
len++;
if (th_flags & TH_FIN)
len++;
@@ -8190,6 +8190,7 @@ rack_strike_dupack(struct tcp_rack *rack)
rsm->r_dupack++;
if (rsm->r_dupack >= DUP_ACK_THRESHOLD) {
rack->r_wanted_output = 1;
+   rack->r_timer_override = 1;
rack_log_retran_reason(rack, rsm, __LINE__, 1, 3);
} else {
rack_log_retran_reason(rack, rsm, __LINE__, 0, 3);
@@ -11359,7 +11360,8 @@ check_it:
if (rsm->r_flags & RACK_ACKED) {
return (NULL);
}
-   if ((rsm->r_flags & RACK_SACK_PASSED) == 0) {
+   if (((rsm->r_flags & RACK_SACK_PASSED) == 0) &&
+   (rsm->r_dupack < DUP_ACK_THRESHOLD)) {
/* Its not yet ready */
return (NULL);
}
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r362113 - in head/sys/netinet: . tcp_stacks

2020-06-12 Thread Randall Stewart
Author: rrs
Date: Fri Jun 12 19:56:19 2020
New Revision: 362113
URL: https://svnweb.freebsd.org/changeset/base/362113

Log:
  So it turns out with the right window scaling you can get the code in all 
stacks to
  always want to do a window update, even when no data can be sent. Now in
  cases where you are not pacing thats probably ok, you just send an extra
  window update or two. However with bbr (and rack if its paced) every time
  the pacer goes off its going to send a "window update".
  
  Also in testing bbr I have found that if we are not responding to
  data right away we end up staying in startup but incorrectly holding
  a pacing gain of 192 (a loss). This is because the idle window code
  does not restict itself to only work with PROBE_BW. In all other
  states you dont want it doing a PROBE_BW state change.
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D25247

Modified:
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Fri Jun 12 18:41:12 2020
(r362112)
+++ head/sys/netinet/tcp_output.c   Fri Jun 12 19:56:19 2020
(r362113)
@@ -655,7 +655,10 @@ after_sack_rexmit:
adv = recwin;
if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) {
oldwin = (tp->rcv_adv - tp->rcv_nxt);
-   adv -= oldwin;
+   if (adv > oldwin)
+   adv -= oldwin;
+   else
+   adv = 0;
} else
oldwin = 0;
 

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Fri Jun 12 18:41:12 2020
(r362112)
+++ head/sys/netinet/tcp_stacks/bbr.c   Fri Jun 12 19:56:19 2020
(r362113)
@@ -8078,7 +8078,7 @@ bbr_restart_after_idle(struct tcp_bbr *bbr, uint32_t c
bbr->r_ctl.rc_bbr_hptsi_gain = bbr->r_ctl.rc_startup_pg;
bbr->r_ctl.rc_bbr_cwnd_gain = bbr->r_ctl.rc_startup_pg;
bbr_log_type_statechange(bbr, cts, __LINE__);
-   } else {
+   } else if (bbr->rc_bbr_state == BBR_STATE_PROBE_BW) {
bbr_substate_change(bbr, cts, __LINE__, 1);
}
}
@@ -12000,21 +12000,27 @@ bbr_window_update_needed(struct tcpcb *tp, struct sock
 * "adv" is the amount we could increase the window, taking into
 * account that we are limited by TCP_MAXWIN << tp->rcv_scale.
 */
-   uint32_t adv;
+   int32_t adv;
int32_t oldwin;
 
-   adv = min(recwin, TCP_MAXWIN << tp->rcv_scale);
+   adv = recwin;
if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) {
oldwin = (tp->rcv_adv - tp->rcv_nxt);
-   adv -= oldwin;
+   if (adv > oldwin)
+   adv -= oldwin;
+   else {
+   /* We can't increase the window */
+   adv = 0;
+   }
} else
oldwin = 0;
 
/*
-* If the new window size ends up being the same as the old size
-* when it is scaled, then don't force a window update.
+* If the new window size ends up being the same as or less
+* than the old size when it is scaled, then don't force
+* a window update.
 */
-   if (oldwin >> tp->rcv_scale == (adv + oldwin) >> tp->rcv_scale)
+   if (oldwin >> tp->rcv_scale >= (adv + oldwin) >> tp->rcv_scale)
return (0);
 
if (adv >= (2 * maxseg) &&

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Fri Jun 12 18:41:12 2020
(r362112)
+++ head/sys/netinet/tcp_stacks/rack.c  Fri Jun 12 19:56:19 2020
(r362113)
@@ -12845,18 +12845,24 @@ again:
int32_t adv;
int oldwin;
 
-   adv = min(recwin, (long)TCP_MAXWIN << tp->rcv_scale);
+   adv = recwin;
if (SEQ_GT(tp->rcv_adv, tp->rcv_nxt)) {
oldwin = (tp->rcv_adv - tp->rcv_nxt);
-   adv -= oldwin;
+   if (adv > oldwin)
+   adv -= oldwin;
+   else {
+   /* We can't increase the window */
+   adv = 0;
+   }
} else
oldwin = 0;
 
/*
-* If the new window size ends up being the same as the old
-* size when it is scaled, then don't force a window update.
+ 

svn commit: r361926 - in head/sys/netinet: . tcp_stacks

2020-06-08 Thread Randall Stewart
Author: rrs
Date: Mon Jun  8 11:48:07 2020
New Revision: 361926
URL: https://svnweb.freebsd.org/changeset/base/361926

Log:
  An important statistic in determining if a server process (or client) is 
being delayed
  is to know the time to first byte in and time to first byte out. Currently we
  have no way to know these all we have is t_starttime. That (t_starttime) 
tells us
  what time the 3 way handshake completed. We don't know when the first
  request came in or how quickly we responded. Nor from a client perspective
  do we know how long from when we sent out the first byte before the
  server responded.
  
  This small change adds the ability to track the TTFB's. This will show up in
  BB logging which then can be pulled for later analysis. Note that currently
  the tracking is via the ticks variable of all three variables. This provides
  a very rough estimate (hz=1000 its 1ms). A follow-on set of work will be
  to change all three of these values into something with a much finer 
resolution
  (either microseconds or nanoseconds), though we may want to make the 
resolution
  configurable so that on lower powered machines we could still use the much
  cheaper ticks variable.
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D24902

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_log_buf.c
  head/sys/netinet/tcp_log_buf.h
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_usrreq.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cMon Jun  8 09:39:48 2020
(r361925)
+++ head/sys/netinet/tcp_input.cMon Jun  8 11:48:07 2020
(r361926)
@@ -1841,6 +1841,15 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
tcp_clean_sackreport(tp);
TCPSTAT_INC(tcps_preddat);
tp->rcv_nxt += tlen;
+   if (tlen &&
+   ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
+   (tp->t_fbyte_in == 0)) {
+   tp->t_fbyte_in = ticks;
+   if (tp->t_fbyte_in == 0)
+   tp->t_fbyte_in = 1;
+   if (tp->t_fbyte_out && tp->t_fbyte_in)
+   tp->t_flags2 |= TF2_FBYTES_COMPLETE;
+   }
/*
 * Pull snd_wl1 up to prevent seq wrap relative to
 * th_seq.
@@ -3016,6 +3025,15 @@ dodata:  
/* XXX */
else
tp->t_flags |= TF_ACKNOW;
tp->rcv_nxt += tlen;
+   if (tlen &&
+   ((tp->t_flags2 & TF2_FBYTES_COMPLETE) == 0) &&
+   (tp->t_fbyte_in == 0)) {
+   tp->t_fbyte_in = ticks;
+   if (tp->t_fbyte_in == 0)
+   tp->t_fbyte_in = 1;
+   if (tp->t_fbyte_out && tp->t_fbyte_in)
+   tp->t_flags2 |= TF2_FBYTES_COMPLETE;
+   }
thflags = th->th_flags & TH_FIN;
TCPSTAT_INC(tcps_rcvpack);
TCPSTAT_ADD(tcps_rcvbyte, tlen);

Modified: head/sys/netinet/tcp_log_buf.c
==
--- head/sys/netinet/tcp_log_buf.c  Mon Jun  8 09:39:48 2020
(r361925)
+++ head/sys/netinet/tcp_log_buf.c  Mon Jun  8 11:48:07 2020
(r361926)
@@ -1693,6 +1693,9 @@ retry:
COPY_STAT(snd_numholes);
COPY_STAT(snd_scale);
COPY_STAT(rcv_scale);
+   COPY_STAT_T(flags2);
+   COPY_STAT_T(fbyte_in);
+   COPY_STAT_T(fbyte_out);
 #undef COPY_STAT
 #undef COPY_STAT_T
log_buf->tlb_flex1 = 0;

Modified: head/sys/netinet/tcp_log_buf.h
==
--- head/sys/netinet/tcp_log_buf.h  Mon Jun  8 09:39:48 2020
(r361925)
+++ head/sys/netinet/tcp_log_buf.h  Mon Jun  8 11:48:07 2020
(r361926)
@@ -32,7 +32,7 @@
 
 #defineTCP_LOG_REASON_LEN  32
 #defineTCP_LOG_TAG_LEN 32
-#defineTCP_LOG_BUF_VER (8)
+#defineTCP_LOG_BUF_VER (9)
 
 /*
  * Because the (struct tcp_log_buffer) includes 8-byte uint64_t's, it requires
@@ -143,6 +143,7 @@ struct tcp_log_buffer
uint32_ttlb_rttvar; /* TCPCB t_rttvar */
uint32_ttlb_rcv_up; /* TCPCB rcv_up */
uint32_ttlb_rcv_adv;/* TCPCB rcv_adv */
+   uint32_t

svn commit: r361752 - head/sys/netinet

2020-06-03 Thread Randall Stewart
Author: rrs
Date: Wed Jun  3 14:16:40 2020
New Revision: 361752
URL: https://svnweb.freebsd.org/changeset/base/361752

Log:
  We should never allow either the broadcast or IN_ADDR_ANY to be
  connected to or sent to. This was fond when working with Michael
  Tuexen and Skyzaller. Skyzaller seems to want to use either of
  these two addresses to connect to at times. And it really is
  an error to do so, so lets not allow that behavior.
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D24852

Modified:
  head/sys/netinet/tcp_usrreq.c

Modified: head/sys/netinet/tcp_usrreq.c
==
--- head/sys/netinet/tcp_usrreq.c   Wed Jun  3 14:07:31 2020
(r361751)
+++ head/sys/netinet/tcp_usrreq.c   Wed Jun  3 14:16:40 2020
(r361752)
@@ -552,6 +552,10 @@ tcp_usr_connect(struct socket *so, struct sockaddr *na
if (sinp->sin_family == AF_INET
&& IN_MULTICAST(ntohl(sinp->sin_addr.s_addr)))
return (EAFNOSUPPORT);
+   if ((sinp->sin_family == AF_INET) &&
+   ((ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) ||
+(sinp->sin_addr.s_addr == INADDR_ANY)))
+   return(EAFNOSUPPORT);
if ((error = prison_remote_ip4(td->td_ucred, >sin_addr)) != 0)
return (error);
 
@@ -652,6 +656,11 @@ tcp6_usr_connect(struct socket *so, struct sockaddr *n
error = EAFNOSUPPORT;
goto out;
}
+   if ((ntohl(sin.sin_addr.s_addr) == INADDR_BROADCAST) ||
+   (sin.sin_addr.s_addr == INADDR_ANY)) {
+   error = EAFNOSUPPORT;
+   goto out;
+   }
if ((error = prison_remote_ip4(td->td_ucred,
_addr)) != 0)
goto out;
@@ -1019,6 +1028,13 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf
goto out;
}
if (IN_MULTICAST(ntohl(sinp->sin_addr.s_addr))) {
+   if (m)
+   m_freem(m);
+   error = EAFNOSUPPORT;
+   goto out;
+   }
+   if ((ntohl(sinp->sin_addr.s_addr) == INADDR_BROADCAST) 
||
+   (sinp->sin_addr.s_addr == INADDR_ANY)) {
if (m)
m_freem(m);
error = EAFNOSUPPORT;
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r361751 - in head/sys/netinet: . tcp_stacks

2020-06-03 Thread Randall Stewart
Author: rrs
Date: Wed Jun  3 14:07:31 2020
New Revision: 361751
URL: https://svnweb.freebsd.org/changeset/base/361751

Log:
  This fixes a couple of skyzaller crashes. Most
  of them have to do with TFO. Even the default stack
  had one of the issues:
  
  1) We need to make sure for rack that we don't advance
 snd_nxt beyond iss when we are not doing fast open. We
 otherwise can get a bunch of SYN's sent out incorrectly
 with the seq number advancing.
  2) When we complete the 3-way handshake we should not ever
 append to reassembly if the tlen is 0, if TFO is enabled
 prior to this fix we could still call the reasemmbly. Note
 this effects all three stacks.
  3) Rack like its cousin BBR should track if a SYN is on a
 send map entry.
  4) Both bbr and rack need to only consider len incremented on a SYN
 if the starting seq is iss, otherwise we don't increment len which
 may mean we return without adding a sendmap entry.
  
  This work was done in collaberation with Michael Tuexen, thanks for
  all the testing!
  Sponsored by: Netflix Inc
  Differential Revision:https://reviews.freebsd.org/D25000

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/tcp_rack.h

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cWed Jun  3 13:51:53 2020
(r361750)
+++ head/sys/netinet/tcp_input.cWed Jun  3 14:07:31 2020
(r361751)
@@ -2989,7 +2989,7 @@ dodata:   
/* XXX */
 */
tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
   IS_FASTOPEN(tp->t_flags));
-   if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
+   if ((tlen || (thflags & TH_FIN) || (tfo_syn && tlen > 0)) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
tcp_seq save_rnxt  = tp->rcv_nxt;

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Wed Jun  3 13:51:53 2020
(r361750)
+++ head/sys/netinet/tcp_stacks/bbr.c   Wed Jun  3 14:07:31 2020
(r361751)
@@ -6028,7 +6028,7 @@ bbr_log_output(struct tcp_bbr *bbr, struct tcpcb *tp, 
 * or FIN if seq_out is adding more on and a FIN is present
 * (and we are not resending).
 */
-   if (th_flags & TH_SYN)
+   if ((th_flags & TH_SYN) && (tp->iss == seq_out))
len++;
if (th_flags & TH_FIN)
len++;
@@ -8369,7 +8369,7 @@ bbr_process_data(struct mbuf *m, struct tcphdr *th, st
 */
tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
   IS_FASTOPEN(tp->t_flags));
-   if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
+   if ((tlen || (thflags & TH_FIN) || (tfo_syn && tlen > 0)) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
tcp_seq save_rnxt  = tp->rcv_nxt;

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Wed Jun  3 13:51:53 2020
(r361750)
+++ head/sys/netinet/tcp_stacks/rack.c  Wed Jun  3 14:07:31 2020
(r361751)
@@ -6237,7 +6237,7 @@ rack_log_output(struct tcpcb *tp, struct tcpopt *to, i
 * or FIN if seq_out is adding more on and a FIN is present
 * (and we are not resending).
 */
-   if (th_flags & TH_SYN)
+   if ((th_flags & TH_SYN) && (seq_out == tp->iss)) 
len++;
if (th_flags & TH_FIN)
len++;
@@ -6280,6 +6280,7 @@ again:
rsm->usec_orig_send = us_cts;
if (th_flags & TH_SYN) {
/* The data space is one beyond snd_una */
+   rsm->r_flags |= RACK_HAS_SIN;
rsm->r_start = seq_out + 1;
rsm->r_end = rsm->r_start + (len - 1);
} else {
@@ -8724,7 +8725,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, s
 */
tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
   IS_FASTOPEN(tp->t_flags));
-   if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
+   if ((tlen || (thflags & TH_FIN) || (tfo_syn && tlen > 0)) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
tcp_seq save_rnxt  = tp->rcv_nxt;
@@ -12563,8 +12564,10 @@ again:
len = 0;
}
/* Without fast-open there should never be data sent on a SYN */
-   if ((flags & TH_SYN) && 

svn commit: r361080 - head/sys/netinet/tcp_stacks

2020-05-15 Thread Randall Stewart
Author: rrs
Date: Fri May 15 14:00:12 2020
New Revision: 361080
URL: https://svnweb.freebsd.org/changeset/base/361080

Log:
  This fixes several skyzaller issues found with the
  help of Michael Tuexen. There was some accounting
  errors with TCPFO for bbr and also for both rack
  and bbr there was a FO case where we should be
  jumping to the just_return_nolock label to
  exit instead of returning 0. This of course
  caused no timer to be running and thus the
  stuck sessions.
  
  Reported by: Michael Tuexen and Skyzaller
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D24852

Modified:
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.c

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Fri May 15 13:53:10 2020
(r361079)
+++ head/sys/netinet/tcp_stacks/bbr.c   Fri May 15 14:00:12 2020
(r361080)
@@ -4975,6 +4975,15 @@ bbr_remxt_tmr(struct tcpcb *tp)
rsm->r_flags &= ~(BBR_ACKED | BBR_SACK_PASSED | 
BBR_WAS_SACKPASS);
bbr_log_type_rsmclear(bbr, cts, rsm, old_flags, 
__LINE__);
} else {
+   if ((tp->t_state < TCPS_ESTABLISHED) &&
+   (rsm->r_start == tp->snd_una)) {
+   /*
+* Special case for TCP FO. Where
+* we sent more data beyond the snd_max.
+* We don't mark that as lost and stop here.
+*/
+   break;
+   }
if ((rsm->r_flags & BBR_MARKED_LOST) == 0) {
bbr->r_ctl.rc_lost += rsm->r_end - rsm->r_start;
bbr->r_ctl.rc_lost_bytes += rsm->r_end - 
rsm->r_start;
@@ -12315,7 +12324,8 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeva
 (tp->t_state == TCPS_SYN_SENT)) &&
SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent 
*/
(tp->t_rxtshift == 0)) {/* not a retransmit */
-   return (0);
+   len = 0;
+   goto just_return_nolock;
}
/*
 * Before sending anything check for a state update. For hpts
@@ -14286,6 +14296,7 @@ nomore:
(hw_tls == 0) &&
(len > 0) &&
((flags & TH_RST) == 0) &&
+   ((flags & TH_SYN) == 0) &&
(IN_RECOVERY(tp->t_flags) == 0) &&
(bbr->rc_in_persist == 0) &&
(tot_len < bbr->r_ctl.rc_pace_max_segs)) {

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Fri May 15 13:53:10 2020
(r361079)
+++ head/sys/netinet/tcp_stacks/rack.c  Fri May 15 14:00:12 2020
(r361080)
@@ -3873,6 +3873,7 @@ skip_measurement:
 * the next send will trigger us picking up the missing data.
 */
if (rack->r_ctl.rc_first_appl &&
+   TCPS_HAVEESTABLISHED(tp->t_state) &&
rack->r_ctl.rc_app_limited_cnt &&
(SEQ_GT(rack->r_ctl.rc_first_appl->r_start, th_ack)) &&
((rack->r_ctl.rc_first_appl->r_start - th_ack) >
@@ -11741,6 +11742,13 @@ rack_start_gp_measurement(struct tcpcb *tp, struct tcp
struct rack_sendmap *my_rsm = NULL;
struct rack_sendmap fe;
 
+   if (tp->t_state < TCPS_ESTABLISHED) {
+   /*
+* We don't start any measurements if we are
+* not at least established.
+*/
+   return;
+   }
tp->t_flags |= TF_GPUTINPROG;
rack->r_ctl.rc_gp_lowrtt = 0x;
rack->r_ctl.rc_gp_high_rwnd = rack->rc_tp->snd_wnd;
@@ -12109,8 +12117,10 @@ rack_output(struct tcpcb *tp)
((tp->t_state == TCPS_SYN_RECEIVED) ||
 (tp->t_state == TCPS_SYN_SENT)) &&
SEQ_GT(tp->snd_max, tp->snd_una) && /* initial SYN or SYN|ACK sent 
*/
-   (tp->t_rxtshift == 0))  /* not a retransmit */
-   return (0);
+   (tp->t_rxtshift == 0)) {  /* not a retransmit */
+   cwnd_to_use = rack->r_ctl.cwnd_to_use = tp->snd_cwnd;
+   goto just_return_nolock;
+   }
/*
 * Determine length of data that should be transmitted, and flags
 * that will be used. If there is some data or critical controls

Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.c
==
--- head/sys/netinet/tcp_stacks/rack_bbr_common.c   Fri May 15 13:53:10 
2020(r361079)
+++ head/sys/netinet/tcp_stacks/rack_bbr_common.c   Fri May 15 14:00:12 
2020 

svn commit: r360798 - head/sys/netinet/tcp_stacks

2020-05-07 Thread Randall Stewart
Author: rrs
Date: Thu May  7 20:29:38 2020
New Revision: 360798
URL: https://svnweb.freebsd.org/changeset/base/360798

Log:
  When in the SYN-SENT state bbr and rack will not properly send an ACK but 
instead start the D-ACK timer. This
  causes so_reuseport_lb_test to fail since it slows down how quickly the 
program runs until the timeout occurs
  and fails the test
  
  Sponsored by: Netflix inc.
  Differential Revision:https://reviews.freebsd.org/D24747

Modified:
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Thu May  7 20:27:32 2020
(r360797)
+++ head/sys/netinet/tcp_stacks/bbr.c   Thu May  7 20:29:38 2020
(r360798)
@@ -4078,6 +4078,7 @@ bbr_cong_signal(struct tcpcb *tp, struct tcphdr *th, u
  */
 #define DELAY_ACK(tp, bbr, nsegs)  \
(((tp->t_flags & TF_RXWIN0SENT) == 0) &&\
+((tp->t_flags & TF_DELACK) == 0) &&\
 ((bbr->bbr_segs_rcvd + nsegs) < tp->t_delayed_ack) &&  \
 (tp->t_delayed_ack || (tp->t_flags & TF_NEEDSYN)))
 
@@ -8992,7 +8993,7 @@ bbr_do_syn_sent(struct mbuf *m, struct tcphdr *th, str
 * If there's data, delay ACK; if there's also a FIN ACKNOW
 * will be turned on later.
 */
-   if (DELAY_ACK(tp, bbr, 1) && tlen != 0 && (tfo_partial == 0)) {
+   if (DELAY_ACK(tp, bbr, 1) && tlen != 0 && !tfo_partial) {
bbr->bbr_segs_rcvd += 1;
tp->t_flags |= TF_DELACK;
bbr_timer_cancel(bbr, __LINE__, bbr->r_ctl.rc_rcvtime);

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Thu May  7 20:27:32 2020
(r360797)
+++ head/sys/netinet/tcp_stacks/rack.c  Thu May  7 20:29:38 2020
(r360798)
@@ -9320,7 +9320,15 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, st
 * If there's data, delay ACK; if there's also a FIN ACKNOW
 * will be turned on later.
 */
-   rack_handle_delayed_ack(tp, rack, tlen, tfo_partial);
+   if (DELAY_ACK(tp, tlen) && tlen != 0 && !tfo_partial) {
+   rack_timer_cancel(tp, rack,
+ rack->r_ctl.rc_rcvtime, __LINE__);
+   tp->t_flags |= TF_DELACK;
+   } else {
+   rack->r_wanted_output = 1;
+   tp->t_flags |= TF_ACKNOW;
+   rack->rc_dack_toggle = 0;
+   }
if (((thflags & (TH_CWR | TH_ECE)) == TH_ECE) &&
(V_tcp_do_ecn == 1)) {
tp->t_flags2 |= TF2_ECN_PERMIT;
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r360776 - head/sys/netinet/tcp_stacks

2020-05-07 Thread Randall Stewart
Author: rrs
Date: Thu May  7 10:46:02 2020
New Revision: 360776
URL: https://svnweb.freebsd.org/changeset/base/360776

Log:
  NF has an internal option that changes the tcp_mcopy_m routine slightly (has
  a few extra arguments). Recently that changed to only have one arg extra so
  that two ifdefs around the call are no longer needed. Lets take out the
  extra ifdef and arg.
  
  Sponsored by: Netflix Inc
  Differential Revision: https://reviews.freebsd.org/D24736

Modified:
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Thu May  7 08:58:08 2020
(r360775)
+++ head/sys/netinet/tcp_stacks/bbr.c   Thu May  7 10:46:02 2020
(r360776)
@@ -13420,9 +13420,6 @@ send:
 #endif
orig_len = len;
m->m_next = tcp_m_copym(
-#ifdef NETFLIX_COPY_ARGS
-   tp,
-#endif
mb, moff, ,
if_hw_tsomaxsegcount,
if_hw_tsomaxsegsize, msb,

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Thu May  7 08:58:08 2020
(r360775)
+++ head/sys/netinet/tcp_stacks/rack.c  Thu May  7 10:46:02 2020
(r360776)
@@ -13353,9 +13353,6 @@ send:
else
msb = sb;
m->m_next = tcp_m_copym(
-#ifdef NETFLIX_COPY_ARGS
-   tp,
-#endif
mb, moff, ,
if_hw_tsomaxsegcount, if_hw_tsomaxsegsize, msb,
((rsm == NULL) ? hw_tls : 0)
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r360644 - head/sys/netinet/tcp_stacks

2020-05-04 Thread Randall Stewart
Author: rrs
Date: Mon May  4 23:02:58 2020
New Revision: 360644
URL: https://svnweb.freebsd.org/changeset/base/360644

Log:
  This fixes two issues found by ankitrahej...@gmail.com
  1) When BBR retransmits the syn it was messing up the snd_max
  2) When we need to send a RST we might not send it when we should
  
  Reported by:  ankitrahej...@gmail.com
  Sponsored by:  Netflix.com
  Differential Revision: https://reviews.freebsd.org/D24693

Modified:
  head/sys/netinet/tcp_stacks/bbr.c

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Mon May  4 22:59:39 2020
(r360643)
+++ head/sys/netinet/tcp_stacks/bbr.c   Mon May  4 23:02:58 2020
(r360644)
@@ -12159,6 +12159,7 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeva
recwin = min(max(sbspace(>so_rcv), 0),
TCP_MAXWIN << tp->rcv_scale);
if ((bbr_window_update_needed(tp, so, recwin, maxseg) 
== 0) &&
+   ((tcp_outflags[tp->t_state] & TH_RST) == 0) &&
((sbavail(sb) + ((tcp_outflags[tp->t_state] & 
TH_FIN) ? 1 : 0)) <=
(tp->snd_max - tp->snd_una))) {
/*
@@ -12916,9 +12917,13 @@ recheck_resend:
if (tp->t_flags & TF_ACKNOW) {
goto send;
}
-   if (((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0)) {
+   if (flags & TH_RST) {
+   /* Always send a RST if one is due */
goto send;
}
+   if ((flags & TH_SYN) && (tp->t_flags & TF_NEEDSYN) == 0) {
+   goto send;
+   }
/*
 * If our state indicates that FIN should be sent and we have not
 * yet done so, then we need to send.
@@ -14029,7 +14034,11 @@ out:
}
if (flags & (TH_SYN | TH_FIN) && (rsm == NULL)) {
if (flags & TH_SYN) {
-   tp->snd_max++;
+   /*
+* Smack the snd_max to iss + 1
+* if its a FO we will add len below.
+*/
+   tp->snd_max = tp->iss + 1;
}
if ((flags & TH_FIN) && ((tp->t_flags & TF_SENTFIN) == 
0)) {
tp->snd_max++;
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r360639 - head/sys/netinet/tcp_stacks

2020-05-04 Thread Randall Stewart
Author: rrs
Date: Mon May  4 20:28:53 2020
New Revision: 360639
URL: https://svnweb.freebsd.org/changeset/base/360639

Log:
  This commit brings things into sync with the advancements that
  have been made in rack and adds a few fixes in BBR. This also
  removes any possibility of incorrectly doing OOB data the stacks
  do not support it. Should fix the skyzaller crashes seen in the
  past. Still to fix is the BBR issue just reported this weekend
  with the SYN and on sending a RST. Note that this version of
  rack can now do pacing as well.
  
  Sponsored by:Netflix Inc
  Differential Revision:https://reviews.freebsd.org/D24576

Modified:
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.h
  head/sys/netinet/tcp_stacks/tcp_bbr.h
  head/sys/netinet/tcp_stacks/tcp_rack.h

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Mon May  4 20:19:57 2020
(r360638)
+++ head/sys/netinet/tcp_stacks/bbr.c   Mon May  4 20:28:53 2020
(r360639)
@@ -1,7 +1,5 @@
 /*-
- * Copyright (c) 2016-9
- * Netflix Inc.
- *  All rights reserved.
+ * Copyright (c) 2016-2020 Netflix, Inc.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -72,6 +70,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 
@@ -1853,28 +1852,6 @@ bbr_init_sysctls(void)
_clear_lost, 0, sysctl_bbr_clear_lost, "IU", "Clear lost 
counters");
 }
 
-static inline int32_t
-bbr_progress_timeout_check(struct tcp_bbr *bbr)
-{
-   if (bbr->rc_tp->t_maxunacktime && bbr->rc_tp->t_acktime &&
-   TSTMP_GT(ticks, bbr->rc_tp->t_acktime)) {
-   if uint32_t)ticks - bbr->rc_tp->t_acktime)) >= 
bbr->rc_tp->t_maxunacktime) {
-   /*
-* There is an assumption here that the caller will
-* drop the connection, so we increment the
-* statistics.
-*/
-   bbr_log_progress_event(bbr, bbr->rc_tp, ticks, 
PROGRESS_DROP, __LINE__);
-   BBR_STAT_INC(bbr_progress_drops);
-#ifdef NETFLIX_STATS
-   KMOD_TCPSTAT_INC(tcps_progdrops);
-#endif
-   return (1);
-   }
-   }
-   return (0);
-}
-
 static void
 bbr_counter_destroy(void)
 {
@@ -1884,6 +1861,8 @@ bbr_counter_destroy(void)
COUNTER_ARRAY_FREE(bbr_state_lost, BBR_MAX_STAT);
COUNTER_ARRAY_FREE(bbr_state_time, BBR_MAX_STAT);
COUNTER_ARRAY_FREE(bbr_state_resend, BBR_MAX_STAT);
+   counter_u64_free(bbr_nohdwr_pacing_enobuf);
+   counter_u64_free(bbr_hdwr_pacing_enobuf);
counter_u64_free(bbr_flows_whdwr_pacing);
counter_u64_free(bbr_flows_nohdwr_pacing);
 
@@ -4643,7 +4622,8 @@ bbr_timeout_tlp(struct tcpcb *tp, struct tcp_bbr *bbr,
/* Its not time yet */
return (0);
}
-   if (bbr_progress_timeout_check(bbr)) {
+   if (ctf_progress_timeout_check(tp, true)) {
+   bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
return (1);
}
@@ -4815,9 +4795,8 @@ bbr_timeout_delack(struct tcpcb *tp, struct tcp_bbr *b
 }
 
 /*
- * Persists timer, here we simply need to setup the
- * FORCE-DATA flag the output routine will send
- * the one byte send.
+ * Here we send a KEEP-ALIVE like probe to the
+ * peer, we do not send data.
  *
  * We only return 1, saying don't proceed, if all timers
  * are stopped (destroyed PCB?).
@@ -4845,7 +4824,8 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *
/*
 * Have we exceeded the user specified progress time?
 */
-   if (bbr_progress_timeout_check(bbr)) {
+   if (ctf_progress_timeout_check(tp, true)) {
+   bbr_log_progress_event(bbr, tp, tick, PROGRESS_DROP, __LINE__);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
goto out;
}
@@ -4859,6 +4839,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *
(ticks - tp->t_rcvtime >= tcp_maxpersistidle ||
ticks - tp->t_rcvtime >= TCP_REXMTVAL(tp) * tcp_totbackoff)) {
KMOD_TCPSTAT_INC(tcps_persistdrop);
+   tcp_log_end_status(tp, TCP_EI_STATUS_PERSIST_MAX);
tcp_set_inp_to_drop(bbr->rc_inp, ETIMEDOUT);
goto out;
}
@@ -4875,6 +4856,7 @@ bbr_timeout_persist(struct tcpcb *tp, struct tcp_bbr *
if (tp->t_state > TCPS_CLOSE_WAIT &&
(ticks - tp->t_rcvtime) >= TCPTV_PERSMAX) {
KMOD_TCPSTAT_INC(tcps_persistdrop);
+   

svn commit: r360638 - head/sys/netinet

2020-05-04 Thread Randall Stewart
Author: rrs
Date: Mon May  4 20:19:57 2020
New Revision: 360638
URL: https://svnweb.freebsd.org/changeset/base/360638

Log:
  Adjust the fb to have a way to ask the underlying stack
  if it can support the PRUS option (OOB). And then have
  the new function call that to validate and give the
  correct error response if needed to the user (rack
  and bbr do not support obsoleted OOB data).
  
  Sponsoered by: Netflix Inc.
  Differential Revision: https://reviews.freebsd.org/D24574

Modified:
  head/sys/netinet/tcp_usrreq.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_usrreq.c
==
--- head/sys/netinet/tcp_usrreq.c   Mon May  4 18:40:56 2020
(r360637)
+++ head/sys/netinet/tcp_usrreq.c   Mon May  4 20:19:57 2020
(r360638)
@@ -133,6 +133,8 @@ static void tcp_disconnect(struct tcpcb *);
 static voidtcp_usrclosed(struct tcpcb *);
 static voidtcp_fill_info(struct tcpcb *, struct tcp_info *);
 
+static int tcp_pru_options_support(struct tcpcb *tp, int flags);
+
 #ifdef TCPDEBUG
 #defineTCPDEBUG0   int ostate = 0
 #defineTCPDEBUG1() ostate = tp ? tp->t_state : 0
@@ -979,6 +981,15 @@ tcp_usr_send(struct socket *so, int flags, struct mbuf
goto out;
}
tp = intotcpcb(inp);
+   if (flags & PRUS_OOB) {
+   if ((error = tcp_pru_options_support(tp, PRUS_OOB)) != 0) {
+   if (control)
+   m_freem(control);
+   if (m && (flags & PRUS_NOTREADY) == 0)
+   m_freem(m);
+   goto out;
+   }
+   }
TCPDEBUG1();
if (nam != NULL && tp->t_state < TCPS_SYN_SENT) {
switch (nam->sa_family) {
@@ -1362,6 +1373,24 @@ tcp_usr_close(struct socket *so)
NET_EPOCH_EXIT(et);
 }
 
+static int 
+tcp_pru_options_support(struct tcpcb *tp, int flags)
+{
+   /*
+* If the specific TCP stack has a pru_options
+* specified then it does not always support
+* all the PRU_XX options and we must ask it.
+* If the function is not specified then all
+* of the PRU_XX options are supported.
+*/
+   int ret = 0;
+
+   if (tp->t_fb->tfb_pru_options) {
+   ret = (*tp->t_fb->tfb_pru_options)(tp, flags);
+   }
+   return (ret);
+}
+
 /*
  * Receive out-of-band data.
  */
@@ -1381,6 +1410,10 @@ tcp_usr_rcvoob(struct socket *so, struct mbuf *m, int 
goto out;
}
tp = intotcpcb(inp);
+   error = tcp_pru_options_support(tp, PRUS_OOB);
+   if (error) {
+   goto out;
+   }
TCPDEBUG1();
if ((so->so_oobmark == 0 &&
 (so->so_rcv.sb_state & SBS_RCVATMARK) == 0) ||

Modified: head/sys/netinet/tcp_var.h
==
--- head/sys/netinet/tcp_var.h  Mon May  4 18:40:56 2020(r360637)
+++ head/sys/netinet/tcp_var.h  Mon May  4 20:19:57 2020(r360638)
@@ -345,6 +345,7 @@ struct tcp_function_block {
void(*tfb_tcp_rexmit_tmr)(struct tcpcb *);
int (*tfb_tcp_handoff_ok)(struct tcpcb *);
void(*tfb_tcp_mtu_chg)(struct tcpcb *);
+   int (*tfb_pru_options)(struct tcpcb *, int);
volatile uint32_t tfb_refcnt;
uint32_t  tfb_flags;
uint8_t tfb_id;
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r360385 - head/sys/netinet

2020-04-27 Thread Randall Stewart
Author: rrs
Date: Mon Apr 27 16:30:29 2020
New Revision: 360385
URL: https://svnweb.freebsd.org/changeset/base/360385

Log:
  This change does a small prepratory step in getting the
  latest rack and bbr in from the NF repo. When those come
  in the OOB data handling will be fixed where Skyzaller crashes.
  
  Differential Revision:https://reviews.freebsd.org/D24575

Modified:
  head/sys/netinet/tcp.h
  head/sys/netinet/tcp_log_buf.h
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp.h
==
--- head/sys/netinet/tcp.h  Mon Apr 27 16:12:32 2020(r360384)
+++ head/sys/netinet/tcp.h  Mon Apr 27 16:30:29 2020(r360385)
@@ -181,6 +181,9 @@ struct tcphdr {
 #defineTCP_CONGESTION  64  /* get/set congestion control algorithm 
*/
 #defineTCP_CCALGOOPT   65  /* get/set cc algorithm specific 
options */
 #define TCP_DELACK 72  /* socket option for delayed ack */
+#define TCP_FIN_IS_RST 73  /* A fin from the peer is treated has a RST */
+#define TCP_LOG_LIMIT  74  /* Limit to number of records in tcp-log */
+#define TCP_SHARED_CWND_ALLOWED 75 /* Use of a shared cwnd is allowed */
 #defineTCP_KEEPINIT128 /* N, time to establish connection */
 #defineTCP_KEEPIDLE256 /* L,N,X start keeplives after this 
period */
 #defineTCP_KEEPINTVL   512 /* L,N interval between keepalives */
@@ -190,10 +193,11 @@ struct tcphdr {
 #defineTCP_PCAP_IN 4096/* number of input packets to keep */
 #define TCP_FUNCTION_BLK 8192  /* Set the tcp function pointers to the 
specified stack */
 /* Options for Rack and BBR */
+#define TCP_RACK_MBUF_QUEUE   1050 /* Do we allow mbuf queuing if supported */
 #define TCP_RACK_PROP1051 /* RACK proportional rate reduction (bool) */
 #define TCP_RACK_TLP_REDUCE   1052 /* RACK TLP cwnd reduction (bool) */
 #define TCP_RACK_PACE_REDUCE  1053 /* RACK Pacing reduction factor (divisor) */
-#define TCP_RACK_PACE_MAX_SEG 1054 /* Max segments in a pace */
+#define TCP_RACK_PACE_MAX_SEG 1054 /* Max TSO size we will send  */
 #define TCP_RACK_PACE_ALWAYS  1055 /* Use the always pace method */
 #define TCP_RACK_PROP_RATE1056 /* The proportional reduction rate */
 #define TCP_RACK_PRR_SENDALOT 1057 /* Allow PRR to send more than one seg */
@@ -236,7 +240,7 @@ struct tcphdr {
 #define TCP_RACK_IDLE_REDUCE_HIGH 1092  /* Reduce the highest cwnd seen to IW 
on idle */
 #define TCP_RACK_MIN_PACE  1093/* Do we enforce rack min pace time */
 #define TCP_RACK_MIN_PACE_SEG  1094/* If so what is the seg threshould */
-#define TCP_RACK_GP_INCREASE   1094/* After 4.1 its the GP increase */
+#define TCP_RACK_GP_INCREASE   1094/* After 4.1 its the GP increase in 
older rack */
 #define TCP_RACK_TLP_USE   1095
 #define TCP_BBR_ACK_COMP_ALG   1096/* Not used */
 #define TCP_BBR_TMR_PACE_OH1096/* Recycled in 4.2 */
@@ -248,7 +252,8 @@ struct tcphdr {
 #define TCP_BBR_PROBE_RTT_GAIN 1101
 #define TCP_BBR_PROBE_RTT_LEN  1102
 #define TCP_BBR_SEND_IWND_IN_TSO 1103  /* Do we burst out whole iwin size 
chunks at start? */
-#define TCP_BBR_USE_RACK_CHEAT 1104/* Do we use the rack cheat for pacing 
rxt's */
+#define TCP_BBR_USE_RACK_RR 1104   /* Do we use the rack rapid recovery 
for pacing rxt's */
+#define TCP_BBR_USE_RACK_CHEAT TCP_BBR_USE_RACK_RR /* Compat. */
 #define TCP_BBR_HDWR_PACE  1105/* Enable/disable hardware pacing */
 #define TCP_BBR_UTTER_MAX_TSO  1106/* Do we enforce an utter max TSO size 
*/
 #define TCP_BBR_EXTRA_STATE1107/* Special exit-persist catch up */
@@ -256,6 +261,24 @@ struct tcphdr {
 #define TCP_BBR_MIN_TOPACEOUT  1109/* Do we suspend pacing until */
 #define TCP_BBR_TSTMP_RAISES   1110/* Can a timestamp measurement raise 
the b/w */
 #define TCP_BBR_POLICER_DETECT /* Turn on/off google mode policer 
detection */
+#define TCP_BBR_RACK_INIT_RATE 1112/* Set an initial pacing rate for when 
we have no b/w in kbits per sec */
+#define TCP_RACK_RR_CONF   1113 /* Rack rapid recovery configuration 
control*/
+#define TCP_RACK_CHEAT_NOT_CONF_RATE TCP_RACK_RR_CONF
+#define TCP_RACK_GP_INCREASE_CA   1114 /* GP increase for Congestion Avoidance 
*/
+#define TCP_RACK_GP_INCREASE_SS   1115 /* GP increase for Slow Start */
+#define TCP_RACK_GP_INCREASE_REC  1116 /* GP increase for Recovery */
+#define TCP_RACK_FORCE_MSEG1117/* Override to use the user set max-seg 
value */
+#define TCP_RACK_PACE_RATE_CA  1118 /* Pacing rate for Congestion Avoidance */
+#define TCP_RACK_PACE_RATE_SS  1119 /* Pacing rate for Slow Start */
+#define TCP_RACK_PACE_RATE_REC  1120 /* Pacing rate for Recovery */
+#define TCP_NO_PRR 1122 /* If pacing, don't use prr  */
+#define TCP_RACK_NONRXT_CFG_RATE 1123 /* In recovery does a non-rxt use the 
cfg rate */
+#define TCP_SHARED_CWND_ENABLE   

svn commit: r358332 - in head/sys: net netinet

2020-02-26 Thread Randall Stewart
Author: rrs
Date: Wed Feb 26 13:48:33 2020
New Revision: 358332
URL: https://svnweb.freebsd.org/changeset/base/358332

Log:
  This commit expands tcp_ratelimit to be able to handle cards
  like the mlx-c5 and c6 that require a "setup" routine before
  the tcp_ratelimit code can declare and use a rate. I add the
  setup routine to if_var as well as fix tcp_ratelimit to call it.
  I also revisit the rates so that in the case of a mlx card
  of type c5/6 we will use about 100 rates concentrated in the range
  where the most gain can be had (1-200Mbps). Note that I have
  tested these on a c5 and they work and perform well. In fact
  in an unloaded system they pace right to the correct rate (great
  job mlx!). There will be a further commit here from Hans that
  will add the respective changes to the mlx driver to support this
  work (which I was testing with).
  
  Sponsored by: Netflix Inc.
  Differential Revision:ttps://reviews.freebsd.org/D23647

Modified:
  head/sys/net/if_var.h
  head/sys/netinet/tcp_ratelimit.c
  head/sys/netinet/tcp_ratelimit.h

Modified: head/sys/net/if_var.h
==
--- head/sys/net/if_var.h   Wed Feb 26 13:23:52 2020(r358331)
+++ head/sys/net/if_var.h   Wed Feb 26 13:48:33 2020(r358332)
@@ -252,6 +252,7 @@ union if_snd_tag_query_params {
 */
 #define RT_IS_FIXED_TABLE 0x0004   /* A fixed table is attached */
 #define RT_IS_UNUSABLE   0x0008/* It is not usable for this */
+#define RT_IS_SETUP_REQ  0x0010/* The interface setup must be 
called before use */
 
 struct if_ratelimit_query_results {
const uint64_t *rate_table; /* Pointer to table if present */
@@ -268,8 +269,8 @@ typedef int (if_snd_tag_query_t)(struct m_snd_tag *, u
 typedef void (if_snd_tag_free_t)(struct m_snd_tag *);
 typedef void (if_ratelimit_query_t)(struct ifnet *,
 struct if_ratelimit_query_results *);
+typedef int (if_ratelimit_setup_t)(struct ifnet *, uint64_t, uint32_t);
 
-
 /*
  * Structure defining a network interface.
  */
@@ -368,7 +369,7 @@ struct ifnet {
if_init_fn_tif_init;/* Init routine */
int (*if_resolvemulti)  /* validate/resolve multicast */
(struct ifnet *, struct sockaddr **, struct sockaddr *);
-   if_qflush_fn_t  if_qflush;  /* flush any queue */   
+   if_qflush_fn_t  if_qflush;  /* flush any queue */
if_transmit_fn_t if_transmit;   /* initiate output routine */
 
void(*if_reassign)  /* reassign to vnet routine */
@@ -411,6 +412,7 @@ struct ifnet {
if_snd_tag_query_t *if_snd_tag_query;
if_snd_tag_free_t *if_snd_tag_free;
if_ratelimit_query_t *if_ratelimit_query;
+   if_ratelimit_setup_t *if_ratelimit_setup;
 
/* Ethernet PCP */
uint8_t if_pcp;
@@ -555,7 +557,7 @@ struct ifaddr {
u_int   ifa_refcnt; /* references to this structure */
 
counter_u64_t   ifa_ipackets;
-   counter_u64_t   ifa_opackets;
+   counter_u64_t   ifa_opackets;
counter_u64_t   ifa_ibytes;
counter_u64_t   ifa_obytes;
struct  epoch_context   ifa_epoch_ctx;
@@ -769,7 +771,7 @@ void if_setstartfn(if_t ifp, void (*)(if_t));
 void if_settransmitfn(if_t ifp, if_transmit_fn_t);
 void if_setqflushfn(if_t ifp, if_qflush_fn_t);
 void if_setgetcounterfn(if_t ifp, if_get_counter_t);
- 
+
 /* Revisit the below. These are inline functions originally */
 int drbr_inuse_drv(if_t ifp, struct buf_ring *br);
 struct mbuf* drbr_dequeue_drv(if_t ifp, struct buf_ring *br);

Modified: head/sys/netinet/tcp_ratelimit.c
==
--- head/sys/netinet/tcp_ratelimit.cWed Feb 26 13:23:52 2020
(r358331)
+++ head/sys/netinet/tcp_ratelimit.cWed Feb 26 13:48:33 2020
(r358332)
@@ -66,45 +66,199 @@ __FBSDID("$FreeBSD$");
  * For the purposes of each send, what is the size
  * of an ethernet frame.
  */
-#ifndef ETHERNET_SEGMENT_SIZE
-#define ETHERNET_SEGMENT_SIZE 1500
-#endif
 MALLOC_DEFINE(M_TCPPACE, "tcp_hwpace", "TCP Hardware pacing memory");
 #ifdef RATELIMIT
 
+/*
+ * The following preferred table will seem weird to
+ * the casual viewer. Why do we not have any rates below
+ * 1Mbps? Why do we have a rate at 1.44Mbps called common?
+ * Why do the rates cluster in the 1-100Mbps range more
+ * than others? Why does the table jump around at the beginnign
+ * and then be more consistently raising?
+ *
+ * Let me try to answer those questions. A lot of
+ * this is dependant on the hardware. We have three basic
+ * supporters of rate limiting
+ *
+ * Chelsio - Supporting 16 configurable rates.
+ * Mlx  - c4 supporting 13 fixed rates.
+ * Mlx  - c5 & c6 supporting 127 configurable rates.
+ *
+ * The c4 is why we have a common rate that is available
+ * in all rate tables. This is a 

svn commit: r357823 - head/sys/netinet

2020-02-12 Thread Randall Stewart
Author: rrs
Date: Wed Feb 12 15:26:56 2020
New Revision: 357823
URL: https://svnweb.freebsd.org/changeset/base/357823

Log:
  Lets get the real correct version.. gessh. I need
  more coffee evidently.
  
  Sponsored by: Netflix

Modified:
  head/sys/netinet/tcp_ratelimit.c

Modified: head/sys/netinet/tcp_ratelimit.c
==
--- head/sys/netinet/tcp_ratelimit.cWed Feb 12 14:50:13 2020
(r357822)
+++ head/sys/netinet/tcp_ratelimit.cWed Feb 12 15:26:56 2020
(r357823)
@@ -49,9 +49,11 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
-#define TCPSTATES  /* for logging */
+#include 
+#include 
 #include 
 #include 
+#define TCPSTATES  /* for logging */
 #include 
 #ifdef INET6
 #include 
@@ -284,7 +286,7 @@ rs_defer_destroy(struct tcp_rate_set *rs)
 
/* Set flag to only defer once. */
rs->rs_flags |= RS_FUNERAL_SCHD;
-   epoch_call(net_epoch, >rs_epoch_ctx, rs_destroy);
+   NET_EPOCH_CALL(rs_destroy, >rs_epoch_ctx);
 }
 
 #ifdef INET
@@ -878,7 +880,7 @@ rt_setup_rate(struct inpcb *inp, struct ifnet *ifp, ui
struct epoch_tracker et;
int err;
 
-   epoch_enter_preempt(net_epoch_preempt, );
+   NET_EPOCH_ENTER(et);
 use_real_interface:
CK_LIST_FOREACH(rs, _rs, next) {
/*
@@ -911,14 +913,14 @@ use_real_interface:
 */
if (rs->rs_disable && error)
*error = ENODEV;
-   epoch_exit_preempt(net_epoch_preempt, );
+   NET_EPOCH_EXIT(et);
return (NULL);
}
 
if ((rs == NULL) || (rs->rs_disable != 0)) {
if (rs->rs_disable && error)
*error = ENOSPC;
-   epoch_exit_preempt(net_epoch_preempt, );
+   NET_EPOCH_EXIT(et);
return (NULL);
}
if (rs->rs_flags & RS_IS_DEFF) {
@@ -929,7 +931,7 @@ use_real_interface:
if (tifp == NULL) {
if (rs->rs_disable && error)
*error = ENOTSUP;
-   epoch_exit_preempt(net_epoch_preempt, );
+   NET_EPOCH_EXIT(et);
return (NULL);
}
goto use_real_interface;
@@ -938,7 +940,7 @@ use_real_interface:
((rs->rs_flows_using + 1) > rs->rs_flow_limit)) {
if (error)
*error = ENOSPC;
-   epoch_exit_preempt(net_epoch_preempt, );
+   NET_EPOCH_EXIT(et);
return (NULL);
}
rte = tcp_find_suitable_rate(rs, bytes_per_sec, flags);
@@ -962,7 +964,7 @@ use_real_interface:
 */
atomic_add_64(>rs_flows_using, 1);
}
-   epoch_exit_preempt(net_epoch_preempt, );
+   NET_EPOCH_EXIT(et);
return (rte);
 }
 
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r357818 - in head/sys/netinet: . cc

2020-02-12 Thread Randall Stewart
Author: rrs
Date: Wed Feb 12 13:31:36 2020
New Revision: 357818
URL: https://svnweb.freebsd.org/changeset/base/357818

Log:
  White space cleanup -- remove trailing tab's or spaces
  from any line.
  
  Sponsored by: Netflix Inc.

Modified:
  head/sys/netinet/cc/cc_cdg.c
  head/sys/netinet/cc/cc_dctcp.c
  head/sys/netinet/cc/cc_htcp.c
  head/sys/netinet/icmp6.h
  head/sys/netinet/if_ether.c
  head/sys/netinet/igmp.c
  head/sys/netinet/in.c
  head/sys/netinet/in.h
  head/sys/netinet/in_mcast.c
  head/sys/netinet/in_pcb.c
  head/sys/netinet/in_pcb.h
  head/sys/netinet/in_proto.c
  head/sys/netinet/in_rmx.c
  head/sys/netinet/ip_divert.c
  head/sys/netinet/ip_dummynet.h
  head/sys/netinet/ip_fastfwd.c
  head/sys/netinet/ip_fw.h
  head/sys/netinet/ip_icmp.c
  head/sys/netinet/ip_id.c
  head/sys/netinet/ip_input.c
  head/sys/netinet/ip_mroute.c
  head/sys/netinet/ip_options.c
  head/sys/netinet/ip_reass.c
  head/sys/netinet/raw_ip.c
  head/sys/netinet/siftr.c
  head/sys/netinet/tcp.h
  head/sys/netinet/tcp_fastopen.c
  head/sys/netinet/tcp_fsm.h
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_log_buf.c
  head/sys/netinet/tcp_log_buf.h
  head/sys/netinet/tcp_lro.c
  head/sys/netinet/tcp_lro.h
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_ratelimit.c
  head/sys/netinet/tcp_ratelimit.h
  head/sys/netinet/tcp_reass.c
  head/sys/netinet/tcp_sack.c
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_syncache.c
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/tcp_timer.h
  head/sys/netinet/tcp_usrreq.c
  head/sys/netinet/tcp_var.h
  head/sys/netinet/udp.h
  head/sys/netinet/udp_usrreq.c
  head/sys/netinet/udp_var.h
  head/sys/netinet/udplite.h

Modified: head/sys/netinet/cc/cc_cdg.c
==
--- head/sys/netinet/cc/cc_cdg.cWed Feb 12 13:07:09 2020
(r357817)
+++ head/sys/netinet/cc/cc_cdg.cWed Feb 12 13:31:36 2020
(r357818)
@@ -607,7 +607,7 @@ cdg_ack_received(struct cc_var *ccv, uint16_t ack_type
congestion = prob_backoff(qdiff_max);
else if (cdg_data->max_qtrend > 0)
congestion = prob_backoff(cdg_data->max_qtrend);
-   
+
/* Update estimate of queue state. */
if (cdg_data->min_qtrend > 0 &&
cdg_data->max_qtrend <= 0) {

Modified: head/sys/netinet/cc/cc_dctcp.c
==
--- head/sys/netinet/cc/cc_dctcp.c  Wed Feb 12 13:07:09 2020
(r357817)
+++ head/sys/netinet/cc/cc_dctcp.c  Wed Feb 12 13:31:36 2020
(r357818)
@@ -274,9 +274,9 @@ dctcp_cong_signal(struct cc_var *ccv, uint32_t type)
dctcp_data->bytes_total = 0;
dctcp_data->save_sndnxt = CCV(ccv, 
snd_nxt);
} else
-   CCV(ccv, snd_ssthresh) = 
+   CCV(ccv, snd_ssthresh) =
max((cwin - (((uint64_t)cwin *
-   dctcp_data->alpha) >> 
(DCTCP_SHIFT+1))), 
+   dctcp_data->alpha) >> 
(DCTCP_SHIFT+1))),
2 * mss);
CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
ENTER_CONGRECOVERY(CCV(ccv, t_flags));

Modified: head/sys/netinet/cc/cc_htcp.c
==
--- head/sys/netinet/cc/cc_htcp.c   Wed Feb 12 13:07:09 2020
(r357817)
+++ head/sys/netinet/cc/cc_htcp.c   Wed Feb 12 13:31:36 2020
(r357818)
@@ -364,7 +364,7 @@ htcp_post_recovery(struct cc_var *ccv)
pipe = tcp_compute_pipe(ccv->ccvc.tcp);
else
pipe = CCV(ccv, snd_max) - ccv->curack;
-   
+
if (pipe < CCV(ccv, snd_ssthresh))
/*
 * Ensure that cwnd down not collape to 1 MSS under

Modified: head/sys/netinet/icmp6.h
==
--- head/sys/netinet/icmp6.hWed Feb 12 13:07:09 2020(r357817)
+++ head/sys/netinet/icmp6.hWed Feb 12 13:31:36 2020(r357818)
@@ -344,7 +344,7 @@ struct nd_opt_mtu { /* MTU option */
 #defineND_OPT_NONCE_LEN((1 * 8) - 2)
 #if ((ND_OPT_NONCE_LEN + 2) % 8) != 0
 #error "(ND_OPT_NONCE_LEN + 2) must be a multiple of 8."
-#endif 
+#endif
 struct nd_opt_nonce {  /* nonce option */
u_int8_tnd_opt_nonce_type;
u_int8_tnd_opt_nonce_len;
@@ -607,7 +607,7 @@ struct icmp6stat {
 * for netinet6 code, it is already available in icp6s_outhist[].
   

svn commit: r357817 - head/sys/netinet

2020-02-12 Thread Randall Stewart
Author: rrs
Date: Wed Feb 12 13:07:09 2020
New Revision: 357817
URL: https://svnweb.freebsd.org/changeset/base/357817

Log:
  Whitespace, remove from three files trailing white
  space (leftover presents from emacs).
  
  Sponsored by: Netflix Inc.

Modified:
  head/sys/netinet/tcp_hpts.c
  head/sys/netinet/tcp_hpts.h
  head/sys/netinet/tcp_ratelimit.c

Modified: head/sys/netinet/tcp_hpts.c
==
--- head/sys/netinet/tcp_hpts.c Wed Feb 12 13:04:19 2020(r357816)
+++ head/sys/netinet/tcp_hpts.c Wed Feb 12 13:07:09 2020(r357817)
@@ -33,7 +33,7 @@ __FBSDID("$FreeBSD$");
  * Some notes about usage.
  *
  * The tcp_hpts system is designed to provide a high precision timer
- * system for tcp. Its main purpose is to provide a mechanism for 
+ * system for tcp. Its main purpose is to provide a mechanism for
  * pacing packets out onto the wire. It can be used in two ways
  * by a given TCP stack (and those two methods can be used simultaneously).
  *
@@ -59,22 +59,22 @@ __FBSDID("$FreeBSD$");
  * to prevent output processing until the time alotted has gone by.
  * Of course this is a bare bones example and the stack will probably
  * have more consideration then just the above.
- * 
+ *
  * Now the second function (actually two functions I guess :D)
- * the tcp_hpts system provides is the  ability to either abort 
- * a connection (later) or process input on a connection. 
+ * the tcp_hpts system provides is the  ability to either abort
+ * a connection (later) or process input on a connection.
  * Why would you want to do this? To keep processor locality
  * and or not have to worry about untangling any recursive
  * locks. The input function now is hooked to the new LRO
- * system as well. 
+ * system as well.
  *
  * In order to use the input redirection function the
- * tcp stack must define an input function for 
+ * tcp stack must define an input function for
  * tfb_do_queued_segments(). This function understands
  * how to dequeue a array of packets that were input and
- * knows how to call the correct processing routine. 
+ * knows how to call the correct processing routine.
  *
- * Locking in this is important as well so most likely the 
+ * Locking in this is important as well so most likely the
  * stack will need to define the tfb_do_segment_nounlock()
  * splitting tfb_do_segment() into two parts. The main processing
  * part that does not unlock the INP and returns a value of 1 or 0.
@@ -83,7 +83,7 @@ __FBSDID("$FreeBSD$");
  * The remains of tfb_do_segment() then become just a simple call
  * to the tfb_do_segment_nounlock() function and check the return
  * code and possibly unlock.
- * 
+ *
  * The stack must also set the flag on the INP that it supports this
  * feature i.e. INP_SUPPORTS_MBUFQ. The LRO code recoginizes
  * this flag as well and will queue packets when it is set.
@@ -99,11 +99,11 @@ __FBSDID("$FreeBSD$");
  *
  * There is a common functions within the rack_bbr_common code
  * version i.e. ctf_do_queued_segments(). This function
- * knows how to take the input queue of packets from 
- * tp->t_in_pkts and process them digging out 
- * all the arguments, calling any bpf tap and 
+ * knows how to take the input queue of packets from
+ * tp->t_in_pkts and process them digging out
+ * all the arguments, calling any bpf tap and
  * calling into tfb_do_segment_nounlock(). The common
- * function (ctf_do_queued_segments())  requires that 
+ * function (ctf_do_queued_segments())  requires that
  * you have defined the tfb_do_segment_nounlock() as
  * described above.
  *
@@ -113,9 +113,9 @@ __FBSDID("$FreeBSD$");
  * a stack wants to drop a connection it calls:
  *
  * tcp_set_inp_to_drop(tp, ETIMEDOUT)
- * 
- * To schedule the tcp_hpts system to call 
- * 
+ *
+ * To schedule the tcp_hpts system to call
+ *
  *tcp_drop(tp, drop_reason)
  *
  * at a future point. This is quite handy to prevent locking
@@ -284,7 +284,7 @@ sysctl_net_inet_tcp_hpts_max_sleep(SYSCTL_HANDLER_ARGS
error = sysctl_handle_int(oidp, , 0, req);
if (error == 0 && req->newptr) {
if ((new < (NUM_OF_HPTSI_SLOTS / 4)) ||
-   (new > HPTS_MAX_SLEEP_ALLOWED)) 
+   (new > HPTS_MAX_SLEEP_ALLOWED))
error = EINVAL;
else
hpts_sleep_max = new;
@@ -311,7 +311,7 @@ tcp_hpts_log(struct tcp_hpts_entry *hpts, struct tcpcb
 int ticks_to_run, int idx)
 {
union tcp_log_stackspecific log;
-   
+
memset(_bbr, 0, sizeof(log.u_bbr));
log.u_bbr.flex1 = hpts->p_nxt_slot;
log.u_bbr.flex2 = hpts->p_cur_slot;
@@ -616,7 +616,7 @@ tcp_hpts_remove_locked_input(struct tcp_hpts_entry *hp
  * Valid values in the flags are
  * HPTS_REMOVE_OUTPUT - remove from the output of the hpts.
  * HPTS_REMOVE_INPUT - remove from the input of the hpts.
- * Note that you can use one or both values 

svn commit: r357816 - head/sys/netinet

2020-02-12 Thread Randall Stewart
Author: rrs
Date: Wed Feb 12 13:04:19 2020
New Revision: 357816
URL: https://svnweb.freebsd.org/changeset/base/357816

Log:
  This small fix makes it so we properly follow
  the RFC and only enable ECN when both the
  CWR and ECT bits our set within the SYN packet.
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D23645

Modified:
  head/sys/netinet/tcp_syncache.c

Modified: head/sys/netinet/tcp_syncache.c
==
--- head/sys/netinet/tcp_syncache.c Wed Feb 12 12:40:06 2020
(r357815)
+++ head/sys/netinet/tcp_syncache.c Wed Feb 12 13:04:19 2020
(r357816)
@@ -1668,7 +1668,8 @@ skip_alloc:
sc->sc_peer_mss = to->to_mss;   /* peer mss may be zero */
if (ltflags & TF_NOOPT)
sc->sc_flags |= SCF_NOOPT;
-   if ((th->th_flags & (TH_ECE|TH_CWR)) && V_tcp_do_ecn)
+   if (((th->th_flags & (TH_ECE|TH_CWR)) == (TH_ECE|TH_CWR)) &&
+   V_tcp_do_ecn)
sc->sc_flags |= SCF_ECN;
 
if (V_tcp_syncookies)
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r357815 - head/sys/netinet/tcp_stacks

2020-02-12 Thread Randall Stewart
Author: rrs
Date: Wed Feb 12 12:40:06 2020
New Revision: 357815
URL: https://svnweb.freebsd.org/changeset/base/357815

Log:
  Remove all trailing white space from the BBR/Rack fold. Bits
  left around by emacs (thanks emacs).

Modified:
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.h
  head/sys/netinet/tcp_stacks/sack_filter.c
  head/sys/netinet/tcp_stacks/tcp_bbr.h
  head/sys/netinet/tcp_stacks/tcp_rack.h

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Wed Feb 12 12:36:55 2020
(r357814)
+++ head/sys/netinet/tcp_stacks/bbr.c   Wed Feb 12 12:40:06 2020
(r357815)
@@ -208,7 +208,7 @@ static int32_t bbr_min_measurements_req = 1;/* We 
nee
 * to prevent it from being ok
 * to have no measurements). */
 static int32_t bbr_no_pacing_until = 4;
-
+
 static int32_t bbr_min_usec_delta = 2; /* 20,000 usecs */
 static int32_t bbr_min_peer_delta = 20;/* 20 units */
 static int32_t bbr_delta_percent = 150;/* 15.0 % */
@@ -380,9 +380,9 @@ static int32_t bbr_rto_max_sec = 4; /* 4 seconds */
 static int32_t bbr_hptsi_per_second = 1000;
 
 /*
- * For hptsi under bbr_cross_over connections what is delay 
+ * For hptsi under bbr_cross_over connections what is delay
  * target 7ms (in usec) combined with a seg_max of 2
- * gets us close to identical google behavior in 
+ * gets us close to identical google behavior in
  * TSO size selection (possibly more 1MSS sends).
  */
 static int32_t bbr_hptsi_segments_delay_tar = 7000;
@@ -596,9 +596,9 @@ activate_rxt:
rsm = TAILQ_FIRST(>r_ctl.rc_tmap);
if (rsm) {
idx = rsm->r_rtr_cnt - 1;
-   if (TSTMP_GEQ(rsm->r_tim_lastsent[idx], 
bbr->r_ctl.rc_tlp_rxt_last_time)) 
+   if (TSTMP_GEQ(rsm->r_tim_lastsent[idx], 
bbr->r_ctl.rc_tlp_rxt_last_time))
tstmp_touse = rsm->r_tim_lastsent[idx];
-   else 
+   else
tstmp_touse = 
bbr->r_ctl.rc_tlp_rxt_last_time;
if (TSTMP_GT(tstmp_touse, cts))
time_since_sent = cts - tstmp_touse;
@@ -673,9 +673,9 @@ activate_rxt:
}
time_since_sent = 0;
idx = rsm->r_rtr_cnt - 1;
-   if (TSTMP_GEQ(rsm->r_tim_lastsent[idx], 
bbr->r_ctl.rc_tlp_rxt_last_time)) 
+   if (TSTMP_GEQ(rsm->r_tim_lastsent[idx], 
bbr->r_ctl.rc_tlp_rxt_last_time))
tstmp_touse = rsm->r_tim_lastsent[idx];
-   else 
+   else
tstmp_touse = bbr->r_ctl.rc_tlp_rxt_last_time;
if (TSTMP_GT(tstmp_touse, cts))
time_since_sent = cts - tstmp_touse;
@@ -695,11 +695,11 @@ activate_rxt:
}
if ((bbr->rc_tlp_rtx_out == 1) &&
(rsm->r_start == bbr->r_ctl.rc_last_tlp_seq)) {
-   /* 
-* Second retransmit of the same TLP 
+   /*
+* Second retransmit of the same TLP
 * lets not.
 */
-   bbr->rc_tlp_rtx_out = 0; 
+   bbr->rc_tlp_rtx_out = 0;
goto activate_rxt;
}
if (rsm->r_start != bbr->r_ctl.rc_last_tlp_seq) {
@@ -766,7 +766,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb
prev_delay = bbr->r_ctl.rc_last_delay_val;
if (bbr->r_ctl.rc_last_delay_val &&
(slot == 0)) {
-   /* 
+   /*
 * If a previous pacer delay was in place we
 * are not coming from the output side (where
 * we calculate a delay, more likely a timer).
@@ -777,7 +777,7 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb
delay_calc = cts - bbr->rc_pacer_started;
if (delay_calc <= slot)
slot -= delay_calc;
-   } 
+   }
}
/* Do we have early to make up for by pushing out the pacing time? */
if (bbr->r_agg_early_set) {
@@ -804,8 +804,8 @@ bbr_start_hpts_timer(struct tcp_bbr *bbr, struct tcpcb
if (bbr->rc_in_persist == 0) {
delayed_ack = bbr_delack_time;
} else {
-   /* 
-* We are in persists and 

svn commit: r357814 - head/sys/netinet/tcp_stacks

2020-02-12 Thread Randall Stewart
Author: rrs
Date: Wed Feb 12 12:36:55 2020
New Revision: 357814
URL: https://svnweb.freebsd.org/changeset/base/357814

Log:
  Now that all of the stats framework is
  in FreeBSD the bits that disabled stats
  when netflix-stats is not defined is no longer
  needed. Lets remove these bits so that we
  will properly use stats per its definition
  in BBR and Rack.
  
  Sponsored by: Netflix Inc
  Differential Revision:https://reviews.freebsd.org/D23088

Modified:
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.h

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Wed Feb 12 12:23:46 2020
(r357813)
+++ head/sys/netinet/tcp_stacks/rack.c  Wed Feb 12 12:36:55 2020
(r357814)
@@ -1681,7 +1681,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *r
tp->t_stats_gput_prev);
tp->t_flags &= ~TF_GPUTINPROG;
tp->t_stats_gput_prev = gput;
-
+#ifdef NETFLIX_PEAKRATE
if (tp->t_maxpeakrate) {
/*
 * We update t_peakrate_thr. This gives us 
roughly
@@ -1689,6 +1689,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *r
 */
tcp_update_peakrate_thr(tp);
}
+#endif
}
 #endif
if (tp->snd_cwnd > tp->snd_ssthresh) {

Modified: head/sys/netinet/tcp_stacks/rack_bbr_common.h
==
--- head/sys/netinet/tcp_stacks/rack_bbr_common.h   Wed Feb 12 12:23:46 
2020(r357813)
+++ head/sys/netinet/tcp_stacks/rack_bbr_common.h   Wed Feb 12 12:36:55 
2020(r357814)
@@ -27,11 +27,6 @@
  * __FBSDID("$FreeBSD$");
  */
 
-/* XXXLAS: Couple STATS to NETFLIX_STATS until stats(3) is fully upstreamed. */
-#ifndefNETFLIX_STATS
-#undef STATS
-#endif
-
 /* Common defines and such used by both RACK and BBR */
 /* Special values for mss accounting array */
 #define TCP_MSS_ACCT_JUSTRET 0
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r356417 - in head/sys/netinet: . tcp_stacks

2020-01-06 Thread Randall Stewart
Author: rrs
Date: Mon Jan  6 15:29:14 2020
New Revision: 356417
URL: https://svnweb.freebsd.org/changeset/base/356417

Log:
  This catches rack up in the recent changes to ECN and
  also commonizes the functions that both the freebsd and
  rack stack uses.
  
  Sponsored by:Netflix Inc
  Differential Revision:https://reviews.freebsd.org/D23052

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cMon Jan  6 13:21:10 2020
(r356416)
+++ head/sys/netinet/tcp_input.cMon Jan  6 15:29:14 2020
(r356417)
@@ -514,7 +514,7 @@ cc_post_recovery(struct tcpcb *tp, struct tcphdr *th)
(tlen <= tp->t_maxseg) &&   \
(V_tcp_delack_enabled || (tp->t_flags & TF_NEEDSYN)))
 
-static void inline
+void inline
 cc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos)
 {
INP_WLOCK_ASSERT(tp->t_inpcb);

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Mon Jan  6 13:21:10 2020
(r356416)
+++ head/sys/netinet/tcp_stacks/rack.c  Mon Jan  6 15:29:14 2020
(r356417)
@@ -7715,6 +7715,10 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr
TCPSTAT_INC(tcps_ecn_ect1);
break;
}
+
+   /* Process a packet differently from RFC3168. */
+   cc_ecnpkt_handler(tp, th, iptos);
+
/* Congestion experienced. */
if (thflags & TH_ECE) {
rack_cong_signal(tp, th, CC_ECN);

Modified: head/sys/netinet/tcp_var.h
==
--- head/sys/netinet/tcp_var.h  Mon Jan  6 13:21:10 2020(r356416)
+++ head/sys/netinet/tcp_var.h  Mon Jan  6 15:29:14 2020(r356417)
@@ -891,6 +891,7 @@ voidcc_ack_received(struct tcpcb *tp, struct tcphdr 
*
uint16_t nsegs, uint16_t type);
 void   cc_conn_init(struct tcpcb *tp);
 void   cc_post_recovery(struct tcpcb *tp, struct tcphdr *th);
+voidcc_ecnpkt_handler(struct tcpcb *tp, struct tcphdr *th, uint8_t iptos);
 void   cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, uint32_t type);
 #ifdef TCP_HHOOK
 void   hhook_run_tcp_est_in(struct tcpcb *tp,
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r356414 - head/sys/netinet

2020-01-06 Thread Randall Stewart
Author: rrs
Date: Mon Jan  6 12:48:06 2020
New Revision: 356414
URL: https://svnweb.freebsd.org/changeset/base/356414

Log:
  This change adds a small feature to the tcp logging code. Basically
  a connection can now have a separate tag added to the id.
  
  Obtained from:Lawrence Stewart
  Sponsored by: Netflix Inc
  Differential Revision:https://reviews.freebsd.org/D22866

Modified:
  head/sys/netinet/tcp_log_buf.c
  head/sys/netinet/tcp_log_buf.h
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_log_buf.c
==
--- head/sys/netinet/tcp_log_buf.c  Mon Jan  6 10:52:13 2020
(r356413)
+++ head/sys/netinet/tcp_log_buf.c  Mon Jan  6 12:48:06 2020
(r356414)
@@ -43,7 +43,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
-#include 
+#include  /* Must come after qmath.h and tree.h */
 #include 
 
 #include 
@@ -78,6 +78,7 @@ static u_long tcp_log_auto_ratio = 0;
 static volatile u_long tcp_log_auto_ratio_cur = 0;
 static uint32_t tcp_log_auto_mode = TCP_LOG_STATE_TAIL;
 static bool tcp_log_auto_all = false;
+static uint32_t tcp_disable_all_bb_logs = 0;
 
 RB_PROTOTYPE_STATIC(tcp_log_id_tree, tcp_log_id_bucket, tlb_rb, tcp_log_id_cmp)
 
@@ -111,6 +112,10 @@ SYSCTL_UMA_CUR(_net_inet_tcp_bb, OID_AUTO, log_id_tcpc
 SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, log_version, CTLFLAG_RD, 
_log_version,
 0, "Version of log formats exported");
 
+SYSCTL_U32(_net_inet_tcp_bb, OID_AUTO, disable_all, CTLFLAG_RW,
+_disable_all_bb_logs, TCP_LOG_STATE_HEAD_AUTO,
+"Disable all BB logging for all connections");
+
 SYSCTL_ULONG(_net_inet_tcp_bb, OID_AUTO, log_auto_ratio, CTLFLAG_RW,
 _log_auto_ratio, 0, "Do auto capturing for 1 out of N sessions");
 
@@ -156,7 +161,18 @@ SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, freed, 
 #ifdef INVARIANTS
 #defineTCPLOG_DEBUG_RINGBUF
 #endif
+/* Number of requests to consider a PBCID "active". */
+#defineACTIVE_REQUEST_COUNT10
 
+/* Statistic tracking for "active" PBCIDs. */
+static counter_u64_t tcp_log_pcb_ids_cur;
+static counter_u64_t tcp_log_pcb_ids_tot;
+
+SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, pcb_ids_cur, CTLFLAG_RD,
+_log_pcb_ids_cur, "Number of pcb IDs allocated in the system");
+SYSCTL_COUNTER_U64(_net_inet_tcp_bb, OID_AUTO, pcb_ids_tot, CTLFLAG_RD,
+_log_pcb_ids_tot, "Total number of pcb IDs that have been allocated");
+
 struct tcp_log_mem
 {
STAILQ_ENTRY(tcp_log_mem) tlm_queue;
@@ -240,10 +256,14 @@ struct tcp_log_id_bucket
 * (struct tcp_log_id_bucket *) and (char *) interchangeably.
 */
chartlb_id[TCP_LOG_ID_LEN];
+   chartlb_tag[TCP_LOG_TAG_LEN];
RB_ENTRY(tcp_log_id_bucket) tlb_rb;
struct tcp_log_id_head  tlb_head;
struct mtx  tlb_mtx;
volatile u_int  tlb_refcnt;
+   volatile u_int  tlb_reqcnt;
+   uint32_ttlb_loglimit;
+   uint8_t tlb_logstate;
 };
 
 struct tcp_log_id_node
@@ -285,6 +305,7 @@ tcp_log_selectauto(void)
 * this session.
 */
if (tcp_log_auto_ratio &&
+   (tcp_disable_all_bb_logs == 0) &&
(atomic_fetchadd_long(_log_auto_ratio_cur, 1) %
tcp_log_auto_ratio) == 0)
return (true);
@@ -337,6 +358,7 @@ tcp_log_remove_bucket(struct tcp_log_id_bucket *tlb)
 #endif
}
TCPID_BUCKET_LOCK_DESTROY(tlb);
+   counter_u64_add(tcp_log_pcb_ids_cur, (int64_t)-1);
uma_zfree(tcp_log_bucket_zone, tlb);
 }
 
@@ -484,7 +506,53 @@ tcp_log_grow_tlb(char *tlb_id, struct tcpcb *tp)
 #endif
 }
 
+static void
+tcp_log_increment_reqcnt(struct tcp_log_id_bucket *tlb)
+{
+
+   atomic_fetchadd_int(>tlb_reqcnt, 1);
+}
+
 /*
+ * Associate the specified tag with a particular TCP log ID.
+ * Called with INPCB locked. Returns with it unlocked.
+ * Returns 0 on success or EOPNOTSUPP if the connection has no TCP log ID.
+ */
+int
+tcp_log_set_tag(struct tcpcb *tp, char *tag)
+{
+   struct tcp_log_id_bucket *tlb;
+   int tree_locked;
+
+   INP_WLOCK_ASSERT(tp->t_inpcb);
+
+   tree_locked = TREE_UNLOCKED;
+   tlb = tp->t_lib;
+   if (tlb == NULL) {
+   INP_WUNLOCK(tp->t_inpcb);
+   return (EOPNOTSUPP);
+   }
+
+   TCPID_BUCKET_REF(tlb);
+   INP_WUNLOCK(tp->t_inpcb);
+   TCPID_BUCKET_LOCK(tlb);
+   strlcpy(tlb->tlb_tag, tag, TCP_LOG_TAG_LEN);
+   if (!tcp_log_unref_bucket(tlb, _locked, NULL))
+   TCPID_BUCKET_UNLOCK(tlb);
+
+   if (tree_locked == TREE_WLOCKED) {
+   TCPID_TREE_WLOCK_ASSERT();
+   TCPID_TREE_WUNLOCK();
+   } else if (tree_locked == TREE_RLOCKED) {
+   TCPID_TREE_RLOCK_ASSERT();
+   TCPID_TREE_RUNLOCK();
+   } else
+ 

svn commit: r355859 - in head/sys/netinet: . tcp_stacks

2019-12-17 Thread Randall Stewart
Author: rrs
Date: Tue Dec 17 16:08:07 2019
New Revision: 355859
URL: https://svnweb.freebsd.org/changeset/base/355859

Log:
  This commit is a bit of a re-arrange of deck chairs. It
  gets both rack and bbr ready for the completion of the STATs
  framework in FreeBSD. For now if you don't have both NF_stats and
  stats on it disables them. As soon as the rest of the stats framework
  lands we can remove that restriction and then just uses stats when
  defined.
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D22479

Modified:
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.h
  head/sys/netinet/tcp_stacks/sack_filter.c
  head/sys/netinet/tcp_stacks/tcp_bbr.h
  head/sys/netinet/tcp_stacks/tcp_rack.h
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Tue Dec 17 15:56:48 2019
(r355858)
+++ head/sys/netinet/tcp_stacks/bbr.c   Tue Dec 17 16:08:07 2019
(r355859)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2016-2019
+ * Copyright (c) 2016-9
  * Netflix Inc.
  *  All rights reserved.
  *
@@ -42,6 +42,7 @@ __FBSDID("$FreeBSD$");
 #include "opt_ratelimit.h"
 #include "opt_kern_tls.h"
 #include 
+#include 
 #include 
 #include 
 #ifdef TCP_HHOOK
@@ -57,9 +58,9 @@ __FBSDID("$FreeBSD$");
 #endif
 #include 
 #include 
+#ifdef STATS
 #include 
 #include 
-#ifdef NETFLIX_STATS
 #include  /* Must come after qmath.h and tree.h */
 #endif
 #include 
@@ -161,9 +162,8 @@ static int32_t bbr_num_pktepo_for_del_limit = BBR_NUM_
 static int32_t bbr_hardware_pacing_limit = 8000;
 static int32_t bbr_quanta = 3; /* How much extra quanta do we get? */
 static int32_t bbr_no_retran = 0;
-static int32_t bbr_tcp_map_entries_limit = 1500;
-static int32_t bbr_tcp_map_split_limit = 256;
 
+
 static int32_t bbr_error_base_paceout = 1; /* usec to pace */
 static int32_t bbr_max_net_error_cnt = 10;
 /* Should the following be dynamic too -- loss wise */
@@ -3381,8 +3381,8 @@ bbr_alloc(struct tcp_bbr *bbr)
 static struct bbr_sendmap *
 bbr_alloc_full_limit(struct tcp_bbr *bbr)
 {
-   if ((bbr_tcp_map_entries_limit > 0) &&
-   (bbr->r_ctl.rc_num_maps_alloced >= bbr_tcp_map_entries_limit)) {
+   if ((V_tcp_map_entries_limit > 0) &&
+   (bbr->r_ctl.rc_num_maps_alloced >= V_tcp_map_entries_limit)) {
BBR_STAT_INC(bbr_alloc_limited);
if (!bbr->alloc_limit_reported) {
bbr->alloc_limit_reported = 1;
@@ -3402,8 +3402,8 @@ bbr_alloc_limit(struct tcp_bbr *bbr, uint8_t limit_typ
 
if (limit_type) {
/* currently there is only one limit type */
-   if (bbr_tcp_map_split_limit > 0 &&
-   bbr->r_ctl.rc_num_split_allocs >= bbr_tcp_map_split_limit) {
+   if (V_tcp_map_split_limit > 0 &&
+   bbr->r_ctl.rc_num_split_allocs >= V_tcp_map_split_limit) {
BBR_STAT_INC(bbr_split_limited);
if (!bbr->alloc_limit_reported) {
bbr->alloc_limit_reported = 1;
@@ -3685,7 +3685,7 @@ bbr_ack_received(struct tcpcb *tp, struct tcp_bbr *bbr
uint32_t cwnd, target_cwnd, saved_bytes, maxseg;
int32_t meth;
 
-#ifdef NETFLIX_STATS
+#ifdef STATS
if ((tp->t_flags & TF_GPUTINPROG) &&
SEQ_GEQ(th->th_ack, tp->gput_ack)) {
/*
@@ -6510,7 +6510,7 @@ tcp_bbr_xmit_timer_commit(struct tcp_bbr *bbr, struct 
}
TCPSTAT_INC(tcps_rttupdated);
tp->t_rttupdated++;
-#ifdef NETFLIX_STATS
+#ifdef STATS
stats_voi_update_abs_u32(tp->t_stats, VOI_TCP_RTT, imax(0, rtt_ticks));
 #endif
/*
@@ -8490,6 +8490,7 @@ dodata:   /* XXX */
return (0);
}
}
+
 #endif
if (DELAY_ACK(tp, bbr, nsegs) || tfo_syn) {
bbr->bbr_segs_rcvd += max(1, nsegs);
@@ -8698,6 +8699,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, 
 * reassembly queue and we have enough buffer space to take it.
 */
nsegs = max(1, m->m_pkthdr.lro_nsegs);
+
 #ifdef NETFLIX_SB_LIMITS
if (so->so_rcv.sb_shlim) {
mcnt = m_memcnt(m);
@@ -8746,6 +8748,7 @@ bbr_do_fastnewdata(struct mbuf *m, struct tcphdr *th, 
newsize, so, NULL))
so->so_rcv.sb_flags &= ~SB_AUTOSIZE;
m_adj(m, drop_hdrlen);  /* delayed header drop */
+
 #ifdef NETFLIX_SB_LIMITS
appended =
 #endif
@@ -11561,7 +11564,7 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr 
 * the scale is zero.
  

svn commit: r354013 - head/sys/netinet/tcp_stacks

2019-10-23 Thread Randall Stewart
Author: rrs
Date: Thu Oct 24 05:54:30 2019
New Revision: 354013
URL: https://svnweb.freebsd.org/changeset/base/354013

Log:
  Fix a small bug in bbr when running under a VM. Basically what
  happens is we are more delayed in the pacer calling in so
  we remove the stack from the pacer and recalculate how
  much time is left after all data has been acknowledged. However
  the comparision was backwards so we end up with a negative
  value in the last_pacing_delay time which causes us to
  add in a huge value to the next pacing time thus stalling
  the connection.
  
  Reported by:  vm2.fina...@gmail.com

Modified:
  head/sys/netinet/tcp_stacks/bbr.c

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Thu Oct 24 04:12:38 2019
(r354012)
+++ head/sys/netinet/tcp_stacks/bbr.c   Thu Oct 24 05:54:30 2019
(r354013)
@@ -11814,12 +11814,13 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr 
uint32_t del;
 
del = lcts - 
bbr->rc_pacer_started;
-   if (del > 
bbr->r_ctl.rc_last_delay_val) {
+   if 
(bbr->r_ctl.rc_last_delay_val > del) {

BBR_STAT_INC(bbr_force_timer_start);

bbr->r_ctl.rc_last_delay_val -= del;
bbr->rc_pacer_started = 
lcts;
} else {
/* We are late */
+   
bbr->r_ctl.rc_last_delay_val = 0;

BBR_STAT_INC(bbr_force_output);

(void)tp->t_fb->tfb_tcp_output(tp);
}
@@ -12278,8 +12279,9 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeva
 * We are early setup to adjust 
 * our slot time.
 */
+   uint64_t merged_val;
+   
bbr->r_ctl.rc_agg_early += 
(bbr->r_ctl.rc_last_delay_val - delay_calc);
-   bbr->r_ctl.rc_last_delay_val = 0;
bbr->r_agg_early_set = 1;
if (bbr->r_ctl.rc_hptsi_agg_delay) {
if (bbr->r_ctl.rc_hptsi_agg_delay >= 
bbr->r_ctl.rc_agg_early) {
@@ -12292,9 +12294,13 @@ bbr_output_wtime(struct tcpcb *tp, const struct timeva
bbr->r_ctl.rc_hptsi_agg_delay = 0;
}
}
+   merged_val = bbr->rc_pacer_started;
+   merged_val <<= 32;
+   merged_val |= bbr->r_ctl.rc_last_delay_val;
bbr_log_pacing_delay_calc(bbr, inp->inp_hpts_calls,
-bbr->r_ctl.rc_agg_early, cts, 
3, 0,
+bbr->r_ctl.rc_agg_early, cts, 
delay_calc, merged_val,
 bbr->r_agg_early_set, 3);
+   bbr->r_ctl.rc_last_delay_val = 0;
BBR_STAT_INC(bbr_early);
delay_calc = 0;
}
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r353490 - head/sys/netinet/tcp_stacks

2019-10-14 Thread Randall Stewart
Author: rrs
Date: Mon Oct 14 13:10:29 2019
New Revision: 353490
URL: https://svnweb.freebsd.org/changeset/base/353490

Log:
  if_hw_tsomaxsegsize needs to be initialized to zero, just
  like in bbr.c and tcp_output.c

Modified:
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Mon Oct 14 13:04:04 2019
(r353489)
+++ head/sys/netinet/tcp_stacks/rack.c  Mon Oct 14 13:10:29 2019
(r353490)
@@ -8115,7 +8115,7 @@ rack_output(struct tcpcb *tp)
struct mbuf *m;
struct mbuf *mb;
uint32_t if_hw_tsomaxsegcount = 0;
-   uint32_t if_hw_tsomaxsegsize;
+   uint32_t if_hw_tsomaxsegsize = 0;
int32_t maxseg;
long tot_len_this_send = 0;
struct ip *ip = NULL;
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r353156 - in head/sys: netinet sys

2019-10-06 Thread Randall Stewart
Author: rrs
Date: Sun Oct  6 22:29:02 2019
New Revision: 353156
URL: https://svnweb.freebsd.org/changeset/base/353156

Log:
  Brad Davis identified a problem with the new LRO code, VLAN's
  no longer worked. The problem was that the defines used the
  same space as the VLAN id. This commit does three things.
  1) Move the LRO used fields to the PH_per fields. This is
 safe since the entire PH_per is used for IP reassembly
 which LRO code will not hit.
  2) Remove old unused pace fields that are not used in mbuf.h
  3) The VLAN processing is not in the mbuf queueing code. Consequently
 if a VLAN submits to Rack or BBR we need to bypass the mbuf queueing
 for now until rack_bbr_common is updated to handle the VLAN properly.
  
  Reported by:  Brad Davis

Modified:
  head/sys/netinet/tcp_lro.c
  head/sys/sys/mbuf.h

Modified: head/sys/netinet/tcp_lro.c
==
--- head/sys/netinet/tcp_lro.c  Sun Oct  6 22:18:03 2019(r353155)
+++ head/sys/netinet/tcp_lro.c  Sun Oct  6 22:29:02 2019(r353156)
@@ -875,7 +875,14 @@ tcp_lro_flush(struct lro_ctrl *lc, struct lro_entry *l
 
/* Now lets lookup the inp first */
CURVNET_SET(lc->ifp->if_vnet);
-   if (tcplro_stacks_wanting_mbufq == 0)
+   /*
+* XXXRRS Currently the common input handler for
+* mbuf queuing cannot handle VLAN Tagged. This needs
+* to be fixed and the or condition removed (i.e. the 
+* common code should do the right lookup for the vlan
+* tag and anything else that the vlan_input() does).
+*/
+   if ((tcplro_stacks_wanting_mbufq == 0) || (le->m_head->m_flags & 
M_VLANTAG))
goto skip_lookup;
INP_INFO_RLOCK_ET(_tcbinfo, et);
switch (le->eh_type) {

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Sun Oct  6 22:18:03 2019(r353155)
+++ head/sys/sys/mbuf.h Sun Oct  6 22:29:02 2019(r353156)
@@ -194,18 +194,13 @@ struct pkthdr {
 };
 #defineether_vtag  PH_per.sixteen[0]
 #definePH_vt   PH_per
-#definevt_nrecssixteen[0]
-#definetso_segsz   PH_per.sixteen[1]
-#definelro_nsegs   tso_segsz
-#definecsum_phsum  PH_per.sixteen[2]
-#definecsum_data   PH_per.thirtytwo[1]
-#define lro_lenPH_per.sixteen[0] /* inbound during LRO */
-#define lro_csum   PH_per.sixteen[1] /* inbound during LRO */
-#define pace_thoff PH_loc.sixteen[0]
-#define pace_tlen  PH_loc.sixteen[1]
-#define pace_drphdrlen PH_loc.sixteen[2]
-#define pace_tos   PH_loc.eight[6]
-#define pace_lock  PH_loc.eight[7]
+#definevt_nrecssixteen[0]/* mld and v6-ND */
+#definetso_segsz   PH_per.sixteen[1] /* inbound after LRO */
+#definelro_nsegs   tso_segsz /* inbound after LRO */
+#definecsum_data   PH_per.thirtytwo[1] /* inbound from hardware up 
*/
+#define lro_lenPH_loc.sixteen[0] /* inbound during LRO (no 
reassembly) */
+#define lro_csum   PH_loc.sixteen[1] /* inbound during LRO (no reassembly) 
*/
+/* Note PH_loc is used during IP reassembly (all 8 bytes as a ptr) */
 
 /*
  * Description of external storage mapped into mbuf; valid only if M_EXT is
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r352661 - head/sys/netinet/tcp_stacks

2019-09-24 Thread Randall Stewart
Author: rrs
Date: Tue Sep 24 20:36:43 2019
New Revision: 352661
URL: https://svnweb.freebsd.org/changeset/base/352661

Log:
  lets put (void) in a couple of functions to keep older platforms that
  are stuck with gcc happy (ppc). The changes are needed in both bbr and
  rack.
  
  Obtained from:Michael Tuexen (mtuexen@)

Modified:
  head/sys/netinet/tcp_stacks/bbr.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Tue Sep 24 20:11:55 2019
(r352660)
+++ head/sys/netinet/tcp_stacks/bbr.c   Tue Sep 24 20:36:43 2019
(r352661)
@@ -1174,7 +1174,7 @@ sysctl_bbr_clear_lost(SYSCTL_HANDLER_ARGS)
 }
 
 static void
-bbr_init_sysctls()
+bbr_init_sysctls(void)
 {
struct sysctl_oid *bbr_probertt;
struct sysctl_oid *bbr_hptsi;
@@ -1875,7 +1875,7 @@ bbr_progress_timeout_check(struct tcp_bbr *bbr)
 }
 
 static void
-bbr_counter_destroy()
+bbr_counter_destroy(void)
 {
COUNTER_ARRAY_FREE(bbr_stat_arry, BBR_STAT_SIZE);
COUNTER_ARRAY_FREE(bbr_opts_arry, BBR_OPTS_SIZE);

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Tue Sep 24 20:11:55 2019
(r352660)
+++ head/sys/netinet/tcp_stacks/rack.c  Tue Sep 24 20:36:43 2019
(r352661)
@@ -514,7 +514,7 @@ sysctl_rack_clear(SYSCTL_HANDLER_ARGS)
 
 
 static void
-rack_init_sysctls()
+rack_init_sysctls(void)
 {
struct sysctl_oid *rack_counters;
struct sysctl_oid *rack_attack;
@@ -1512,7 +1512,7 @@ rack_log_sad(struct tcp_rack *rack, int event)
 #endif
 
 static void
-rack_counter_destroy()
+rack_counter_destroy(void)
 {
counter_u64_free(rack_badfr);
counter_u64_free(rack_badfr_bytes);
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r352657 - in head/sys: conf kern modules/tcp modules/tcp/bbr netinet netinet/tcp_stacks sys

2019-09-24 Thread Randall Stewart via svn-src-all
Hmm

It looks like BBR needs an update too since it calls the inpcb detach of the 
ratelimit function too… I may
need to reassess this since it should use only the tcp_ratelimit interfaces… 
but for now an simple
ifdef will work

make sure to pick up r352660

(and actually it might be best to include ratelimit.. it costs little and makes 
it so if you
 do get a nic that supports rate limiting you will be able to take advantage of 
it)

R

> On Sep 24, 2019, at 1:06 PM, Randall Stewart  wrote:
> 
> Right
> 
> Thats because GENERIC does not add the optional TCP stacks.
> 
> Ok the problem is fixed with r352659
> 
> The tcp_ratelimit.h had a mixed up ifdef
> 
> i.e.
> 
> #ifdef RATELIMIT
> #ifdef _KERNEL
> 
> definitions
> 
> #else
> 
> macro definitions that return error
> #endif
> #endif
> 
> 
> Which should have  been the opposite
> 
> #ifdef _KERNEL
> #ifdef RATELIMIT
> 
> definitions
> 
> #else
> 
> 
> macros def’s returning errors
> 
> #endif
> #endif
> 
> Reversing that will fix the issue if you add the extra stacks but fail to add 
> RATELIMIT
> 
> R
> 
>> On Sep 24, 2019, at 1:01 PM, Li-Wen Hsu  wrote:
>> 
>> I mean the head (r352657) world and GENERIC kernel can be built
>> successfully on 12.0-R, which is we guaranteed.
>> Also the LINT kernel build is fine on CI:
>> https://ci.freebsd.org/job/FreeBSD-head-amd64-LINT/13781/
>> 
>> So I was curious about the build environment of that build failure.
>> 
>> Best,
>> Li-Wen
>> 
>> On Tue, Sep 24, 2019 at 9:55 PM Randall Stewart  wrote:
>>> 
>>> 12.0R would not have BBR .. its only in head… hmm it could be a issue with 
>>> TCP_RATELIMIT not defined
>>> though I did compile GENERIC without the extra stacks (and without rate 
>>> limit and hpts) and that
>>> compiled ok..
>>> 
>>> R
>>> 
>>>> On Sep 24, 2019, at 12:49 PM, Li-Wen Hsu  wrote:
>>>> 
>>>> On Tue, Sep 24, 2019 at 9:29 PM O. Hartmann  wrote:
>>>>> 
>>>>> -BEGIN PGP SIGNED MESSAGE-
>>>>> Hash: SHA256
>>>>> 
>>>>> Am Tue, 24 Sep 2019 18:18:11 + (UTC)
>>>>> Randall Stewart  schrieb:
>>>>> 
>>>>>> Author: rrs
>>>>>> Date: Tue Sep 24 18:18:11 2019
>>>>>> New Revision: 352657
>>>>>> URL: https://svnweb.freebsd.org/changeset/base/352657
>>>> 
>>>> ...
>>>> 
>>>>> This break kernel builds:
>>>>> 
>>>>> [...]
>>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: 
>>>>> error: implicit
>>>>> declaration of function 'tcp_chg_pacing_rate' is invalid in C99
>>>>> [-Werror,-Wimplicit-function-declaration] nrte = 
>>>>> tcp_chg_pacing_rate(bbr->r_ctl.crte, ^
>>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: 
>>>>> error: this function
>>>>> declaration is not a prototype [-Werror,-Wstrict-prototypes]
>>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:7: 
>>>>> error: incompatible
>>>>> integer to pointer conversion assigning to 'const struct 
>>>>> tcp_hwrate_limit_table *' from 'int'
>>>>> [-Werror,-Wint-conversion] nrte = tcp_chg_pacing_rate(bbr->r_ctl.crte, ^
>>>>>  --- all_subdir_toecore --- Building
>>>>> /usr/obj/usr/src/amd64.amd64/sys/THOR/modules/usr/src/sys/modules/toecore/toecore.ko
>>>>>  ---
>>>>> all_subdir_tcp --- 
>>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4:
>>>>> error: implicit declaration of function 'tcp_rel_pacing_rate' is invalid 
>>>>> in C99
>>>>> [-Werror,-Wimplicit-function-declaration] 
>>>>> tcp_rel_pacing_rate(bbr->r_ctl.crte, bbr->rc_tp); ^
>>>>> - --- all_subdir_tpm ---
>>>>> ===> tpm (all)
>>>>> - --- all_subdir_tcp ---
>>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4: 
>>>>> error: this function
>>>>> declaration is not a prototype [-Werror,-Wstrict-prototypes] --- 
>>>>> all_subdir_trm ---
>>>>> ===> trm (all)
>>>>> - --- all_subdir_tcp ---
>>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:14307:21: 
>>>>> error: implicit
>>>>> declaration of function 'tcp_set_pacing_rate' is invalid in C99
>>>>> [-Werror,-Wimplicit-function-declaration] bbr->r_ctl.crte = 
>>>>> tcp_set_pacing_rate(bbr->rc_tp,
>>>> 
>>>> CI completed a clean build on 12.0-R:
>>>> https://ci.freebsd.org/job/FreeBSD-head-amd64-build/14672/
>>>> 
>>>> What's your build environment and platform?
>>>> 
>>>> Best,
>>>> Li-Wen
>>> 
>>> --
>>> Randall Stewart
>>> r...@netflix.com
>>> 
>>> 
>>> 
> 
> --
> Randall Stewart
> r...@netflix.com
> 
> 
> 

--
Randall Stewart
r...@netflix.com



___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r352660 - head/sys/netinet/tcp_stacks

2019-09-24 Thread Randall Stewart
Author: rrs
Date: Tue Sep 24 20:11:55 2019
New Revision: 352660
URL: https://svnweb.freebsd.org/changeset/base/352660

Log:
  don't call in_ratelmit detach when RATELIMIT is not
  compiled in the kernel.

Modified:
  head/sys/netinet/tcp_stacks/bbr.c

Modified: head/sys/netinet/tcp_stacks/bbr.c
==
--- head/sys/netinet/tcp_stacks/bbr.c   Tue Sep 24 20:04:31 2019
(r352659)
+++ head/sys/netinet/tcp_stacks/bbr.c   Tue Sep 24 20:11:55 2019
(r352660)
@@ -14784,10 +14784,12 @@ bbr_set_sockopt(struct socket *so, struct sockopt *sop
bbr->bbr_attempt_hdwr_pace = 0;
} else {
bbr->bbr_hdw_pace_ena = 0;
+#ifdef RATELIMIT
if (bbr->bbr_hdrw_pacing) {
bbr->bbr_hdrw_pacing = 0;
in_pcbdetach_txrtlmt(bbr->rc_inp);
}
+#endif
}
break;
 
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r352657 - in head/sys: conf kern modules/tcp modules/tcp/bbr netinet netinet/tcp_stacks sys

2019-09-24 Thread Randall Stewart via svn-src-all
Right

Thats because GENERIC does not add the optional TCP stacks.

Ok the problem is fixed with r352659

The tcp_ratelimit.h had a mixed up ifdef

i.e.

#ifdef RATELIMIT
#ifdef _KERNEL

definitions

#else

macro definitions that return error
#endif
#endif


Which should have  been the opposite

#ifdef _KERNEL
#ifdef RATELIMIT
 
definitions

#else


macros def’s returning errors

#endif
#endif

Reversing that will fix the issue if you add the extra stacks but fail to add 
RATELIMIT

R

> On Sep 24, 2019, at 1:01 PM, Li-Wen Hsu  wrote:
> 
> I mean the head (r352657) world and GENERIC kernel can be built
> successfully on 12.0-R, which is we guaranteed.
> Also the LINT kernel build is fine on CI:
> https://ci.freebsd.org/job/FreeBSD-head-amd64-LINT/13781/
> 
> So I was curious about the build environment of that build failure.
> 
> Best,
> Li-Wen
> 
> On Tue, Sep 24, 2019 at 9:55 PM Randall Stewart  wrote:
>> 
>> 12.0R would not have BBR .. its only in head… hmm it could be a issue with 
>> TCP_RATELIMIT not defined
>> though I did compile GENERIC without the extra stacks (and without rate 
>> limit and hpts) and that
>> compiled ok..
>> 
>> R
>> 
>>> On Sep 24, 2019, at 12:49 PM, Li-Wen Hsu  wrote:
>>> 
>>> On Tue, Sep 24, 2019 at 9:29 PM O. Hartmann  wrote:
>>>> 
>>>> -BEGIN PGP SIGNED MESSAGE-
>>>> Hash: SHA256
>>>> 
>>>> Am Tue, 24 Sep 2019 18:18:11 + (UTC)
>>>> Randall Stewart  schrieb:
>>>> 
>>>>> Author: rrs
>>>>> Date: Tue Sep 24 18:18:11 2019
>>>>> New Revision: 352657
>>>>> URL: https://svnweb.freebsd.org/changeset/base/352657
>>> 
>>> ...
>>> 
>>>> This break kernel builds:
>>>> 
>>>> [...]
>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: 
>>>> error: implicit
>>>> declaration of function 'tcp_chg_pacing_rate' is invalid in C99
>>>> [-Werror,-Wimplicit-function-declaration] nrte = 
>>>> tcp_chg_pacing_rate(bbr->r_ctl.crte, ^
>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: 
>>>> error: this function
>>>> declaration is not a prototype [-Werror,-Wstrict-prototypes]
>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:7: 
>>>> error: incompatible
>>>> integer to pointer conversion assigning to 'const struct 
>>>> tcp_hwrate_limit_table *' from 'int'
>>>> [-Werror,-Wint-conversion] nrte = tcp_chg_pacing_rate(bbr->r_ctl.crte, ^
>>>>  --- all_subdir_toecore --- Building
>>>> /usr/obj/usr/src/amd64.amd64/sys/THOR/modules/usr/src/sys/modules/toecore/toecore.ko
>>>>  ---
>>>> all_subdir_tcp --- 
>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4:
>>>> error: implicit declaration of function 'tcp_rel_pacing_rate' is invalid 
>>>> in C99
>>>> [-Werror,-Wimplicit-function-declaration] 
>>>> tcp_rel_pacing_rate(bbr->r_ctl.crte, bbr->rc_tp); ^
>>>> - --- all_subdir_tpm ---
>>>> ===> tpm (all)
>>>> - --- all_subdir_tcp ---
>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4: 
>>>> error: this function
>>>> declaration is not a prototype [-Werror,-Wstrict-prototypes] --- 
>>>> all_subdir_trm ---
>>>> ===> trm (all)
>>>> - --- all_subdir_tcp ---
>>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:14307:21: 
>>>> error: implicit
>>>> declaration of function 'tcp_set_pacing_rate' is invalid in C99
>>>> [-Werror,-Wimplicit-function-declaration] bbr->r_ctl.crte = 
>>>> tcp_set_pacing_rate(bbr->rc_tp,
>>> 
>>> CI completed a clean build on 12.0-R:
>>> https://ci.freebsd.org/job/FreeBSD-head-amd64-build/14672/
>>> 
>>> What's your build environment and platform?
>>> 
>>> Best,
>>> Li-Wen
>> 
>> --
>> Randall Stewart
>> r...@netflix.com
>> 
>> 
>> 

--
Randall Stewart
r...@netflix.com



___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r352659 - head/sys/netinet

2019-09-24 Thread Randall Stewart
Author: rrs
Date: Tue Sep 24 20:04:31 2019
New Revision: 352659
URL: https://svnweb.freebsd.org/changeset/base/352659

Log:
  Fix the ifdefs in tcp_ratelimit.h. They were reversed so
  that instead of functions only being inside the _KERNEL and
  the absence of RATELIMIT causing us to have NULL/error returning
  interfaces we ended up with non-kernel getting the error path.
  opps..

Modified:
  head/sys/netinet/tcp_ratelimit.h

Modified: head/sys/netinet/tcp_ratelimit.h
==
--- head/sys/netinet/tcp_ratelimit.hTue Sep 24 20:01:20 2019
(r352658)
+++ head/sys/netinet/tcp_ratelimit.hTue Sep 24 20:04:31 2019
(r352659)
@@ -87,8 +87,8 @@ CK_LIST_HEAD(head_tcp_rate_set, tcp_rate_set);
 #define RS_PACING_LT   0x0008  /* Less than requested rate */
 #define RS_PACING_SUB_OK   0x0010  /* If a rate can't be found get the
 * next best rate (highest or lowest). 
*/
-#ifdef RATELIMIT
 #ifdef _KERNEL
+#ifdef RATELIMIT
 #define DETAILED_RATELIMIT_SYSCTL 1/*
 * Undefine this if you don't want
 * detailed rates to appear in
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r352657 - in head/sys: conf kern modules/tcp modules/tcp/bbr netinet netinet/tcp_stacks sys

2019-09-24 Thread Randall Stewart via svn-src-all
Ok I have found it

Its a reversal in an ifdef in tcp_ratelimit.h .. it supposed to be that if
its not define (RATELIMIT) the main interfaces return errors.. and the
ifdef kernel/ratelimit is reversed of what it should be.

Let me fix that :)

R

> On Sep 24, 2019, at 12:55 PM, Randall Stewart  wrote:
> 
> 12.0R would not have BBR .. its only in head… hmm it could be a issue with 
> TCP_RATELIMIT not defined
> though I did compile GENERIC without the extra stacks (and without rate limit 
> and hpts) and that
> compiled ok..
> 
> R
> 
>> On Sep 24, 2019, at 12:49 PM, Li-Wen Hsu  wrote:
>> 
>> On Tue, Sep 24, 2019 at 9:29 PM O. Hartmann  wrote:
>>> 
>>> -BEGIN PGP SIGNED MESSAGE-
>>> Hash: SHA256
>>> 
>>> Am Tue, 24 Sep 2019 18:18:11 + (UTC)
>>> Randall Stewart  schrieb:
>>> 
>>>> Author: rrs
>>>> Date: Tue Sep 24 18:18:11 2019
>>>> New Revision: 352657
>>>> URL: https://svnweb.freebsd.org/changeset/base/352657
>> 
>> ...
>> 
>>> This break kernel builds:
>>> 
>>> [...]
>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: 
>>> error: implicit
>>> declaration of function 'tcp_chg_pacing_rate' is invalid in C99
>>> [-Werror,-Wimplicit-function-declaration] nrte = 
>>> tcp_chg_pacing_rate(bbr->r_ctl.crte, ^
>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: 
>>> error: this function
>>> declaration is not a prototype [-Werror,-Wstrict-prototypes]
>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:7: 
>>> error: incompatible
>>> integer to pointer conversion assigning to 'const struct 
>>> tcp_hwrate_limit_table *' from 'int'
>>> [-Werror,-Wint-conversion] nrte = tcp_chg_pacing_rate(bbr->r_ctl.crte, ^
>>>  --- all_subdir_toecore --- Building
>>> /usr/obj/usr/src/amd64.amd64/sys/THOR/modules/usr/src/sys/modules/toecore/toecore.ko
>>>  ---
>>> all_subdir_tcp --- 
>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4:
>>> error: implicit declaration of function 'tcp_rel_pacing_rate' is invalid in 
>>> C99
>>> [-Werror,-Wimplicit-function-declaration] 
>>> tcp_rel_pacing_rate(bbr->r_ctl.crte, bbr->rc_tp); ^
>>> - --- all_subdir_tpm ---
>>> ===> tpm (all)
>>> - --- all_subdir_tcp ---
>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4: 
>>> error: this function
>>> declaration is not a prototype [-Werror,-Wstrict-prototypes] --- 
>>> all_subdir_trm ---
>>> ===> trm (all)
>>> - --- all_subdir_tcp ---
>>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:14307:21: 
>>> error: implicit
>>> declaration of function 'tcp_set_pacing_rate' is invalid in C99
>>> [-Werror,-Wimplicit-function-declaration] bbr->r_ctl.crte = 
>>> tcp_set_pacing_rate(bbr->rc_tp,
>> 
>> CI completed a clean build on 12.0-R:
>> https://ci.freebsd.org/job/FreeBSD-head-amd64-build/14672/
>> 
>> What's your build environment and platform?
>> 
>> Best,
>> Li-Wen
> 
> --
> Randall Stewart
> r...@netflix.com
> 
> 
> 

--
Randall Stewart
r...@netflix.com



___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r352657 - in head/sys: conf kern modules/tcp modules/tcp/bbr netinet netinet/tcp_stacks sys

2019-09-24 Thread Randall Stewart via svn-src-all
12.0R would not have BBR .. its only in head… hmm it could be a issue with 
TCP_RATELIMIT not defined
though I did compile GENERIC without the extra stacks (and without rate limit 
and hpts) and that
compiled ok..

R

> On Sep 24, 2019, at 12:49 PM, Li-Wen Hsu  wrote:
> 
> On Tue, Sep 24, 2019 at 9:29 PM O. Hartmann  wrote:
>> 
>> -BEGIN PGP SIGNED MESSAGE-
>> Hash: SHA256
>> 
>> Am Tue, 24 Sep 2019 18:18:11 + (UTC)
>> Randall Stewart  schrieb:
>> 
>>> Author: rrs
>>> Date: Tue Sep 24 18:18:11 2019
>>> New Revision: 352657
>>> URL: https://svnweb.freebsd.org/changeset/base/352657
> 
> ...
> 
>> This break kernel builds:
>> 
>> [...]
>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: 
>> error: implicit
>> declaration of function 'tcp_chg_pacing_rate' is invalid in C99
>> [-Werror,-Wimplicit-function-declaration] nrte = 
>> tcp_chg_pacing_rate(bbr->r_ctl.crte, ^
>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:9: 
>> error: this function
>> declaration is not a prototype [-Werror,-Wstrict-prototypes]
>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:5613:7: 
>> error: incompatible
>> integer to pointer conversion assigning to 'const struct 
>> tcp_hwrate_limit_table *' from 'int'
>> [-Werror,-Wint-conversion] nrte = tcp_chg_pacing_rate(bbr->r_ctl.crte, ^
>>  --- all_subdir_toecore --- Building
>> /usr/obj/usr/src/amd64.amd64/sys/THOR/modules/usr/src/sys/modules/toecore/toecore.ko
>>  ---
>> all_subdir_tcp --- 
>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4:
>> error: implicit declaration of function 'tcp_rel_pacing_rate' is invalid in 
>> C99
>> [-Werror,-Wimplicit-function-declaration] 
>> tcp_rel_pacing_rate(bbr->r_ctl.crte, bbr->rc_tp); ^
>> - --- all_subdir_tpm ---
>> ===> tpm (all)
>> - --- all_subdir_tcp ---
>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:10443:4: 
>> error: this function
>> declaration is not a prototype [-Werror,-Wstrict-prototypes] --- 
>> all_subdir_trm ---
>> ===> trm (all)
>> - --- all_subdir_tcp ---
>> /usr/src/sys/modules/tcp/bbr/../../../netinet/tcp_stacks/bbr.c:14307:21: 
>> error: implicit
>> declaration of function 'tcp_set_pacing_rate' is invalid in C99
>> [-Werror,-Wimplicit-function-declaration] bbr->r_ctl.crte = 
>> tcp_set_pacing_rate(bbr->rc_tp,
> 
> CI completed a clean build on 12.0-R:
> https://ci.freebsd.org/job/FreeBSD-head-amd64-build/14672/
> 
> What's your build environment and platform?
> 
> Best,
> Li-Wen

--
Randall Stewart
r...@netflix.com



___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r352657 - in head/sys: conf kern modules/tcp modules/tcp/bbr netinet netinet/tcp_stacks sys

2019-09-24 Thread Randall Stewart via svn-src-all
This is strange I built this and have it running on my machine
with the standard

make buildkern KERNCONF=myconf
and
make installkern KERNCONF=myconf

Why can I build and it blow up.. last time I saw this I was building in 
amd64/compile and
was getting a warning that somehow is an error.. but this time it *should* have 
built fine :(

R

> On Sep 24, 2019, at 12:28 PM, O. Hartmann  wrote:
> 
> -BEGIN PGP SIGNED MESSAGE-
> Hash: SHA256
> 
> Am Tue, 24 Sep 2019 18:18:11 +0000 (UTC)
> Randall Stewart  schrieb:
> 
>> Author: rrs
>> Date: Tue Sep 24 18:18:11 2019
>> New Revision: 352657
>> URL: https://svnweb.freebsd.org/changeset/base/352657
>> 
>> Log:
>>  This commit adds BBR (Bottleneck Bandwidth and RTT) congestion control. This
>>  is a completely separate TCP stack (tcp_bbr.ko) that will be built only if
>>  you add the make options WITH_EXTRA_TCP_STACKS=1 and also include the option
>>  TCPHPTS. You can also include the RATELIMIT option if you have a NIC 
>> interface that
>>  supports hardware pacing, BBR understands how to use such a feature.
>> 
>>  Note that this commit also adds in a general purpose time-filter which
>>  allows you to have a min-filter or max-filter. A filter allows you to
>>  have a low (or high) value for some period of time and degrade slowly
>>  to another value has time passes. You can find out the details of
>>  BBR by looking at the original paper at:
>> 
>>  https://queue.acm.org/detail.cfm?id=3022184
>> 
>>  or consult many other web resources you can find on the web
>>  referenced by "BBR congestion control". It should be noted that
>>  BBRv1 (which this is) does tend to unfairness in cases of small
>>  buffered paths, and it will usually get less bandwidth in the case
>>  of large BDP paths(when competing with new-reno or cubic flows). BBR
>>  is still an active research area and we do plan on  implementing V2
>>  of BBR to see if it is an improvement over V1.
>> 
>>  Sponsored by:   Netflix Inc.
>>  Differential Revision:  https://reviews.freebsd.org/D21582
>> 
>> Added:
>>  head/sys/kern/subr_filter.c   (contents, props changed)
>>  head/sys/modules/tcp/bbr/
>>  head/sys/modules/tcp/bbr/Makefile   (contents, props changed)
>>  head/sys/netinet/tcp_stacks/bbr.c   (contents, props changed)
>>  head/sys/netinet/tcp_stacks/tcp_bbr.h   (contents, props changed)
>>  head/sys/sys/tim_filter.h   (contents, props changed)
>> Modified:
>>  head/sys/conf/files
>>  head/sys/modules/tcp/Makefile
>>  head/sys/netinet/ip_output.c
>>  head/sys/netinet/ip_var.h
>>  head/sys/netinet/tcp.h
>>  head/sys/netinet/tcp_stacks/rack.c
>>  head/sys/netinet/tcp_stacks/rack_bbr_common.c
>>  head/sys/netinet/tcp_stacks/rack_bbr_common.h
>>  head/sys/netinet/tcp_stacks/sack_filter.c
>>  head/sys/netinet/tcp_stacks/sack_filter.h
>>  head/sys/netinet/tcp_stacks/tcp_rack.h
>>  head/sys/sys/mbuf.h
>> 
>> Modified: head/sys/conf/files
>> ==
>> --- head/sys/conf/files  Tue Sep 24 17:06:32 2019(r352656)
>> +++ head/sys/conf/files  Tue Sep 24 18:18:11 2019(r352657)
>> @@ -3808,6 +3808,7 @@ kern/subr_epoch.c  standard
>> kern/subr_eventhandler.c standard
>> kern/subr_fattime.c  standard
>> kern/subr_firmware.c optional firmware
>> +kern/subr_filter.c  standard
>> kern/subr_gtaskqueue.c   standard
>> kern/subr_hash.c standard
>> kern/subr_hints.cstandard
>> 
>> Added: head/sys/kern/subr_filter.c
>> ==
>> --- /dev/null00:00:00 1970   (empty, because file is newly added)
>> +++ head/sys/kern/subr_filter.c  Tue Sep 24 18:18:11 2019
>> (r352657)
>> @@ -0,0 +1,482 @@
>> +/*-
>> + * Copyright (c) 2016-2019 Netflix, Inc.
>> + * All rights reserved.
>> + *
>> + * Redistribution and use in source and binary forms, with or without
>> + * modification, are permitted provided that the following conditions
>> + * are met:
>> + * 1. Redistributions of source code must retain the above copyright
>> + *notice, this list of conditions and the following disclaimer.
>> + * 2. Redistributions in binary form must reproduce the above copyright
>> + *notice, this list of conditions and the following disclaimer in the
>> + *documentation and/or other materials provided with the distribution.
&g

svn commit: r352657 - in head/sys: conf kern modules/tcp modules/tcp/bbr netinet netinet/tcp_stacks sys

2019-09-24 Thread Randall Stewart
Author: rrs
Date: Tue Sep 24 18:18:11 2019
New Revision: 352657
URL: https://svnweb.freebsd.org/changeset/base/352657

Log:
  This commit adds BBR (Bottleneck Bandwidth and RTT) congestion control. This
  is a completely separate TCP stack (tcp_bbr.ko) that will be built only if
  you add the make options WITH_EXTRA_TCP_STACKS=1 and also include the option
  TCPHPTS. You can also include the RATELIMIT option if you have a NIC 
interface that
  supports hardware pacing, BBR understands how to use such a feature.
  
  Note that this commit also adds in a general purpose time-filter which
  allows you to have a min-filter or max-filter. A filter allows you to
  have a low (or high) value for some period of time and degrade slowly
  to another value has time passes. You can find out the details of
  BBR by looking at the original paper at:
  
  https://queue.acm.org/detail.cfm?id=3022184
  
  or consult many other web resources you can find on the web
  referenced by "BBR congestion control". It should be noted that
  BBRv1 (which this is) does tend to unfairness in cases of small
  buffered paths, and it will usually get less bandwidth in the case
  of large BDP paths(when competing with new-reno or cubic flows). BBR
  is still an active research area and we do plan on  implementing V2
  of BBR to see if it is an improvement over V1.
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D21582

Added:
  head/sys/kern/subr_filter.c   (contents, props changed)
  head/sys/modules/tcp/bbr/
  head/sys/modules/tcp/bbr/Makefile   (contents, props changed)
  head/sys/netinet/tcp_stacks/bbr.c   (contents, props changed)
  head/sys/netinet/tcp_stacks/tcp_bbr.h   (contents, props changed)
  head/sys/sys/tim_filter.h   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/modules/tcp/Makefile
  head/sys/netinet/ip_output.c
  head/sys/netinet/ip_var.h
  head/sys/netinet/tcp.h
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.h
  head/sys/netinet/tcp_stacks/sack_filter.c
  head/sys/netinet/tcp_stacks/sack_filter.h
  head/sys/netinet/tcp_stacks/tcp_rack.h
  head/sys/sys/mbuf.h

Modified: head/sys/conf/files
==
--- head/sys/conf/files Tue Sep 24 17:06:32 2019(r352656)
+++ head/sys/conf/files Tue Sep 24 18:18:11 2019(r352657)
@@ -3808,6 +3808,7 @@ kern/subr_epoch.c standard
 kern/subr_eventhandler.c   standard
 kern/subr_fattime.cstandard
 kern/subr_firmware.c   optional firmware
+kern/subr_filter.c  standard
 kern/subr_gtaskqueue.c standard
 kern/subr_hash.c   standard
 kern/subr_hints.c  standard

Added: head/sys/kern/subr_filter.c
==
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/kern/subr_filter.c Tue Sep 24 18:18:11 2019(r352657)
@@ -0,0 +1,482 @@
+/*-
+ * Copyright (c) 2016-2019 Netflix, Inc.
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * Author: Randall Stewart 
+ */
+#include 
+__FBSDID("$FreeBSD$");
+#include 
+#include 
+#include 
+#include 
+
+void
+reset_time(struct time_filter *tf, uint32_t time_len)
+{
+   tf->cur_time_limit = time_len;
+}
+
+void
+reset_time_small(struct time_filter_small *tf, uint32_t time_len)
+{
+   tf->cur_time_limit = time_len;
+}
+
+/*
+ * A time filter can be a filter for MIN or MAX. 
+ * You call setup_time_filter() with the pointer to
+ * the filter structure, the

svn commit: r352215 - head/sys/netinet

2019-09-11 Thread Randall Stewart
Author: rrs
Date: Wed Sep 11 15:41:36 2019
New Revision: 352215
URL: https://svnweb.freebsd.org/changeset/base/352215

Log:
  With the recent commit of ktls, we no longer have a
  sb_tls_flags, its just the sb_flags. Also the ratelimit
  code, now that the defintion is in sockbuf.h, does not
  need the ktls.h file (or its predecessor).
  
  Sponsored by: Netflix Inc

Modified:
  head/sys/netinet/tcp_ratelimit.c

Modified: head/sys/netinet/tcp_ratelimit.c
==
--- head/sys/netinet/tcp_ratelimit.cWed Sep 11 15:39:28 2019
(r352214)
+++ head/sys/netinet/tcp_ratelimit.cWed Sep 11 15:41:36 2019
(r352215)
@@ -45,9 +45,6 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
-#ifdef KERN_TLS
-#include 
-#endif
 #include 
 #include 
 #include 
@@ -1069,7 +1066,7 @@ tcp_set_pacing_rate(struct tcpcb *tp, struct ifnet *if
return (NULL);
}
 #ifdef KERN_TLS
-   if (tp->t_inpcb->inp_socket->so_snd.sb_tls_flags & 
SB_TLS_IFNET) {
+   if (tp->t_inpcb->inp_socket->so_snd.sb_flags & SB_TLS_IFNET) {
/*
 * We currently can't do both TLS and hardware
 * pacing
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r351951 - head/sys/netinet

2019-09-06 Thread Randall Stewart
Author: rrs
Date: Fri Sep  6 18:29:48 2019
New Revision: 351951
URL: https://svnweb.freebsd.org/changeset/base/351951

Log:
  This adds in the missing counter initialization which
  I had forgotten to bring over.. opps.
  
  Differential Revision: https://reviews.freebsd.org/D21127

Modified:
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_subr.c
==
--- head/sys/netinet/tcp_subr.c Fri Sep  6 18:25:42 2019(r351950)
+++ head/sys/netinet/tcp_subr.c Fri Sep  6 18:29:48 2019(r351951)
@@ -1125,6 +1125,13 @@ tcp_init(void)
SHUTDOWN_PRI_DEFAULT);
EVENTHANDLER_REGISTER(maxsockets_change, tcp_zone_change, NULL,
EVENTHANDLER_PRI_ANY);
+
+   tcp_inp_lro_direct_queue = counter_u64_alloc(M_WAITOK);
+   tcp_inp_lro_wokeup_queue = counter_u64_alloc(M_WAITOK);
+   tcp_inp_lro_compressed = counter_u64_alloc(M_WAITOK);
+   tcp_inp_lro_single_push = counter_u64_alloc(M_WAITOK);
+   tcp_inp_lro_locks_taken = counter_u64_alloc(M_WAITOK);
+   tcp_inp_lro_sack_wake = counter_u64_alloc(M_WAITOK);
 #ifdef TCPPCAP
tcp_pcap_init();
 #endif

Modified: head/sys/netinet/tcp_var.h
==
--- head/sys/netinet/tcp_var.h  Fri Sep  6 18:25:42 2019(r351950)
+++ head/sys/netinet/tcp_var.h  Fri Sep  6 18:29:48 2019(r351951)
@@ -887,6 +887,13 @@ struct tcp_function_block *
 find_and_ref_tcp_fb(struct tcp_function_block *fs);
 int tcp_default_ctloutput(struct socket *so, struct sockopt *sopt, struct 
inpcb *inp, struct tcpcb *tp);
 
+extern counter_u64_t tcp_inp_lro_direct_queue;
+extern counter_u64_t tcp_inp_lro_wokeup_queue;
+extern counter_u64_t tcp_inp_lro_compressed;
+extern counter_u64_t tcp_inp_lro_single_push;
+extern counter_u64_t tcp_inp_lro_locks_taken;
+extern counter_u64_t tcp_inp_lro_sack_wake;
+
 uint32_t tcp_maxmtu(struct in_conninfo *, struct tcp_ifcap *);
 uint32_t tcp_maxmtu6(struct in_conninfo *, struct tcp_ifcap *);
 u_int   tcp_maxseg(const struct tcpcb *);
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r351934 - in head/sys: netinet netinet/tcp_stacks sys

2019-09-06 Thread Randall Stewart
Author: rrs
Date: Fri Sep  6 14:25:41 2019
New Revision: 351934
URL: https://svnweb.freebsd.org/changeset/base/351934

Log:
  This adds the final tweaks to LRO that will now allow me
  to add BBR. These changes make it so you can get an
  array of timestamps instead of a compressed ack/data segment.
  BBR uses this to aid with its delivery estimates. We also
  now (via Drew's suggestions) will not go to the expense of
  the tcb lookup if no stack registers to want this feature. If
  HPTS is not present the feature is not present either and you
  just get the compressed behavior.
  
  Sponsored by: Netflix Inc
  Differential Revision: https://reviews.freebsd.org/D21127

Modified:
  head/sys/netinet/tcp_lro.c
  head/sys/netinet/tcp_lro.h
  head/sys/netinet/tcp_stacks/rack_bbr_common.c
  head/sys/sys/mbuf.h

Modified: head/sys/netinet/tcp_lro.c
==
--- head/sys/netinet/tcp_lro.c  Fri Sep  6 12:29:51 2019(r351933)
+++ head/sys/netinet/tcp_lro.c  Fri Sep  6 14:25:41 2019(r351934)
@@ -44,6 +44,8 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 
 #include 
@@ -56,11 +58,14 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #include 
 #include 
-
+#include 
+#include 
 #include 
 
 #include 
@@ -79,11 +84,47 @@ static int  tcp_lro_rx2(struct lro_ctrl *lc, struct mbu
 SYSCTL_NODE(_net_inet_tcp, OID_AUTO, lro,  CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
 "TCP LRO");
 
+static long tcplro_stacks_wanting_mbufq = 0;
+counter_u64_t tcp_inp_lro_direct_queue;
+counter_u64_t tcp_inp_lro_wokeup_queue;
+counter_u64_t tcp_inp_lro_compressed;
+counter_u64_t tcp_inp_lro_single_push;
+counter_u64_t tcp_inp_lro_locks_taken;
+counter_u64_t tcp_inp_lro_sack_wake;
+
 static unsignedtcp_lro_entries = TCP_LRO_ENTRIES;
+static int32_t hold_lock_over_compress = 0;
+SYSCTL_INT(_net_inet_tcp_lro, OID_AUTO, hold_lock, CTLFLAG_RW,
+_lock_over_compress, 0,
+"Do we hold the lock over the compress of mbufs?");
 SYSCTL_UINT(_net_inet_tcp_lro, OID_AUTO, entries,
 CTLFLAG_RDTUN | CTLFLAG_MPSAFE, _lro_entries, 0,
 "default number of LRO entries");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, fullqueue, CTLFLAG_RD,
+_inp_lro_direct_queue, "Number of lro's fully queued to transport");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, wokeup, CTLFLAG_RD,
+_inp_lro_wokeup_queue, "Number of lro's where we woke up transport via 
hpts");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, compressed, CTLFLAG_RD,
+_inp_lro_compressed, "Number of lro's compressed and sent to 
transport");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, single, CTLFLAG_RD,
+_inp_lro_single_push, "Number of lro's sent with single segment");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, lockcnt, CTLFLAG_RD,
+_inp_lro_locks_taken, "Number of lro's inp_wlocks taken");
+SYSCTL_COUNTER_U64(_net_inet_tcp_lro, OID_AUTO, sackwakeups, CTLFLAG_RD,
+_inp_lro_sack_wake, "Number of wakeups caused by sack/fin");
 
+void
+tcp_lro_reg_mbufq(void)
+{
+   atomic_fetchadd_long(_stacks_wanting_mbufq, 1);
+}
+
+void
+tcp_lro_dereg_mbufq(void)
+{
+   atomic_fetchadd_long(_stacks_wanting_mbufq, -1);
+}
+
 static __inline void
 tcp_lro_active_insert(struct lro_ctrl *lc, struct lro_head *bucket,
 struct lro_entry *le)
@@ -162,6 +203,36 @@ tcp_lro_init_args(struct lro_ctrl *lc, struct ifnet *i
return (0);
 }
 
+static struct tcphdr *
+tcp_lro_get_th(struct lro_entry *le, struct mbuf *m)
+{
+   struct ether_header *eh;
+   struct tcphdr *th = NULL;
+#ifdef INET6
+   struct ip6_hdr *ip6 = NULL; /* Keep compiler happy. */
+#endif
+#ifdef INET
+   struct ip *ip4 = NULL;  /* Keep compiler happy. */
+#endif
+
+   eh = mtod(m, struct ether_header *);
+   switch (le->eh_type) {
+#ifdef INET6
+   case ETHERTYPE_IPV6:
+   ip6 = (struct ip6_hdr *)(eh + 1);
+   th = (struct tcphdr *)(ip6 + 1);
+   break;
+#endif
+#ifdef INET
+   case ETHERTYPE_IP:
+   ip4 = (struct ip *)(eh + 1);
+   th = (struct tcphdr *)(ip4 + 1);
+   break;
+#endif
+   }
+   return (th);
+}
+
 void
 tcp_lro_free(struct lro_ctrl *lc)
 {
@@ -192,7 +263,6 @@ tcp_lro_free(struct lro_ctrl *lc)
lc->lro_mbuf_data = NULL;
 }
 
-#ifdef TCP_LRO_UPDATE_CSUM
 static uint16_t
 tcp_lro_csum_th(struct tcphdr *th)
 {
@@ -275,7 +345,6 @@ tcp_lro_rx_csum_fixup(struct lro_entry *le, void *l3hd
 
return (c & 0x);
 }
-#endif
 
 static void
 tcp_lro_rx_done(struct lro_ctrl *lc)
@@ -297,7 +366,7 @@ tcp_lro_flush_inactive(struct lro_ctrl *lc, const stru
if (LIST_EMPTY(>lro_active))
return;
 
-   getmicrotime();
+   getmicrouptime();
timevalsub(, timeout);
LIST_FOREACH_SAFE(le, >lro_active, next, le_tmp) {
if 

svn commit: r346094 - head/sys/netinet

2019-09-03 Thread Randall Stewart
Author: rrs
Date: Wed Apr 10 18:58:11 2019
New Revision: 346094
URL: https://svnweb.freebsd.org/changeset/base/346094

Log:
  Fix a small bug in the tcp_log_id where the bucket
  was unlocked and yet the bucket-unlock flag was not
  changed to false. This can cause a panic if INVARIANTS
  is on and we go through the right path (though rare).
  This fixes the correct bug :)
  
  Reported by:  syzbot+179a1ad49f3c4c215...@syzkaller.appspotmail.com
  Reviewed by:  tuexen@

Modified:
  head/sys/netinet/tcp_log_buf.c

Modified: head/sys/netinet/tcp_log_buf.c
==
--- head/sys/netinet/tcp_log_buf.c  Wed Apr 10 18:17:27 2019
(r346093)
+++ head/sys/netinet/tcp_log_buf.c  Wed Apr 10 18:58:11 2019
(r346094)
@@ -752,6 +752,7 @@ refind:
RECHECK_INP();
if (tp->t_lib != NULL) {
TCPID_BUCKET_UNLOCK(tlb);
+   bucket_locked = false;
tlb = NULL;
goto restart;
}


___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r345851 - head/sys/netinet

2019-09-03 Thread Randall Stewart
Author: rrs
Date: Wed Apr  3 19:35:07 2019
New Revision: 345851
URL: https://svnweb.freebsd.org/changeset/base/345851

Log:
  Undo my previous erroneous commit changing the tcp_output kassert.
  Hmm now the question is where did the tcp_log_id change go :o

Modified:
  head/sys/netinet/tcp_output.c

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Wed Apr  3 18:35:13 2019
(r345850)
+++ head/sys/netinet/tcp_output.c   Wed Apr  3 19:35:07 2019
(r345851)
@@ -138,8 +138,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto_lowat
  * non-ACK.
  */
 #define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags)   \
-   KASSERT(((len) == 0 && ((th_flags) &\
-   (TH_SYN | TH_FIN | TH_RST)) != 0) ||\
+   KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\
tcp_timer_active((tp), TT_REXMT) || \
tcp_timer_active((tp), TT_PERSIST), \
("neither rexmt nor persist timer is set"))


___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r351328 - head/sys/netinet/tcp_stacks

2019-08-21 Thread Randall Stewart
Author: rrs
Date: Wed Aug 21 10:45:28 2019
New Revision: 351328
URL: https://svnweb.freebsd.org/changeset/base/351328

Log:
  Fix an issue when TSO and Rack play together. Basically
  an retransmission of the initial SYN (with data) would
  cause us to strip the SYN and decrement/increase offset/len
  which then caused us a -1 offset and a panic.
  
  Reported by:  Larry Rosenman
  (Michael Tuexen helped me debug this at the IETF)

Modified:
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Wed Aug 21 10:42:31 2019
(r351327)
+++ head/sys/netinet/tcp_stacks/rack.c  Wed Aug 21 10:45:28 2019
(r351328)
@@ -7405,9 +7405,6 @@ again:
(tp->t_state == TCPS_SYN_RECEIVED))
flags &= ~TH_SYN;
 #endif
-   sb_offset--, len++;
-   if (sbavail(sb) == 0)
-   len = 0;
}
/*
 * Be careful not to send data and/or FIN on SYN segments. This
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r350973 - head/sys/netinet/tcp_stacks

2019-08-13 Thread Randall Stewart
Author: rrs
Date: Tue Aug 13 12:41:15 2019
New Revision: 350973
URL: https://svnweb.freebsd.org/changeset/base/350973

Log:
  Place back in the dependency on HPTS via module depends versus
  a fatal error in compiling. This was taken out by mistake
  when I mis-merged from the 18q22p2 sources of rack in NF. Opps.
  
  Reported by:  sbruno

Modified:
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Tue Aug 13 04:54:02 2019
(r350972)
+++ head/sys/netinet/tcp_stacks/rack.c  Tue Aug 13 12:41:15 2019
(r350973)
@@ -128,10 +128,6 @@ uma_zone_t rack_pcb_zone;
 struct sysctl_ctx_list rack_sysctl_ctx;
 struct sysctl_oid *rack_sysctl_root;
 
-#ifndef TCPHPTS
-#error "fatal error missing option TCPHSTS in the build"
-#endif
-
 #define CUM_ACKED 1
 #define SACKED 2
 
@@ -9212,3 +9208,4 @@ static moduledata_t tcp_rack = {
 
 MODULE_VERSION(MODNAME, 1);
 DECLARE_MODULE(MODNAME, tcp_rack, SI_SUB_PROTO_DOMAIN, SI_ORDER_ANY);
+MODULE_DEPEND(MODNAME, tcphpts, 1, 1, 1);
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r350537 - head/sys/netinet

2019-08-02 Thread Randall Stewart
Author: rrs
Date: Fri Aug  2 11:17:07 2019
New Revision: 350537
URL: https://svnweb.freebsd.org/changeset/base/350537

Log:
  Fix one more atomic for i86
  Obtained from:mtue...@freebsd.org

Modified:
  head/sys/netinet/tcp_ratelimit.c

Modified: head/sys/netinet/tcp_ratelimit.c
==
--- head/sys/netinet/tcp_ratelimit.cFri Aug  2 11:05:00 2019
(r350536)
+++ head/sys/netinet/tcp_ratelimit.cFri Aug  2 11:17:07 2019
(r350537)
@@ -945,7 +945,7 @@ use_real_interface:
 * We use an atomic here for accounting so we don't have to
 * use locks when freeing.
 */
-   atomic_add_long(>rs_flows_using, 1);
+   atomic_add_64(>rs_flows_using, 1);
}
epoch_exit_preempt(net_epoch_preempt, );
return (rte);
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r350521 - head/sys/netinet

2019-08-01 Thread Randall Stewart
Author: rrs
Date: Thu Aug  1 20:26:27 2019
New Revision: 350521
URL: https://svnweb.freebsd.org/changeset/base/350521

Log:
  Opps use fetchadd_u64 not long to keep old 32 bit platforms
  happy.

Modified:
  head/sys/netinet/tcp_ratelimit.c

Modified: head/sys/netinet/tcp_ratelimit.c
==
--- head/sys/netinet/tcp_ratelimit.cThu Aug  1 19:45:34 2019
(r350520)
+++ head/sys/netinet/tcp_ratelimit.cThu Aug  1 20:26:27 2019
(r350521)
@@ -1186,7 +1186,7 @@ tcp_rel_pacing_rate(const struct tcp_hwrate_limit_tabl
 * in order to release our refcount.
 */
rs = __DECONST(struct tcp_rate_set *, crs);
-   pre = atomic_fetchadd_long(>rs_flows_using, -1);
+   pre = atomic_fetchadd_64(>rs_flows_using, -1);
if (pre == 1) {
mtx_lock(_mtx);
/*
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r350501 - in head/sys: conf dev/cxgbe dev/mlx5/mlx5_en net netinet

2019-08-01 Thread Randall Stewart
  */
+   if (!INP_TRY_UPGRADE(inp))
+   return;
+   did_upgrade = 1;
+   } else {
+   did_upgrade = 0;
+   }
+
+   /*
+* NOTE: The so_max_pacing_rate value is read unlocked,
+* because atomic updates are not required since the variable
+* is checked at every mbuf we send. It is assumed that the
+* variable read itself will be atomic.
+*/
+   max_pacing_rate = socket->so_max_pacing_rate;
+
+   error = in_pcboutput_txrtlmt_locked(inp, ifp, mb, max_pacing_rate);
+
if (did_upgrade)
INP_DOWNGRADE(inp);
 }
@@ -3424,4 +3478,14 @@ in_pcboutput_eagain(struct inpcb *inp)
if (did_upgrade)
INP_DOWNGRADE(inp);
 }
+
+static void
+rl_init(void *st)
+{
+   rate_limit_active = counter_u64_alloc(M_WAITOK);
+   rate_limit_alloc_fail = counter_u64_alloc(M_WAITOK);
+   rate_limit_set_ok = counter_u64_alloc(M_WAITOK);
+}
+
+SYSINIT(rl, SI_SUB_PROTO_DOMAININIT, SI_ORDER_ANY, rl_init, NULL);
 #endif /* RATELIMIT */

Modified: head/sys/netinet/in_pcb.h
==
--- head/sys/netinet/in_pcb.h   Thu Aug  1 14:13:04 2019(r350500)
+++ head/sys/netinet/in_pcb.h   Thu Aug  1 14:17:31 2019(r350501)
@@ -883,8 +883,13 @@ struct sockaddr *
in_sockaddr(in_port_t port, struct in_addr *addr);
 void   in_pcbsosetlabel(struct socket *so);
 #ifdef RATELIMIT
-intin_pcbattach_txrtlmt(struct inpcb *, struct ifnet *, uint32_t, 
uint32_t, uint32_t);
+int
+in_pcboutput_txrtlmt_locked(struct inpcb *, struct ifnet *,
+   struct mbuf *, uint32_t);
+intin_pcbattach_txrtlmt(struct inpcb *, struct ifnet *, uint32_t, uint32_t,
+   uint32_t, struct m_snd_tag **);
 void   in_pcbdetach_txrtlmt(struct inpcb *);
+voidin_pcbdetach_tag(struct ifnet *ifp, struct m_snd_tag *mst);
 intin_pcbmodify_txrtlmt(struct inpcb *, uint32_t);
 intin_pcbquery_txrtlmt(struct inpcb *, uint32_t *);
 intin_pcbquery_txrlevel(struct inpcb *, uint32_t *);

Added: head/sys/netinet/tcp_ratelimit.c
==
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/netinet/tcp_ratelimit.cThu Aug  1 14:17:31 2019
(r350501)
@@ -0,0 +1,1234 @@
+/*-
+ *
+ * SPDX-License-Identifier: BSD-3-Clause
+ *
+ * Copyright (c) 2018-2019
+ * Netflix Inc.
+ *  All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+/**
+ * Author: Randall Stewart 
+ */
+
+#include 
+__FBSDID("$FreeBSD$");
+#include "opt_inet.h"
+#include "opt_inet6.h"
+#include "opt_ipsec.h"
+#include "opt_tcpdebug.h"
+#include "opt_ratelimit.h"
+#include 
+#include 
+#include 
+#include 
+#include 
+#include 
+#ifdef KERN_TLS
+#include 
+#endif
+#include 
+#include 
+#include 
+#include 
+#define TCPSTATES  /* for logging */
+#include 
+#include 
+#include 
+#ifdef INET6
+#include 
+#endif
+#include 
+#ifndef USECS_IN_SECOND
+#define USECS_IN_SECOND 100
+#endif
+/*
+ * For the purposes of each send, what is the size
+ * of an ethernet frame.
+ */
+#ifndef ETHERNET_SEGMENT_SIZE
+#define ETHERNET_SEGMENT_SIZE 1500
+#endif
+MALLOC_DEFINE(M_TCPPACE, "tcp_hwpace", "TCP Hardware pacing memory");
+#ifdef RATELIMIT
+
+#define COMMON_RATE 180500
+uint64_t desired_rates[] = {
+   62500,  /* 500Kbps */
+   180500, /* 1.44Mpbs */
+   375000, /* 3Mbps */
+   50, /* 4Mbps */

svn commit: r349987 - in head/sys/netinet: . tcp_stacks

2019-07-14 Thread Randall Stewart
Author: rrs
Date: Sun Jul 14 16:05:47 2019
New Revision: 349987
URL: https://svnweb.freebsd.org/changeset/base/349987

Log:
  This is the second in a number of patches needed to
  get BBRv1 into the tree. This fixes the DSACK bug but
  is also needed by BBR. We have yet to go two more
  one will be for the pacing code (tcp_ratelimit.c) and
  the second will be for the new updated LRO code that
  allows a transport to know the arrival times of packets
  and (tcp_lro.c). After that we should finally be able
  to get BBRv1 into head.
  
  Sponsored by: Netflix Inc
  Differential Revision:https://reviews.freebsd.org/D20908

Modified:
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_sack.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Sun Jul 14 12:04:39 2019
(r349986)
+++ head/sys/netinet/tcp_output.c   Sun Jul 14 16:05:47 2019
(r349987)
@@ -1508,7 +1508,13 @@ timer:
if (SEQ_GT(tp->snd_nxt + xlen, tp->snd_max))
tp->snd_max = tp->snd_nxt + xlen;
}
-
+   if ((error == 0) &&
+   (TCPS_HAVEESTABLISHED(tp->t_state) &&
+(tp->t_flags & TF_SACK_PERMIT) &&
+tp->rcv_numsacks > 0)) {
+   /* Clean up any DSACK's sent */
+   tcp_clean_dsack_blocks(tp);
+   }
if (error) {
/* Record the error. */
TCP_LOG_EVENT(tp, NULL, >so_rcv, >so_snd, TCP_LOG_OUT,

Modified: head/sys/netinet/tcp_sack.c
==
--- head/sys/netinet/tcp_sack.c Sun Jul 14 12:04:39 2019(r349986)
+++ head/sys/netinet/tcp_sack.c Sun Jul 14 16:05:47 2019(r349987)
@@ -279,6 +279,45 @@ tcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_sta
tp->rcv_numsacks = num_head + num_saved;
 }
 
+void
+tcp_clean_dsack_blocks(struct tcpcb *tp)
+{
+   struct sackblk saved_blks[MAX_SACK_BLKS];
+   int num_saved, i;
+
+   INP_WLOCK_ASSERT(tp->t_inpcb);
+   /*
+* Clean up any DSACK blocks that
+* are in our queue of sack blocks.
+* 
+*/
+   num_saved = 0;
+   for (i = 0; i < tp->rcv_numsacks; i++) {
+   tcp_seq start = tp->sackblks[i].start;
+   tcp_seq end = tp->sackblks[i].end;
+   if (SEQ_GEQ(start, end) || SEQ_LEQ(start, tp->rcv_nxt)) {
+   /*
+* Discard this D-SACK block.
+*/
+   continue;
+   }
+   /*
+* Save this SACK block.
+*/
+   saved_blks[num_saved].start = start;
+   saved_blks[num_saved].end = end;
+   num_saved++;
+   }
+   if (num_saved > 0) {
+   /*
+* Copy the saved SACK blocks back.
+*/
+   bcopy(saved_blks, >sackblks[0],
+ sizeof(struct sackblk) * num_saved);
+   }
+   tp->rcv_numsacks = num_saved;
+}
+
 /*
  * Delete all receiver-side SACK information.
  */

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Sun Jul 14 12:04:39 2019
(r349986)
+++ head/sys/netinet/tcp_stacks/rack.c  Sun Jul 14 16:05:47 2019
(r349987)
@@ -5087,9 +5087,8 @@ rack_do_fastnewdata(struct mbuf *m, struct tcphdr *th,
 
 
/* Clean receiver SACK report if present */
-/* if (tp->rcv_numsacks)
+   if (tp->rcv_numsacks)
tcp_clean_sackreport(tp);
-*/
TCPSTAT_INC(tcps_preddat);
tp->rcv_nxt += tlen;
/*
@@ -8537,10 +8536,10 @@ out:
 * retransmit.  In persist state, just set snd_max.
 */
if (error == 0) {
-/* if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+   if (TCPS_HAVEESTABLISHED(tp->t_state) &&
(tp->t_flags & TF_SACK_PERMIT) &&
tp->rcv_numsacks > 0)
-   tcp_clean_dsack_blocks(tp);*/
+   tcp_clean_dsack_blocks(tp);
if (len == 0)
counter_u64_add(rack_out_size[TCP_MSS_ACCT_SNDACK], 1);
else if (len == 1) {

Modified: head/sys/netinet/tcp_var.h
==
--- head/sys/netinet/tcp_var.h  Sun Jul 14 12:04:39 2019(r349986)
+++ head/sys/netinet/tcp_var.h  Sun Jul 14 16:05:47 2019(r349987)
@@ -939,6 +939,7 @@ tcp_seq  tcp_new_isn(struct in_conninfo *);
 
 int tcp_sack_doack(struct tcpcb *, struct tcpopt *, tcp_seq);
 voidtcp_update_sack_list(struct tcpcb *tp, tcp_seq rcv_laststart, tcp_seq 
rcv_lastend);
+void

svn commit: r349942 - head/sys/netinet/tcp_stacks

2019-07-12 Thread Randall Stewart
Author: rrs
Date: Fri Jul 12 11:45:42 2019
New Revision: 349942
URL: https://svnweb.freebsd.org/changeset/base/349942

Log:
  add back the comment around the pending DSACK fixes.

Modified:
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Fri Jul 12 09:59:21 2019
(r349941)
+++ head/sys/netinet/tcp_stacks/rack.c  Fri Jul 12 11:45:42 2019
(r349942)
@@ -8537,10 +8537,10 @@ out:
 * retransmit.  In persist state, just set snd_max.
 */
if (error == 0) {
-   if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+/* if (TCPS_HAVEESTABLISHED(tp->t_state) &&
(tp->t_flags & TF_SACK_PERMIT) &&
tp->rcv_numsacks > 0)
-   tcp_clean_dsack_blocks(tp);
+   tcp_clean_dsack_blocks(tp);*/
if (len == 0)
counter_u64_add(rack_out_size[TCP_MSS_ACCT_SNDACK], 1);
else if (len == 1) {
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r349907 - head/sys/netinet/tcp_stacks

2019-07-12 Thread Randall Stewart via svn-src-all
opps.. that was a error on my part I will fix it :)


> On Jul 11, 2019, at 4:37 AM, Enji Cooper  wrote:
> 
> 
>> On Jul 10, 2019, at 9:38 PM, Randall Stewart  wrote:
>> 
>> Author: rrs
>> Date: Thu Jul 11 04:38:33 2019
>> New Revision: 349907
>> URL: https://svnweb.freebsd.org/changeset/base/349907
>> 
>> Log:
>>  Update copyright per JBH's suggestions.. thanks.
>> 
>> Modified:
>>  head/sys/netinet/tcp_stacks/rack.c
>> 
>> Modified: head/sys/netinet/tcp_stacks/rack.c
>> ==
>> --- head/sys/netinet/tcp_stacks/rack.c   Thu Jul 11 03:29:25 2019
>> (r349906)
>> +++ head/sys/netinet/tcp_stacks/rack.c   Thu Jul 11 04:38:33 2019
>> (r349907)
>> @@ -1,5 +1,5 @@
>> /*-
>> - * Copyright (c) 2016
>> + * Copyright (c) 2016-2019
>>  *   Netflix Inc.  All rights reserved.
>>  *
>>  * Redistribution and use in source and binary forms, with or without
>> @@ -8537,10 +8537,10 @@ out:
>>   * retransmit.  In persist state, just set snd_max.
>>   */
>>  if (error == 0) {
>> -/*  if (TCPS_HAVEESTABLISHED(tp->t_state) &&
>> +if (TCPS_HAVEESTABLISHED(tp->t_state) &&
>>  (tp->t_flags & TF_SACK_PERMIT) &&
>>  tp->rcv_numsacks > 0)
>> -    tcp_clean_dsack_blocks(tp);*/
>> +tcp_clean_dsack_blocks(tp);
> 
>   Removing this commented out code unfortunately broke the build: 
> https://ci.freebsd.org/job/FreeBSD-head-amd64-LINT/12934/console .
> Thanks,
> -Enji
> 

--
Randall Stewart
r...@netflix.com



___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r349893 - in head/sys: modules/tcp/rack netinet netinet/tcp_stacks sys

2019-07-10 Thread Randall Stewart via svn-src-all
John:

Thanks for the suggestions.. I have committed changes to the two
nits. As to M_PROTO1, I see that in the NF world we have removed
M_PROTO12 and moved the M_PROTO’s up 1 i.e. M_PROTO1 == 0x2000

So for now it is safe, since the M_TSTMP_LRO is not yet used.. but in
my up and coming commits I will have to address this i.e. either do
the same thing or just make it use M_PROTO12.

There are a couple of places M_PROTO1 is used on the receive path
so that would not work there :o

After I get the DSACK fixes in my next change to get BBR in will
be the LRO work…

So maybe I should just settle on using M_PROTO12 for that 
what do you think?

R

> On Jul 10, 2019, at 7:28 PM, John Baldwin  wrote:
> 
> On 7/10/19 1:40 PM, Randall Stewart wrote:
>> Author: rrs
>> Date: Wed Jul 10 20:40:39 2019
>> New Revision: 349893
>> URL: https://svnweb.freebsd.org/changeset/base/349893
>> 
>> Log:
>>  This commit updates rack to what is basically being used at NF as
>>  well as sets in some of the groundwork for committing BBR. The
>>  hpts system is updated as well as some other needed utilities
>>  for the entrance of BBR. This is actually part 1 of 3 more
>>  needed commits which will finally complete with BBRv1 being
>>  added as a new tcp stack.
>> 
>>  Sponsored by:   Netflix Inc.
>>  Differential Revision:  https://reviews.freebsd.org/D20834
> 
> Is it safe for M_TSTMP_LRO to conflict with M_PROTO1?
> 
> Also, it seems you changed the copyright range on rack.c from
> 2016-2019 to just 2016 which I suspect is an accident.
> 
> I would suggest using #error here:
> 
> #ifndef TCPHPTS
> fatal error missing option TCPHSTS in the build;
> #endif
> 
> -- 
> John Baldwin

--
Randall Stewart
r...@netflix.com



___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r349908 - head/sys/netinet/tcp_stacks

2019-07-10 Thread Randall Stewart
Author: rrs
Date: Thu Jul 11 04:40:58 2019
New Revision: 349908
URL: https://svnweb.freebsd.org/changeset/base/349908

Log:
  Update to jhb's other suggestion, use #error when
  we are missing  HPTS.

Modified:
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Thu Jul 11 04:38:33 2019
(r349907)
+++ head/sys/netinet/tcp_stacks/rack.c  Thu Jul 11 04:40:58 2019
(r349908)
@@ -129,7 +129,7 @@ struct sysctl_ctx_list rack_sysctl_ctx;
 struct sysctl_oid *rack_sysctl_root;
 
 #ifndef TCPHPTS
-fatal error missing option TCPHSTS in the build;
+#error "fatal error missing option TCPHSTS in the build"
 #endif
 
 #define CUM_ACKED 1
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r349907 - head/sys/netinet/tcp_stacks

2019-07-10 Thread Randall Stewart
Author: rrs
Date: Thu Jul 11 04:38:33 2019
New Revision: 349907
URL: https://svnweb.freebsd.org/changeset/base/349907

Log:
  Update copyright per JBH's suggestions.. thanks.

Modified:
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Thu Jul 11 03:29:25 2019
(r349906)
+++ head/sys/netinet/tcp_stacks/rack.c  Thu Jul 11 04:38:33 2019
(r349907)
@@ -1,5 +1,5 @@
 /*-
- * Copyright (c) 2016
+ * Copyright (c) 2016-2019
  * Netflix Inc.  All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -8537,10 +8537,10 @@ out:
 * retransmit.  In persist state, just set snd_max.
 */
if (error == 0) {
-/* if (TCPS_HAVEESTABLISHED(tp->t_state) &&
+   if (TCPS_HAVEESTABLISHED(tp->t_state) &&
(tp->t_flags & TF_SACK_PERMIT) &&
tp->rcv_numsacks > 0)
-   tcp_clean_dsack_blocks(tp);*/
+   tcp_clean_dsack_blocks(tp);
if (len == 0)
counter_u64_add(rack_out_size[TCP_MSS_ACCT_SNDACK], 1);
else if (len == 1) {
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r349893 - in head/sys: modules/tcp/rack netinet netinet/tcp_stacks sys

2019-07-10 Thread Randall Stewart
Author: rrs
Date: Wed Jul 10 20:40:39 2019
New Revision: 349893
URL: https://svnweb.freebsd.org/changeset/base/349893

Log:
  This commit updates rack to what is basically being used at NF as
  well as sets in some of the groundwork for committing BBR. The
  hpts system is updated as well as some other needed utilities
  for the entrance of BBR. This is actually part 1 of 3 more
  needed commits which will finally complete with BBRv1 being
  added as a new tcp stack.
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D20834

Added:
  head/sys/netinet/tcp_stacks/rack_bbr_common.c   (contents, props changed)
Modified:
  head/sys/modules/tcp/rack/Makefile
  head/sys/netinet/in_pcb.h
  head/sys/netinet/tcp.h
  head/sys/netinet/tcp_hpts.c
  head/sys/netinet/tcp_hpts.h
  head/sys/netinet/tcp_log_buf.h
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_stacks/rack_bbr_common.h
  head/sys/netinet/tcp_var.h
  head/sys/sys/mbuf.h

Modified: head/sys/modules/tcp/rack/Makefile
==
--- head/sys/modules/tcp/rack/Makefile  Wed Jul 10 19:57:48 2019
(r349892)
+++ head/sys/modules/tcp/rack/Makefile  Wed Jul 10 20:40:39 2019
(r349893)
@@ -6,7 +6,7 @@
 
 STACKNAME= rack
 KMOD=  tcp_${STACKNAME}
-SRCS=  rack.c sack_filter.c
+SRCS=  rack.c sack_filter.c rack_bbr_common.c
 
 SRCS+= opt_inet.h opt_inet6.h opt_ipsec.h
 SRCS+= opt_tcpdebug.h

Modified: head/sys/netinet/in_pcb.h
==
--- head/sys/netinet/in_pcb.h   Wed Jul 10 19:57:48 2019(r349892)
+++ head/sys/netinet/in_pcb.h   Wed Jul 10 20:40:39 2019(r349893)
@@ -759,7 +759,9 @@ int inp_so_options(const struct inpcb *inp);
 #defineINP_ORIGDSTADDR 0x0800 /* receive IP dst 
address/port */
 #define INP_CANNOT_DO_ECN  0x1000 /* The stack does not do ECN */
 #defineINP_REUSEPORT_LB0x2000 /* SO_REUSEPORT_LB option is 
set */
-
+#define INP_SUPPORTS_MBUFQ 0x4000 /* Supports the mbuf queue method of 
LRO */
+#define INP_MBUF_QUEUE_READY   0x8000 /* The transport is pacing, inputs 
can be queued */
+#define INP_DONT_SACK_QUEUE0x0001 /* If a sack arrives do not wake me 
*/
 /*
  * Flags passed to in_pcblookup*() functions.
  */

Modified: head/sys/netinet/tcp.h
==
--- head/sys/netinet/tcp.h  Wed Jul 10 19:57:48 2019(r349892)
+++ head/sys/netinet/tcp.h  Wed Jul 10 20:40:39 2019(r349893)
@@ -201,9 +201,8 @@ struct tcphdr {
 #define TCP_RACK_TLP_THRESH   1063 /* RACK TLP theshold i.e. srtt+(srtt/N) */
 #define TCP_RACK_PKT_DELAY1064 /* RACK added ms i.e. rack-rtt + reord + N 
*/
 #define TCP_RACK_TLP_INC_VAR  1065 /* Does TLP include rtt variance in t-o */
-#define TCP_RACK_SESS_CWV 1066 /* Enable RFC7611 cwnd validation on sess */
 #define TCP_BBR_IWINTSO  1067 /* Initial TSO window for BBRs first 
sends */
-#define TCP_BBR_RECFORCE  1068 /* Enter recovery force out a segment 
disregard pacer */
+#define TCP_BBR_RECFORCE  1068 /* Enter recovery force out a segment 
disregard pacer no longer valid */
 #define TCP_BBR_STARTUP_PG1069 /* Startup pacing gain */
 #define TCP_BBR_DRAIN_PG  1070 /* Drain pacing gain */
 #define TCP_BBR_RWND_IS_APP   1071 /* Rwnd limited is considered app limited */
@@ -211,14 +210,18 @@ struct tcphdr {
 #define TCP_BBR_ONE_RETRAN1073 /* Is only one segment allowed out during 
retran */
 #define TCP_BBR_STARTUP_LOSS_EXIT 1074 /* Do we exit a loss during startup if 
not 20% incr */
 #define TCP_BBR_USE_LOWGAIN   1075 /* lower the gain in PROBE_BW enable */
-#define TCP_BBR_LOWGAIN_THRESH 1076 /* How many cycles do we stay in lowgain */
-#define TCP_BBR_LOWGAIN_HALF  1077 /* Do we halfstep lowgain down */
-#define TCP_BBR_LOWGAIN_FD1078 /* Do we force a drain when lowgain in 
place */
+#define TCP_BBR_LOWGAIN_THRESH 1076 /* Unused after 2.3 morphs to TSLIMITS >= 
2.3 */
+#define TCP_BBR_TSLIMITS 1076 /* Do we use experimental Timestamp limiting 
for our algo */
+#define TCP_BBR_LOWGAIN_HALF  1077 /* Unused after 2.3 */
+#define TCP_BBR_PACE_OH1077 /* Reused in 4.2 for pacing overhead 
setting */
+#define TCP_BBR_LOWGAIN_FD1078 /* Unused after 2.3 */
+#define TCP_BBR_HOLD_TARGET 1078   /* For 4.3 on */
 #define TCP_BBR_USEDEL_RATE   1079 /* Enable use of delivery rate for loss 
recovery */
 #define TCP_BBR_MIN_RTO   1080 /* Min RTO in milliseconds */
 #define TCP_BBR_MAX_RTO  1081 /* Max RTO in milliseconds */
 #define TCP_BBR_REC_OVER_HPTS 1082 /* Recovery override htps settings 0/1/3 */
-#define TCP_BBR_UNLIMITED 1083 /* Does BBR, in non-recovery not use cwnd */
+#define TCP_BBR_UNLIMITED 1083 /* Not used before 2.3 and morphs to 
algorithm >= 2.3 */
+#define TCP_BBR_ALGORITHM 

svn commit: r346094 - head/sys/netinet

2019-04-10 Thread Randall Stewart
Author: rrs
Date: Wed Apr 10 18:58:11 2019
New Revision: 346094
URL: https://svnweb.freebsd.org/changeset/base/346094

Log:
  Fix a small bug in the tcp_log_id where the bucket
  was unlocked and yet the bucket-unlock flag was not
  changed to false. This can cause a panic if INVARIANTS
  is on and we go through the right path (though rare).
  This fixes the correct bug :)
  
  Reported by:  syzbot+179a1ad49f3c4c215...@syzkaller.appspotmail.com
  Reviewed by:  tuexen@

Modified:
  head/sys/netinet/tcp_log_buf.c

Modified: head/sys/netinet/tcp_log_buf.c
==
--- head/sys/netinet/tcp_log_buf.c  Wed Apr 10 18:17:27 2019
(r346093)
+++ head/sys/netinet/tcp_log_buf.c  Wed Apr 10 18:58:11 2019
(r346094)
@@ -752,6 +752,7 @@ refind:
RECHECK_INP();
if (tp->t_lib != NULL) {
TCPID_BUCKET_UNLOCK(tlb);
+   bucket_locked = false;
tlb = NULL;
goto restart;
}
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r345851 - head/sys/netinet

2019-04-03 Thread Randall Stewart
Author: rrs
Date: Wed Apr  3 19:35:07 2019
New Revision: 345851
URL: https://svnweb.freebsd.org/changeset/base/345851

Log:
  Undo my previous erroneous commit changing the tcp_output kassert.
  Hmm now the question is where did the tcp_log_id change go :o

Modified:
  head/sys/netinet/tcp_output.c

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Wed Apr  3 18:35:13 2019
(r345850)
+++ head/sys/netinet/tcp_output.c   Wed Apr  3 19:35:07 2019
(r345851)
@@ -138,8 +138,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto_lowat
  * non-ACK.
  */
 #define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags)   \
-   KASSERT(((len) == 0 && ((th_flags) &\
-   (TH_SYN | TH_FIN | TH_RST)) != 0) ||\
+   KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\
tcp_timer_active((tp), TT_REXMT) || \
tcp_timer_active((tp), TT_PERSIST), \
("neither rexmt nor persist timer is set"))
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r345527 - head/sys/netinet

2019-03-26 Thread Randall Stewart
Author: rrs
Date: Tue Mar 26 10:41:27 2019
New Revision: 345527
URL: https://svnweb.freebsd.org/changeset/base/345527

Log:
  Fix a small bug in the tcp_log_id where the bucket
  was unlocked and yet the bucket-unlock flag was not
  changed to false. This can cause a panic if INVARIANTS
  is on and we go through the right path (though rare).
  
  Reported by:  syzbot+179a1ad49f3c4c215...@syzkaller.appspotmail.com
  Reviewed by:  tuexen@
  MFC after:1 week

Modified:
  head/sys/netinet/tcp_output.c

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Tue Mar 26 09:46:17 2019
(r345526)
+++ head/sys/netinet/tcp_output.c   Tue Mar 26 10:41:27 2019
(r345527)
@@ -138,7 +138,8 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sendbuf_auto_lowat
  * non-ACK.
  */
 #define TCP_XMIT_TIMER_ASSERT(tp, len, th_flags)   \
-   KASSERT(((len) == 0 && ((th_flags) & (TH_SYN | TH_FIN)) == 0) ||\
+   KASSERT(((len) == 0 && ((th_flags) &\
+   (TH_SYN | TH_FIN | TH_RST)) != 0) ||\
tcp_timer_active((tp), TT_REXMT) || \
tcp_timer_active((tp), TT_PERSIST), \
("neither rexmt nor persist timer is set"))
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r344099 - head/sys/net

2019-02-21 Thread Randall Stewart via svn-src-all


> On Feb 13, 2019, at 1:10 PM, John Baldwin  wrote:
> 
> On 2/13/19 10:03 AM, Randall Stewart wrote:
>> oh and one other thing..
>> 
>> It was *not* a random IFP.. it was the IFP to the lagg.
>> 
>> I.e. an alloc() was done to the lagg.. and the free was
>> done back to the same IFP (that provided the allocate).
> 
> Yes, that's wrong.  Suppose the route changes so that my traffic is now over
> em0 instead of lagg0 (where em0 isn't a member of the lagg), how do you
> expect if_lagg_free to invoke em0's free routine?  In your case it does,
> but only by accident.  It doesn't work in the other case I described which
> is if you have non-lagg interfaces and a route moves from cc0 to em0.  In
> that case your existing code that is using the wrong ifp will just panic.
> 
> These aren't real alloc routines as the lagg and vlan ones don't allocate
> anything, they pass along the request to the child and the child allocates
> the tag.  Only ifnet's that actually allocate tags should need to free them,
> and you should be using tag->ifp to as the ifp whose if_snd_tag_free works.

But thats what the lagg’s routine does, use the tag sent in
to find the real ifp (where the tag was allocated) and call
the if_snd_tag_free() on that.

Its not an accident it works, it calls the free of the actual
interface where the allocation came from.

I don’t see how it would panic.

R

> 
>> R
>> 
>>> On Feb 13, 2019, at 1:02 PM, Randall Stewart  wrote:
>>> 
>>> I disagree. If you define an alloc it is only
>>> reciprocal that you should define a free.
>>> 
>>> The code in question that hit this was changed (its in a version
>>> of rack that has the rate-limit and TLS code).. but I think these
>>> things *should* be balanced.. if you provide an Allocate, you
>>> should also provide a Free… 
>>> 
>>> R
>>> 
>>> 
>>>> On Feb 13, 2019, at 12:09 PM, John Baldwin  wrote:
>>>> 
>>>> On 2/13/19 6:57 AM, Randall Stewart wrote:
>>>>> Author: rrs
>>>>> Date: Wed Feb 13 14:57:59 2019
>>>>> New Revision: 344099
>>>>> URL: https://svnweb.freebsd.org/changeset/base/344099
>>>>> 
>>>>> Log:
>>>>> This commit adds the missing release mechanism for the
>>>>> ratelimiting code. The two modules (lagg and vlan) did have
>>>>> allocation routines, and even though they are indirect (and
>>>>> vector down to the underlying interfaces) they both need to
>>>>> have a free routine (that also vectors down to the actual interface).
>>>>> 
>>>>> Sponsored by: Netflix Inc.
>>>>> Differential Revision:https://reviews.freebsd.org/D19032
>>>> 
>>>> Hmm, I don't understand why you'd ever invoke if_snd_tag_free from anything
>>>> but 'tag->ifp' rather than some other ifp.  What if the route for a 
>>>> connection
>>>> moves so that a tag allocated on cc0 is now on a route that goes over em0?
>>>> You can't expect em0 to have an if_snd_tag_free routine that will know to
>>>> go invoke cxgbe's snd_tag_free.  I think you should always be using
>>>> 'tag->ifp->if_snd_tag_free' to free tags and never using any other ifp.
>>>> 
>>>> That is, I think this should be reverted and that instead you need to fix
>>>> the code invoking if_snd_tag_free to invoke it on the tag's ifp instead of
>>>> some random other ifp.
>>>> 
>>>> -- 
>>>> John Baldwin
>>>> 
>>>> 
>>> 
>>> --
>>> Randall Stewart
>>> r...@netflix.com
>>> 
>>> 
>>> 
>> 
>> --
>> Randall Stewart
>> r...@netflix.com
>> 
>> 
>> 
> 
> 
> -- 
> John Baldwin

--
Randall Stewart
r...@netflix.com



___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r344099 - head/sys/net

2019-02-13 Thread Randall Stewart via svn-src-all
oh and one other thing..

It was *not* a random IFP.. it was the IFP to the lagg.

I.e. an alloc() was done to the lagg.. and the free was
done back to the same IFP (that provided the allocate).

R

> On Feb 13, 2019, at 1:02 PM, Randall Stewart  wrote:
> 
> I disagree. If you define an alloc it is only
> reciprocal that you should define a free.
> 
> The code in question that hit this was changed (its in a version
> of rack that has the rate-limit and TLS code).. but I think these
> things *should* be balanced.. if you provide an Allocate, you
> should also provide a Free… 
> 
> R
> 
> 
>> On Feb 13, 2019, at 12:09 PM, John Baldwin  wrote:
>> 
>> On 2/13/19 6:57 AM, Randall Stewart wrote:
>>> Author: rrs
>>> Date: Wed Feb 13 14:57:59 2019
>>> New Revision: 344099
>>> URL: https://svnweb.freebsd.org/changeset/base/344099
>>> 
>>> Log:
>>> This commit adds the missing release mechanism for the
>>> ratelimiting code. The two modules (lagg and vlan) did have
>>> allocation routines, and even though they are indirect (and
>>> vector down to the underlying interfaces) they both need to
>>> have a free routine (that also vectors down to the actual interface).
>>> 
>>> Sponsored by:   Netflix Inc.
>>> Differential Revision:  https://reviews.freebsd.org/D19032
>> 
>> Hmm, I don't understand why you'd ever invoke if_snd_tag_free from anything
>> but 'tag->ifp' rather than some other ifp.  What if the route for a 
>> connection
>> moves so that a tag allocated on cc0 is now on a route that goes over em0?
>> You can't expect em0 to have an if_snd_tag_free routine that will know to
>> go invoke cxgbe's snd_tag_free.  I think you should always be using
>> 'tag->ifp->if_snd_tag_free' to free tags and never using any other ifp.
>> 
>> That is, I think this should be reverted and that instead you need to fix
>> the code invoking if_snd_tag_free to invoke it on the tag's ifp instead of
>> some random other ifp.
>> 
>> -- 
>> John Baldwin
>> 
>> 
> 
> --
> Randall Stewart
> r...@netflix.com
> 
> 
> 

--
Randall Stewart
r...@netflix.com



___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r344099 - head/sys/net

2019-02-13 Thread Randall Stewart via svn-src-all
I disagree. If you define an alloc it is only
reciprocal that you should define a free.

The code in question that hit this was changed (its in a version
of rack that has the rate-limit and TLS code).. but I think these
things *should* be balanced.. if you provide an Allocate, you
should also provide a Free… 

R


> On Feb 13, 2019, at 12:09 PM, John Baldwin  wrote:
> 
> On 2/13/19 6:57 AM, Randall Stewart wrote:
>> Author: rrs
>> Date: Wed Feb 13 14:57:59 2019
>> New Revision: 344099
>> URL: https://svnweb.freebsd.org/changeset/base/344099
>> 
>> Log:
>>  This commit adds the missing release mechanism for the
>>  ratelimiting code. The two modules (lagg and vlan) did have
>>  allocation routines, and even though they are indirect (and
>>  vector down to the underlying interfaces) they both need to
>>  have a free routine (that also vectors down to the actual interface).
>> 
>>  Sponsored by:   Netflix Inc.
>>  Differential Revision:  https://reviews.freebsd.org/D19032
> 
> Hmm, I don't understand why you'd ever invoke if_snd_tag_free from anything
> but 'tag->ifp' rather than some other ifp.  What if the route for a connection
> moves so that a tag allocated on cc0 is now on a route that goes over em0?
> You can't expect em0 to have an if_snd_tag_free routine that will know to
> go invoke cxgbe's snd_tag_free.  I think you should always be using
> 'tag->ifp->if_snd_tag_free' to free tags and never using any other ifp.
> 
> That is, I think this should be reverted and that instead you need to fix
> the code invoking if_snd_tag_free to invoke it on the tag's ifp instead of
> some random other ifp.
> 
> -- 
> John Baldwin
> 
> 

--
Randall Stewart
r...@netflix.com



___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r344099 - head/sys/net

2019-02-13 Thread Randall Stewart
Author: rrs
Date: Wed Feb 13 14:57:59 2019
New Revision: 344099
URL: https://svnweb.freebsd.org/changeset/base/344099

Log:
  This commit adds the missing release mechanism for the
  ratelimiting code. The two modules (lagg and vlan) did have
  allocation routines, and even though they are indirect (and
  vector down to the underlying interfaces) they both need to
  have a free routine (that also vectors down to the actual interface).
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D19032

Modified:
  head/sys/net/if_lagg.c
  head/sys/net/if_vlan.c

Modified: head/sys/net/if_lagg.c
==
--- head/sys/net/if_lagg.c  Wed Feb 13 14:39:16 2019(r344098)
+++ head/sys/net/if_lagg.c  Wed Feb 13 14:57:59 2019(r344099)
@@ -133,6 +133,7 @@ static int  lagg_ioctl(struct ifnet *, u_long, caddr_t)
 static int lagg_snd_tag_alloc(struct ifnet *,
union if_snd_tag_alloc_params *,
struct m_snd_tag **);
+static voidlagg_snd_tag_free(struct m_snd_tag *);
 #endif
 static int lagg_setmulti(struct lagg_port *);
 static int lagg_clrmulti(struct lagg_port *);
@@ -514,6 +515,7 @@ lagg_clone_create(struct if_clone *ifc, int unit, cadd
ifp->if_flags = IFF_SIMPLEX | IFF_BROADCAST | IFF_MULTICAST;
 #ifdef RATELIMIT
ifp->if_snd_tag_alloc = lagg_snd_tag_alloc;
+   ifp->if_snd_tag_free = lagg_snd_tag_free;
 #endif
ifp->if_capenable = ifp->if_capabilities = IFCAP_HWSTATS;
 
@@ -1568,6 +1570,13 @@ lagg_snd_tag_alloc(struct ifnet *ifp,
/* forward allocation request */
return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
 }
+
+static void
+lagg_snd_tag_free(struct m_snd_tag *tag)
+{
+   tag->ifp->if_snd_tag_free(tag);
+}
+
 #endif
 
 static int

Modified: head/sys/net/if_vlan.c
==
--- head/sys/net/if_vlan.c  Wed Feb 13 14:39:16 2019(r344098)
+++ head/sys/net/if_vlan.c  Wed Feb 13 14:57:59 2019(r344099)
@@ -267,6 +267,7 @@ static  int vlan_ioctl(struct ifnet *ifp, u_long cmd, c
 #ifdef RATELIMIT
 static int vlan_snd_tag_alloc(struct ifnet *,
 union if_snd_tag_alloc_params *, struct m_snd_tag **);
+static void vlan_snd_tag_free(struct m_snd_tag *);
 #endif
 static void vlan_qflush(struct ifnet *ifp);
 static int vlan_setflag(struct ifnet *ifp, int flag, int status,
@@ -1047,6 +1048,7 @@ vlan_clone_create(struct if_clone *ifc, char *name, si
ifp->if_ioctl = vlan_ioctl;
 #ifdef RATELIMIT
ifp->if_snd_tag_alloc = vlan_snd_tag_alloc;
+   ifp->if_snd_tag_free = vlan_snd_tag_free;
 #endif
ifp->if_flags = VLAN_IFFLAGS;
ether_ifattach(ifp, eaddr);
@@ -1933,5 +1935,11 @@ vlan_snd_tag_alloc(struct ifnet *ifp,
return (EOPNOTSUPP);
/* forward allocation request */
return (ifp->if_snd_tag_alloc(ifp, params, ppmt));
+}
+
+static void
+vlan_snd_tag_free(struct m_snd_tag *tag)
+{
+   tag->ifp->if_snd_tag_free(tag);
 }
 #endif
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r338102 - in head/sys/netinet: . tcp_stacks

2018-08-20 Thread Randall Stewart
Author: rrs
Date: Mon Aug 20 12:43:18 2018
New Revision: 338102
URL: https://svnweb.freebsd.org/changeset/base/338102

Log:
  This change represents a substantial restructure of the way we
  reassembly inbound tcp segments. The old algorithm just blindly
  dropped in segments without coalescing. This meant that every
  segment could take up greater and greater room on the linked list
  of segments. This of course is now subject to a tighter limit (100)
  of segments which in a high BDP situation will cause us to be a
  lot more in-efficent as we drop segments beyond 100 entries that
  we receive. What this restructure does is cause the reassembly
  buffer to coalesce segments putting an emphasis on the two
  common cases (which avoid walking the list of segments) i.e.
  where we add to the back of the queue of segments and where we
  add to the front. We also have the reassembly buffer supporting
  a couple of debug options (black box logging as well as counters
  for code coverage). These are compiled out by default but can
  be added by uncommenting the defines.
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D16626

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_log_buf.h
  head/sys/netinet/tcp_reass.c
  head/sys/netinet/tcp_stacks/rack.c
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_usrreq.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cMon Aug 20 12:31:39 2018
(r338101)
+++ head/sys/netinet/tcp_input.cMon Aug 20 12:43:18 2018
(r338102)
@@ -1734,7 +1734,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
tp->snd_nxt == tp->snd_max &&
tiwin && tiwin == tp->snd_wnd && 
((tp->t_flags & (TF_NEEDSYN|TF_NEEDFIN)) == 0) &&
-   LIST_EMPTY(>t_segq) &&
+   SEGQ_EMPTY(tp) &&
((to.to_flags & TOF_TS) == 0 ||
 TSTMP_GEQ(to.to_tsval, tp->ts_recent)) ) {
 
@@ -2440,7 +2440,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru
 * later; if not, do so now to pass queued data to user.
 */
if (tlen == 0 && (thflags & TH_FIN) == 0)
-   (void) tcp_reass(tp, (struct tcphdr *)0, 0,
+   (void) tcp_reass(tp, (struct tcphdr *)0, NULL, 0,
(struct mbuf *)0);
tp->snd_wl1 = th->th_seq - 1;
/* FALLTHROUGH */
@@ -3017,7 +3017,7 @@ dodata:   
/* XXX */
 * fast retransmit can work).
 */
if (th->th_seq == tp->rcv_nxt &&
-   LIST_EMPTY(>t_segq) &&
+   SEGQ_EMPTY(tp) &&
(TCPS_HAVEESTABLISHED(tp->t_state) ||
 tfo_syn)) {
if (DELAY_ACK(tp, tlen) || tfo_syn)
@@ -3042,7 +3042,7 @@ dodata:   
/* XXX */
 * m_adj() doesn't actually frees any mbufs
 * when trimming from the head.
 */
-   thflags = tcp_reass(tp, th, , m);
+   thflags = tcp_reass(tp, th, _start, , m);
tp->t_flags |= TF_ACKNOW;
}
if (tlen > 0 && (tp->t_flags & TF_SACK_PERMIT))

Modified: head/sys/netinet/tcp_log_buf.h
==
--- head/sys/netinet/tcp_log_buf.h  Mon Aug 20 12:31:39 2018
(r338101)
+++ head/sys/netinet/tcp_log_buf.h  Mon Aug 20 12:43:18 2018
(r338102)
@@ -217,7 +217,9 @@ enum tcp_log_events {
BBR_LOG_REDUCE, /* old bbr log reduce for 4.1 and earlier 46*/
TCP_LOG_RTT,/* A rtt (in useconds) is being sampled and 
applied to the srtt algo 47 */
BBR_LOG_SETTINGS_CHG,   /* Settings changed for loss response 48 */
-   TCP_LOG_END /* End (keep at end)49 */
+   BBR_LOG_SRTT_GAIN_EVENT, /* SRTT gaining 49 */
+   TCP_LOG_REASS,  /* Reassembly buffer logging 50 */
+   TCP_LOG_END /* End (keep at end)51 */
 };
 
 enum tcp_log_states {

Modified: head/sys/netinet/tcp_reass.c
==
--- head/sys/netinet/tcp_reass.cMon Aug 20 12:31:39 2018
(r338101)
+++ head/sys/netinet/tcp_reass.cMon Aug 20 12:43:18 2018
(r338102)
@@ -72,15 +72,37 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
+#include 
 #include 
 #include 
 #ifdef TCPDEBUG
 #include 
 #endif /* TCPDEBUG */
 
+#define TCP_R_LOG_ADD  1
+#define TCP_R_LOG_LIMIT_REACHED 2
+#define TCP_R_LOG_APPEND   3
+#define 

svn commit: r337455 - head/sys/netinet/tcp_stacks

2018-08-08 Thread Randall Stewart
Author: rrs
Date: Wed Aug  8 13:36:49 2018
New Revision: 337455
URL: https://svnweb.freebsd.org/changeset/base/337455

Log:
  Fix a small bug in rack where it will
  end up sending the FIN twice.
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D16604

Modified:
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Wed Aug  8 12:08:46 2018
(r337454)
+++ head/sys/netinet/tcp_stacks/rack.c  Wed Aug  8 13:36:49 2018
(r337455)
@@ -7603,13 +7603,10 @@ dontupdate:
 * If our state indicates that FIN should be sent and we have not
 * yet done so, then we need to send.
 */
-   if (flags & TH_FIN) {
-   if ((tp->t_flags & TF_SENTFIN) ||
-   (((tp->t_flags & TF_SENTFIN) == 0) &&
-(tp->snd_nxt == tp->snd_una))) {
-   pass = 11;
-   goto send;
-   }
+   if ((flags & TH_FIN) &&
+   (tp->snd_nxt == tp->snd_una)) {
+   pass = 11;
+   goto send;
}
/*
 * No reason to send a segment, just return.
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r337375 - head/sys/netinet/tcp_stacks

2018-08-06 Thread Randall Stewart
Author: rrs
Date: Mon Aug  6 09:22:07 2018
New Revision: 337375
URL: https://svnweb.freebsd.org/changeset/base/337375

Log:
  This fixes a bug in Rack where we were
  not properly using the correct value for
  Delayed Ack.
  
  Sponsored by: Netflix Inc.
  Differential Revision: https://reviews.freebsd.org/D16579

Modified:
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Mon Aug  6 08:40:02 2018
(r337374)
+++ head/sys/netinet/tcp_stacks/rack.c  Mon Aug  6 09:22:07 2018
(r337375)
@@ -2275,7 +2275,7 @@ rack_start_hpts_timer(struct tcp_rack *rack, struct tc
}
hpts_timeout = rack_timer_start(tp, rack, cts);
if (tp->t_flags & TF_DELACK) {
-   delayed_ack = tcp_delacktime;
+   delayed_ack = TICKS_2_MSEC(tcp_delacktime);
rack->r_ctl.rc_hpts_flags |= PACE_TMR_DELACK;
}
if (delayed_ack && ((hpts_timeout == 0) ||
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r336893 - head/sys/netinet/tcp_stacks

2018-07-30 Thread Randall Stewart
Author: rrs
Date: Mon Jul 30 10:23:29 2018
New Revision: 336893
URL: https://svnweb.freebsd.org/changeset/base/336893

Log:
  This fixes a hole where rack could end up
  sending an invalid segment into the reassembly
  queue. This would happen if you enabled the
  data after close option.
  
  Sponsored by: Netflix
  Differential Revision: https://reviews.freebsd.org/D16453

Modified:
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Mon Jul 30 09:50:26 2018
(r336892)
+++ head/sys/netinet/tcp_stacks/rack.c  Mon Jul 30 10:23:29 2018
(r336893)
@@ -4657,7 +4657,6 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, s
 
rack = (struct tcp_rack *)tp->t_fb_ptr;
INP_WLOCK_ASSERT(tp->t_inpcb);
-
nsegs = max(1, m->m_pkthdr.lro_nsegs);
if ((thflags & TH_ACK) &&
(SEQ_LT(tp->snd_wl1, th->th_seq) ||
@@ -4686,6 +4685,10 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, s
tp->snd_nxt = tp->snd_max;
/* Make sure we output to start the timer */
rack->r_wanted_output++;
+   }
+   if (tp->t_flags2 & TF2_DROP_AF_DATA) {
+   m_freem(m);
+   return (0);
}
/*
 * Process segments with URG.
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r336672 - in head/sys: modules/tcp modules/tcp/fastpath netinet/tcp_stacks

2018-07-24 Thread Randall Stewart
Author: rrs
Date: Tue Jul 24 14:55:47 2018
New Revision: 336672
URL: https://svnweb.freebsd.org/changeset/base/336672

Log:
  Delete the example tcp stack "fastpath" which
  was only put in has an example.
  
  Sponsored by: Netflix inc.
  Differential Revision:https://reviews.freebsd.org/D16420

Deleted:
  head/sys/modules/tcp/fastpath/
  head/sys/netinet/tcp_stacks/fastpath.c
Modified:
  head/sys/modules/tcp/Makefile

Modified: head/sys/modules/tcp/Makefile
==
--- head/sys/modules/tcp/Makefile   Tue Jul 24 13:31:50 2018
(r336671)
+++ head/sys/modules/tcp/Makefile   Tue Jul 24 14:55:47 2018
(r336672)
@@ -6,12 +6,10 @@ SYSDIR?=${SRCTOP}/sys
 .include "${SYSDIR}/conf/kern.opts.mk"
 
 SUBDIR=\
-   ${_tcp_fastpath} \
 ${_tcp_rack} \
${_tcpmd5} \
 
 .if ${MK_EXTRA_TCP_STACKS} != "no" || defined(ALL_MODULES)
-_tcp_fastpath= fastpath
 _tcp_rack= rack
 .endif
 
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r336465 - in head/sys/netinet: . tcp_stacks

2018-07-18 Thread Randall Stewart
Author: rrs
Date: Wed Jul 18 22:49:53 2018
New Revision: 336465
URL: https://svnweb.freebsd.org/changeset/base/336465

Log:
  Bump the ICMP echo limits to match the RFC
  
  Reviewed by:  tuexen
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D16333

Modified:
  head/sys/netinet/ip_icmp.c
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/ip_icmp.c
==
--- head/sys/netinet/ip_icmp.c  Wed Jul 18 22:45:45 2018(r336464)
+++ head/sys/netinet/ip_icmp.c  Wed Jul 18 22:49:53 2018(r336465)
@@ -139,8 +139,8 @@ static VNET_DEFINE(int, icmp_rfi) = 0;
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, reply_from_interface, CTLFLAG_VNET | 
CTLFLAG_RW,
_NAME(icmp_rfi), 0,
"ICMP reply from incoming interface for non-local packets");
-
-static VNET_DEFINE(int, icmp_quotelen) = 8;
+/* Router requirements RFC 1812 section 4.3.2.3 requires 576 - 28. */
+static VNET_DEFINE(int, icmp_quotelen) = 548;
 #defineV_icmp_quotelen VNET(icmp_quotelen)
 SYSCTL_INT(_net_inet_icmp, OID_AUTO, quotelen, CTLFLAG_VNET | CTLFLAG_RW,
_NAME(icmp_quotelen), 0,

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Wed Jul 18 22:45:45 2018
(r336464)
+++ head/sys/netinet/tcp_stacks/rack.c  Wed Jul 18 22:49:53 2018
(r336465)
@@ -1627,7 +1627,6 @@ rack_process_rst(struct mbuf *m, struct tcphdr *th, st
 static void
 rack_challenge_ack(struct mbuf *m, struct tcphdr *th, struct tcpcb *tp, 
int32_t * ret_val)
 {
-
INP_INFO_RLOCK_ASSERT(_tcbinfo);
 
TCPSTAT_INC(tcps_badsyn);
@@ -6103,7 +6102,6 @@ rack_do_lastack(struct mbuf *m, struct tcphdr *th, str
return (ret_val);
}
if (ourfinisacked) {
-
INP_INFO_RLOCK_ASSERT(_tcbinfo);
tp = tcp_close(tp);
rack_do_drop(m, tp);
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r335502 - head/sys/netinet

2018-06-21 Thread Randall Stewart
Author: rrs
Date: Thu Jun 21 21:03:58 2018
New Revision: 335502
URL: https://svnweb.freebsd.org/changeset/base/335502

Log:
  This adds in an optimization so that we only walk one
  time through the mbuf chain during copy and TSO limiting.
  It is used by both Rack and now the FreeBSD stack.
  Sponsored by: Netflix Inc
  Differential Revision: https://reviews.freebsd.org/D15937

Modified:
  head/sys/netinet/tcp_output.c

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Thu Jun 21 20:18:23 2018
(r335501)
+++ head/sys/netinet/tcp_output.c   Thu Jun 21 21:03:58 2018
(r335502)
@@ -209,6 +209,8 @@ tcp_output(struct tcpcb *tp)
int32_t len;
uint32_t recwin, sendwin;
int off, flags, error = 0;  /* Keep compiler happy */
+   u_int if_hw_tsomaxsegcount = 0;
+   u_int if_hw_tsomaxsegsize;
struct mbuf *m;
struct ip *ip = NULL;
 #ifdef TCPDEBUG
@@ -879,9 +881,6 @@ send:
 
if (tso) {
u_int if_hw_tsomax;
-   u_int if_hw_tsomaxsegcount;
-   u_int if_hw_tsomaxsegsize;
-   struct mbuf *mb;
u_int moff;
int max_len;
 
@@ -913,66 +912,7 @@ send:
len = max_len;
}
}
-
/*
-* Check if we should limit by maximum segment
-* size and count:
-*/
-   if (if_hw_tsomaxsegcount != 0 &&
-   if_hw_tsomaxsegsize != 0) {
-   /*
-* Subtract one segment for the LINK
-* and TCP/IP headers mbuf that will
-* be prepended to this mbuf chain
-* after the code in this section
-* limits the number of mbufs in the
-* chain to if_hw_tsomaxsegcount.
-*/
-   if_hw_tsomaxsegcount -= 1;
-   max_len = 0;
-   mb = sbsndmbuf(>so_snd, off, );
-
-   while (mb != NULL && max_len < len) {
-   u_int mlen;
-   u_int frags;
-
-   /*
-* Get length of mbuf fragment
-* and how many hardware frags,
-* rounded up, it would use:
-*/
-   mlen = (mb->m_len - moff);
-   frags = howmany(mlen,
-   if_hw_tsomaxsegsize);
-
-   /* Handle special case: Zero Length 
Mbuf */
-   if (frags == 0)
-   frags = 1;
-
-   /*
-* Check if the fragment limit
-* will be reached or exceeded:
-*/
-   if (frags >= if_hw_tsomaxsegcount) {
-   max_len += min(mlen,
-   if_hw_tsomaxsegcount *
-   if_hw_tsomaxsegsize);
-   break;
-   }
-   max_len += mlen;
-   if_hw_tsomaxsegcount -= frags;
-   moff = 0;
-   mb = mb->m_next;
-   }
-   if (max_len <= 0) {
-   len = 0;
-   } else if (len > max_len) {
-   sendalot = 1;
-   len = max_len;
-   }
-   }
-
-   /*
 * Prevent the last segment from being
 * fractional unless the send sockbuf can be
 * emptied:
@@ -1006,7 +946,6 @@ send:
 */
if (tp->t_flags & TF_NEEDFIN)
sendalot = 1;
-
} else {
len = tp->t_maxseg - optlen - ipoptlen;
sendalot = 1;
@@ 

svn commit: r335364 - head/sys/netinet/tcp_stacks

2018-06-19 Thread Randall Stewart
Author: rrs
Date: Tue Jun 19 11:20:28 2018
New Revision: 335364
URL: https://svnweb.freebsd.org/changeset/base/335364

Log:
  Make sure that the t_peakrate_thr is not compiled in
  by default until NF can upstream it.
  
  Reviewed by:  and suggested lstewart
  Sponsored by: Netflix Inc.

Modified:
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Tue Jun 19 11:06:36 2018
(r335363)
+++ head/sys/netinet/tcp_stacks/rack.c  Tue Jun 19 11:20:28 2018
(r335364)
@@ -1206,7 +1206,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *r
tp->t_stats_gput_prev);
tp->t_flags &= ~TF_GPUTINPROG;
tp->t_stats_gput_prev = gput;
-
+#ifdef NETFLIX_CWV
if (tp->t_maxpeakrate) {
/*
 * We update t_peakrate_thr. This gives us 
roughly
@@ -1214,6 +1214,7 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *r
 */
tcp_update_peakrate_thr(tp);
}
+#endif
}
 #endif
if (tp->snd_cwnd > tp->snd_ssthresh) {
@@ -1267,11 +1268,11 @@ rack_ack_received(struct tcpcb *tp, struct tcp_rack *r
tcp_newcwv_update_pipeack(tp, data);
}
}
-#endif
/* we enforce max peak rate if it is set. */
if (tp->t_peakrate_thr && tp->snd_cwnd > tp->t_peakrate_thr) {
tp->snd_cwnd = tp->t_peakrate_thr;
}
+#endif
 }
 
 static void
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r335361 - head/sys/netinet

2018-06-18 Thread Randall Stewart
Author: rrs
Date: Tue Jun 19 05:28:14 2018
New Revision: 335361
URL: https://svnweb.freebsd.org/changeset/base/335361

Log:
  Move the tp set back to where it was before
  we started playing with the VNET sets. This
  way we have verified the INP settings before
  we go to the trouble of de-referencing it.
  
  Reviewed by:  and suggested by lstewart
  Sponsored by: Netflix Inc.

Modified:
  head/sys/netinet/tcp_hpts.c

Modified: head/sys/netinet/tcp_hpts.c
==
--- head/sys/netinet/tcp_hpts.c Tue Jun 19 05:01:07 2018(r335360)
+++ head/sys/netinet/tcp_hpts.c Tue Jun 19 05:28:14 2018(r335361)
@@ -1158,7 +1158,6 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct tim
hpts->p_inp = inp;
drop_reason = inp->inp_hpts_drop_reas;
inp->inp_in_input = 0;
-   tp = intotcpcb(inp);
mtx_unlock(>p_mtx);
CURVNET_SET(inp->inp_vnet);
if (drop_reason) {
@@ -1183,6 +1182,7 @@ out:
mtx_lock(>p_mtx);
continue;
}
+   tp = intotcpcb(inp);
if ((tp == NULL) || (tp->t_inpcb == NULL)) {
goto out;
}
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r335317 - head/sys/netinet

2018-06-18 Thread Randall Stewart
Author: rrs
Date: Mon Jun 18 14:10:12 2018
New Revision: 335317
URL: https://svnweb.freebsd.org/changeset/base/335317

Log:
  Move to using the inp->vnet pointer has suggested by lstewart.
  This is far better since the hpts system is using the inp
  as its basis anyway. Unfortunately his comments came late.
  
  Sponsored by: Netflix Inc.

Modified:
  head/sys/netinet/tcp_hpts.c

Modified: head/sys/netinet/tcp_hpts.c
==
--- head/sys/netinet/tcp_hpts.c Mon Jun 18 13:49:44 2018(r335316)
+++ head/sys/netinet/tcp_hpts.c Mon Jun 18 14:10:12 2018(r335317)
@@ -1216,7 +1216,7 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct tim
inp->inp_in_input = 0;
tp = intotcpcb(inp);
mtx_unlock(>p_mtx);
-   CURVNET_SET(tp->t_vnet);
+   CURVNET_SET(inp->inp_vnet);
if (drop_reason) {
INP_INFO_RLOCK(_tcbinfo);
ti_locked = TI_RLOCKED;
@@ -1589,7 +1589,7 @@ out_now:
getmicrouptime();
cts = tcp_tv_to_usectick();
}
-   CURVNET_SET(tp->t_vnet);
+   CURVNET_SET(inp->inp_vnet);
/*
 * There is a hole here, we get the refcnt on the
 * inp so it will still be preserved but to make
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r335106 - head/sys/netinet/tcp_stacks

2018-06-13 Thread Randall Stewart
Author: rrs
Date: Thu Jun 14 03:27:42 2018
New Revision: 335106
URL: https://svnweb.freebsd.org/changeset/base/335106

Log:
  This fixes several bugs that Larry Rosenman helped me find in
  Rack with respect to its handling of TCP Fast Open. Several
  fixes all related to TFO are included in this commit:
  1) Handling of non-TFO retransmissions
  2) Building the proper send-map when we are doing TFO
  3) Dealing with the ack that comes back that includes the
 SYN and data.
  
  It appears that with this commit TFO now works :-)
  
  Thanks Larry for all your help!!
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D15758

Modified:
  head/sys/netinet/tcp_stacks/rack.c

Modified: head/sys/netinet/tcp_stacks/rack.c
==
--- head/sys/netinet/tcp_stacks/rack.c  Thu Jun 14 02:30:43 2018
(r335105)
+++ head/sys/netinet/tcp_stacks/rack.c  Thu Jun 14 03:27:42 2018
(r335106)
@@ -2083,6 +2083,8 @@ rack_timer_start(struct tcpcb *tp, struct tcp_rack *ra
/* We can't start any timer in persists */
return (rack_get_persists_timer_val(tp, rack));
}
+   if (tp->t_state < TCPS_ESTABLISHED)
+   goto activate_rxt;
rsm = TAILQ_FIRST(>r_ctl.rc_tmap);
if (rsm == NULL) {
/* Nothing on the send map */
@@ -3385,8 +3387,15 @@ again:
rsm->r_tim_lastsent[0] = ts;
rsm->r_rtr_cnt = 1;
rsm->r_rtr_bytes = 0;
-   rsm->r_start = seq_out;
-   rsm->r_end = rsm->r_start + len;
+   if (th_flags & TH_SYN) {
+   /* The data space is one beyond snd_una */
+   rsm->r_start = seq_out + 1;
+   rsm->r_end = rsm->r_start + (len - 1);
+   } else {
+   /* Normal case */
+   rsm->r_start = seq_out;
+   rsm->r_end = rsm->r_start + len;
+   }
rsm->r_sndcnt = 0;
TAILQ_INSERT_TAIL(>r_ctl.rc_map, rsm, r_next);
TAILQ_INSERT_TAIL(>r_ctl.rc_tmap, rsm, r_tnext);
@@ -4657,11 +4666,7 @@ rack_process_data(struct mbuf *m, struct tcphdr *th, s
 * send garbage on first SYN.
 */
int32_t nsegs;
-#ifdef TCP_RFC7413
int32_t tfo_syn;
-#else
-#definetfo_syn (FALSE)
-#endif
struct tcp_rack *rack;
 
rack = (struct tcp_rack *)tp->t_fb_ptr;
@@ -4767,10 +4772,8 @@ dodata:  /* XXX */
 * PRU_RCVD).  If a FIN has already been received on this connection
 * then we just ignore the text.
 */
-#ifdef TCP_RFC7413
tfo_syn = ((tp->t_state == TCPS_SYN_RECEIVED) &&
-   (tp->t_flags & TF_FASTOPEN));
-#endif
+  IS_FASTOPEN(tp->t_flags));
if ((tlen || (thflags & TH_FIN) || tfo_syn) &&
TCPS_HAVERCVDFIN(tp->t_state) == 0) {
tcp_seq save_start = th->th_seq;
@@ -5237,6 +5240,8 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, st
tp->irs = th->th_seq;
tcp_rcvseqinit(tp);
if (thflags & TH_ACK) {
+   int tfo_partial = 0;
+   
TCPSTAT_INC(tcps_connects);
soisconnected(so);
 #ifdef MAC
@@ -5250,10 +5255,19 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, st
tp->rcv_adv += min(tp->rcv_wnd,
TCP_MAXWIN << tp->rcv_scale);
/*
+* If not all the data that was sent in the TFO SYN
+* has been acked, resend the remainder right away.
+*/
+   if (IS_FASTOPEN(tp->t_flags) &&
+   (tp->snd_una != tp->snd_max)) {
+   tp->snd_nxt = th->th_ack;
+   tfo_partial = 1;
+   }
+   /*
 * If there's data, delay ACK; if there's also a FIN ACKNOW
 * will be turned on later.
 */
-   if (DELAY_ACK(tp, tlen) && tlen != 0) {
+   if (DELAY_ACK(tp, tlen) && tlen != 0 && (tfo_partial == 0)) {
rack_timer_cancel(tp, (struct tcp_rack *)tp->t_fb_ptr,
  ((struct tcp_rack 
*)tp->t_fb_ptr)->r_ctl.rc_rcvtime, __LINE__);
tp->t_flags |= TF_DELACK;
@@ -5266,6 +5280,21 @@ rack_do_syn_sent(struct mbuf *m, struct tcphdr *th, st
tp->t_flags |= TF_ECN_PERMIT;
TCPSTAT_INC(tcps_ecn_shs);
}
+   if (SEQ_GT(th->th_ack, tp->snd_una)) {
+   /* 
+* We advance snd_una for the 
+* fast open case. If th_ack is
+* acknowledging data beyond 
+* snd_una we can't just call
+ 

svn commit: r335022 - head/sys/netinet

2018-06-12 Thread Randall Stewart
Author: rrs
Date: Tue Jun 12 23:54:08 2018
New Revision: 335022
URL: https://svnweb.freebsd.org/changeset/base/335022

Log:
  This fixes missing VNET sets in the hpts system. Basically
  without this and running vnets with a TCP stack that uses
  some of the features is a recipe for panic (without this commit).
  
  Reported by:  Larry Rosenman
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D15757

Modified:
  head/sys/netinet/tcp_hpts.c

Modified: head/sys/netinet/tcp_hpts.c
==
--- head/sys/netinet/tcp_hpts.c Tue Jun 12 23:26:25 2018(r335021)
+++ head/sys/netinet/tcp_hpts.c Tue Jun 12 23:54:08 2018(r335022)
@@ -1215,7 +1215,9 @@ tcp_input_data(struct tcp_hpts_entry *hpts, struct tim
hpts->p_inp = inp;
drop_reason = inp->inp_hpts_drop_reas;
inp->inp_in_input = 0;
+   tp = intotcpcb(inp);
mtx_unlock(>p_mtx);
+   CURVNET_SET(tp->t_vnet);
if (drop_reason) {
INP_INFO_RLOCK(_tcbinfo);
ti_locked = TI_RLOCKED;
@@ -1234,10 +1236,10 @@ out:
INP_WUNLOCK(inp);
}
ti_locked = TI_UNLOCKED;
+   CURVNET_RESTORE();
mtx_lock(>p_mtx);
continue;
}
-   tp = intotcpcb(inp);
if ((tp == NULL) || (tp->t_inpcb == NULL)) {
goto out;
}
@@ -1262,6 +1264,7 @@ out:
}
if (in_pcbrele_wlocked(inp) == 0)
INP_WUNLOCK(inp);
+   CURVNET_RESTORE();
mtx_lock(>p_mtx);
continue;
}
@@ -1282,7 +1285,6 @@ out:
 */
tcp_set_hpts(inp);
}
-   CURVNET_SET(tp->t_vnet);
m = tp->t_in_pkt;
n = NULL;
if (m != NULL &&
@@ -1366,7 +1368,6 @@ out:
if (m)
n = m->m_nextpkt;
}
-   CURVNET_RESTORE();
goto out;
}
/*
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r334804 - in head/sys: kern modules/tcp modules/tcp/rack netinet netinet/tcp_stacks sys

2018-06-07 Thread Randall Stewart via svn-src-all



> On Jun 7, 2018, at 6:01 PM, hiren panchasara  
> wrote:
> 
> On 06/07/18 at 06:18P, Randall Stewart wrote:
>> Author: rrs
>> Date: Thu Jun  7 18:18:13 2018
>> New Revision: 334804
>> URL: https://svnweb.freebsd.org/changeset/base/334804
>> 
>> Log:
>>  This commit brings in a new refactored TCP stack called Rack.
>>  Rack includes the following features:
>>   - A different SACK processing scheme (the old sack structures are not 
>> used).
>>   - RACK (Recent acknowledgment) where counting dup-acks is no longer done
>>  instead time is used to knwo when to retransmit. (see the I-D)
>>   - TLP (Tail Loss Probe) where we will probe for tail-losses to attempt
>>  to try not to take a retransmit time-out. (see the I-D)
>>   - Burst mitigation using TCPHTPS
>>   - PRR (partial rate reduction) see the RFC.
>> 
>>  Once built into your kernel, you can select this stack by either
>>  socket option with the name of the stack is "rack" or by setting
>>  the global sysctl so the default is rack.
>> 
>>  Note that any connection that does not support SACK will be kicked
>>  back to the "default" base  FreeBSD stack (currently known as "default").
>> 
>>  To build this into your kernel you will need to enable in your
>>  kernel:
>> makeoptions WITH_EXTRA_TCP_STACKS=1
>> options TCPHPTS
>> 
>>  Sponsored by:   Netflix Inc.
>>  Differential Revision:  https://reviews.freebsd.org/D15525
>> 
>> Added:
>>  head/sys/modules/tcp/rack/
>>  head/sys/modules/tcp/rack/Makefile   (contents, props changed)
>>  head/sys/netinet/tcp_stacks/rack.c   (contents, props changed)
>>  head/sys/netinet/tcp_stacks/rack_bbr_common.h   (contents, props changed)
>>  head/sys/netinet/tcp_stacks/sack_filter.c   (contents, props changed)
>>  head/sys/netinet/tcp_stacks/sack_filter.h   (contents, props changed)
>>  head/sys/netinet/tcp_stacks/tcp_rack.h   (contents, props changed)
>> Modified:
>>  head/sys/kern/uipc_sockbuf.c
>>  head/sys/modules/tcp/Makefile
>>  head/sys/netinet/tcp.h
>>  head/sys/netinet/tcp_log_buf.h
>>  head/sys/netinet/tcp_output.c
>>  head/sys/netinet/tcp_stacks/fastpath.c
>>  head/sys/netinet/tcp_timer.c
>>  head/sys/netinet/tcp_timer.h
>>  head/sys/netinet/tcp_var.h
>>  head/sys/sys/mbuf.h
>>  head/sys/sys/queue.h
>>  head/sys/sys/sockbuf.h
>>  head/sys/sys/time.h
> 
> I thought we'd have more time to review/test this. Looks like BSDCan
> commit-spree in effect. :-)

The Phabricator review has been up since May 22nd. Thats over 2.5 weeks,
this was also discussed on the Thursday conference calls.
> 
> A few questions:
> 1) Does RACK work reliably without HPTS? If yes, has that config been
> tested?
> 
No it requires the pacer.

> 2) It looks like PRR is tied to RACK. Why did we go that route?
> Shouldn't it be easily used with the 'default' stack also?
> 

It is what I developed.. and I had no desire to work with the default stack. 
That
is a fifth rail that no one wants touched.

> 3) Can new SACK be used with the traditional stack?

Well if you want to rework the base stack you might be able to do that :)

It would be quite some effort.. I think Robert wants eventually the old
stack to be de-composed and then slowly work at getting more common
code between them until eventually you can have a diff and somehow
figure out how to integrate the two.

> 
> 4) Where should manpage like info for RACK go? a new man-page or
> extending tcp(4)? Info like how to enable system-wide or per socket
> should go here.
> 

The enable/disable or per-socket I think is in with the pluggable stack
stuff. We might want a Rack man page.. have to think about it.



> 5) Any perf numbers to go along with this commit? Synthetic or
> production numbers showing improvements in transfer speed or any other
> impact on CPU usage (specially with HPTS) that you can share?
> 

CPU will be more but we see close to a drop in rebuffers by about 12% I am told.

> 6) In your testing, have you found cases where RACK does poorly compared
> to the 'default' stack? Any recommendations on when should RACK be
> enabled? (Something like this could go in the manpage.)

Nope. 

R

> 
> Glad to finally see this in -head!
> 
> Cheers,
> Hiren


Randall Stewart
r...@netflix.com
803-317-4952





___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r334815 - head/sys/modules/tcp/rack

2018-06-07 Thread Randall Stewart
Author: rrs
Date: Thu Jun  7 20:57:12 2018
New Revision: 334815
URL: https://svnweb.freebsd.org/changeset/base/334815

Log:
  Take out the stack alias inadvertantly added by my commit.
  
  Reported by:  Peter Lei

Modified:
  head/sys/modules/tcp/rack/Makefile

Modified: head/sys/modules/tcp/rack/Makefile
==
--- head/sys/modules/tcp/rack/Makefile  Thu Jun  7 20:49:01 2018
(r334814)
+++ head/sys/modules/tcp/rack/Makefile  Thu Jun  7 20:57:12 2018
(r334815)
@@ -19,6 +19,5 @@ SRCS+=opt_kern_tls.h
 
 CFLAGS+=   -DMODNAME=${KMOD}
 CFLAGS+=   -DSTACKNAME=${STACKNAME}
-CFLAGS+=   -DSTACKALIAS=rack_18q21
 
 .include 
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r334813 - head/sys/sys

2018-06-07 Thread Randall Stewart
Author: rrs
Date: Thu Jun  7 19:57:55 2018
New Revision: 334813
URL: https://svnweb.freebsd.org/changeset/base/334813

Log:
  Fix build issue with const and volatile and the
  myriad ways that the various compliers treat this. The
  only safe prefetch appears to be for AMD. The other
  compilers either are not volatile or are not const :(
  
  Reported by:  Michael Tuexen

Modified:
  head/sys/sys/kern_prefetch.h

Modified: head/sys/sys/kern_prefetch.h
==
--- head/sys/sys/kern_prefetch.hThu Jun  7 19:48:49 2018
(r334812)
+++ head/sys/sys/kern_prefetch.hThu Jun  7 19:57:55 2018
(r334813)
@@ -34,7 +34,7 @@ kern_prefetch(const volatile void *addr, void* before)
 #if defined(__amd64__)
__asm __volatile("prefetcht1 (%1)":"=rm"(*((int32_t 
*)before)):"r"(addr):);
 #else
-   __builtin_prefetch(addr);
+/* __builtin_prefetch(addr);*/
 #endif
 }
 
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r334804 - in head/sys: kern modules/tcp modules/tcp/rack netinet netinet/tcp_stacks sys

2018-06-07 Thread Randall Stewart
Author: rrs
Date: Thu Jun  7 18:18:13 2018
New Revision: 334804
URL: https://svnweb.freebsd.org/changeset/base/334804

Log:
  This commit brings in a new refactored TCP stack called Rack.
  Rack includes the following features:
   - A different SACK processing scheme (the old sack structures are not used).
   - RACK (Recent acknowledgment) where counting dup-acks is no longer done
  instead time is used to knwo when to retransmit. (see the I-D)
   - TLP (Tail Loss Probe) where we will probe for tail-losses to attempt
  to try not to take a retransmit time-out. (see the I-D)
   - Burst mitigation using TCPHTPS
   - PRR (partial rate reduction) see the RFC.
  
  Once built into your kernel, you can select this stack by either
  socket option with the name of the stack is "rack" or by setting
  the global sysctl so the default is rack.
  
  Note that any connection that does not support SACK will be kicked
  back to the "default" base  FreeBSD stack (currently known as "default").
  
  To build this into your kernel you will need to enable in your
  kernel:
 makeoptions WITH_EXTRA_TCP_STACKS=1
 options TCPHPTS
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D15525

Added:
  head/sys/modules/tcp/rack/
  head/sys/modules/tcp/rack/Makefile   (contents, props changed)
  head/sys/netinet/tcp_stacks/rack.c   (contents, props changed)
  head/sys/netinet/tcp_stacks/rack_bbr_common.h   (contents, props changed)
  head/sys/netinet/tcp_stacks/sack_filter.c   (contents, props changed)
  head/sys/netinet/tcp_stacks/sack_filter.h   (contents, props changed)
  head/sys/netinet/tcp_stacks/tcp_rack.h   (contents, props changed)
Modified:
  head/sys/kern/uipc_sockbuf.c
  head/sys/modules/tcp/Makefile
  head/sys/netinet/tcp.h
  head/sys/netinet/tcp_log_buf.h
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_stacks/fastpath.c
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/tcp_timer.h
  head/sys/netinet/tcp_var.h
  head/sys/sys/mbuf.h
  head/sys/sys/queue.h
  head/sys/sys/sockbuf.h
  head/sys/sys/time.h

Modified: head/sys/kern/uipc_sockbuf.c
==
--- head/sys/kern/uipc_sockbuf.cThu Jun  7 18:06:01 2018
(r334803)
+++ head/sys/kern/uipc_sockbuf.cThu Jun  7 18:18:13 2018
(r334804)
@@ -1283,6 +1283,55 @@ sbsndptr(struct sockbuf *sb, u_int off, u_int len, u_i
return (ret);
 }
 
+struct mbuf *
+sbsndptr_noadv(struct sockbuf *sb, uint32_t off, uint32_t *moff)
+{
+   struct mbuf *m;
+
+   KASSERT(sb->sb_mb != NULL, ("%s: sb_mb is NULL", __func__));
+   if (sb->sb_sndptr == NULL || sb->sb_sndptroff > off) {
+   *moff = off;
+   if (sb->sb_sndptr == NULL) {
+   sb->sb_sndptr = sb->sb_mb;
+   sb->sb_sndptroff = 0;
+   }
+   return (sb->sb_mb);
+   } else {
+   m = sb->sb_sndptr;
+   off -= sb->sb_sndptroff;
+   }
+   *moff = off;
+   return (m);
+}
+
+void
+sbsndptr_adv(struct sockbuf *sb, struct mbuf *mb, uint32_t len)
+{
+   /*
+* A small copy was done, advance forward the sb_sbsndptr to cover
+* it.
+*/
+   struct mbuf *m;
+
+   if (mb != sb->sb_sndptr) {
+   /* Did not copyout at the same mbuf */
+   return;
+   }
+   m = mb;
+   while (m && (len > 0)) {
+   if (len >= m->m_len) {
+   len -= m->m_len;
+   if (m->m_next) {
+   sb->sb_sndptroff += m->m_len;
+   sb->sb_sndptr = m->m_next;
+   }
+   m = m->m_next;
+   } else {
+   len = 0;
+   }
+   }
+}
+
 /*
  * Return the first mbuf and the mbuf data offset for the provided
  * send offset without changing the "sb_sndptroff" field.

Modified: head/sys/modules/tcp/Makefile
==
--- head/sys/modules/tcp/Makefile   Thu Jun  7 18:06:01 2018
(r334803)
+++ head/sys/modules/tcp/Makefile   Thu Jun  7 18:18:13 2018
(r334804)
@@ -7,10 +7,12 @@ SYSDIR?=${SRCTOP}/sys
 
 SUBDIR=\
${_tcp_fastpath} \
+${_tcp_rack} \
${_tcpmd5} \
 
 .if ${MK_EXTRA_TCP_STACKS} != "no" || defined(ALL_MODULES)
 _tcp_fastpath= fastpath
+_tcp_rack= rack
 .endif
 
 .if (${MK_INET_SUPPORT} != "no" || ${MK_INET6_SUPPORT} != "no") || \

Added: head/sys/modules/tcp/rack/Makefile
==
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/modules/tcp/rack/Makefile  Thu Jun  7 18:18:13 2018
(r334804)
@@ -0,0 +1,24 @@
+#
+# $FreeBSD$
+#
+
+.PATH: ${.CURDIR}/../../../netinet/tcp_stacks
+
+STACKNAME= 

svn commit: r333041 - head/sys/netinet

2018-04-26 Thread Randall Stewart
Author: rrs
Date: Thu Apr 26 21:41:16 2018
New Revision: 333041
URL: https://svnweb.freebsd.org/changeset/base/333041

Log:
  This change re-arranges the fields within the tcp-pcb so that
  they are more in order of cache line use as one passes
  through the tcp_input/output paths (non-errors most likely path). This
  helps speed up cache line optimization so that the tcp stack runs
  a bit more efficently.
  
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D15136

Modified:
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_var.h
==
--- head/sys/netinet/tcp_var.h  Thu Apr 26 21:40:05 2018(r333040)
+++ head/sys/netinet/tcp_var.h  Thu Apr 26 21:41:16 2018(r333041)
@@ -83,125 +83,123 @@ STAILQ_HEAD(tcp_log_stailq, tcp_log_mem);
 
 /*
  * Tcp control block, one per tcp; fields:
- * Organized for 16 byte cacheline efficiency.
+ * Organized for 64 byte cacheline efficiency based
+ * on common tcp_input/tcp_output processing.
  */
 struct tcpcb {
-   struct  tsegqe_head t_segq; /* segment reassembly queue */
-   int t_segqlen;  /* segment reassembly queue length */
-   int t_dupacks;  /* consecutive dup acks recd */
-
-   struct mbuf  *t_in_pkt; /* head of the input packet queue for 
the tcp_hpts system */
-   struct mbuf  *t_tail_pkt;   /* tail of the input packet queue for 
the tcp_hpts system */
-   struct tcp_timer *t_timers; /* All the TCP timers in one struct */
-
+   /* Cache line 1 */
struct  inpcb *t_inpcb; /* back pointer to internet pcb */
-   int t_state;/* state of this connection */
+   struct tcp_function_block *t_fb;/* TCP function call block */
+   void*t_fb_ptr;  /* Pointer to t_fb specific data */
+   uint32_t t_maxseg:24,   /* maximum segment size */
+   t_logstate:8;   /* State of "black box" logging */
+   uint32_t t_state:4, /* state of this connection */
+   bits_spare : 24;
u_int   t_flags;
-
-   struct  vnet *t_vnet;   /* back pointer to parent vnet */
-
tcp_seq snd_una;/* sent but unacknowledged */
tcp_seq snd_max;/* highest sequence number sent;
 * used to recognize retransmits
 */
tcp_seq snd_nxt;/* send next */
tcp_seq snd_up; /* send urgent pointer */
-
-   tcp_seq snd_wl1;/* window update seg seq number */
-   tcp_seq snd_wl2;/* window update seg ack number */
-   tcp_seq iss;/* initial send sequence number */
-   tcp_seq irs;/* initial receive sequence number */
-
+   uint32_t  snd_wnd;  /* send window */
+   uint32_t  snd_cwnd; /* congestion-controlled window */
+   uint32_t cl1_spare; /* Spare to round out CL 1 */
+   /* Cache line 2 */
+   u_int32_t  ts_offset;   /* our timestamp offset */
+   u_int32_t   rfbuf_ts;   /* recv buffer autoscaling timestamp */
+   int rcv_numsacks;   /* # distinct sack blks present */
+   u_int   t_tsomax;   /* TSO total burst length limit in 
bytes */
+   u_int   t_tsomaxsegcount;   /* TSO maximum segment count */
+   u_int   t_tsomaxsegsize;/* TSO maximum segment size in bytes */
tcp_seq rcv_nxt;/* receive next */
tcp_seq rcv_adv;/* advertised window */
uint32_t  rcv_wnd;  /* receive window */
+   u_int   t_flags2;   /* More tcpcb flags storage */
+   int t_srtt; /* smoothed round-trip time */
+   int t_rttvar;   /* variance in round-trip time */
+   u_int32_t  ts_recent;   /* timestamp echo data */
+   u_char  snd_scale;  /* window scaling for send window */
+   u_char  rcv_scale;  /* window scaling for recv window */
+   u_char  snd_limited;/* segments limited transmitted */
+   u_char  request_r_scale;/* pending window scaling */
+   tcp_seq last_ack_sent;
+   u_int   t_rcvtime;  /* inactivity time */
+   /* Cache line 3 */
tcp_seq rcv_up; /* receive urgent pointer */
-
-   uint32_t  snd_wnd;  /* send window */
-   uint32_t  snd_cwnd; /* congestion-controlled window */
+   int t_segqlen;  /* segment reassembly queue length */
+   struct  tsegqe_head t_segq; /* segment reassembly queue */
+   struct mbuf  *t_in_pkt;
+   struct mbuf  *t_tail_pkt;
+   struct tcp_timer *t_timers; 

svn commit: r332774 - head/sys/netinet

2018-04-19 Thread Randall Stewart
Author: rrs
Date: Thu Apr 19 15:03:48 2018
New Revision: 332774
URL: https://svnweb.freebsd.org/changeset/base/332774

Log:
  These two modules need the tcp_hpts.h file for
  when the option is enabled (not sure how LINT/build-universe
  missed this) opps.
  
  Sponsored by: Netflix Inc

Modified:
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_usrreq.c

Modified: head/sys/netinet/tcp_subr.c
==
--- head/sys/netinet/tcp_subr.c Thu Apr 19 15:02:53 2018(r332773)
+++ head/sys/netinet/tcp_subr.c Thu Apr 19 15:03:48 2018(r332774)
@@ -99,6 +99,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 #include 
 #ifdef INET6
 #include 

Modified: head/sys/netinet/tcp_usrreq.c
==
--- head/sys/netinet/tcp_usrreq.c   Thu Apr 19 15:02:53 2018
(r332773)
+++ head/sys/netinet/tcp_usrreq.c   Thu Apr 19 15:03:48 2018
(r332774)
@@ -94,6 +94,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 #ifdef TCPPCAP
 #include 
 #endif
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r332770 - in head/sys: conf netinet netinet/tcp_stacks sys

2018-04-19 Thread Randall Stewart
Author: rrs
Date: Thu Apr 19 13:37:59 2018
New Revision: 332770
URL: https://svnweb.freebsd.org/changeset/base/332770

Log:
  This commit brings in the TCP high precision timer system (tcp_hpts).
  It is the forerunner/foundational work of bringing in both Rack and BBR
  which use hpts for pacing out packets. The feature is optional and requires
  the TCPHPTS option to be enabled before the feature will be active. TCP
  modules that use it must assure that the base component is compile in
  the kernel in which they are loaded.
  
  MFC after:Never
  Sponsored by: Netflix Inc.
  Differential Revision:https://reviews.freebsd.org/D15020

Added:
  head/sys/netinet/tcp_hpts.c   (contents, props changed)
  head/sys/netinet/tcp_hpts.h   (contents, props changed)
  head/sys/sys/kern_prefetch.h   (contents, props changed)
Modified:
  head/sys/conf/files
  head/sys/conf/options
  head/sys/netinet/in_pcb.c
  head/sys/netinet/in_pcb.h
  head/sys/netinet/tcp_stacks/fastpath.c
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_syncache.c
  head/sys/netinet/tcp_usrreq.c
  head/sys/netinet/tcp_var.h
  head/sys/sys/mbuf.h

Modified: head/sys/conf/files
==
--- head/sys/conf/files Thu Apr 19 12:50:49 2018(r332769)
+++ head/sys/conf/files Thu Apr 19 13:37:59 2018(r332770)
@@ -4355,6 +4355,7 @@ netinet/tcp_log_buf.c optional tcp_blackbox 
inet | tc
 netinet/tcp_lro.c  optional inet | inet6
 netinet/tcp_output.c   optional inet | inet6
 netinet/tcp_offload.c  optional tcp_offload inet | tcp_offload inet6
+netinet/tcp_hpts.c  optional tcphpts inet | tcphpts inet6
 netinet/tcp_pcap.c optional inet tcppcap | inet6 tcppcap
 netinet/tcp_reass.coptional inet | inet6
 netinet/tcp_sack.c optional inet | inet6

Modified: head/sys/conf/options
==
--- head/sys/conf/options   Thu Apr 19 12:50:49 2018(r332769)
+++ head/sys/conf/options   Thu Apr 19 13:37:59 2018(r332770)
@@ -218,6 +218,7 @@ SYSVMSG opt_sysvipc.h
 SYSVSEMopt_sysvipc.h
 SYSVSHMopt_sysvipc.h
 SW_WATCHDOGopt_watchdog.h
+TCPHPTS opt_inet.h
 TURNSTILE_PROFILING
 UMTX_PROFILING
 UMTX_CHAINSopt_global.h

Modified: head/sys/netinet/in_pcb.c
==
--- head/sys/netinet/in_pcb.c   Thu Apr 19 12:50:49 2018(r332769)
+++ head/sys/netinet/in_pcb.c   Thu Apr 19 13:37:59 2018(r332770)
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -87,6 +88,9 @@ __FBSDID("$FreeBSD$");
 #include 
 #include 
 #include 
+#ifdef TCPHPTS
+#include 
+#endif
 #include 
 #include 
 #endif
@@ -1224,9 +1228,28 @@ in_pcbrele_rlocked(struct inpcb *inp)
}
return (0);
}
-
+   
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
-
+#ifdef TCPHPTS
+   if (inp->inp_in_hpts || inp->inp_in_input) {
+   struct tcp_hpts_entry *hpts;
+   /*
+* We should not be on the hpts at 
+* this point in any form. we must
+* get the lock to be sure.
+*/
+   hpts = tcp_hpts_lock(inp);
+   if (inp->inp_in_hpts)
+   panic("Hpts:%p inp:%p at free still on hpts",
+ hpts, inp);
+   mtx_unlock(>p_mtx);
+   hpts = tcp_input_lock(inp);
+   if (inp->inp_in_input) 
+   panic("Hpts:%p inp:%p at free still on input hpts",
+ hpts, inp);
+   mtx_unlock(>p_mtx);
+   }
+#endif
INP_RUNLOCK(inp);
pcbinfo = inp->inp_pcbinfo;
uma_zfree(pcbinfo->ipi_zone, inp);
@@ -1255,7 +1278,26 @@ in_pcbrele_wlocked(struct inpcb *inp)
}
 
KASSERT(inp->inp_socket == NULL, ("%s: inp_socket != NULL", __func__));
-
+#ifdef TCPHPTS
+   if (inp->inp_in_hpts || inp->inp_in_input) {
+   struct tcp_hpts_entry *hpts;
+   /*
+* We should not be on the hpts at 
+* this point in any form. we must
+* get the lock to be sure.
+*/
+   hpts = tcp_hpts_lock(inp);
+   if (inp->inp_in_hpts)
+   panic("Hpts:%p inp:%p at free still on hpts",
+ hpts, inp);
+   mtx_unlock(>p_mtx);
+   hpts = tcp_input_lock(inp);
+   if (inp->inp_in_input) 
+   panic("Hpts:%p inp:%p at free still on input hpts",
+ hpts, inp);
+   mtx_unlock(>p_mtx);
+   }
+#endif

svn commit: r304224 - head/sys/netinet

2016-08-16 Thread Randall Stewart
Author: rrs
Date: Tue Aug 16 15:17:36 2016
New Revision: 304224
URL: https://svnweb.freebsd.org/changeset/base/304224

Log:
  A few more wording tweaks as suggested (with some modifications
  as well) by Ravi Pokala. Thanks for the comments :-)
  Sponsored by: Netflix Inc.

Modified:
  head/sys/netinet/tcp_timer.c

Modified: head/sys/netinet/tcp_timer.c
==
--- head/sys/netinet/tcp_timer.cTue Aug 16 15:11:46 2016
(r304223)
+++ head/sys/netinet/tcp_timer.cTue Aug 16 15:17:36 2016
(r304224)
@@ -307,15 +307,15 @@ tcp_timer_delack(void *xtp)
  * should only have grabbed the INP_WLOCK() when
  * it entered. To safely switch to holding both the
  * INP_INFO_RLOCK() and the INP_WLOCK() we must first
- * grab a reference on the inp, this will hold the inp
- * so that it can't be removed. We then unlock and grab
- * the info-read lock. Once we have the INP_INFO_RLOCK() we
- * proceed again to get the INP_WLOCK() but after that
- * we must check if someone else deleted the pcb i.e.
- * the inp_flags check.If so we return 1 otherwise 
- * we return 0.
+ * grab a reference on the inp, which will hold the inp
+ * so that it can't be removed. We then unlock the INP_WLOCK(), 
+ * and grab the INP_INFO_RLOCK() lock. Once we have the INP_INFO_RLOCK()
+ * we proceed again to get the INP_WLOCK() (this preserves proper
+ * lock order). After acquiring the INP_WLOCK we must check if someone 
+ * else deleted the pcb i.e. the inp_flags check.
+ * If so we return 1 otherwise we return 0.
  *
- * No matter which the tcp_inpinfo_lock_add() function
+ * No matter what the tcp_inpinfo_lock_add() function
  * returns the caller must afterwards call tcp_inpinfo_lock_del()
  * to drop the locks and reference properly.
  */
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r304223 - in head: share/man/man4 share/man/man9 sys/netinet

2016-08-16 Thread Randall Stewart
Author: rrs
Date: Tue Aug 16 15:11:46 2016
New Revision: 304223
URL: https://svnweb.freebsd.org/changeset/base/304223

Log:
  Here we update the  modular tcp to be able to switch to an
  alternate TCP stack in other then the closed state (pre-listen/connect).
  The idea is that *if* that is supported by the alternate stack, it
  is asked if its ok to switch. If it approves the "handoff" then we
  allow the switch to happen. Also the fini() function now gets a flag
  to tell if you are switching away *or* the tcb is destroyed. The
  init() call into the alternate stack is moved to the end so the
  tcb is more fully formed before the init transpires.
  
  Sponsored by: Netflix Inc.
  Differential Revision:D6790

Modified:
  head/share/man/man4/tcp.4
  head/share/man/man9/tcp_functions.9
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_syncache.c
  head/sys/netinet/tcp_usrreq.c
  head/sys/netinet/tcp_var.h

Modified: head/share/man/man4/tcp.4
==
--- head/share/man/man4/tcp.4   Tue Aug 16 14:33:25 2016(r304222)
+++ head/share/man/man4/tcp.4   Tue Aug 16 15:11:46 2016(r304223)
@@ -633,7 +633,8 @@ when trying to use a TCP function block 
 .Xr mod_cc 4 ,
 .Xr siftr 4 ,
 .Xr syncache 4 ,
-.Xr setkey 8
+.Xr setkey 8 ,
+.Xr tcp_functions 9
 .Rs
 .%A "V. Jacobson"
 .%A "R. Braden"

Modified: head/share/man/man9/tcp_functions.9
==
--- head/share/man/man9/tcp_functions.9 Tue Aug 16 14:33:25 2016
(r304222)
+++ head/share/man/man9/tcp_functions.9 Tue Aug 16 15:11:46 2016
(r304223)
@@ -114,14 +114,17 @@ struct tcp_function_block {
struct inpcb *inp, struct tcpcb *tp);
/* Optional memory allocation/free routine */
void(*tfb_tcp_fb_init)(struct tcpcb *);
-   void(*tfb_tcp_fb_fini)(struct tcpcb *);
+   void(*tfb_tcp_fb_fini)(struct tcpcb *, int);
/* Optional timers, must define all if you define one */
int (*tfb_tcp_timer_stop_all)(struct tcpcb *);
void(*tfb_tcp_timer_activate)(struct tcpcb *,
uint32_t, u_int);
int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
void(*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
+   /* Optional functions */
void(*tfb_tcp_rexmit_tmr)(struct tcpcb *);
+   void(*tfb_tcp_handoff_ok)(struct tcpcb *);
+   /* System use */
volatile uint32_t tfb_refcnt;
uint32_t  tfb_flags;
 };
@@ -157,6 +160,16 @@ in the
 .Va tfb_tcp_fb_fini
 field.
 .Pp
+If the
+.Va tfb_tcp_fb_fini
+argument is non-NULL, the function to which it points is called when the
+kernel is destroying the TCP control block or when the socket is transitioning
+to use a different TCP stack.
+The function is called with arguments of the TCP control block and an integer
+flag.
+The flag will be zero if the socket is transitioning to use another TCP stack
+or one if the TCP control block is being destroyed.
+.Pp
 If the TCP stack implements additional timers, the TCP stack should set a
 non-NULL pointer in the
 .Va tfb_tcp_timer_stop_all ,
@@ -193,6 +206,37 @@ However, care must be taken to ensure th
 TCP control block in a valid state for the remainder of the retransmit
 timer logic.
 .Pp
+A user may select a new TCP stack before calling
+.Xr connect 2
+or
+.Xr listen 2 .
+Optionally, a TCP stack may also allow a user to begin using the TCP stack for
+a connection that is in a later state by setting a non-NULL function pointer in
+the
+.Va tfb_tcp_handoff_ok
+field.
+If this field is non-NULL and a user attempts to select that TCP stack after
+calling
+.Xr connect 2
+or
+.Xr listen 2
+for that socket, the kernel will call the function pointed to by the
+.Va tfb_tcp_handoff_ok
+field.
+The function should return 0 if the user is allowed to switch the socket to use
+the TCP stack. Otherwise, the function should return an error code, which will
+be returned to the user.
+If the
+.Va tfb_tcp_handoff_ok
+field is
+.Dv NULL
+and a user attempts to select the TCP stack after calling
+.Xr connect 2
+or
+.Xr listen 2
+for that socket, the operation will fail and the kernel will return
+.Er EINVAL .
+.Pp
 The
 .Va tfb_refcnt
 and
@@ -269,8 +313,10 @@ The
 .Fa blk
 argument references a function block that is not currently registered.
 .Sh SEE ALSO
-.Xr malloc 9 ,
-.Xr tcp 4
+.Xr connect 2 ,
+.Xr listen 2 ,
+.Xr tcp 4 ,
+.Xr malloc 9
 .Sh HISTORY
 This framework first appeared in
 .Fx 11.0 .

Modified: head/sys/netinet/tcp_subr.c
==
--- head/sys/netinet/tcp_subr.c Tue Aug 16 14:33:25 2016(r304222)
+++ head/sys/netinet/tcp_subr.c Tue Aug 16 15:11:46 2016(r304223)
@@ -1187,9 +1187,6 @@ tcp_newtcpcb(struct inpcb *inp)
tp->t_fb = tcp_func_set_ptr;

Re: svn commit: r304218 - head/sys/netinet

2016-08-16 Thread Randall Stewart via svn-src-all

In theory it *could* be MFC’d to stable-10 and 11 but I am not sure we want to 
do that. I am
told by Drew that it does improve performance since in stable-10 you are 
getting the INFO_WLOCK()
but I am not sure if folks want it MFC’d…

One thing that this code leads us towards is we *in theory* could move the lock 
acquisition to the
timer code itself (I think).. we would have to make sure that the callout 
functions did do the
unlock since thats part of the lock-dance with reference… but its theoretically 
possible :-)

R

> On Aug 16, 2016, at 6:18 AM, Slawa Olhovchenkov <s...@zxy.spb.ru> wrote:
> 
> On Tue, Aug 16, 2016 at 12:40:56PM +, Randall Stewart wrote:
> 
>> Author: rrs
>> Date: Tue Aug 16 12:40:56 2016
>> New Revision: 304218
>> URL: https://svnweb.freebsd.org/changeset/base/304218
>> 
>> Log:
>>  This cleans up the timer code in TCP and also makes it so we do not
>>  take the INFO lock *unless* we are really going to delete the TCB.
>> 
>>  Differential Revision:  D7136
> 
> Is this related to stable/10?


Randall Stewart
r...@netflix.com
803-317-4952





___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Re: svn commit: r304218 - head/sys/netinet

2016-08-16 Thread Randall Stewart via svn-src-all
Hans:

Take a look at the comments maybe they will help you understand whats going on.

The idea of it is that you *only* need the INFO_RLOCK when the timer function
wants to destroy the tcb (not all timers do this).. and yes usually the timer 
function
is going to call the drop/close path to purge the TCB. So in order to pick-up 
the info
lock you do the refcnt/lock-dance to get both locks in the proper lock order. 
This means
that someone could possibly come in and purge the tcb on you while you are in 
the
process of doing the lock-dance. 

If that occurs (the return code is 1) all the caller has to do is call the 
drop-lock function (the
mate to the add_lock) and then return (since the pcb is in the state the caller 
wants.. i.e. gone).
If the return code is 0, the caller can proceed to purge the tcb.. and then 
call the drop_lock function.

Note that in theory this could be used outside of wanting to kill the tcb.. but 
I am not sure why one
would want to hold the INFO_RLOCK if one did not want to purge the tcb.

R


> On Aug 16, 2016, at 6:14 AM, Hans Petter Selasky <h...@selasky.org> wrote:
> 
> On 08/16/16 15:01, Randall Stewart wrote:
>> Sure
>> 
>> Let me add some comments for you. The idea her is that you pick-up a 
>> reference
>> to the PCB.. so it can’t be removed. Thus when you re-lock the INP you check 
>> the
>> dropped flag (just in case someone did get in).
> 
> And this code is only used before tcp_close() / tcp_drop(), so if others got 
> in it is safe to assume that the inp is dead?
> 
> --HPS


Randall Stewart
r...@netflix.com
803-317-4952





___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r304219 - head/sys/netinet

2016-08-16 Thread Randall Stewart
Author: rrs
Date: Tue Aug 16 13:08:03 2016
New Revision: 304219
URL: https://svnweb.freebsd.org/changeset/base/304219

Log:
  Comments describing how to properly use the new lock_add functions
  and its respective companion.
  
  Sponsored by: Netflix Inc.

Modified:
  head/sys/netinet/tcp_timer.c

Modified: head/sys/netinet/tcp_timer.c
==
--- head/sys/netinet/tcp_timer.cTue Aug 16 12:40:56 2016
(r304218)
+++ head/sys/netinet/tcp_timer.cTue Aug 16 13:08:03 2016
(r304219)
@@ -301,6 +301,25 @@ tcp_timer_delack(void *xtp)
CURVNET_RESTORE();
 }
 
+/*
+ * When a timer wants to remove a TCB it must
+ * hold the INP_INFO_RLOCK(). The timer function
+ * should only have grabbed the INP_WLOCK() when
+ * it entered. To safely switch to holding both the
+ * INP_INFO_RLOCK() and the INP_WLOCK() we must first
+ * grab a reference on the inp, this will hold the inp
+ * so that it can't be removed. We then unlock and grab
+ * the info-read lock. Once we have the INP_INFO_RLOCK() we
+ * proceed again to get the INP_WLOCK() but after that
+ * we must check if someone else deleted the pcb i.e.
+ * the inp_flags check.If so we return 1 otherwise 
+ * we return 0.
+ *
+ * No matter which the tcp_inpinfo_lock_add() function
+ * returns the caller must afterwards call tcp_inpinfo_lock_del()
+ * to drop the locks and reference properly.
+ */
+
 int
 tcp_inpinfo_lock_add(struct inpcb *inp)
 {
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r304218 - head/sys/netinet

2016-08-16 Thread Randall Stewart via svn-src-all
Sure

Let me add some comments for you. The idea her is that you pick-up a reference
to the PCB.. so it can’t be removed. Thus when you re-lock the INP you check the
dropped flag (just in case someone did get in).

Let me get that in comments.. (note thats also why when using this function you
have to use its companion function to drop the reference).

> On Aug 16, 2016, at 5:58 AM, Hans Petter Selasky <h...@selasky.org> wrote:
> 
> On 08/16/16 14:40, Randall Stewart wrote:
>> +int
>> +tcp_inpinfo_lock_add(struct inpcb *inp)
>> +{
>> +in_pcbref(inp);
>> +INP_WUNLOCK(inp);
>> +INP_INFO_RLOCK(_tcbinfo);
>> +INP_WLOCK(inp);
>> +if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
>> +return(1);
>> +}
>> +return(0);
>> +
>> +}
> 
> Hi,
> 
> Could you add some comments describing how it is considered safe to drop the 
> INP write-lock and then pick it up again?
> 
> My first impression is that because you are dropping the inp lock, multiple 
> threads can enter the code in question, leaving the window open to races?
> 
> --HPS


Randall Stewart
r...@netflix.com
803-317-4952





___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r304218 - head/sys/netinet

2016-08-16 Thread Randall Stewart
Author: rrs
Date: Tue Aug 16 12:40:56 2016
New Revision: 304218
URL: https://svnweb.freebsd.org/changeset/base/304218

Log:
  This cleans up the timer code in TCP and also makes it so we do not
  take the INFO lock *unless* we are really going to delete the TCB.
  
  Differential Revision:D7136

Modified:
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/tcp_timer.h

Modified: head/sys/netinet/tcp_timer.c
==
--- head/sys/netinet/tcp_timer.cTue Aug 16 12:13:12 2016
(r304217)
+++ head/sys/netinet/tcp_timer.cTue Aug 16 12:40:56 2016
(r304218)
@@ -294,11 +294,6 @@ tcp_timer_delack(void *xtp)
CURVNET_RESTORE();
return;
}
-   KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
-   ("%s: tp %p tcpcb can't be stopped here", __func__, tp));
-   KASSERT((tp->t_timers->tt_flags & TT_DELACK) != 0,
-   ("%s: tp %p delack callout should be running", __func__, tp));
-
tp->t_flags |= TF_ACKNOW;
TCPSTAT_INC(tcps_delack);
(void) tp->t_fb->tfb_tcp_output(tp);
@@ -306,6 +301,39 @@ tcp_timer_delack(void *xtp)
CURVNET_RESTORE();
 }
 
+int
+tcp_inpinfo_lock_add(struct inpcb *inp)
+{
+   in_pcbref(inp);
+   INP_WUNLOCK(inp);
+   INP_INFO_RLOCK(_tcbinfo);
+   INP_WLOCK(inp);
+   if (inp->inp_flags & (INP_TIMEWAIT | INP_DROPPED)) {
+   return(1);
+   }
+   return(0);
+
+}
+
+void
+tcp_inpinfo_lock_del(struct inpcb *inp, struct tcpcb *tp)
+{
+   INP_INFO_RUNLOCK(_tcbinfo);
+   if (inp && (tp == NULL)) {
+   /*
+* If tcp_close/drop() gets called and tp
+* returns NULL, then the function dropped
+* the inp lock, we hold a reference keeping
+* this around, so we must re-aquire the 
+* INP_WLOCK() in order to proceed with
+* our dropping the inp reference.
+*/
+   INP_WLOCK(inp);
+   }
+   if (inp && in_pcbrele_wlocked(inp) == 0)
+   INP_WUNLOCK(inp);
+}
+
 void
 tcp_timer_2msl(void *xtp)
 {
@@ -317,7 +345,6 @@ tcp_timer_2msl(void *xtp)
 
ostate = tp->t_state;
 #endif
-   INP_INFO_RLOCK(_tcbinfo);
inp = tp->t_inpcb;
KASSERT(inp != NULL, ("%s: tp %p tp->t_inpcb == NULL", __func__, tp));
INP_WLOCK(inp);
@@ -325,21 +352,17 @@ tcp_timer_2msl(void *xtp)
if (callout_pending(>t_timers->tt_2msl) ||
!callout_active(>t_timers->tt_2msl)) {
INP_WUNLOCK(tp->t_inpcb);
-   INP_INFO_RUNLOCK(_tcbinfo);
CURVNET_RESTORE();
return;
}
callout_deactivate(>t_timers->tt_2msl);
if ((inp->inp_flags & INP_DROPPED) != 0) {
INP_WUNLOCK(inp);
-   INP_INFO_RUNLOCK(_tcbinfo);
CURVNET_RESTORE();
return;
}
KASSERT((tp->t_timers->tt_flags & TT_STOPPED) == 0,
("%s: tp %p tcpcb can't be stopped here", __func__, tp));
-   KASSERT((tp->t_timers->tt_flags & TT_2MSL) != 0,
-   ("%s: tp %p 2msl callout should be running", __func__, tp));
/*
 * 2 MSL timeout in shutdown went off.  If we're closed but
 * still waiting for peer to close and connection has been idle
@@ -355,7 +378,6 @@ tcp_timer_2msl(void *xtp)
 */
if ((inp->inp_flags & INP_TIMEWAIT) != 0) {
INP_WUNLOCK(inp);
-   INP_INFO_RUNLOCK(_tcbinfo);
CURVNET_RESTORE();
return;
}
@@ -363,15 +385,26 @@ tcp_timer_2msl(void *xtp)
tp->t_inpcb && tp->t_inpcb->inp_socket && 
(tp->t_inpcb->inp_socket->so_rcv.sb_state & SBS_CANTRCVMORE)) {
TCPSTAT_INC(tcps_finwait2_drops);
+   if (tcp_inpinfo_lock_add(inp)) {
+   tcp_inpinfo_lock_del(inp, tp);
+   goto out;
+   }
tp = tcp_close(tp); 
+   tcp_inpinfo_lock_del(inp, tp);
+   goto out;
} else {
if (ticks - tp->t_rcvtime <= TP_MAXIDLE(tp)) {
-   if (!callout_reset(>t_timers->tt_2msl,
-  TP_KEEPINTVL(tp), tcp_timer_2msl, tp)) {
-   tp->t_timers->tt_flags &= ~TT_2MSL_RST;
+   callout_reset(>t_timers->tt_2msl,
+ TP_KEEPINTVL(tp), tcp_timer_2msl, tp);
+   } else {
+   if (tcp_inpinfo_lock_add(inp)) {
+   tcp_inpinfo_lock_del(inp, tp);
+   goto out;
}
-   } else
-  tp = tcp_close(tp);
+   tp = tcp_close(tp);
+   tcp_inpinfo_lock_del(inp, tp);
+  

svn commit: r303412 - head

2016-07-27 Thread Randall Stewart
Author: rrs
Date: Wed Jul 27 20:37:32 2016
New Revision: 303412
URL: https://svnweb.freebsd.org/changeset/base/303412

Log:
  Remove myself from kern_timeout.c yeah!

Modified:
  head/MAINTAINERS

Modified: head/MAINTAINERS
==
--- head/MAINTAINERSWed Jul 27 20:34:09 2016(r303411)
+++ head/MAINTAINERSWed Jul 27 20:37:32 2016(r303412)
@@ -37,7 +37,6 @@ subsystem login   notes
 -
 atffreebsd-testing,jmmv,ngie   Pre-commit review 
requested.
 ath(4) adrian  Pre-commit review requested, send to 
freebsd-wirel...@freebsd.org
-callout_*(9)   rrs Pre-commit review requested -- becareful its 
tricksy code :o.
 contrib/compiler-rtdim Pre-commit review preferred.
 contrib/libc++ dim Pre-commit review preferred.
 contrib/libcxxrt   dim Pre-commit review preferred.
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


Re: svn commit: r303037 - head/sys/kern

2016-07-20 Thread Randall Stewart via svn-src-all
You are most welcome to backout anything you like.. as far as
I am concerned you own the code..

R
> On Jul 20, 2016, at 6:35 PM, Gleb Smirnoff <gleb...@freebsd.org> wrote:
> 
>  Randall,
> 
>  I have just tested and r303037 brings the TCP panic back. I
> got two crashes during 2.5 hours.
> 
> In your email [1] you are right, there is regression that (-1)
> return value is lost. This problem was worked on in the PR 210884,
> and we were very close to commiting the fix.
> 
> The whole 11.0-RELEASE cycle strongly depends on this change. We
> don't want to release with TCP panic, and of course we want the
> regression described in 210884 to be fixed.
> 
> Your backout mixed with extra code really made things messy. Since
> I don't want to go with commit war, on behalf of RE we are asking
> for explicit agreement to back out r303037. Then we will proceed with
> latest patch from 210884. Is that okay?
> 
> [1] https://lists.freebsd.org/pipermail/svn-src-head/2016-July/089313.html
> 
> On Wed, Jul 20, 2016 at 03:33:37PM +0200, Randall Stewart wrote:
> R> Gleb
> R> 
> R> I wish you would have responded earlier.. I am more than glad to hand
> R> off all kern_timeout.c to you… please take it commit what you want to
> R> it and have it. I hate the code and I dislike having to touch it.
> R> 
> R> Its yours.. I can assure you I will not touch it again.
> R> 
> R> R
> R> > On Jul 20, 2016, at 8:53 AM, Gleb Smirnoff <gleb...@freebsd.org> wrote:
> R> > 
> R> > On Tue, Jul 19, 2016 at 06:31:19PM +, Randall Stewart wrote:
> R> > R> Author: rrs
> R> > R> Date: Tue Jul 19 18:31:19 2016
> R> > R> New Revision: 303037
> R> > R> URL: https://svnweb.freebsd.org/changeset/base/303037
> R> > R> 
> R> > R> Log:
> R> > R>   This reverts out Gleb's changes and adds three small
> R> > R>   fixes that I think closes up the races Gleb was
> R> > R>   looking for. This is running quite nicely in Netflix and
> R> > R>   now no longer causes TCP-tcb leaks.
> R> > R>   
> R> > R>   Differential Revision:  7135
> R> > 
> R> > Just to notice that I am completely pissed of by this commit
> R> > war, that you started.
> R> > 
> R> > I've been testing my changes properly, I gave people time to
> R> > review my changes. You didn't.
> R> > 
> R> > From your explanation in other emails I see that you've been
> R> > testing your changes with a version of FreeBSD that is a heavily
> R> > modified FreeBSD 10, not 11.
> R> > 
> R> > The new code you mixed with revert of mine, doesn't fix the
> R> > problem observed. It fixes another problem that you imagined,
> R> > which might exist, but isn't observed. We already discussed that
> R> > and you didn't prove it wrong.
> R> > 
> R> > Your change doesn't even revert my change completely.
> R> > 
> R> > -- 
> R> > Totus tuus, Glebius.
> R> 
> R> 
> R> Randall Stewart
> R> r...@netflix.com
> R> 803-317-4952
> R> 
> R> 
> R> 
> R> 
> R> 
> R> 
> 
> -- 
> Totus tuus, Glebius.


Randall Stewart
r...@netflix.com
803-317-4952





___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Re: svn commit: r303037 - head/sys/kern

2016-07-20 Thread Randall Stewart via svn-src-all
Gleb

I wish you would have responded earlier.. I am more than glad to hand
off all kern_timeout.c to you… please take it commit what you want to
it and have it. I hate the code and I dislike having to touch it.

Its yours.. I can assure you I will not touch it again.

R
> On Jul 20, 2016, at 8:53 AM, Gleb Smirnoff <gleb...@freebsd.org> wrote:
> 
> On Tue, Jul 19, 2016 at 06:31:19PM +, Randall Stewart wrote:
> R> Author: rrs
> R> Date: Tue Jul 19 18:31:19 2016
> R> New Revision: 303037
> R> URL: https://svnweb.freebsd.org/changeset/base/303037
> R> 
> R> Log:
> R>   This reverts out Gleb's changes and adds three small
> R>   fixes that I think closes up the races Gleb was
> R>   looking for. This is running quite nicely in Netflix and
> R>   now no longer causes TCP-tcb leaks.
> R>   
> R>   Differential Revision:   7135
> 
> Just to notice that I am completely pissed of by this commit
> war, that you started.
> 
> I've been testing my changes properly, I gave people time to
> review my changes. You didn't.
> 
> From your explanation in other emails I see that you've been
> testing your changes with a version of FreeBSD that is a heavily
> modified FreeBSD 10, not 11.
> 
> The new code you mixed with revert of mine, doesn't fix the
> problem observed. It fixes another problem that you imagined,
> which might exist, but isn't observed. We already discussed that
> and you didn't prove it wrong.
> 
> Your change doesn't even revert my change completely.
> 
> -- 
> Totus tuus, Glebius.


Randall Stewart
r...@netflix.com
803-317-4952





___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r303037 - head/sys/kern

2016-07-19 Thread Randall Stewart
Author: rrs
Date: Tue Jul 19 18:31:19 2016
New Revision: 303037
URL: https://svnweb.freebsd.org/changeset/base/303037

Log:
  This reverts out Gleb's changes and adds three small
  fixes that I think closes up the races Gleb was
  looking for. This is running quite nicely in Netflix and
  now no longer causes TCP-tcb leaks.
  
  Differential Revision:7135

Modified:
  head/sys/kern/kern_timeout.c

Modified: head/sys/kern/kern_timeout.c
==
--- head/sys/kern/kern_timeout.cTue Jul 19 18:15:22 2016
(r303036)
+++ head/sys/kern/kern_timeout.cTue Jul 19 18:31:19 2016
(r303037)
@@ -1050,7 +1050,7 @@ callout_reset_sbt_on(struct callout *c, 
 */
if (c->c_lock != NULL && !cc_exec_cancel(cc, direct))
cancelled = cc_exec_cancel(cc, direct) = true;
-   if (cc_exec_waiting(cc, direct)) {
+   if (cc_exec_waiting(cc, direct) || cc_exec_drain(cc, direct)) {
/*
 * Someone has called callout_drain to kill this
 * callout.  Don't reschedule.
@@ -1166,7 +1166,7 @@ _callout_stop_safe(struct callout *c, in
struct callout_cpu *cc, *old_cc;
struct lock_class *class;
int direct, sq_locked, use_lock;
-   int cancelled, not_on_a_list;
+   int not_on_a_list;
 
if ((flags & CS_DRAIN) != 0)
WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, c->c_lock,
@@ -1234,17 +1234,47 @@ again:
panic("migration should not happen");
 #endif
}
-
+   if ((drain != NULL) && (c->c_iflags & CALLOUT_PENDING) &&
+   (cc_exec_curr(cc, direct) != c)) {
+   /* 
+* This callout is executing and we are draining.
+* The only way this can happen is if its also
+* been rescheduled to run on one thread *and* asked to drain
+* on this thread (at the same time it is waiting to execute).
+*/
+   if ((c->c_iflags & CALLOUT_PROCESSED) == 0) {
+   if (cc_exec_next(cc) == c)
+   cc_exec_next(cc) = LIST_NEXT(c, c_links.le);
+   LIST_REMOVE(c, c_links.le);
+   } else {
+   TAILQ_REMOVE(>cc_expireq, c, c_links.tqe);
+   }
+   c->c_iflags &= ~CALLOUT_PENDING;
+   c->c_flags &= ~CALLOUT_ACTIVE;
+   }
/*
-* If the callout is running, try to stop it or drain it.
+* If the callout isn't pending, it's not on the queue, so
+* don't attempt to remove it from the queue.  We can try to
+* stop it by other means however.
 */
-   if (cc_exec_curr(cc, direct) == c) {
+   if (!(c->c_iflags & CALLOUT_PENDING)) {
/*
-* Succeed we to stop it or not, we must clear the
-* active flag - this is what API users expect.
+* If it wasn't on the queue and it isn't the current
+* callout, then we can't stop it, so just bail.
+* It probably has already been run (if locking
+* is properly done). You could get here if the caller
+* calls stop twice in a row for example. The second
+* call would fall here without CALLOUT_ACTIVE set.
 */
c->c_flags &= ~CALLOUT_ACTIVE;
-
+   if (cc_exec_curr(cc, direct) != c) {
+   CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
+   c, c->c_func, c->c_arg);
+   CC_UNLOCK(cc);
+   if (sq_locked)
+   sleepq_release(_exec_waiting(cc, direct));
+   return (-1);
+   }
if ((flags & CS_DRAIN) != 0) {
/*
 * The current callout is running (or just
@@ -1278,7 +1308,6 @@ again:
old_cc = cc;
goto again;
}
-
/*
 * Migration could be cancelled here, but
 * as long as it is still not sure when it
@@ -1362,6 +1391,8 @@ again:
cc_exec_drain(cc, direct) = drain;
}
CC_UNLOCK(cc);
+   if (drain)
+   return(0);
return ((flags & CS_EXECUTING) != 0);
}
CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
@@ -1369,20 +1400,12 @@ again:
if (drain) {
cc_exec_drain(cc, direct) = drain;
}
-   KASSERT(!sq_locked, 

Re: svn commit: r302998 - head/sys/kern

2016-07-19 Thread Randall Stewart via svn-src-all
Well 

The code itself I had up on machines for probably about 2 months. But then
I switched over to Gleb’s changes here just recently .. which caused me all
kinds of fun :)

I had to go back into Mercurial to pull back my changes.. I have had the
resurrected changes running on my netflix machines for about 20 or so
hours generating about anywhere from 14Gbps to 32Gbps depending on the machine
type.

I plan on waiting until tomorrow to sync it down into the NF code base. 

Note that if you do decide instead to roll back to the 10.x kern_timeout.c you
will need to roll back a bunch of tcp changes as well that use the new
async_drain() interface.

I am game either way for you to proceed.. I will commit this current code
to head as long as I hear no objections (from Gleb or others)….

R

> On Jul 19, 2016, at 3:56 PM, Glen Barber <g...@freebsd.org> wrote:
> 
> On Tue, Jul 19, 2016 at 03:46:54PM +0200, Randall Stewart wrote:
>> Glen:
>> 
>> My changes work.. I have them running in NF in  at least 1/2 dozen machines.
>> 
> 
> For how long?  What are the uptimes on these machines?
> 
> This is the blocker for 11.0-BETA2, and I don't want to see more
> regressions being introduced at this point of the cycle.
> 
> Glen
> 
>> I am more than willing to commit them.. they actually are not much different 
>> than
>> whats in stable 10.. though I don’t know if the async-drain was MFC’d 
>> there.. it
>> needs to be in for TCP.. or else you will have yet another mess in that
>> respect (TCP depends on ASYNC-drain).
>> 
>> I can commit what I have.. if you like.. or not.. I really don’t care (I 
>> hate kern_timeout.c :-o)
>> 
>> R
>>> On Jul 19, 2016, at 2:25 PM, Glen Barber <g...@freebsd.org> wrote:
>>> 
>>> On Tue, Jul 19, 2016 at 01:43:16PM +0200, Randall Stewart wrote:
>>>> Gleb
>>>> 
>>>> Ok
>>>> 
>>>> I have now updated
>>>> 
>>>> https://reviews.freebsd.org/D7135
>>>> 
>>>> You can take this or not… I really don’t care either way… (you are welcome 
>>>> to
>>>> own the kern_timeout.c code I hate it) :-)
>>>> 
>>>> Basically when you went off and re-factored kern_timeout.c I had worked in 
>>>> parallel on fixing
>>>> the bugs you were seeing.. There were three distinct problems that I 
>>>> fixed… but then
>>>> you had refactored the stop() routine.. and I thought ok.. thats fine. I 
>>>> had actually thought about
>>>> doing something similar to what you did and was too chicken to poke that 
>>>> much at it.. it has
>>>> always had a nasty habit of biting back when you make a lot of changes :-D
>>>> 
>>>> I know my version has worked for quite some time in my testing so I 
>>>> brought it back.
>>>> Complete with its 3 return codes (I only recently switched to your version 
>>>> and thus
>>>> started having difficulties with leaks and crashes)….
>>>> 
>>>> You are welcome not to use this..  I know it works (it ran
>>>> on a number of machines at NF last night.. and we will of course continue 
>>>> testing
>>>> it as we finish our dev testing for the upcoming OCA software release).. 
>>>> For now
>>>> this is what will be going out into the OCA’s at least :-)
>>>> 
>>> 
>>> I'm honestly done with this topic, and at the point now where I'm
>>> considering backing out all changes to callout(9) and related changes to
>>> the state they were at in stable/10.
>>> 
>>> This changes the KBI, and if it needs to be done, it needs to happen
>>> now.  We cannot wait for RC1 phase for this, and the amount of churn to
>>> get things into a working state with the current implementation far
>>> outweighs the benefit of the dangers.
>>> 
>>> Glen
>>> 
>> 
>> 
>> Randall Stewart
>> r...@netflix.com
>> 803-317-4952
>> 
>> 
>> 
>> 
>> 


Randall Stewart
r...@netflix.com
803-317-4952





___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Re: svn commit: r302998 - head/sys/kern

2016-07-19 Thread Randall Stewart via svn-src-all
Glen:

My changes work.. I have them running in NF in  at least 1/2 dozen machines.

I am more than willing to commit them.. they actually are not much different 
than
whats in stable 10.. though I don’t know if the async-drain was MFC’d there.. it
needs to be in for TCP.. or else you will have yet another mess in that
respect (TCP depends on ASYNC-drain).

I can commit what I have.. if you like.. or not.. I really don’t care (I hate 
kern_timeout.c :-o)

R
> On Jul 19, 2016, at 2:25 PM, Glen Barber <g...@freebsd.org> wrote:
> 
> On Tue, Jul 19, 2016 at 01:43:16PM +0200, Randall Stewart wrote:
>> Gleb
>> 
>> Ok
>> 
>> I have now updated
>> 
>> https://reviews.freebsd.org/D7135
>> 
>> You can take this or not… I really don’t care either way… (you are welcome to
>> own the kern_timeout.c code I hate it) :-)
>> 
>> Basically when you went off and re-factored kern_timeout.c I had worked in 
>> parallel on fixing
>> the bugs you were seeing.. There were three distinct problems that I fixed… 
>> but then
>> you had refactored the stop() routine.. and I thought ok.. thats fine. I had 
>> actually thought about
>> doing something similar to what you did and was too chicken to poke that 
>> much at it.. it has
>> always had a nasty habit of biting back when you make a lot of changes :-D
>> 
>> I know my version has worked for quite some time in my testing so I brought 
>> it back.
>> Complete with its 3 return codes (I only recently switched to your version 
>> and thus
>> started having difficulties with leaks and crashes)….
>> 
>> You are welcome not to use this..  I know it works (it ran
>> on a number of machines at NF last night.. and we will of course continue 
>> testing
>> it as we finish our dev testing for the upcoming OCA software release).. For 
>> now
>> this is what will be going out into the OCA’s at least :-)
>> 
> 
> I'm honestly done with this topic, and at the point now where I'm
> considering backing out all changes to callout(9) and related changes to
> the state they were at in stable/10.
> 
> This changes the KBI, and if it needs to be done, it needs to happen
> now.  We cannot wait for RC1 phase for this, and the amount of churn to
> get things into a working state with the current implementation far
> outweighs the benefit of the dangers.
> 
> Glen
> 


Randall Stewart
r...@netflix.com
803-317-4952





___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Re: svn commit: r302998 - head/sys/kern

2016-07-19 Thread Randall Stewart via svn-src-all
Gleb

Ok

I have now updated

https://reviews.freebsd.org/D7135

You can take this or not… I really don’t care either way… (you are welcome to
own the kern_timeout.c code I hate it) :-)

Basically when you went off and re-factored kern_timeout.c I had worked in 
parallel on fixing
the bugs you were seeing.. There were three distinct problems that I fixed… but 
then
you had refactored the stop() routine.. and I thought ok.. thats fine. I had 
actually thought about
doing something similar to what you did and was too chicken to poke that much 
at it.. it has
always had a nasty habit of biting back when you make a lot of changes :-D

I know my version has worked for quite some time in my testing so I brought it 
back.
Complete with its 3 return codes (I only recently switched to your version and 
thus
started having difficulties with leaks and crashes)….

You are welcome not to use this..  I know it works (it ran
on a number of machines at NF last night.. and we will of course continue 
testing
it as we finish our dev testing for the upcoming OCA software release).. For now
this is what will be going out into the OCA’s at least :-)

R

> On Jul 18, 2016, at 6:19 PM, Randall Stewart <r...@netflix.com> wrote:
> 
> I have worked out a fix of this in Netflix code base (I have the same code 
> running there). I
> will get that tested tonight I will get the fixes in to restore the behavior.
> 
> I will setup a phabricator shortly.. most likely I will update the one I 
> already
> have on the one problem your earlier patch did not fix.
> 
> R
>> On Jul 18, 2016, at 5:44 PM, Randall Stewart <r...@netflix.com> wrote:
>> 
>> Gleb:
>> 
>> This now leaks TCP-PCB’s since you have broken the return codes with all your
>> fixes that used to be in here.
>> 
>> It was
>> 
>> return 1 — You stopped the callout
>> return 0 — The callout could not be stopped
>> return -1 — The callout was not running.
>> 
>> The LLRef code that was crashing in in.c depended on this to know to free
>> the memory.. i.e. if was > 0 then they needed to free the memory.
>> 
>> TCP depends on a return 0 to indicate the async-drain function will be 
>> called back and
>> thus increments a refcnt and waits for the callback.
>> 
>> You now return 0 when no timer was active.. which makes the stack then wait
>> for the not forth coming async-drain call.
>> 
>> R
>>> On Jul 18, 2016, at 11:29 AM, Gleb Smirnoff <gleb...@freebsd.org> wrote:
>>> 
>>> Author: glebius
>>> Date: Mon Jul 18 09:29:08 2016
>>> New Revision: 302998
>>> URL: https://svnweb.freebsd.org/changeset/base/302998
>>> 
>>> Log:
>>> Revert the last commit. It must get more review and testing first.
>>> 
>>> Modified:
>>> head/sys/kern/kern_timeout.c
>>> 
>>> Modified: head/sys/kern/kern_timeout.c
>>> ==
>>> --- head/sys/kern/kern_timeout.cMon Jul 18 09:26:06 2016
>>> (r302997)
>>> +++ head/sys/kern/kern_timeout.cMon Jul 18 09:29:08 2016
>>> (r302998)
>>> @@ -1381,7 +1381,7 @@ again:
>>> CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
>>> c, c->c_func, c->c_arg);
>>> CC_UNLOCK(cc);
>>> -   return (-1);
>>> +   return (0);
>>> }
>>> 
>>> c->c_iflags &= ~CALLOUT_PENDING;
>>> 
>> 
>> 
>> Randall Stewart
>> r...@netflix.com
>> 803-317-4952
>> 
>> 
>> 
>> 
>> 
> 
> 
> Randall Stewart
> r...@netflix.com
> 803-317-4952
> 
> 
> 
> 
> 


Randall Stewart
r...@netflix.com
803-317-4952





___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Re: svn commit: r302998 - head/sys/kern

2016-07-18 Thread Randall Stewart via svn-src-all
I have worked out a fix of this in Netflix code base (I have the same code 
running there). I
will get that tested tonight I will get the fixes in to restore the behavior.

I will setup a phabricator shortly.. most likely I will update the one I already
have on the one problem your earlier patch did not fix.

R
> On Jul 18, 2016, at 5:44 PM, Randall Stewart <r...@netflix.com> wrote:
> 
> Gleb:
> 
> This now leaks TCP-PCB’s since you have broken the return codes with all your
> fixes that used to be in here.
> 
> It was
> 
> return 1 — You stopped the callout
> return 0 — The callout could not be stopped
> return -1 — The callout was not running.
> 
> The LLRef code that was crashing in in.c depended on this to know to free
> the memory.. i.e. if was > 0 then they needed to free the memory.
> 
> TCP depends on a return 0 to indicate the async-drain function will be called 
> back and
> thus increments a refcnt and waits for the callback.
> 
> You now return 0 when no timer was active.. which makes the stack then wait
> for the not forth coming async-drain call.
> 
> R
>> On Jul 18, 2016, at 11:29 AM, Gleb Smirnoff <gleb...@freebsd.org> wrote:
>> 
>> Author: glebius
>> Date: Mon Jul 18 09:29:08 2016
>> New Revision: 302998
>> URL: https://svnweb.freebsd.org/changeset/base/302998
>> 
>> Log:
>> Revert the last commit. It must get more review and testing first.
>> 
>> Modified:
>> head/sys/kern/kern_timeout.c
>> 
>> Modified: head/sys/kern/kern_timeout.c
>> ==
>> --- head/sys/kern/kern_timeout.c Mon Jul 18 09:26:06 2016
>> (r302997)
>> +++ head/sys/kern/kern_timeout.c Mon Jul 18 09:29:08 2016
>> (r302998)
>> @@ -1381,7 +1381,7 @@ again:
>>  CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
>>  c, c->c_func, c->c_arg);
>>      CC_UNLOCK(cc);
>> -return (-1);
>> +return (0);
>>  }
>> 
>>  c->c_iflags &= ~CALLOUT_PENDING;
>> 
> 
> 
> Randall Stewart
> r...@netflix.com
> 803-317-4952
> 
> 
> 
> 
> 


Randall Stewart
r...@netflix.com
803-317-4952





___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

Re: svn commit: r302998 - head/sys/kern

2016-07-18 Thread Randall Stewart via svn-src-all
Gleb:

This now leaks TCP-PCB’s since you have broken the return codes with all your
fixes that used to be in here.

It was

return 1 — You stopped the callout
return 0 — The callout could not be stopped
return -1 — The callout was not running.

The LLRef code that was crashing in in.c depended on this to know to free
the memory.. i.e. if was > 0 then they needed to free the memory.

TCP depends on a return 0 to indicate the async-drain function will be called 
back and
thus increments a refcnt and waits for the callback.

You now return 0 when no timer was active.. which makes the stack then wait
for the not forth coming async-drain call.

R
> On Jul 18, 2016, at 11:29 AM, Gleb Smirnoff <gleb...@freebsd.org> wrote:
> 
> Author: glebius
> Date: Mon Jul 18 09:29:08 2016
> New Revision: 302998
> URL: https://svnweb.freebsd.org/changeset/base/302998
> 
> Log:
>  Revert the last commit. It must get more review and testing first.
> 
> Modified:
>  head/sys/kern/kern_timeout.c
> 
> Modified: head/sys/kern/kern_timeout.c
> ==
> --- head/sys/kern/kern_timeout.c  Mon Jul 18 09:26:06 2016
> (r302997)
> +++ head/sys/kern/kern_timeout.c  Mon Jul 18 09:29:08 2016
> (r302998)
> @@ -1381,7 +1381,7 @@ again:
>   CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
>   c, c->c_func, c->c_arg);
>   CC_UNLOCK(cc);
> - return (-1);
> +     return (0);
>   }
> 
>   c->c_iflags &= ~CALLOUT_PENDING;
> 


Randall Stewart
r...@netflix.com
803-317-4952





___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"

svn commit: r300042 - in head/sys/netinet: . tcp_stacks

2016-05-17 Thread Randall Stewart
Author: rrs
Date: Tue May 17 09:53:22 2016
New Revision: 300042
URL: https://svnweb.freebsd.org/changeset/base/300042

Log:
  This small change adopts the excellent suggestion for using named
  structures in the add of a new tcp-stack that came in late to me
  via email after the last commit. It also makes it so that a new
  stack may optionally get a callback during a retransmit
  timeout. This allows the new stack to clear specific state (think
  sack scoreboards or other such structures).
  
  Sponsored by: Netflix Inc.
  Differential Revision:http://reviews.freebsd.org/D6303

Modified:
  head/sys/netinet/tcp_stacks/fastpath.c
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_stacks/fastpath.c
==
--- head/sys/netinet/tcp_stacks/fastpath.c  Tue May 17 09:24:54 2016
(r300041)
+++ head/sys/netinet/tcp_stacks/fastpath.c  Tue May 17 09:53:22 2016
(r300042)
@@ -2375,34 +2375,17 @@ tcp_do_segment_fastack(struct mbuf *m, s
 }
 
 struct tcp_function_block __tcp_fastslow = {
-   "fastslow",
-   tcp_output,
-   tcp_do_segment_fastslow,
-   tcp_default_ctloutput,
-   NULL,
-   NULL,
-   NULL,
-   NULL,
-   NULL,
-   NULL,
-   0,
-   0
-
+   .tfb_tcp_block_name = "fastslow",
+   .tfb_tcp_output = tcp_output,
+   .tfb_tcp_do_segment = tcp_do_segment_fastslow,
+   .tfb_tcp_ctloutput = tcp_default_ctloutput,
 };
 
 struct tcp_function_block __tcp_fastack = {
-   "fastack",
-   tcp_output,
-   tcp_do_segment_fastack,
-   tcp_default_ctloutput,
-   NULL,
-   NULL,
-   NULL,
-   NULL,
-   NULL,
-   NULL,
-   0,
-   0
+   .tfb_tcp_block_name = "fastack",
+   .tfb_tcp_output = tcp_output,
+   .tfb_tcp_do_segment = tcp_do_segment_fastack,
+   .tfb_tcp_ctloutput = tcp_default_ctloutput
 };
 
 static int

Modified: head/sys/netinet/tcp_timer.c
==
--- head/sys/netinet/tcp_timer.cTue May 17 09:24:54 2016
(r300041)
+++ head/sys/netinet/tcp_timer.cTue May 17 09:53:22 2016
(r300042)
@@ -604,6 +604,10 @@ tcp_timer_rexmt(void * xtp)
KASSERT((tp->t_timers->tt_flags & TT_REXMT) != 0,
("%s: tp %p rexmt callout should be running", __func__, tp));
tcp_free_sackholes(tp);
+   if (tp->t_fb->tfb_tcp_rexmit_tmr) {
+   /* The stack has a timer action too. */
+   (*tp->t_fb->tfb_tcp_rexmit_tmr)(tp);
+   }
/*
 * Retransmission timer went off.  Message has not
 * been acked within retransmit interval.  Back off

Modified: head/sys/netinet/tcp_var.h
==
--- head/sys/netinet/tcp_var.h  Tue May 17 09:24:54 2016(r300041)
+++ head/sys/netinet/tcp_var.h  Tue May 17 09:53:22 2016(r300042)
@@ -135,6 +135,7 @@ struct tcp_function_block {
uint32_t, u_int);
int (*tfb_tcp_timer_active)(struct tcpcb *, uint32_t);
void(*tfb_tcp_timer_stop)(struct tcpcb *, uint32_t);
+   void(*tfb_tcp_rexmit_tmr)(struct tcpcb *);
volatile uint32_t tfb_refcnt;
uint32_t  tfb_flags;
 };
___
svn-src-all@freebsd.org mailing list
https://lists.freebsd.org/mailman/listinfo/svn-src-all
To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"


svn commit: r298747 - in head/sys: net netinet netinet6

2016-04-28 Thread Randall Stewart
Author: rrs
Date: Thu Apr 28 15:53:10 2016
New Revision: 298747
URL: https://svnweb.freebsd.org/changeset/base/298747

Log:
  Complete the UDP tunneling of ICMP msgs to those protocols
  interested in having tunneled UDP and finding out about the
  ICMP (tested by Michael Tuexen with SCTP.. soon to be using
  this feature).
  
  Differential Revision:http://reviews.freebsd.org/D5875

Modified:
  head/sys/net/if_vxlan.c
  head/sys/netinet/sctputil.c
  head/sys/netinet/udp_usrreq.c
  head/sys/netinet/udp_var.h
  head/sys/netinet6/udp6_usrreq.c

Modified: head/sys/net/if_vxlan.c
==
--- head/sys/net/if_vxlan.c Thu Apr 28 15:20:08 2016(r298746)
+++ head/sys/net/if_vxlan.c Thu Apr 28 15:53:10 2016(r298747)
@@ -930,7 +930,7 @@ vxlan_socket_init(struct vxlan_socket *v
}
 
error = udp_set_kernel_tunneling(vso->vxlso_sock,
-   vxlan_rcv_udp_packet, vso);
+   vxlan_rcv_udp_packet, NULL, vso);
if (error) {
if_printf(ifp, "cannot set tunneling function: %d\n", error);
return (error);

Modified: head/sys/netinet/sctputil.c
==
--- head/sys/netinet/sctputil.c Thu Apr 28 15:20:08 2016(r298746)
+++ head/sys/netinet/sctputil.c Thu Apr 28 15:53:10 2016(r298747)
@@ -6945,7 +6945,7 @@ sctp_over_udp_start(void)
}
/* Call the special UDP hook. */
if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp4_tun_socket),
-   sctp_recv_udp_tunneled_packet, NULL))) {
+   sctp_recv_udp_tunneled_packet, NULL, NULL))) {
sctp_over_udp_stop();
return (ret);
}
@@ -6969,7 +6969,7 @@ sctp_over_udp_start(void)
}
/* Call the special UDP hook. */
if ((ret = udp_set_kernel_tunneling(SCTP_BASE_INFO(udp6_tun_socket),
-   sctp_recv_udp_tunneled_packet, NULL))) {
+   sctp_recv_udp_tunneled_packet, NULL, NULL))) {
sctp_over_udp_stop();
return (ret);
}

Modified: head/sys/netinet/udp_usrreq.c
==
--- head/sys/netinet/udp_usrreq.c   Thu Apr 28 15:20:08 2016
(r298746)
+++ head/sys/netinet/udp_usrreq.c   Thu Apr 28 15:53:10 2016
(r298747)
@@ -792,6 +792,21 @@ udp_common_ctlinput(int cmd, struct sock
udp_notify(inp, inetctlerrmap[cmd]);
}
INP_RUNLOCK(inp);
+   } else {
+   inp = in_pcblookup(pcbinfo, faddr, uh->uh_dport,
+  ip->ip_src, uh->uh_sport,
+  INPLOOKUP_WILDCARD | 
INPLOOKUP_RLOCKPCB, NULL);
+   if (inp != NULL) {
+   struct udpcb *up;
+
+   up = intoudpcb(inp);
+   if (up->u_icmp_func != NULL) {
+   INP_RUNLOCK(inp);
+   (*up->u_icmp_func)(cmd, sa, vip, 
up->u_tun_ctx);
+   } else {
+   INP_RUNLOCK(inp);
+   }
+   }
}
} else
in_pcbnotifyall(pcbinfo, faddr, inetctlerrmap[cmd],
@@ -1748,7 +1763,7 @@ udp_attach(struct socket *so, int proto,
 #endif /* INET */
 
 int
-udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, void *ctx)
+udp_set_kernel_tunneling(struct socket *so, udp_tun_func_t f, udp_tun_icmp_t 
i, void *ctx)
 {
struct inpcb *inp;
struct udpcb *up;
@@ -1759,11 +1774,13 @@ udp_set_kernel_tunneling(struct socket *
KASSERT(inp != NULL, ("udp_set_kernel_tunneling: inp == NULL"));
INP_WLOCK(inp);
up = intoudpcb(inp);
-   if (up->u_tun_func != NULL) {
+   if ((up->u_tun_func != NULL) ||
+   (up->u_icmp_func != NULL)) {
INP_WUNLOCK(inp);
return (EBUSY);
}
up->u_tun_func = f;
+   up->u_icmp_func = i;
up->u_tun_ctx = ctx;
INP_WUNLOCK(inp);
return (0);

Modified: head/sys/netinet/udp_var.h
==
--- head/sys/netinet/udp_var.h  Thu Apr 28 15:20:08 2016(r298746)
+++ head/sys/netinet/udp_var.h  Thu Apr 28 15:53:10 2016(r298747)
@@ -55,14 +55,16 @@ struct udpiphdr {
 struct inpcb;
 struct mbuf;
 
-typedef void(*udp_tun_func_t)(struct mbuf *, int off, struct inpcb *,
+typedef void(*udp_tun_func_t)(struct mbuf *, int, struct inpcb *,
  const struct sockaddr *, void *);
-
+typedef void(*udp_tun_icmp_t)(int, struct sockaddr *, void *, void *);
+ 
 

svn commit: r298743 - in head/sys/netinet: . tcp_stacks

2016-04-28 Thread Randall Stewart
Author: rrs
Date: Thu Apr 28 13:27:12 2016
New Revision: 298743
URL: https://svnweb.freebsd.org/changeset/base/298743

Log:
  This cleans up the timers code in TCP to start using the new
  async_drain functionality. This as been tested in NF as well as
  by Verisign. Still to do in here is to remove all the old flags. They
  are currently left being maintained but probably are no longer needed.
  
  Sponsored by: Netflix Inc.
  Differential Revision:http://reviews.freebsd.org/D5924

Modified:
  head/sys/netinet/tcp_stacks/fastpath.c
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_timer.c
  head/sys/netinet/tcp_timer.h
  head/sys/netinet/tcp_var.h

Modified: head/sys/netinet/tcp_stacks/fastpath.c
==
--- head/sys/netinet/tcp_stacks/fastpath.c  Thu Apr 28 13:00:40 2016
(r298742)
+++ head/sys/netinet/tcp_stacks/fastpath.c  Thu Apr 28 13:27:12 2016
(r298743)
@@ -2386,7 +2386,6 @@ struct tcp_function_block __tcp_fastslow
NULL,
NULL,
NULL,
-   NULL,
0,
0
 
@@ -2403,7 +2402,6 @@ struct tcp_function_block __tcp_fastack 
NULL,
NULL,
NULL,
-   NULL,
0,
0
 };

Modified: head/sys/netinet/tcp_subr.c
==
--- head/sys/netinet/tcp_subr.c Thu Apr 28 13:00:40 2016(r298742)
+++ head/sys/netinet/tcp_subr.c Thu Apr 28 13:27:12 2016(r298743)
@@ -244,7 +244,6 @@ static struct inpcb *tcp_mtudisc_notify(
 static void tcp_mtudisc(struct inpcb *, int);
 static char *  tcp_log_addr(struct in_conninfo *inc, struct tcphdr *th,
void *ip4hdr, const void *ip6hdr);
-static voidtcp_timer_discard(struct tcpcb *, uint32_t);
 
 
 static struct tcp_function_block tcp_def_funcblk = {
@@ -258,7 +257,6 @@ static struct tcp_function_block tcp_def
NULL,
NULL,
NULL,
-   NULL,
0,
0
 };
@@ -528,7 +526,6 @@ register_tcp_functions(struct tcp_functi
return (EINVAL);
}
if (blk->tfb_tcp_timer_stop_all ||
-   blk->tfb_tcp_timers_left ||
blk->tfb_tcp_timer_activate ||
blk->tfb_tcp_timer_active ||
blk->tfb_tcp_timer_stop) {
@@ -537,7 +534,6 @@ register_tcp_functions(struct tcp_functi
 * must have them all.
 */
if ((blk->tfb_tcp_timer_stop_all == NULL) ||
-   (blk->tfb_tcp_timers_left  == NULL) ||
(blk->tfb_tcp_timer_activate == NULL) ||
(blk->tfb_tcp_timer_active == NULL) ||
(blk->tfb_tcp_timer_stop == NULL)) {
@@ -1343,13 +1339,21 @@ tcp_discardcb(struct tcpcb *tp)
 * callout, and the last discard function called will take care of
 * deleting the tcpcb.
 */
+   tp->t_timers->tt_draincnt = 0;
tcp_timer_stop(tp, TT_REXMT);
tcp_timer_stop(tp, TT_PERSIST);
tcp_timer_stop(tp, TT_KEEP);
tcp_timer_stop(tp, TT_2MSL);
tcp_timer_stop(tp, TT_DELACK);
if (tp->t_fb->tfb_tcp_timer_stop_all) {
-   /* Call the stop-all function of the methods */
+   /* 
+* Call the stop-all function of the methods, 
+* this function should call the tcp_timer_stop()
+* method with each of the function specific timeouts.
+* That stop will be called via the tfb_tcp_timer_stop()
+* which should use the async drain function of the 
+* callout system (see tcp_var.h).
+*/
tp->t_fb->tfb_tcp_timer_stop_all(tp);
}
 
@@ -1434,13 +1438,8 @@ tcp_discardcb(struct tcpcb *tp)
 
CC_ALGO(tp) = NULL;
inp->inp_ppcb = NULL;
-   if ((tp->t_timers->tt_flags & TT_MASK) == 0) {
+   if (tp->t_timers->tt_draincnt == 0) {
/* We own the last reference on tcpcb, let's free it. */
-   if ((tp->t_fb->tfb_tcp_timers_left) &&
-   (tp->t_fb->tfb_tcp_timers_left(tp))) {
-   /* Some fb timers left running! */
-   return;
-   }
if (tp->t_fb->tfb_tcp_fb_fini)
(*tp->t_fb->tfb_tcp_fb_fini)(tp);
refcount_release(>t_fb->tfb_refcnt);
@@ -1453,45 +1452,12 @@ tcp_discardcb(struct tcpcb *tp)
 }
 
 void
-tcp_timer_2msl_discard(void *xtp)
-{
-
-   tcp_timer_discard((struct tcpcb *)xtp, TT_2MSL);
-}
-
-void
-tcp_timer_keep_discard(void *xtp)
-{
-
-   tcp_timer_discard((struct tcpcb *)xtp, TT_KEEP);
-}
-
-void
-tcp_timer_persist_discard(void *xtp)
-{
-
-   tcp_timer_discard((struct tcpcb *)xtp, TT_PERSIST);
-}
-
-void
-tcp_timer_rexmt_discard(void *xtp)
-{
-
-   tcp_timer_discard((struct tcpcb *)xtp, TT_REXMT);
-}
-
-void
-tcp_timer_delack_discard(void *xtp)
-{
-
- 

svn commit: r297663 - head/sys/netinet

2016-04-07 Thread Randall Stewart
Author: rrs
Date: Thu Apr  7 09:34:41 2016
New Revision: 297663
URL: https://svnweb.freebsd.org/changeset/base/297663

Log:
  A couple of minor changes that I missed that Michael had done, most noted
  in these is the change to non-strict ordering for incoming data (this will
  make pkt-drill test 14 fail but its expected).

Modified:
  head/sys/netinet/sctp_indata.h
  head/sys/netinet/sctp_os_bsd.h
  head/sys/netinet/sctp_output.c
  head/sys/netinet/sctp_sysctl.h
  head/sys/netinet/sctp_var.h
  head/sys/netinet/sctputil.c

Modified: head/sys/netinet/sctp_indata.h
==
--- head/sys/netinet/sctp_indata.h  Thu Apr  7 09:10:34 2016
(r297662)
+++ head/sys/netinet/sctp_indata.h  Thu Apr  7 09:34:41 2016
(r297663)
@@ -53,7 +53,7 @@ sctp_build_readq_entry(struct sctp_tcb *
memset(_ctl, 0, sizeof(struct sctp_queued_to_read)); \
(_ctl)->sinfo_stream = stream_no; \
(_ctl)->sinfo_ssn = stream_seq; \
-   TAILQ_INIT(&_ctl->reasm);   \
+   TAILQ_INIT(&_ctl->reasm); \
(_ctl)->top_fsn = tfsn; \
(_ctl)->msg_id = msgid; \
(_ctl)->sinfo_flags = (flags << 8); \

Modified: head/sys/netinet/sctp_os_bsd.h
==
--- head/sys/netinet/sctp_os_bsd.h  Thu Apr  7 09:10:34 2016
(r297662)
+++ head/sys/netinet/sctp_os_bsd.h  Thu Apr  7 09:34:41 2016
(r297663)
@@ -480,9 +480,9 @@ sctp_get_mbuf_for_msg(unsigned int space
 #define SCTP_SAVE_ATOMIC_DECREMENT(addr, val) \
 { \
int32_t oldval; \
-   oldval = atomic_fetchadd_int(addr, -val);  \
+   oldval = atomic_fetchadd_int(addr, -val); \
if (oldval < val) { \
-   panic("Counter goes negative addr:%p val:%d oldval:%d", addr, 
val, oldval); \
+   panic("Counter goes negative"); \
} \
 }
 #else

Modified: head/sys/netinet/sctp_output.c
==
--- head/sys/netinet/sctp_output.c  Thu Apr  7 09:10:34 2016
(r297662)
+++ head/sys/netinet/sctp_output.c  Thu Apr  7 09:34:41 2016
(r297663)
@@ -10499,6 +10499,7 @@ sctp_fill_in_rest:
strseq++;
} else {
strseq_m->stream = 
ntohs(at->rec.data.stream_number);
+   strseq_m->reserved = ntohs(0);
strseq_m->msg_id = 
ntohl(at->rec.data.stream_seq);
strseq_m++;
}

Modified: head/sys/netinet/sctp_sysctl.h
==
--- head/sys/netinet/sctp_sysctl.h  Thu Apr  7 09:10:34 2016
(r297662)
+++ head/sys/netinet/sctp_sysctl.h  Thu Apr  7 09:34:41 2016
(r297663)
@@ -432,7 +432,7 @@ struct sctp_sysctl {
 #define SCTPCTL_STRICT_DATA_ORDER_DESC "Enforce strict data ordering, abort if 
control inside data"
 #define SCTPCTL_STRICT_DATA_ORDER_MIN  0
 #define SCTPCTL_STRICT_DATA_ORDER_MAX  1
-#define SCTPCTL_STRICT_DATA_ORDER_DEFAULT  1
+#define SCTPCTL_STRICT_DATA_ORDER_DEFAULT  0
 
 /* min_residual: min residual in a data fragment leftover */
 #define SCTPCTL_MIN_RESIDUAL_DESC  "Minimum residual data chunk in second 
part of split"

Modified: head/sys/netinet/sctp_var.h
==
--- head/sys/netinet/sctp_var.h Thu Apr  7 09:10:34 2016(r297662)
+++ head/sys/netinet/sctp_var.h Thu Apr  7 09:34:41 2016(r297663)
@@ -99,8 +99,8 @@ extern struct pr_usrreqs sctp_usrreqs;
  */
 #ifdef INVARIANTS
 #define sctp_free_a_readq(_stcb, _readq) { \
-   if ((_readq)->on_strm_q)\
-   panic("On strm q stcb:%p readq:%p", (_stcb), (_readq)); 
\
+   if ((_readq)->on_strm_q) \
+   panic("On strm q stcb:%p readq:%p", (_stcb), (_readq)); \
SCTP_ZONE_FREE(SCTP_BASE_INFO(ipi_zone_readq), (_readq)); \
SCTP_DECR_READQ_COUNT(); \
 }
@@ -204,7 +204,7 @@ extern struct pr_usrreqs sctp_usrreqs;
 }
 
 #define sctp_sbfree(ctl, stcb, sb, m) { \
-   SCTP_SAVE_ATOMIC_DECREMENT(&(sb)->sb_cc, SCTP_BUF_LEN((m)));\
+   SCTP_SAVE_ATOMIC_DECREMENT(&(sb)->sb_cc, SCTP_BUF_LEN((m))); \
SCTP_SAVE_ATOMIC_DECREMENT(&(sb)->sb_mbcnt, MSIZE); \
if (((ctl)->do_not_ref_stcb == 0) && stcb) {\
SCTP_SAVE_ATOMIC_DECREMENT(&(stcb)->asoc.sb_cc, 
SCTP_BUF_LEN((m))); \

Modified: head/sys/netinet/sctputil.c
==
--- head/sys/netinet/sctputil.c Thu Apr  7 09:10:34 2016(r297662)
+++ head/sys/netinet/sctputil.c Thu Apr  7 09:34:41 2016(r297663)
@@ -6100,7 +6100,7 @@ 

  1   2   3   4   >