svn commit: r368327 - head/sys/netinet
Author: rscheff Date: Fri Dec 4 11:29:27 2020 New Revision: 368327 URL: https://svnweb.freebsd.org/changeset/base/368327 Log: Add TCP feature Proportional Rate Reduction (PRR) - RFC6937 PRR improves loss recovery and avoids RTOs in a wide range of scenarios (ACK thinning) over regular SACK loss recovery. PRR is disabled by default, enable by net.inet.tcp.do_prr = 1. Performance may be impeded by token bucket rate policers at the bottleneck, where net.inet.tcp.do_prr_conservate = 1 should be enabled in addition. Submitted by: Aris Angelogiannopoulos Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D18892 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cFri Dec 4 04:39:48 2020 (r368326) +++ head/sys/netinet/tcp_input.cFri Dec 4 11:29:27 2020 (r368327) @@ -153,6 +153,16 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop_synfin, CTLFL _NAME(drop_synfin), 0, "Drop TCP packets with SYN+FIN set"); +VNET_DEFINE(int, tcp_do_prr_conservative) = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr_conservative, CTLFLAG_VNET | CTLFLAG_RW, +_NAME(tcp_do_prr_conservative), 0, +"Do conservative Proportional Rate Reduction"); + +VNET_DEFINE(int, tcp_do_prr) = 1; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_prr, CTLFLAG_VNET | CTLFLAG_RW, +_NAME(tcp_do_prr), 1, +"Enable Proportional Rate Reduction per RFC 6937"); + VNET_DEFINE(int, tcp_do_newcwv) = 0; SYSCTL_INT(_net_inet_tcp, OID_AUTO, newcwv, CTLFLAG_VNET | CTLFLAG_RW, _NAME(tcp_do_newcwv), 0, @@ -2554,7 +2564,55 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru IN_FASTRECOVERY(tp->t_flags)) { cc_ack_received(tp, th, nsegs, CC_DUPACK); - if ((tp->t_flags & TF_SACK_PERMIT) && + if (V_tcp_do_prr && + IN_FASTRECOVERY(tp->t_flags) && + (tp->t_flags & TF_SACK_PERMIT)) { + long snd_cnt = 0, limit = 0; + long del_data = 0, pipe = 0; + /* +* In a duplicate ACK del_data is only the +* diff_in_sack. If no SACK is used del_data +* will be 0. Pipe is the amount of data we +* estimate to be in the network. +*/ + del_data = tp->sackhint.delivered_data; + pipe = (tp->snd_nxt - tp->snd_fack) + + tp->sackhint.sack_bytes_rexmit; + tp->sackhint.prr_delivered += del_data; + if (pipe > tp->snd_ssthresh) { + snd_cnt = (tp->sackhint.prr_delivered * + tp->snd_ssthresh / + tp->sackhint.recover_fs) + + 1 - tp->sackhint.sack_bytes_rexmit; + } else { + if (V_tcp_do_prr_conservative) + limit = tp->sackhint.prr_delivered - + tp->sackhint.sack_bytes_rexmit; + else + if ((tp->sackhint.prr_delivered - + tp->sackhint.sack_bytes_rexmit) > + del_data) + limit = tp->sackhint.prr_delivered - + tp->sackhint.sack_bytes_rexmit + + maxseg; + else + limit = del_data + maxseg; + if ((tp->snd_ssthresh - pipe) < limit) +
svn commit: r367753 - stable/12/sys/netinet/cc
Author: rscheff Date: Tue Nov 17 08:11:17 2020 New Revision: 367753 URL: https://svnweb.freebsd.org/changeset/base/367753 Log: MFC r367008: TCP Cubic: improve reaction to (and rollback from) RTO fix compliancy issue of CUBIC RTO handling according to RFC8312 section 4.7 add CUBIC CC_RTO_ERR handling Submitted by: chengc_netapp.com Reviewed by: rrs, tuexen, rscheff Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26808 Modified: stable/12/sys/netinet/cc/cc_cubic.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc_cubic.c == --- stable/12/sys/netinet/cc/cc_cubic.c Tue Nov 17 08:03:49 2020 (r367752) +++ stable/12/sys/netinet/cc/cc_cubic.c Tue Nov 17 08:11:17 2020 (r367753) @@ -78,7 +78,7 @@ static void cubic_conn_init(struct cc_var *ccv); static int cubic_mod_init(void); static voidcubic_post_recovery(struct cc_var *ccv); static voidcubic_record_rtt(struct cc_var *ccv); -static voidcubic_ssthresh_update(struct cc_var *ccv); +static voidcubic_ssthresh_update(struct cc_var *ccv, uint32_t maxseg); static voidcubic_after_idle(struct cc_var *ccv); struct cubic { @@ -90,19 +90,28 @@ struct cubic { unsigned long max_cwnd; /* cwnd at the previous congestion event. */ unsigned long prev_max_cwnd; + /* A copy of prev_max_cwnd. Used for CC_RTO_ERR */ + unsigned long prev_max_cwnd_cp; /* various flags */ uint32_tflags; #define CUBICFLAG_CONG_EVENT 0x0001 /* congestion experienced */ #define CUBICFLAG_IN_SLOWSTART 0x0002 /* in slow start */ #define CUBICFLAG_IN_APPLIMIT 0x0004 /* application limited */ +#define CUBICFLAG_RTO_EVENT0x0008 /* RTO experienced */ /* Minimum observed rtt in ticks. */ int min_rtt_ticks; /* Mean observed rtt between congestion epochs. */ int mean_rtt_ticks; /* ACKs since last congestion event. */ int epoch_ack_count; - /* Time of last congestion event in ticks. */ + /* Timestamp (in ticks) of arriving in congestion avoidance from last +* congestion event. +*/ int t_last_cong; + /* Timestamp (in ticks) of a previous congestion event. Used for +* CC_RTO_ERR. +*/ + int t_last_cong_prev; }; static MALLOC_DEFINE(M_CUBIC, "cubic data", @@ -142,7 +151,14 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) cubic_data->flags |= CUBICFLAG_IN_SLOWSTART; newreno_cc_algo.ack_received(ccv, type); } else { - if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | + if ((cubic_data->flags & CUBICFLAG_RTO_EVENT) && + (cubic_data->flags & CUBICFLAG_IN_SLOWSTART)) { + /* RFC8312 Section 4.7 */ + cubic_data->flags &= ~(CUBICFLAG_RTO_EVENT | + CUBICFLAG_IN_SLOWSTART); + cubic_data->max_cwnd = CCV(ccv, snd_cwnd); + cubic_data->K = 0; + } else if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | CUBICFLAG_IN_APPLIMIT)) { cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART | CUBICFLAG_IN_APPLIMIT); @@ -274,10 +290,10 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) case CC_NDUPACK: if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) { if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { - cubic_ssthresh_update(ccv); + cubic_ssthresh_update(ccv, mss); cubic_data->flags |= CUBICFLAG_CONG_EVENT; cubic_data->t_last_cong = ticks; - cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); + cubic_data->K = cubic_k(cubic_data->max_cwnd / mss); } ENTER_RECOVERY(CCV(ccv, t_flags)); } @@ -285,37 +301,35 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) case CC_ECN: if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { - cubic_ssthresh_update(ccv); + cubic_ssthresh_update(ccv, mss); cubic_data->flags |= CUBICFLAG_CONG_EVENT; cubic_data->t_last_cong = ticks; - cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); +
svn commit: r367752 - in stable/12/sys/netinet: . cc
Author: rscheff Date: Tue Nov 17 08:03:49 2020 New Revision: 367752 URL: https://svnweb.freebsd.org/changeset/base/367752 Log: MFC r367007: tcp: move cwnd and ssthresh updates into cc modules This will pave the way of setting ssthresh differently in TCP CUBIC, according to RFC8312 section 4.7. Use dynamic tcp_maxseg() adjusting for tcp options instead of static t_maxseg. Submitted by: chengc_netapp.com Reviewed by: rrs, tuexen, rscheff Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26807 Modified: stable/12/sys/netinet/cc/cc_cubic.c stable/12/sys/netinet/cc/cc_dctcp.c stable/12/sys/netinet/cc/cc_htcp.c stable/12/sys/netinet/cc/cc_newreno.c stable/12/sys/netinet/tcp_input.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc_cubic.c == --- stable/12/sys/netinet/cc/cc_cubic.c Tue Nov 17 06:04:16 2020 (r367751) +++ stable/12/sys/netinet/cc/cc_cubic.c Tue Nov 17 08:03:49 2020 (r367752) @@ -265,8 +265,10 @@ static void cubic_cong_signal(struct cc_var *ccv, uint32_t type) { struct cubic *cubic_data; + u_int mss; cubic_data = ccv->cc_data; + mss = tcp_maxseg(ccv->ccvc.tcp); switch (type) { case CC_NDUPACK: @@ -293,6 +295,10 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) break; case CC_RTO: + CCV(ccv, snd_ssthresh) = max(min(CCV(ccv, snd_wnd), +CCV(ccv, snd_cwnd)) / 2 / mss, +2) * mss; + CCV(ccv, snd_cwnd) = mss; /* * Grab the current time and record it so we know when the * most recent congestion event was. Only record it when the Modified: stable/12/sys/netinet/cc/cc_dctcp.c == --- stable/12/sys/netinet/cc/cc_dctcp.c Tue Nov 17 06:04:16 2020 (r367751) +++ stable/12/sys/netinet/cc/cc_dctcp.c Tue Nov 17 08:03:49 2020 (r367752) @@ -235,7 +235,7 @@ dctcp_cong_signal(struct cc_var *ccv, uint32_t type) if (CCV(ccv, t_flags) & TF_ECN_PERMIT) { dctcp_data = ccv->cc_data; cwin = CCV(ccv, snd_cwnd); - mss = CCV(ccv, t_maxseg); + mss = tcp_maxseg(ccv->ccvc.tcp); switch (type) { case CC_NDUPACK: @@ -282,6 +282,10 @@ dctcp_cong_signal(struct cc_var *ccv, uint32_t type) dctcp_data->ece_curr = 1; break; case CC_RTO: + CCV(ccv, snd_ssthresh) = max(min(CCV(ccv, snd_wnd), +CCV(ccv, snd_cwnd)) / 2 / mss, +2) * mss; + CCV(ccv, snd_cwnd) = mss; dctcp_update_alpha(ccv); dctcp_data->save_sndnxt += CCV(ccv, t_maxseg); dctcp_data->num_cong_events++; Modified: stable/12/sys/netinet/cc/cc_htcp.c == --- stable/12/sys/netinet/cc/cc_htcp.c Tue Nov 17 06:04:16 2020 (r367751) +++ stable/12/sys/netinet/cc/cc_htcp.c Tue Nov 17 08:03:49 2020 (r367752) @@ -271,8 +271,10 @@ static void htcp_cong_signal(struct cc_var *ccv, uint32_t type) { struct htcp *htcp_data; + u_int mss; htcp_data = ccv->cc_data; + mss = tcp_maxseg(ccv->ccvc.tcp); switch (type) { case CC_NDUPACK: @@ -311,6 +313,10 @@ htcp_cong_signal(struct cc_var *ccv, uint32_t type) break; case CC_RTO: + CCV(ccv, snd_ssthresh) = max(min(CCV(ccv, snd_wnd), +CCV(ccv, snd_cwnd)) / 2 / mss, +2) * mss; + CCV(ccv, snd_cwnd) = mss; /* * Grab the current time and record it so we know when the * most recent congestion event was. Only record it when the Modified: stable/12/sys/netinet/cc/cc_newreno.c == --- stable/12/sys/netinet/cc/cc_newreno.c Tue Nov 17 06:04:16 2020 (r367751) +++ stable/12/sys/netinet/cc/cc_newreno.c Tue Nov 17 08:03:49 2020 (r367752) @@ -241,7 +241,7 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type) u_int mss; cwin = CCV(ccv, snd_cwnd); - mss = CCV(ccv, t_maxseg); + mss = tcp_maxseg(ccv->ccvc.tcp); nreno = ccv->cc_data; beta = (nreno == NULL) ? V_newreno_beta : nreno->beta; beta_ecn = (nreno == NULL) ? V_newreno_beta_ecn : nreno->beta_ecn; @@ -278,6
svn commit: r367492 - in head/sys/netinet: . tcp_stacks
Author: rscheff Date: Sun Nov 8 18:47:05 2020 New Revision: 367492 URL: https://svnweb.freebsd.org/changeset/base/367492 Log: Prevent premature SACK block transmission during loss recovery Under specific conditions, a window update can be sent with outdated SACK information. Some clients react to this by subsequently delaying loss recovery, making TCP perform very poorly. Reported by: chengc_netapp.com Reviewed by: rrs, jtl MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D24237 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_reass.c head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c head/sys/netinet/tcp_stacks/rack_bbr_common.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cSun Nov 8 18:27:49 2020 (r367491) +++ head/sys/netinet/tcp_input.cSun Nov 8 18:47:05 2020 (r367492) @@ -1462,6 +1462,29 @@ tcp_autorcvbuf(struct mbuf *m, struct tcphdr *th, stru } void +tcp_handle_wakeup(struct tcpcb *tp, struct socket *so) +{ + /* +* Since tp might be gone if the session entered +* the TIME_WAIT state before coming here, we need +* to check if the socket is still connected. +*/ + if ((so->so_state & SS_ISCONNECTED) == 0) + return; + INP_LOCK_ASSERT(tp->t_inpcb); + if (tp->t_flags & TF_WAKESOR) { + tp->t_flags &= ~TF_WAKESOR; + SOCKBUF_UNLOCK_ASSERT(>so_rcv); + sorwakeup(so); + } + if (tp->t_flags & TF_WAKESOW) { + tp->t_flags &= ~TF_WAKESOW; + SOCKBUF_UNLOCK_ASSERT(>so_snd); + sowwakeup(so); + } +} + +void tcp_do_segment(struct mbuf *m, struct tcphdr *th, struct socket *so, struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos) { @@ -1811,7 +1834,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru else if (!tcp_timer_active(tp, TT_PERSIST)) tcp_timer_activate(tp, TT_REXMT, tp->t_rxtcur); - sowwakeup(so); + tp->t_flags |= TF_WAKESOW; if (sbavail(>so_snd)) (void) tp->t_fb->tfb_tcp_output(tp); goto check_delack; @@ -1876,8 +1899,8 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru m_adj(m, drop_hdrlen); /* delayed header drop */ sbappendstream_locked(>so_rcv, m, 0); } - /* NB: sorwakeup_locked() does an implicit unlock. */ - sorwakeup_locked(so); + SOCKBUF_UNLOCK(>so_rcv); + tp->t_flags |= TF_WAKESOR; if (DELAY_ACK(tp, tlen)) { tp->t_flags |= TF_DELACK; } else { @@ -2811,8 +2834,8 @@ process_ACK: tp->snd_wnd = 0; ourfinisacked = 0; } - /* NB: sowwakeup_locked() does an implicit unlock. */ - sowwakeup_locked(so); + SOCKBUF_UNLOCK(>so_snd); + tp->t_flags |= TF_WAKESOW; m_freem(mfree); /* Detect una wraparound. */ if (!IN_RECOVERY(tp->t_flags) && @@ -3033,8 +3056,8 @@ dodata: /* XXX */ m_freem(m); else sbappendstream_locked(>so_rcv, m, 0); - /* NB: sorwakeup_locked() does an implicit unlock. */ - sorwakeup_locked(so); + SOCKBUF_UNLOCK(>so_rcv); + tp->t_flags |= TF_WAKESOR; } else { /* * XXX: Due to the header drop above "th" is @@ -3101,6 +3124,8 @@ dodata: /* XXX */ if (thflags & TH_FIN) { if (TCPS_HAVERCVDFIN(tp->t_state) == 0) { socantrcvmore(so); + /* The socket upcall is handled by socantrcvmore. */ + tp->t_flags &= ~TF_WAKESOR; /* * If connection is half-synchronized * (ie NEEDSYN flag on) then delay ACK, @@ -3164,6 +3189,7 @@ check_delack: tp->t_flags &= ~TF_DELACK; tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); } + tcp_handle_wakeup(tp, so);
svn commit: r367024 - head/usr.sbin/ctld
Author: rscheff Date: Sat Oct 24 21:10:53 2020 New Revision: 367024 URL: https://svnweb.freebsd.org/changeset/base/367024 Log: Add network QoS support for PCP to iscsi target. Mak the Ethernet PCP codepoint configurable for L2 local traffic, to allow lower latency for iSCSI block IO. This addresses the target side only. Reviewed by: mav, trasz, bcr Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26740 Modified: head/usr.sbin/ctld/ctl.conf.5 head/usr.sbin/ctld/ctld.c head/usr.sbin/ctld/ctld.h head/usr.sbin/ctld/parse.y head/usr.sbin/ctld/token.l head/usr.sbin/ctld/uclparse.c Modified: head/usr.sbin/ctld/ctl.conf.5 == --- head/usr.sbin/ctld/ctl.conf.5 Sat Oct 24 21:07:13 2020 (r367023) +++ head/usr.sbin/ctld/ctl.conf.5 Sat Oct 24 21:10:53 2020 (r367024) @@ -258,6 +258,13 @@ well-defined and .Qq Ar AFxx codepoints. +.It Ic pcp Ar value +The 802.1Q Priority CodePoint used for sending packets. +The PCP can be set to a value in the range between +.Qq Ar 0 +to +.Qq Ar 7 . +When omitted, the default for the outgoing interface is used. .El .Ss target Context .Bl -tag -width indent Modified: head/usr.sbin/ctld/ctld.c == --- head/usr.sbin/ctld/ctld.c Sat Oct 24 21:07:13 2020(r367023) +++ head/usr.sbin/ctld/ctld.c Sat Oct 24 21:10:53 2020(r367024) @@ -626,6 +626,7 @@ portal_group_new(struct conf *conf, const char *name) pg->pg_conf = conf; pg->pg_tag = 0; /* Assigned later in conf_apply(). */ pg->pg_dscp = -1; + pg->pg_pcp = -1; TAILQ_INSERT_TAIL(>conf_portal_groups, pg, pg_next); return (pg); @@ -2210,6 +2211,32 @@ conf_apply(struct conf *oldconf, struct conf *newconf) IPPROTO_IPV6, IPV6_TCLASS, , sizeof(tos)) == -1) log_warn("setsockopt(IPV6_TCLASS) " + "failed for %s", + newp->p_listen); + } + } + if (newpg->pg_pcp != -1) { + struct sockaddr sa; + int len = sizeof(sa); + getsockname(newp->p_socket, , ); + /* +* Only allow the 6-bit DSCP +* field to be modified +*/ + int pcp = newpg->pg_pcp; + if (sa.sa_family == AF_INET) { + if (setsockopt(newp->p_socket, + IPPROTO_IP, IP_VLAN_PCP, + , sizeof(pcp)) == -1) + log_warn("setsockopt(IP_VLAN_PCP) " + "failed for %s", + newp->p_listen); + } else + if (sa.sa_family == AF_INET6) { + if (setsockopt(newp->p_socket, + IPPROTO_IPV6, IPV6_VLAN_PCP, + , sizeof(pcp)) == -1) + log_warn("setsockopt(IPV6_VLAN_PCP) " "failed for %s", newp->p_listen); } Modified: head/usr.sbin/ctld/ctld.h == --- head/usr.sbin/ctld/ctld.h Sat Oct 24 21:07:13 2020(r367023) +++ head/usr.sbin/ctld/ctld.h Sat Oct 24 21:10:53 2020(r367024) @@ -128,6 +128,7 @@ struct portal_group { char*pg_offload; char*pg_redirection; int pg_dscp; + int pg_pcp; uint16_tpg_tag; }; Modified: head/usr.sbin/ctld/parse.y == --- head/usr.sbin/ctld/parse.y Sat Oct 24 21:07:13 2020(r367023) +++ head/usr.sbin/ctld/parse.y Sat Oct 24 21:10:53 2020(r367024) @@ -65,8 +65,8 @@ extern void yyrestart(FILE *); %token DISCOVERY_AUTH_GROUP DISCOVERY_FILTER DSCP FOREIGN %token INITIATOR_NAME INITIATOR_PORTAL ISNS_SERVER ISNS_PERIOD ISNS_TIMEOUT %token LISTEN LISTEN_ISER LUN MAXPROC OFFLOAD OPENING_BRACKET OPTION -%token PATH PIDFILE PORT
svn commit: r367023 - in head: sys/dev/iscsi usr.bin/iscsictl usr.sbin/iscsid
Author: rscheff Date: Sat Oct 24 21:07:13 2020 New Revision: 367023 URL: https://svnweb.freebsd.org/changeset/base/367023 Log: Add network QoS support for PCP to iscsi initiator. Make the Ethernet PCP codepoint configurable for L2 local traffic, to allow lower latency for iSCSI block IO. This addresses the initiator side only. Reviewed by: mav, trasz, bcr Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26739 Modified: head/sys/dev/iscsi/iscsi_ioctl.h head/usr.bin/iscsictl/iscsi.conf.5 head/usr.bin/iscsictl/iscsictl.c head/usr.bin/iscsictl/iscsictl.h head/usr.bin/iscsictl/parse.y head/usr.bin/iscsictl/token.l head/usr.sbin/iscsid/iscsid.c Modified: head/sys/dev/iscsi/iscsi_ioctl.h == --- head/sys/dev/iscsi/iscsi_ioctl.hSat Oct 24 21:07:10 2020 (r367022) +++ head/sys/dev/iscsi/iscsi_ioctl.hSat Oct 24 21:07:13 2020 (r367023) @@ -71,7 +71,8 @@ struct iscsi_session_conf { charisc_offload[ISCSI_OFFLOAD_LEN]; int isc_enable; int isc_dscp; - int isc_spare[3]; + int isc_pcp; + int isc_spare[2]; }; /* Modified: head/usr.bin/iscsictl/iscsi.conf.5 == --- head/usr.bin/iscsictl/iscsi.conf.5 Sat Oct 24 21:07:10 2020 (r367022) +++ head/usr.bin/iscsictl/iscsi.conf.5 Sat Oct 24 21:07:13 2020 (r367023) @@ -155,6 +155,13 @@ and codepoints. Default is no specified dscp codepoint, which means the default of the outgoing interface is used. +.It Cm pcp +The 802.1Q Priority CodePoint used for sending packets. +The PCP can be set to a value in the range between +.Qq Ar 0 +to +.Qq Ar 7 . +When omitted, the default for the outgoing interface is used. .El .Sh FILES .Bl -tag -width indent Modified: head/usr.bin/iscsictl/iscsictl.c == --- head/usr.bin/iscsictl/iscsictl.cSat Oct 24 21:07:10 2020 (r367022) +++ head/usr.bin/iscsictl/iscsictl.cSat Oct 24 21:07:13 2020 (r367023) @@ -88,6 +88,7 @@ target_new(struct conf *conf) xo_err(1, "calloc"); targ->t_conf = conf; targ->t_dscp = -1; + targ->t_pcp = -1; TAILQ_INSERT_TAIL(>conf_targets, targ, t_next); return (targ); @@ -360,6 +361,7 @@ conf_from_target(struct iscsi_session_conf *conf, else conf->isc_data_digest = ISCSI_DIGEST_NONE; conf->isc_dscp = targ->t_dscp; + conf->isc_pcp = targ->t_pcp; } static int @@ -540,6 +542,9 @@ kernel_list(int iscsi_fd, const struct target *targ __ if (conf->isc_dscp != -1) xo_emit("{L:/%-26s}{V:dscp/0x%02x}\n", "Target DSCP:", conf->isc_dscp); + if (conf->isc_pcp != -1) + xo_emit("{L:/%-26s}{V:pcp/0x%02x}\n", + "Target PCP:", conf->isc_pcp); xo_close_container("target"); xo_open_container("auth"); Modified: head/usr.bin/iscsictl/iscsictl.h == --- head/usr.bin/iscsictl/iscsictl.hSat Oct 24 21:07:10 2020 (r367022) +++ head/usr.bin/iscsictl/iscsictl.hSat Oct 24 21:07:13 2020 (r367023) @@ -79,6 +79,7 @@ struct target { int t_enable; int t_protocol; int t_dscp; + int t_pcp; char*t_offload; char*t_user; char*t_secret; Modified: head/usr.bin/iscsictl/parse.y == --- head/usr.bin/iscsictl/parse.y Sat Oct 24 21:07:10 2020 (r367022) +++ head/usr.bin/iscsictl/parse.y Sat Oct 24 21:07:13 2020 (r367023) @@ -133,6 +133,8 @@ target_entry: ignored | dscp + | + pcp ; target_name: TARGET_NAME EQUALS STR @@ -306,6 +308,8 @@ dscp: DSCP EQUALS STR { uint64_t tmp; + if (target->t_dscp != -1) + xo_errx(1, "duplicated dscp at line %d", lineno); if (strcmp($3, "0x") == 0) { tmp = strtol($3 + 2, NULL, 16); } else if (expand_number($3, ) != 0) { @@ -342,6 +346,27 @@ dscp: DSCP EQUALS STR | DSCP EQUALS AF41 { target->t_dscp = IPTOS_DSCP_AF41 >> 2 ; } | DSCP EQUALS AF42 { target->t_dscp = IPTOS_DSCP_AF42 >> 2 ; } | DSCP EQUALS AF43 { target->t_dscp =
svn commit: r367021 - in head/sbin: ping ping6
Author: rscheff Date: Sat Oct 24 21:01:18 2020 New Revision: 367021 URL: https://svnweb.freebsd.org/changeset/base/367021 Log: Make use of IP_VLAN_PCP setsockopt in ping and ping6. In order to validate the proper marking and use of a different ethernet priority class, add the new session-specific PCP feature to the ping/ping6 utilities. Reviewed by: mav, bcr Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26627 Modified: head/sbin/ping/ping.8 head/sbin/ping/ping.c head/sbin/ping6/ping6.8 head/sbin/ping6/ping6.c Modified: head/sbin/ping/ping.8 == --- head/sbin/ping/ping.8 Sat Oct 24 20:57:13 2020(r367020) +++ head/sbin/ping/ping.8 Sat Oct 24 21:01:18 2020(r367021) @@ -28,7 +28,7 @@ .\" @(#)ping.8 8.2 (Berkeley) 12/11/93 .\" $FreeBSD$ .\" -.Dd August 22, 2019 +.Dd October 2, 2020 .Dt PING 8 .Os .Sh NAME @@ -39,6 +39,7 @@ packets to network hosts .Sh SYNOPSIS .Nm .Op Fl AaDdfHnoQqRrv +.Op Fl C Ar pcp .Op Fl c Ar count .Op Fl G Ar sweepmaxsize .Op Fl g Ar sweepminsize @@ -57,6 +58,7 @@ packets to network hosts .Ar host .Nm .Op Fl AaDdfHLnoQqRrv +.Op Fl C Ar pcp .Op Fl c Ar count .Op Fl I Ar iface .Op Fl i Ar wait @@ -112,6 +114,9 @@ Include a bell character in the output when any packet is received. This option is ignored if other format options are present. +.It Fl C Ar pcp +Add an 802.1p Ethernet Priority Code Point when sending a packet. +0..7 uses that specific PCP, -1 uses the interface default PCP (or none). .It Fl c Ar count Stop after sending (and receiving) Modified: head/sbin/ping/ping.c == --- head/sbin/ping/ping.c Sat Oct 24 20:57:13 2020(r367020) +++ head/sbin/ping/ping.c Sat Oct 24 21:01:18 2020(r367021) @@ -155,6 +155,7 @@ static int options; #defineF_TIME 0x10 #defineF_SWEEP 0x20 #defineF_WAITTIME 0x40 +#defineF_IP_VLAN_PCP 0x80 /* * MAX_DUP_CHK is the number of bits in received table, i.e. the maximum @@ -247,7 +248,7 @@ main(int argc, char *const *argv) u_long alarmtimeout; long ltmp; int almost_done, ch, df, hold, i, icmp_len, mib[4], preload; - int ssend_errno, srecv_errno, tos, ttl; + int ssend_errno, srecv_errno, tos, ttl, pcp; char ctrl[CMSG_SPACE(sizeof(struct timespec))]; char hnamebuf[MAXHOSTNAMELEN], snamebuf[MAXHOSTNAMELEN]; #ifdef IP_OPTIONS @@ -295,11 +296,11 @@ main(int argc, char *const *argv) err(EX_OSERR, "srecv socket"); } - alarmtimeout = df = preload = tos = 0; + alarmtimeout = df = preload = tos = pcp = 0; outpack = outpackhdr + sizeof(struct ip); while ((ch = getopt(argc, argv, - "Aac:DdfG:g:Hh:I:i:Ll:M:m:nop:QqRrS:s:T:t:vW:z:" + "AaC:c:DdfG:g:Hh:I:i:Ll:M:m:nop:QqRrS:s:T:t:vW:z:" #ifdef IPSEC #ifdef IPSEC_POLICY_IPSEC "P:" @@ -314,6 +315,13 @@ main(int argc, char *const *argv) case 'a': options |= F_AUDIBLE; break; + case 'C': + options |= F_IP_VLAN_PCP; + ltmp = strtol(optarg, , 0); + if (*ep || ep == optarg || ltmp > 7 || ltmp < -1) + errx(EX_USAGE, "invalid PCP: `%s'", optarg); + pcp = ltmp; + break; case 'c': ltmp = strtol(optarg, , 0); if (*ep || ep == optarg || ltmp <= 0) @@ -665,6 +673,10 @@ main(int argc, char *const *argv) if (options & F_SO_DONTROUTE) (void)setsockopt(ssend, SOL_SOCKET, SO_DONTROUTE, (char *), sizeof(hold)); + if (options & F_IP_VLAN_PCP) { + (void)setsockopt(ssend, IPPROTO_IP, IP_VLAN_PCP, (char *), + sizeof(pcp)); + } #ifdef IPSEC #ifdef IPSEC_POLICY_IPSEC if (options & F_POLICY) { @@ -1762,11 +1774,11 @@ usage(void) { (void)fprintf(stderr, "%s\n%s\n%s\n%s\n%s\n%s\n%s\n%s\n", -"usage: ping [-AaDdfHnoQqRrv] [-c count] [-G sweepmaxsize] [-g sweepminsize]", +"usage: ping [-AaDdfHnoQqRrv] [-C pcp] [-c count] [-G sweepmaxsize] [-g sweepminsize]", "[-h sweepincrsize] [-i wait] [-l preload] [-M mask | time] [-m ttl]", " " SECOPT " [-p pattern] [-S src_addr] [-s packetsize] [-t timeout]", "[-W waittime] [-z tos] host", -" ping [-AaDdfHLnoQqRrv] [-c count] [-I iface] [-i wait] [-l preload]", +" ping [-AaDdfHLnoQqRrv] [-C pcp] [-c count] [-I iface] [-i wait] [-l preload]", "[-M mask | time] [-m ttl]" SECOPT " [-p pattern] [-S src_addr]", "[-s packetsize]
svn commit: r367019 - in stable/12: sys/dev/iscsi usr.bin/iscsictl usr.sbin/iscsid
Author: rscheff Date: Sat Oct 24 20:52:05 2020 New Revision: 367019 URL: https://svnweb.freebsd.org/changeset/base/367019 Log: MFC r366573: Add DSCP support for network QoS to iscsi initiator. Allow the DSCP codepoint also to be configurable for the traffic in the direction from the initiator to the target, such that writes and any requests are also treated in the appropriate QoS class. Reviewed by: mav MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26714 Modified: stable/12/sys/dev/iscsi/iscsi_ioctl.h stable/12/usr.bin/iscsictl/Makefile stable/12/usr.bin/iscsictl/iscsi.conf.5 stable/12/usr.bin/iscsictl/iscsictl.c stable/12/usr.bin/iscsictl/iscsictl.h stable/12/usr.bin/iscsictl/parse.y stable/12/usr.bin/iscsictl/token.l stable/12/usr.sbin/iscsid/iscsid.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/dev/iscsi/iscsi_ioctl.h == --- stable/12/sys/dev/iscsi/iscsi_ioctl.h Sat Oct 24 20:48:35 2020 (r367018) +++ stable/12/sys/dev/iscsi/iscsi_ioctl.h Sat Oct 24 20:52:05 2020 (r367019) @@ -70,7 +70,8 @@ struct iscsi_session_conf { int isc_iser; charisc_offload[ISCSI_OFFLOAD_LEN]; int isc_enable; - int isc_spare[4]; + int isc_dscp; + int isc_spare[3]; }; /* Modified: stable/12/usr.bin/iscsictl/Makefile == --- stable/12/usr.bin/iscsictl/Makefile Sat Oct 24 20:48:35 2020 (r367018) +++ stable/12/usr.bin/iscsictl/Makefile Sat Oct 24 20:52:05 2020 (r367019) @@ -7,7 +7,7 @@ CFLAGS+=-I${.CURDIR} CFLAGS+= -I${SRCTOP}/sys/dev/iscsi MAN= iscsi.conf.5 iscsictl.8 -LIBADD=xo +LIBADD=util xo YFLAGS+= -v LFLAGS+= -i Modified: stable/12/usr.bin/iscsictl/iscsi.conf.5 == --- stable/12/usr.bin/iscsictl/iscsi.conf.5 Sat Oct 24 20:48:35 2020 (r367018) +++ stable/12/usr.bin/iscsictl/iscsi.conf.5 Sat Oct 24 20:52:05 2020 (r367019) @@ -145,6 +145,16 @@ for iSCSI over RDMA, or .Qq Ar iSCSI . Default is .Qq Ar iSCSI . +.It Cm dscp +The DiffServ Codepoint used for sending data. The DSCP can be +set to numeric, or hexadecimal values directly, as well as the +well-defined +.Qq Ar cs +and +.Qq Ar af +codepoints. +Default is no specified dscp codepoint, which means the default +of the outgoing interface is used. .El .Sh FILES .Bl -tag -width indent Modified: stable/12/usr.bin/iscsictl/iscsictl.c == --- stable/12/usr.bin/iscsictl/iscsictl.c Sat Oct 24 20:48:35 2020 (r367018) +++ stable/12/usr.bin/iscsictl/iscsictl.c Sat Oct 24 20:52:05 2020 (r367019) @@ -87,6 +87,7 @@ target_new(struct conf *conf) if (targ == NULL) xo_err(1, "calloc"); targ->t_conf = conf; + targ->t_dscp = -1; TAILQ_INSERT_TAIL(>conf_targets, targ, t_next); return (targ); @@ -358,6 +359,7 @@ conf_from_target(struct iscsi_session_conf *conf, conf->isc_data_digest = ISCSI_DIGEST_CRC32C; else conf->isc_data_digest = ISCSI_DIGEST_NONE; + conf->isc_dscp = targ->t_dscp; } static int @@ -535,6 +537,9 @@ kernel_list(int iscsi_fd, const struct target *targ __ "Target portal:", conf->isc_target_addr); xo_emit("{L:/%-26s}{V:alias/%s}\n", "Target alias:", state->iss_target_alias); + if (conf->isc_dscp != -1) + xo_emit("{L:/%-26s}{V:dscp/0x%02x}\n", + "Target DSCP:", conf->isc_dscp); xo_close_container("target"); xo_open_container("auth"); Modified: stable/12/usr.bin/iscsictl/iscsictl.h == --- stable/12/usr.bin/iscsictl/iscsictl.h Sat Oct 24 20:48:35 2020 (r367018) +++ stable/12/usr.bin/iscsictl/iscsictl.h Sat Oct 24 20:52:05 2020 (r367019) @@ -78,6 +78,7 @@ struct target { int t_session_type; int t_enable; int t_protocol; + int t_dscp; char*t_offload; char*t_user; char*t_secret; Modified: stable/12/usr.bin/iscsictl/parse.y == --- stable/12/usr.bin/iscsictl/parse.y Sat Oct 24
svn commit: r367018 - stable/12/sys/netinet
Author: rscheff Date: Sat Oct 24 20:48:35 2020 New Revision: 367018 URL: https://svnweb.freebsd.org/changeset/base/367018 Log: MFC r366570: Stop sending tiny new data segments during SACK recovery Consider the currently in-use TCP options when calculating the amount of new data to be injected during SACK loss recovery. That addresses the effect that very small (new) segments could be injected on partial ACKs while still performing a SACK loss recovery. Reported by: Liang Tian Reviewed by: tuexen, chengc_netapp.com MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26446 Modified: stable/12/sys/netinet/tcp_output.c stable/12/sys/netinet/tcp_sack.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/tcp_output.c == --- stable/12/sys/netinet/tcp_output.c Sat Oct 24 20:23:20 2020 (r367017) +++ stable/12/sys/netinet/tcp_output.c Sat Oct 24 20:48:35 2020 (r367018) @@ -322,7 +322,7 @@ again: sendalot = 1; TCPSTAT_INC(tcps_sack_rexmits); TCPSTAT_ADD(tcps_sack_rexmit_bytes, - min(len, tp->t_maxseg)); + min(len, tcp_maxseg(tp))); } } after_sack_rexmit: @@ -841,7 +841,6 @@ send: if (flags & TH_SYN) to.to_flags |= TOF_SACKPERM; else if (TCPS_HAVEESTABLISHED(tp->t_state) && - (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) { to.to_flags |= TOF_SACK; to.to_nsacks = tp->rcv_numsacks; Modified: stable/12/sys/netinet/tcp_sack.c == --- stable/12/sys/netinet/tcp_sack.cSat Oct 24 20:23:20 2020 (r367017) +++ stable/12/sys/netinet/tcp_sack.cSat Oct 24 20:48:35 2020 (r367018) @@ -787,15 +787,16 @@ void tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th) { int num_segs = 1; + u_int maxseg = tcp_maxseg(tp); INP_WLOCK_ASSERT(tp->t_inpcb); tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0; /* Send one or 2 segments based on how much new data was acked. */ - if ((BYTES_THIS_ACK(tp, th) / tp->t_maxseg) >= 2) + if ((BYTES_THIS_ACK(tp, th) / maxseg) >= 2) num_segs = 2; tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit + - (tp->snd_nxt - tp->sack_newdata) + num_segs * tp->t_maxseg); + (tp->snd_nxt - tp->sack_newdata) + num_segs * maxseg); if (tp->snd_cwnd > tp->snd_ssthresh) tp->snd_cwnd = tp->snd_ssthresh; tp->t_flags |= TF_ACKNOW; ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r367017 - in stable/12/sys: net netinet netinet6
Author: rscheff Date: Sat Oct 24 20:23:20 2020 New Revision: 367017 URL: https://svnweb.freebsd.org/changeset/base/367017 Log: MFC r366569: Add IP(V6)_VLAN_PCP to set 802.1 priority per-flow. This adds a new IP_PROTO / IPV6_PROTO setsockopt (getsockopt) option IP(V6)_VLAN_PCP, which can be set to -1 (interface default), or explicitly to any priority between 0 and 7. Note that for untagged traffic, explicitly adding a priority will insert a special 801.1Q vlan header with vlan ID = 0 to carry the priority setting Reviewed by: gallatin, rrs MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26409 Modified: stable/12/sys/net/if_ethersubr.c stable/12/sys/netinet/in.h stable/12/sys/netinet/in_pcb.h stable/12/sys/netinet/ip_output.c stable/12/sys/netinet6/in6.h stable/12/sys/netinet6/ip6_output.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/net/if_ethersubr.c == --- stable/12/sys/net/if_ethersubr.cSat Oct 24 20:09:27 2020 (r367016) +++ stable/12/sys/net/if_ethersubr.cSat Oct 24 20:23:20 2020 (r367017) @@ -1349,6 +1349,13 @@ ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, } /* +* If PCP is set in mbuf, use it +*/ + if ((*mp)->m_flags & M_VLANTAG) { + pcp = EVL_PRIOFTAG((*mp)->m_pkthdr.ether_vtag); + } + + /* * If underlying interface can do VLAN tag insertion itself, * just pass the packet along. However, we need some way to * tell the interface where the packet came from so that it Modified: stable/12/sys/netinet/in.h == --- stable/12/sys/netinet/in.h Sat Oct 24 20:09:27 2020(r367016) +++ stable/12/sys/netinet/in.h Sat Oct 24 20:23:20 2020(r367017) @@ -483,6 +483,10 @@ __END_DECLS /* The following option is private; do not use it from user applications. */ #defineIP_MSFILTER 74 /* set/get filter list */ +/* The following option deals with the 802.1Q Ethernet Priority Code Point */ +#defineIP_VLAN_PCP 75 /* int; set/get PCP used for packet, */ +/* -1 use interface default */ + /* Protocol Independent Multicast API [RFC3678] */ #defineMCAST_JOIN_GROUP80 /* join an any-source group */ #defineMCAST_LEAVE_GROUP 81 /* leave all sources for group */ Modified: stable/12/sys/netinet/in_pcb.h == --- stable/12/sys/netinet/in_pcb.h Sat Oct 24 20:09:27 2020 (r367016) +++ stable/12/sys/netinet/in_pcb.h Sat Oct 24 20:23:20 2020 (r367017) @@ -762,6 +762,13 @@ intinp_so_options(const struct inpcb *inp); #define INP_SUPPORTS_MBUFQ 0x4000 /* Supports the mbuf queue method of LRO */ #define INP_MBUF_QUEUE_READY 0x8000 /* The transport is pacing, inputs can be queued */ #define INP_DONT_SACK_QUEUE0x0001 /* If a sack arrives do not wake me */ +#define INP_2PCP_SET 0x0002 /* If the Eth PCP should be set explicitly */ +#define INP_2PCP_BIT0 0x0004 /* Eth PCP Bit 0 */ +#define INP_2PCP_BIT1 0x0008 /* Eth PCP Bit 1 */ +#define INP_2PCP_BIT2 0x0010 /* Eth PCP Bit 2 */ +#define INP_2PCP_BASE INP_2PCP_BIT0 +#define INP_2PCP_MASK (INP_2PCP_BIT0 | INP_2PCP_BIT1 | INP_2PCP_BIT2) +#define INP_2PCP_SHIFT 18 /* shift PCP field in/out of inp_flags2 */ /* * Flags passed to in_pcblookup*() functions. */ Modified: stable/12/sys/netinet/ip_output.c == --- stable/12/sys/netinet/ip_output.c Sat Oct 24 20:09:27 2020 (r367016) +++ stable/12/sys/netinet/ip_output.c Sat Oct 24 20:23:20 2020 (r367017) @@ -61,7 +61,9 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include +#include #include #include #include @@ -221,6 +223,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou int hlen = sizeof (struct ip); int mtu; int error = 0; + int vlan_pcp = -1; struct sockaddr_in *dst; const struct sockaddr_in *gw; struct in_ifaddr *ia; @@ -241,6 +244,9 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } + if ((inp->inp_flags2 & INP_2PCP_SET) != 0) + vlan_pcp = (inp->inp_flags2 & INP_2PCP_MASK) >> + INP_2PCP_SHIFT; } if (ro == NULL) { @@ -588,6 +594,9 @@ sendit: }
svn commit: r367015 - in stable/12: sys/netinet usr.bin/netstat
Author: rscheff Date: Sat Oct 24 17:47:43 2020 New Revision: 367015 URL: https://svnweb.freebsd.org/changeset/base/367015 Log: MFC r366566;r366567: Extend netstat to display TCP stack and detailed congestion state Upstreaming the "-c" option used to show detailed per-connection congestion control state for TCP sessions. This is one summary patch, which adds the relevant variables into xtcpcb. As previous "spare" space is used, these changes are ABI compatible (an older version of netstat will simply not show the newly available data from newer kernels, and a newer version of netstat will only show zeroed data querying older kernels. Reviewed by: tuexen MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26518 Modified: stable/12/sys/netinet/tcp_subr.c stable/12/sys/netinet/tcp_var.h stable/12/usr.bin/netstat/inet.c stable/12/usr.bin/netstat/main.c stable/12/usr.bin/netstat/netstat.1 stable/12/usr.bin/netstat/netstat.h Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/tcp_subr.c == --- stable/12/sys/netinet/tcp_subr.cSat Oct 24 17:11:45 2020 (r367014) +++ stable/12/sys/netinet/tcp_subr.cSat Oct 24 17:47:43 2020 (r367015) @@ -3248,6 +3248,12 @@ tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *x xt->t_sndzerowin = tp->t_sndzerowin; xt->t_sndrexmitpack = tp->t_sndrexmitpack; xt->t_rcvoopack = tp->t_rcvoopack; + xt->t_rcv_wnd = tp->rcv_wnd; + xt->t_snd_wnd = tp->snd_wnd; + xt->t_snd_cwnd = tp->snd_cwnd; + xt->t_snd_ssthresh = tp->snd_ssthresh; + xt->t_maxseg = tp->t_maxseg; + xt->xt_ecn = (tp->t_flags & TF_ECN_PERMIT) ? 1 : 0; now = getsbinuptime(); #defineCOPYTIMER(ttt) do { \ Modified: stable/12/sys/netinet/tcp_var.h == --- stable/12/sys/netinet/tcp_var.h Sat Oct 24 17:11:45 2020 (r367014) +++ stable/12/sys/netinet/tcp_var.h Sat Oct 24 17:47:43 2020 (r367015) @@ -710,7 +710,13 @@ struct xtcpcb { int32_t tt_2msl;/* (s) */ int32_t tt_delack; /* (s) */ int32_t t_logstate; /* (3) */ - int32_t spare32[32]; + uint32_tt_snd_cwnd; /* (s) */ + uint32_tt_snd_ssthresh; /* (s) */ + uint32_tt_maxseg; /* (s) */ + uint32_tt_rcv_wnd; /* (s) */ + uint32_tt_snd_wnd; /* (s) */ + uint32_txt_ecn; /* (s) */ + int32_t spare32[26]; } __aligned(8); #ifdef _KERNEL Modified: stable/12/usr.bin/netstat/inet.c == --- stable/12/usr.bin/netstat/inet.cSat Oct 24 17:11:45 2020 (r367014) +++ stable/12/usr.bin/netstat/inet.cSat Oct 24 17:47:43 2020 (r367015) @@ -85,6 +85,8 @@ __FBSDID("$FreeBSD$"); #include "netstat.h" #include "nl_defs.h" +#define max(a, b) (((a) > (b)) ? (a) : (b)) + #ifdef INET static void inetprint(const char *, struct in_addr *, int, const char *, int, const int); @@ -204,6 +206,7 @@ protopr(u_long off, const char *name, int af1, int pro struct xinpcb *inp; struct xinpgen *xig, *oxig; struct xsocket *so; + int fnamelen, cnamelen; istcp = 0; switch (proto) { @@ -236,6 +239,28 @@ protopr(u_long off, const char *name, int af1, int pro if (!pcblist_sysctl(proto, name, )) return; + if (cflag || Cflag) { + fnamelen = strlen("Stack"); + cnamelen = strlen("CC"); + oxig = xig = (struct xinpgen *)buf; + for (xig = (struct xinpgen*)((char *)xig + xig->xig_len); + xig->xig_len > sizeof(struct xinpgen); + xig = (struct xinpgen *)((char *)xig + xig->xig_len)) { + if (istcp) { + tp = (struct xtcpcb *)xig; + inp = >xt_inp; + } else { + continue; + } + if (so->xso_protocol != proto) + continue; + if (inp->inp_gencnt > oxig->xig_gen) + continue; + fnamelen = max(fnamelen, (int)strlen(tp->xt_stack)); + cnamelen = max(cnamelen, (int)strlen(tp->xt_cc)); + } + } + oxig = xig = (struct xinpgen *)buf; for (xig =
svn commit: r367008 - head/sys/netinet/cc
Author: rscheff Date: Sat Oct 24 16:11:46 2020 New Revision: 367008 URL: https://svnweb.freebsd.org/changeset/base/367008 Log: TCP Cubic: improve reaction to (and rollback from) RTO 1. fix compliancy issue of CUBIC RTO handling according to RFC8312 section 4.7 2. add CUBIC CC_RTO_ERR handling Submitted by: chengc_netapp.com Reviewed by: rrs, tuexen, rscheff MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26808 Modified: head/sys/netinet/cc/cc_cubic.c Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Sat Oct 24 16:09:18 2020 (r367007) +++ head/sys/netinet/cc/cc_cubic.c Sat Oct 24 16:11:46 2020 (r367008) @@ -78,7 +78,7 @@ static void cubic_conn_init(struct cc_var *ccv); static int cubic_mod_init(void); static voidcubic_post_recovery(struct cc_var *ccv); static voidcubic_record_rtt(struct cc_var *ccv); -static voidcubic_ssthresh_update(struct cc_var *ccv); +static voidcubic_ssthresh_update(struct cc_var *ccv, uint32_t maxseg); static voidcubic_after_idle(struct cc_var *ccv); struct cubic { @@ -90,19 +90,28 @@ struct cubic { unsigned long max_cwnd; /* cwnd at the previous congestion event. */ unsigned long prev_max_cwnd; + /* A copy of prev_max_cwnd. Used for CC_RTO_ERR */ + unsigned long prev_max_cwnd_cp; /* various flags */ uint32_tflags; #define CUBICFLAG_CONG_EVENT 0x0001 /* congestion experienced */ #define CUBICFLAG_IN_SLOWSTART 0x0002 /* in slow start */ #define CUBICFLAG_IN_APPLIMIT 0x0004 /* application limited */ +#define CUBICFLAG_RTO_EVENT0x0008 /* RTO experienced */ /* Minimum observed rtt in ticks. */ int min_rtt_ticks; /* Mean observed rtt between congestion epochs. */ int mean_rtt_ticks; /* ACKs since last congestion event. */ int epoch_ack_count; - /* Time of last congestion event in ticks. */ + /* Timestamp (in ticks) of arriving in congestion avoidance from last +* congestion event. +*/ int t_last_cong; + /* Timestamp (in ticks) of a previous congestion event. Used for +* CC_RTO_ERR. +*/ + int t_last_cong_prev; }; static MALLOC_DEFINE(M_CUBIC, "cubic data", @@ -142,7 +151,14 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) cubic_data->flags |= CUBICFLAG_IN_SLOWSTART; newreno_cc_algo.ack_received(ccv, type); } else { - if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | + if ((cubic_data->flags & CUBICFLAG_RTO_EVENT) && + (cubic_data->flags & CUBICFLAG_IN_SLOWSTART)) { + /* RFC8312 Section 4.7 */ + cubic_data->flags &= ~(CUBICFLAG_RTO_EVENT | + CUBICFLAG_IN_SLOWSTART); + cubic_data->max_cwnd = CCV(ccv, snd_cwnd); + cubic_data->K = 0; + } else if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | CUBICFLAG_IN_APPLIMIT)) { cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART | CUBICFLAG_IN_APPLIMIT); @@ -273,10 +289,10 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) case CC_NDUPACK: if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) { if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { - cubic_ssthresh_update(ccv); + cubic_ssthresh_update(ccv, mss); cubic_data->flags |= CUBICFLAG_CONG_EVENT; cubic_data->t_last_cong = ticks; - cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); + cubic_data->K = cubic_k(cubic_data->max_cwnd / mss); } ENTER_RECOVERY(CCV(ccv, t_flags)); } @@ -284,37 +300,35 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) case CC_ECN: if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { - cubic_ssthresh_update(ccv); + cubic_ssthresh_update(ccv, mss); cubic_data->flags |= CUBICFLAG_CONG_EVENT; cubic_data->t_last_cong = ticks; - cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); + cubic_data->K = cubic_k(cubic_data->max_cwnd /
svn commit: r367007 - in head/sys/netinet: . cc
Author: rscheff Date: Sat Oct 24 16:09:18 2020 New Revision: 367007 URL: https://svnweb.freebsd.org/changeset/base/367007 Log: tcp: move cwnd and ssthresh updates into cc modules This will pave the way of setting ssthresh differently in TCP CUBIC, according to RFC8312 section 4.7. No functional change, only code movement. Submitted by: chengc_netapp.com Reviewed by: rrs, tuexen, rscheff MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26807 Modified: head/sys/netinet/cc/cc_cubic.c head/sys/netinet/cc/cc_dctcp.c head/sys/netinet/cc/cc_htcp.c head/sys/netinet/cc/cc_newreno.c head/sys/netinet/tcp_input.c Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Sat Oct 24 16:05:37 2020 (r367006) +++ head/sys/netinet/cc/cc_cubic.c Sat Oct 24 16:09:18 2020 (r367007) @@ -264,8 +264,10 @@ static void cubic_cong_signal(struct cc_var *ccv, uint32_t type) { struct cubic *cubic_data; + u_int mss; cubic_data = ccv->cc_data; + mss = tcp_maxseg(ccv->ccvc.tcp); switch (type) { case CC_NDUPACK: @@ -292,6 +294,10 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) break; case CC_RTO: + CCV(ccv, snd_ssthresh) = max(min(CCV(ccv, snd_wnd), +CCV(ccv, snd_cwnd)) / 2 / mss, +2) * mss; + CCV(ccv, snd_cwnd) = mss; /* * Grab the current time and record it so we know when the * most recent congestion event was. Only record it when the Modified: head/sys/netinet/cc/cc_dctcp.c == --- head/sys/netinet/cc/cc_dctcp.c Sat Oct 24 16:05:37 2020 (r367006) +++ head/sys/netinet/cc/cc_dctcp.c Sat Oct 24 16:09:18 2020 (r367007) @@ -235,7 +235,7 @@ dctcp_cong_signal(struct cc_var *ccv, uint32_t type) if (CCV(ccv, t_flags2) & TF2_ECN_PERMIT) { dctcp_data = ccv->cc_data; cwin = CCV(ccv, snd_cwnd); - mss = CCV(ccv, t_maxseg); + mss = tcp_maxseg(ccv->ccvc.tcp); switch (type) { case CC_NDUPACK: @@ -282,6 +282,10 @@ dctcp_cong_signal(struct cc_var *ccv, uint32_t type) dctcp_data->ece_curr = 1; break; case CC_RTO: + CCV(ccv, snd_ssthresh) = max(min(CCV(ccv, snd_wnd), +CCV(ccv, snd_cwnd)) / 2 / mss, +2) * mss; + CCV(ccv, snd_cwnd) = mss; dctcp_update_alpha(ccv); dctcp_data->save_sndnxt += CCV(ccv, t_maxseg); dctcp_data->num_cong_events++; Modified: head/sys/netinet/cc/cc_htcp.c == --- head/sys/netinet/cc/cc_htcp.c Sat Oct 24 16:05:37 2020 (r367006) +++ head/sys/netinet/cc/cc_htcp.c Sat Oct 24 16:09:18 2020 (r367007) @@ -271,8 +271,10 @@ static void htcp_cong_signal(struct cc_var *ccv, uint32_t type) { struct htcp *htcp_data; + u_int mss; htcp_data = ccv->cc_data; + mss = tcp_maxseg(ccv->ccvc.tcp); switch (type) { case CC_NDUPACK: @@ -311,6 +313,10 @@ htcp_cong_signal(struct cc_var *ccv, uint32_t type) break; case CC_RTO: + CCV(ccv, snd_ssthresh) = max(min(CCV(ccv, snd_wnd), +CCV(ccv, snd_cwnd)) / 2 / mss, +2) * mss; + CCV(ccv, snd_cwnd) = mss; /* * Grab the current time and record it so we know when the * most recent congestion event was. Only record it when the Modified: head/sys/netinet/cc/cc_newreno.c == --- head/sys/netinet/cc/cc_newreno.cSat Oct 24 16:05:37 2020 (r367006) +++ head/sys/netinet/cc/cc_newreno.cSat Oct 24 16:09:18 2020 (r367007) @@ -237,7 +237,7 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type) u_int mss; cwin = CCV(ccv, snd_cwnd); - mss = CCV(ccv, t_maxseg); + mss = tcp_maxseg(ccv->ccvc.tcp); nreno = ccv->cc_data; beta = (nreno == NULL) ? V_newreno_beta : nreno->beta; beta_ecn = (nreno == NULL) ? V_newreno_beta_ecn : nreno->beta_ecn; @@ -274,6 +274,12 @@ newreno_cong_signal(struct cc_var *ccv, uint32_t type) CCV(ccv, snd_cwnd) = cwin;
svn commit: r366627 - stable/12/usr.sbin/ctld
Author: rscheff Date: Sun Oct 11 13:39:04 2020 New Revision: 366627 URL: https://svnweb.freebsd.org/changeset/base/366627 Log: MFC r366206: Add DSCP support for network QoS to iscsi target. In order to prioritize iSCSI traffic across a network, DSCP can be used. In order not to rely on "ipfw setdscp" or in-network reclassification, this adds the dscp value directly to the portal group (where TCP sessions are accepted). Reviewed by: mav, trasz MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26385 Modified: stable/12/usr.sbin/ctld/ctl.conf.5 stable/12/usr.sbin/ctld/ctld.c stable/12/usr.sbin/ctld/ctld.h stable/12/usr.sbin/ctld/parse.y stable/12/usr.sbin/ctld/token.l stable/12/usr.sbin/ctld/uclparse.c Directory Properties: stable/12/ (props changed) Modified: stable/12/usr.sbin/ctld/ctl.conf.5 == --- stable/12/usr.sbin/ctld/ctl.conf.5 Sun Oct 11 10:40:11 2020 (r366626) +++ stable/12/usr.sbin/ctld/ctl.conf.5 Sun Oct 11 13:39:04 2020 (r366627) @@ -250,6 +250,14 @@ Specifies that this .Sy portal-group is listened by some other host. This host will announce it on discovery stage, but won't listen. +.It Ic dscp Ar value +The DiffServ Codepoint used for sending data. The DSCP can be +set to numeric, or hexadecimal values directly, as well as the +well-defined +.Qq Ar CSx +and +.Qq Ar AFxx +codepoints. .El .Ss target Context .Bl -tag -width indent Modified: stable/12/usr.sbin/ctld/ctld.c == --- stable/12/usr.sbin/ctld/ctld.c Sun Oct 11 10:40:11 2020 (r366626) +++ stable/12/usr.sbin/ctld/ctld.c Sun Oct 11 13:39:04 2020 (r366627) @@ -624,6 +624,7 @@ portal_group_new(struct conf *conf, const char *name) TAILQ_INIT(>pg_ports); pg->pg_conf = conf; pg->pg_tag = 0; /* Assigned later in conf_apply(). */ + pg->pg_dscp = -1; TAILQ_INSERT_TAIL(>conf_portal_groups, pg, pg_next); return (pg); @@ -2179,6 +2180,32 @@ conf_apply(struct conf *oldconf, struct conf *newconf) newp->p_socket = 0; cumulated_error++; continue; + } + if (newpg->pg_dscp != -1) { + struct sockaddr sa; + int len = sizeof(sa); + getsockname(newp->p_socket, , ); + /* +* Only allow the 6-bit DSCP +* field to be modified +*/ + int tos = newpg->pg_dscp << 2; + if (sa.sa_family == AF_INET) { + if (setsockopt(newp->p_socket, + IPPROTO_IP, IP_TOS, + , sizeof(tos)) == -1) + log_warn("setsockopt(IP_TOS) " + "failed for %s", + newp->p_listen); + } else + if (sa.sa_family == AF_INET6) { + if (setsockopt(newp->p_socket, + IPPROTO_IPV6, IPV6_TCLASS, + , sizeof(tos)) == -1) + log_warn("setsockopt(IPV6_TCLASS) " + "failed for %s", + newp->p_listen); + } } error = bind(newp->p_socket, newp->p_ai->ai_addr, newp->p_ai->ai_addrlen); Modified: stable/12/usr.sbin/ctld/ctld.h == --- stable/12/usr.sbin/ctld/ctld.h Sun Oct 11 10:40:11 2020 (r366626) +++ stable/12/usr.sbin/ctld/ctld.h Sun Oct 11 13:39:04 2020 (r366627) @@ -127,6 +127,7 @@ struct portal_group { TAILQ_HEAD(, port) pg_ports; char*pg_offload; char*pg_redirection; + int pg_dscp; uint16_tpg_tag; }; Modified: stable/12/usr.sbin/ctld/parse.y == --- stable/12/usr.sbin/ctld/parse.y Sun Oct 11 10:40:11 2020 (r366626) +++ stable/12/usr.sbin/ctld/parse.y Sun Oct 11 13:39:04 2020 (r366627) @@ -41,6 +41,8 @@ #include
svn commit: r366573 - in head: sys/dev/iscsi usr.bin/iscsictl usr.sbin/iscsid
Author: rscheff Date: Fri Oct 9 14:33:09 2020 New Revision: 366573 URL: https://svnweb.freebsd.org/changeset/base/366573 Log: Add DSCP support for network QoS to iscsi initiator. Allow the DSCP codepoint also to be configurable for the traffic in the direction from the initiator to the target, such that writes and any requests are also treated in the appropriate QoS class. Reviewed by: mav MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26714 Modified: head/sys/dev/iscsi/iscsi_ioctl.h head/usr.bin/iscsictl/Makefile head/usr.bin/iscsictl/iscsi.conf.5 head/usr.bin/iscsictl/iscsictl.c head/usr.bin/iscsictl/iscsictl.h head/usr.bin/iscsictl/parse.y head/usr.bin/iscsictl/token.l head/usr.sbin/iscsid/iscsid.c Modified: head/sys/dev/iscsi/iscsi_ioctl.h == --- head/sys/dev/iscsi/iscsi_ioctl.hFri Oct 9 14:03:45 2020 (r366572) +++ head/sys/dev/iscsi/iscsi_ioctl.hFri Oct 9 14:33:09 2020 (r366573) @@ -70,7 +70,8 @@ struct iscsi_session_conf { int isc_iser; charisc_offload[ISCSI_OFFLOAD_LEN]; int isc_enable; - int isc_spare[4]; + int isc_dscp; + int isc_spare[3]; }; /* Modified: head/usr.bin/iscsictl/Makefile == --- head/usr.bin/iscsictl/Makefile Fri Oct 9 14:03:45 2020 (r366572) +++ head/usr.bin/iscsictl/Makefile Fri Oct 9 14:33:09 2020 (r366573) @@ -7,7 +7,7 @@ CFLAGS+=-I${.CURDIR} CFLAGS+= -I${SRCTOP}/sys/dev/iscsi MAN= iscsi.conf.5 iscsictl.8 -LIBADD=xo +LIBADD=util xo YFLAGS+= -v LFLAGS+= -i Modified: head/usr.bin/iscsictl/iscsi.conf.5 == --- head/usr.bin/iscsictl/iscsi.conf.5 Fri Oct 9 14:03:45 2020 (r366572) +++ head/usr.bin/iscsictl/iscsi.conf.5 Fri Oct 9 14:33:09 2020 (r366573) @@ -145,6 +145,16 @@ for iSCSI over RDMA, or .Qq Ar iSCSI . Default is .Qq Ar iSCSI . +.It Cm dscp +The DiffServ Codepoint used for sending data. The DSCP can be +set to numeric, or hexadecimal values directly, as well as the +well-defined +.Qq Ar cs +and +.Qq Ar af +codepoints. +Default is no specified dscp codepoint, which means the default +of the outgoing interface is used. .El .Sh FILES .Bl -tag -width indent Modified: head/usr.bin/iscsictl/iscsictl.c == --- head/usr.bin/iscsictl/iscsictl.cFri Oct 9 14:03:45 2020 (r366572) +++ head/usr.bin/iscsictl/iscsictl.cFri Oct 9 14:33:09 2020 (r366573) @@ -87,6 +87,7 @@ target_new(struct conf *conf) if (targ == NULL) xo_err(1, "calloc"); targ->t_conf = conf; + targ->t_dscp = -1; TAILQ_INSERT_TAIL(>conf_targets, targ, t_next); return (targ); @@ -358,6 +359,7 @@ conf_from_target(struct iscsi_session_conf *conf, conf->isc_data_digest = ISCSI_DIGEST_CRC32C; else conf->isc_data_digest = ISCSI_DIGEST_NONE; + conf->isc_dscp = targ->t_dscp; } static int @@ -535,6 +537,9 @@ kernel_list(int iscsi_fd, const struct target *targ __ "Target portal:", conf->isc_target_addr); xo_emit("{L:/%-26s}{V:alias/%s}\n", "Target alias:", state->iss_target_alias); + if (conf->isc_dscp != -1) + xo_emit("{L:/%-26s}{V:dscp/0x%02x}\n", + "Target DSCP:", conf->isc_dscp); xo_close_container("target"); xo_open_container("auth"); Modified: head/usr.bin/iscsictl/iscsictl.h == --- head/usr.bin/iscsictl/iscsictl.hFri Oct 9 14:03:45 2020 (r366572) +++ head/usr.bin/iscsictl/iscsictl.hFri Oct 9 14:33:09 2020 (r366573) @@ -78,6 +78,7 @@ struct target { int t_session_type; int t_enable; int t_protocol; + int t_dscp; char*t_offload; char*t_user; char*t_secret; Modified: head/usr.bin/iscsictl/parse.y == --- head/usr.bin/iscsictl/parse.y Fri Oct 9 14:03:45 2020 (r366572) +++ head/usr.bin/iscsictl/parse.y Fri Oct 9 14:33:09 2020 (r366573) @@ -44,6 +44,8 @@ #include #include "iscsictl.h" +#include +#include extern FILE
svn commit: r366570 - head/sys/netinet
Author: rscheff Date: Fri Oct 9 12:44:56 2020 New Revision: 366570 URL: https://svnweb.freebsd.org/changeset/base/366570 Log: Stop sending tiny new data segments during SACK recovery Consider the currently in-use TCP options when calculating the amount of new data to be injected during SACK loss recovery. That addresses the effect that very small (new) segments could be injected on partial ACKs while still performing a SACK loss recovery. Reported by: Liang Tian Reviewed by: tuexen, chengc_netapp.com MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26446 Modified: head/sys/netinet/tcp_output.c head/sys/netinet/tcp_sack.c Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Fri Oct 9 12:06:43 2020 (r366569) +++ head/sys/netinet/tcp_output.c Fri Oct 9 12:44:56 2020 (r366570) @@ -336,7 +336,7 @@ again: sendalot = 1; TCPSTAT_INC(tcps_sack_rexmits); TCPSTAT_ADD(tcps_sack_rexmit_bytes, - min(len, tp->t_maxseg)); + min(len, tcp_maxseg(tp))); } } after_sack_rexmit: @@ -858,7 +858,6 @@ send: if (flags & TH_SYN) to.to_flags |= TOF_SACKPERM; else if (TCPS_HAVEESTABLISHED(tp->t_state) && - (tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) { to.to_flags |= TOF_SACK; to.to_nsacks = tp->rcv_numsacks; Modified: head/sys/netinet/tcp_sack.c == --- head/sys/netinet/tcp_sack.c Fri Oct 9 12:06:43 2020(r366569) +++ head/sys/netinet/tcp_sack.c Fri Oct 9 12:44:56 2020(r366570) @@ -787,15 +787,16 @@ void tcp_sack_partialack(struct tcpcb *tp, struct tcphdr *th) { int num_segs = 1; + u_int maxseg = tcp_maxseg(tp); INP_WLOCK_ASSERT(tp->t_inpcb); tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0; /* Send one or 2 segments based on how much new data was acked. */ - if ((BYTES_THIS_ACK(tp, th) / tp->t_maxseg) >= 2) + if ((BYTES_THIS_ACK(tp, th) / maxseg) >= 2) num_segs = 2; tp->snd_cwnd = (tp->sackhint.sack_bytes_rexmit + - (tp->snd_nxt - tp->snd_recover) + num_segs * tp->t_maxseg); + (tp->snd_nxt - tp->snd_recover) + num_segs * maxseg); if (tp->snd_cwnd > tp->snd_ssthresh) tp->snd_cwnd = tp->snd_ssthresh; tp->t_flags |= TF_ACKNOW; ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r366569 - in head/sys: net netinet netinet6
Author: rscheff Date: Fri Oct 9 12:06:43 2020 New Revision: 366569 URL: https://svnweb.freebsd.org/changeset/base/366569 Log: Add IP(V6)_VLAN_PCP to set 802.1 priority per-flow. This adds a new IP_PROTO / IPV6_PROTO setsockopt (getsockopt) option IP(V6)_VLAN_PCP, which can be set to -1 (interface default), or explicitly to any priority between 0 and 7. Note that for untagged traffic, explicitly adding a priority will insert a special 801.1Q vlan header with vlan ID = 0 to carry the priority setting Reviewed by: gallatin, rrs MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26409 Modified: head/sys/net/if_ethersubr.c head/sys/netinet/in.h head/sys/netinet/in_pcb.h head/sys/netinet/ip_output.c head/sys/netinet6/in6.h head/sys/netinet6/ip6_output.c Modified: head/sys/net/if_ethersubr.c == --- head/sys/net/if_ethersubr.c Fri Oct 9 11:24:19 2020(r366568) +++ head/sys/net/if_ethersubr.c Fri Oct 9 12:06:43 2020(r366569) @@ -1388,6 +1388,13 @@ ether_8021q_frame(struct mbuf **mp, struct ifnet *ife, } /* +* If PCP is set in mbuf, use it +*/ + if ((*mp)->m_flags & M_VLANTAG) { + pcp = EVL_PRIOFTAG((*mp)->m_pkthdr.ether_vtag); + } + + /* * If underlying interface can do VLAN tag insertion itself, * just pass the packet along. However, we need some way to * tell the interface where the packet came from so that it Modified: head/sys/netinet/in.h == --- head/sys/netinet/in.h Fri Oct 9 11:24:19 2020(r366568) +++ head/sys/netinet/in.h Fri Oct 9 12:06:43 2020(r366569) @@ -483,6 +483,10 @@ __END_DECLS /* The following option is private; do not use it from user applications. */ #defineIP_MSFILTER 74 /* set/get filter list */ +/* The following option deals with the 802.1Q Ethernet Priority Code Point */ +#defineIP_VLAN_PCP 75 /* int; set/get PCP used for packet, */ +/* -1 use interface default */ + /* Protocol Independent Multicast API [RFC3678] */ #defineMCAST_JOIN_GROUP80 /* join an any-source group */ #defineMCAST_LEAVE_GROUP 81 /* leave all sources for group */ Modified: head/sys/netinet/in_pcb.h == --- head/sys/netinet/in_pcb.h Fri Oct 9 11:24:19 2020(r366568) +++ head/sys/netinet/in_pcb.h Fri Oct 9 12:06:43 2020(r366569) @@ -748,6 +748,13 @@ intinp_so_options(const struct inpcb *inp); #define INP_SUPPORTS_MBUFQ 0x4000 /* Supports the mbuf queue method of LRO */ #define INP_MBUF_QUEUE_READY 0x8000 /* The transport is pacing, inputs can be queued */ #define INP_DONT_SACK_QUEUE0x0001 /* If a sack arrives do not wake me */ +#define INP_2PCP_SET 0x0002 /* If the Eth PCP should be set explicitly */ +#define INP_2PCP_BIT0 0x0004 /* Eth PCP Bit 0 */ +#define INP_2PCP_BIT1 0x0008 /* Eth PCP Bit 1 */ +#define INP_2PCP_BIT2 0x0010 /* Eth PCP Bit 2 */ +#define INP_2PCP_BASE INP_2PCP_BIT0 +#define INP_2PCP_MASK (INP_2PCP_BIT0 | INP_2PCP_BIT1 | INP_2PCP_BIT2) +#define INP_2PCP_SHIFT 18 /* shift PCP field in/out of inp_flags2 */ /* * Flags passed to in_pcblookup*() functions. */ Modified: head/sys/netinet/ip_output.c == --- head/sys/netinet/ip_output.cFri Oct 9 11:24:19 2020 (r366568) +++ head/sys/netinet/ip_output.cFri Oct 9 12:06:43 2020 (r366569) @@ -62,7 +62,9 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include +#include #include #include #include @@ -324,6 +326,7 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou int hlen = sizeof (struct ip); int mtu = 0; int error = 0; + int vlan_pcp = -1; struct sockaddr_in *dst, sin; const struct sockaddr_in *gw; struct in_ifaddr *ia = NULL; @@ -345,6 +348,9 @@ ip_output(struct mbuf *m, struct mbuf *opt, struct rou m->m_pkthdr.flowid = inp->inp_flowid; M_HASHTYPE_SET(m, inp->inp_flowtype); } + if ((inp->inp_flags2 & INP_2PCP_SET) != 0) + vlan_pcp = (inp->inp_flags2 & INP_2PCP_MASK) >> + INP_2PCP_SHIFT; #ifdef NUMA m->m_pkthdr.numa_domain = inp->inp_numa_domain; #endif @@ -717,6 +723,9 @@ sendit: } } + if (vlan_pcp > -1) + EVL_APPLY_PRI(m, vlan_pcp); +
svn commit: r366567 - head/sys/netinet
Author: rscheff Date: Fri Oct 9 10:55:19 2020 New Revision: 366567 URL: https://svnweb.freebsd.org/changeset/base/366567 Log: Extend netstat to display TCP stack and detailed congestion state (2) Extend netstat to display TCP stack and detailed congestion state Adding the "-c" option used to show detailed per-connection congestion control state for TCP sessions. This is one summary patch, which adds the relevant variables into xtcpcb. As previous "spare" space is used, these changes are ABI compatible. Reviewed by: tuexen MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26518 Modified: head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_subr.c == --- head/sys/netinet/tcp_subr.c Fri Oct 9 10:07:41 2020(r366566) +++ head/sys/netinet/tcp_subr.c Fri Oct 9 10:55:19 2020(r366567) @@ -3437,6 +3437,13 @@ tcp_inptoxtp(const struct inpcb *inp, struct xtcpcb *x xt->t_sndzerowin = tp->t_sndzerowin; xt->t_sndrexmitpack = tp->t_sndrexmitpack; xt->t_rcvoopack = tp->t_rcvoopack; + xt->t_rcv_wnd = tp->rcv_wnd; + xt->t_snd_wnd = tp->snd_wnd; + xt->t_snd_cwnd = tp->snd_cwnd; + xt->t_snd_ssthresh = tp->snd_ssthresh; + xt->t_maxseg = tp->t_maxseg; + xt->xt_ecn = (tp->t_flags2 & TF2_ECN_PERMIT) ? 1 : 0 + +(tp->t_flags2 & TF2_ACE_PERMIT) ? 2 : 0; now = getsbinuptime(); #defineCOPYTIMER(ttt) do { \ Modified: head/sys/netinet/tcp_var.h == --- head/sys/netinet/tcp_var.h Fri Oct 9 10:07:41 2020(r366566) +++ head/sys/netinet/tcp_var.h Fri Oct 9 10:55:19 2020(r366567) @@ -768,7 +768,13 @@ struct xtcpcb { int32_t tt_2msl;/* (s) */ int32_t tt_delack; /* (s) */ int32_t t_logstate; /* (3) */ - int32_t spare32[32]; + uint32_tt_snd_cwnd; /* (s) */ + uint32_tt_snd_ssthresh; /* (s) */ + uint32_tt_maxseg; /* (s) */ + uint32_tt_rcv_wnd; /* (s) */ + uint32_tt_snd_wnd; /* (s) */ + uint32_txt_ecn; /* (s) */ + int32_t spare32[26]; } __aligned(8); #ifdef _KERNEL ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r366566 - head/usr.bin/netstat
Author: rscheff Date: Fri Oct 9 10:07:41 2020 New Revision: 366566 URL: https://svnweb.freebsd.org/changeset/base/366566 Log: Extend netstat to display TCP stack and detailed congestion state Adding the "-c" option used to show detailed per-connection congestion control state for TCP sessions. This is one summary patch, which adds the relevant variables into xtcpcb. As previous "spare" space is used, these changes are ABI compatible. Reviewed by: tuexen MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26518 Modified: head/usr.bin/netstat/inet.c head/usr.bin/netstat/main.c head/usr.bin/netstat/netstat.1 head/usr.bin/netstat/netstat.h Modified: head/usr.bin/netstat/inet.c == --- head/usr.bin/netstat/inet.c Fri Oct 9 09:37:43 2020(r366565) +++ head/usr.bin/netstat/inet.c Fri Oct 9 10:07:41 2020(r366566) @@ -85,6 +85,8 @@ __FBSDID("$FreeBSD$"); #include "netstat.h" #include "nl_defs.h" +#define max(a, b) (((a) > (b)) ? (a) : (b)) + #ifdef INET static void inetprint(const char *, struct in_addr *, int, const char *, int, const int); @@ -204,6 +206,7 @@ protopr(u_long off, const char *name, int af1, int pro struct xinpcb *inp; struct xinpgen *xig, *oxig; struct xsocket *so; + int fnamelen, cnamelen; istcp = 0; switch (proto) { @@ -236,6 +239,28 @@ protopr(u_long off, const char *name, int af1, int pro if (!pcblist_sysctl(proto, name, )) return; + if (cflag || Cflag) { + fnamelen = strlen("Stack"); + cnamelen = strlen("CC"); + oxig = xig = (struct xinpgen *)buf; + for (xig = (struct xinpgen*)((char *)xig + xig->xig_len); + xig->xig_len > sizeof(struct xinpgen); + xig = (struct xinpgen *)((char *)xig + xig->xig_len)) { + if (istcp) { + tp = (struct xtcpcb *)xig; + inp = >xt_inp; + } else { + continue; + } + if (so->xso_protocol != proto) + continue; + if (inp->inp_gencnt > oxig->xig_gen) + continue; + fnamelen = max(fnamelen, (int)strlen(tp->xt_stack)); + cnamelen = max(cnamelen, (int)strlen(tp->xt_cc)); + } + } + oxig = xig = (struct xinpgen *)buf; for (xig = (struct xinpgen *)((char *)xig + xig->xig_len); xig->xig_len > sizeof(struct xinpgen); @@ -341,9 +366,19 @@ protopr(u_long off, const char *name, int af1, int pro xo_emit(" {T:/%8.8s} {T:/%5.5s}", "flowid", "ftype"); } + if (cflag) { + xo_emit(" {T:/%-*.*s}", + fnamelen, fnamelen, "Stack"); + } if (Cflag) - xo_emit(" {T:/%-*.*s}", TCP_CA_NAME_MAX, - TCP_CA_NAME_MAX, "CC"); + xo_emit(" {T:/%-*.*s} {T:/%10.10s}" + " {T:/%10.10s} {T:/%5.5s}" + " {T:/%3.3s}", cnamelen, + cnamelen, "CC", + "cwin", + "ssthresh", + "MSS", + "ECN"); if (Pflag) xo_emit(" {T:/%s}", "Log ID"); xo_emit("\n"); @@ -518,9 +553,24 @@ protopr(u_long off, const char *name, int af1, int pro inp->inp_flowtype); } if (istcp) { + if (cflag) + xo_emit(" {:stack/%-*.*s}", + + fnamelen, fnamelen, tp->xt_stack); if (Cflag) - xo_emit(" {:cc/%-*.*s}", TCP_CA_NAME_MAX, - TCP_CA_NAME_MAX, tp->xt_cc); + xo_emit(" {:cc/%-*.*s}" + " {:snd-cwnd/%10lu}" + " {:snd-ssthresh/%10lu}" + " {:t-maxseg/%5u} {:ecn/%3s}", + cnamelen, cnamelen, tp->xt_cc, + tp->t_snd_cwnd, tp->t_snd_ssthresh, + tp->t_maxseg, +
svn commit: r366565 - stable/12/sys/netinet
Author: rscheff Date: Fri Oct 9 09:37:43 2020 New Revision: 366565 URL: https://svnweb.freebsd.org/changeset/base/366565 Log: MFC r366150: TCP: send full initial window when timestamps are in use The fastpath in tcp_output tries to send out full segments, and avoid sending partial segments by comparing against the static t_maxseg variable. That value does not consider tcp options like timestamps, while the initial window calculation is using the correct dynamic tcp_maxseg() function. Due to this interaction, the last, full size segment is considered too short and not sent out immediately. Reported by: tuexen MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26478 Modified: stable/12/sys/netinet/tcp.h stable/12/sys/netinet/tcp_output.c stable/12/sys/netinet/tcp_subr.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/tcp.h == --- stable/12/sys/netinet/tcp.h Fri Oct 9 09:33:45 2020(r366564) +++ stable/12/sys/netinet/tcp.h Fri Oct 9 09:37:43 2020(r366565) @@ -80,6 +80,8 @@ struct tcphdr { u_short th_urp; /* urgent pointer */ }; +#definePADTCPOLEN(len) len) / 4) + !!((len) % 4)) * 4) + #defineTCPOPT_EOL 0 #define TCPOLEN_EOL 1 #defineTCPOPT_PAD 0 /* padding after EOL */ Modified: stable/12/sys/netinet/tcp_output.c == --- stable/12/sys/netinet/tcp_output.c Fri Oct 9 09:33:45 2020 (r366564) +++ stable/12/sys/netinet/tcp_output.c Fri Oct 9 09:37:43 2020 (r366565) @@ -577,6 +577,20 @@ after_sack_rexmit: if (len >= tp->t_maxseg) goto send; /* +* As the TCP header options are now +* considered when setting up the initial +* window, we would not send the last segment +* if we skip considering the option length here. +* Note: this may not work when tcp headers change +* very dynamically in the future. +*/ + if tp->t_flags & TF_SIGNATURE) ? + PADTCPOLEN(TCPOLEN_SIGNATURE) : 0) + + ((tp->t_flags & TF_RCVD_TSTMP) ? + PADTCPOLEN(TCPOLEN_TIMESTAMP) : 0) + + len) >= tp->t_maxseg) + goto send; + /* * NOTE! on localhost connections an 'ack' from the remote * end may occur synchronously with the output and cause * us to flush a buffer queued with moretocome. XXX Modified: stable/12/sys/netinet/tcp_subr.c == --- stable/12/sys/netinet/tcp_subr.cFri Oct 9 09:33:45 2020 (r366564) +++ stable/12/sys/netinet/tcp_subr.cFri Oct 9 09:37:43 2020 (r366565) @@ -2941,7 +2941,6 @@ tcp_maxseg(const struct tcpcb *tp) * but this is harmless, since result of tcp_maxseg() is used * only in cwnd and ssthresh estimations. */ -#definePAD(len)len) / 4) + !!((len) % 4)) * 4) if (TCPS_HAVEESTABLISHED(tp->t_state)) { if (tp->t_flags & TF_RCVD_TSTMP) optlen = TCPOLEN_TSTAMP_APPA; @@ -2949,26 +2948,26 @@ tcp_maxseg(const struct tcpcb *tp) optlen = 0; #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (tp->t_flags & TF_SIGNATURE) - optlen += PAD(TCPOLEN_SIGNATURE); + optlen += PADTCPOLEN(TCPOLEN_SIGNATURE); #endif if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) { optlen += TCPOLEN_SACKHDR; optlen += tp->rcv_numsacks * TCPOLEN_SACK; - optlen = PAD(optlen); + optlen = PADTCPOLEN(optlen); } } else { if (tp->t_flags & TF_REQ_TSTMP) optlen = TCPOLEN_TSTAMP_APPA; else - optlen = PAD(TCPOLEN_MAXSEG); + optlen = PADTCPOLEN(TCPOLEN_MAXSEG); if (tp->t_flags & TF_REQ_SCALE) - optlen += PAD(TCPOLEN_WINDOW); + optlen += PADTCPOLEN(TCPOLEN_WINDOW); #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (tp->t_flags & TF_SIGNATURE) - optlen += PAD(TCPOLEN_SIGNATURE); + optlen += PADTCPOLEN(TCPOLEN_SIGNATURE); #endif if (tp->t_flags & TF_SACK_PERMIT) - optlen +=
svn commit: r366564 - stable/12/sys/netinet/cc
Author: rscheff Date: Fri Oct 9 09:33:45 2020 New Revision: 366564 URL: https://svnweb.freebsd.org/changeset/base/366564 Log: MFC r366149: TCP newreno: improve after_idle ssthresh Adjust ssthresh in after_idle to the maximum of the prior ssthresh, or 3/4 of the prior cwnd. See RFC2861 section 2 for an in depth explanation for the rationale around this. As newreno is the default "fall-through" reaction, most tcp variants will benefit from this. Reviewed by: tuexen MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D22438 Modified: stable/12/sys/netinet/cc/cc_newreno.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc_newreno.c == --- stable/12/sys/netinet/cc/cc_newreno.c Fri Oct 9 05:28:32 2020 (r366563) +++ stable/12/sys/netinet/cc/cc_newreno.c Fri Oct 9 09:33:45 2020 (r366564) @@ -213,12 +213,19 @@ newreno_after_idle(struct cc_var *ccv) * wirespeed, overloading router and switch buffers along the way. * * See RFC5681 Section 4.1. "Restarting Idle Connections". +* +* In addition, per RFC2861 Section 2, the ssthresh is set to the +* maximum of the former ssthresh or 3/4 of the old cwnd, to +* not exit slow-start prematurely. */ if (V_tcp_do_rfc3390) rw = min(4 * CCV(ccv, t_maxseg), max(2 * CCV(ccv, t_maxseg), 4380)); else rw = CCV(ccv, t_maxseg) * 2; + + CCV(ccv, snd_ssthresh) = max(CCV(ccv, snd_ssthresh), + CCV(ccv, snd_cwnd)-(CCV(ccv, snd_cwnd)>>2)); CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd)); } ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r366206 - head/usr.sbin/ctld
Author: rscheff Date: Sun Sep 27 21:43:19 2020 New Revision: 366206 URL: https://svnweb.freebsd.org/changeset/base/366206 Log: Add DSCP support for network QoS to iscsi target. In order to prioritize iSCSI traffic across a network, DSCP can be used. In order not to rely on "ipfw setdscp" or in-network reclassification, this adds the dscp value directly to the portal group (where TCP sessions are accepted). The incoming iSCSI session is first handled by ctld for any CHAP authentication and the socket is then handed off to the in-kernel iscsi driver without modification of the socket parameters. Simply setting up the socket in ctld is sufficient to keep sending outgoing iSCSI related traffic with the configured DSCP value. Reviewed by: mav, trasz MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26385 Modified: head/usr.sbin/ctld/ctl.conf.5 head/usr.sbin/ctld/ctld.c head/usr.sbin/ctld/ctld.h head/usr.sbin/ctld/parse.y head/usr.sbin/ctld/token.l head/usr.sbin/ctld/uclparse.c Modified: head/usr.sbin/ctld/ctl.conf.5 == --- head/usr.sbin/ctld/ctl.conf.5 Sun Sep 27 18:47:06 2020 (r366205) +++ head/usr.sbin/ctld/ctl.conf.5 Sun Sep 27 21:43:19 2020 (r366206) @@ -250,6 +250,14 @@ Specifies that this .Sy portal-group is listened by some other host. This host will announce it on discovery stage, but won't listen. +.It Ic dscp Ar value +The DiffServ Codepoint used for sending data. The DSCP can be +set to numeric, or hexadecimal values directly, as well as the +well-defined +.Qq Ar CSx +and +.Qq Ar AFxx +codepoints. .El .Ss target Context .Bl -tag -width indent Modified: head/usr.sbin/ctld/ctld.c == --- head/usr.sbin/ctld/ctld.c Sun Sep 27 18:47:06 2020(r366205) +++ head/usr.sbin/ctld/ctld.c Sun Sep 27 21:43:19 2020(r366206) @@ -625,6 +625,7 @@ portal_group_new(struct conf *conf, const char *name) TAILQ_INIT(>pg_ports); pg->pg_conf = conf; pg->pg_tag = 0; /* Assigned later in conf_apply(). */ + pg->pg_dscp = -1; TAILQ_INSERT_TAIL(>conf_portal_groups, pg, pg_next); return (pg); @@ -2180,6 +2181,32 @@ conf_apply(struct conf *oldconf, struct conf *newconf) newp->p_socket = 0; cumulated_error++; continue; + } + if (newpg->pg_dscp != -1) { + struct sockaddr sa; + int len = sizeof(sa); + getsockname(newp->p_socket, , ); + /* +* Only allow the 6-bit DSCP +* field to be modified +*/ + int tos = newpg->pg_dscp << 2; + if (sa.sa_family == AF_INET) { + if (setsockopt(newp->p_socket, + IPPROTO_IP, IP_TOS, + , sizeof(tos)) == -1) + log_warn("setsockopt(IP_TOS) " + "failed for %s", + newp->p_listen); + } else + if (sa.sa_family == AF_INET6) { + if (setsockopt(newp->p_socket, + IPPROTO_IPV6, IPV6_TCLASS, + , sizeof(tos)) == -1) + log_warn("setsockopt(IPV6_TCLASS) " + "failed for %s", + newp->p_listen); + } } error = bind(newp->p_socket, newp->p_ai->ai_addr, newp->p_ai->ai_addrlen); Modified: head/usr.sbin/ctld/ctld.h == --- head/usr.sbin/ctld/ctld.h Sun Sep 27 18:47:06 2020(r366205) +++ head/usr.sbin/ctld/ctld.h Sun Sep 27 21:43:19 2020(r366206) @@ -127,6 +127,7 @@ struct portal_group { TAILQ_HEAD(, port) pg_ports; char*pg_offload; char*pg_redirection; + int pg_dscp; uint16_tpg_tag; }; Modified: head/usr.sbin/ctld/parse.y == ---
svn commit: r366152 - stable/12/sbin/ping6
Author: rscheff Date: Fri Sep 25 10:57:11 2020 New Revision: 366152 URL: https://svnweb.freebsd.org/changeset/base/366152 Log: MFC r365547: Add -z "TOS" option to ping6, to test DSCP/ECN values ping has the option to add the (deprecated) TOS byte using the -z option. Adding the same option, with the same (deprecated) Traffic Class Byte (nowadays actually DSCP and ECN fields) to ping6 to validate proper QoS processing in network switches. Reviewed by: tuexen MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26384 Modified: stable/12/sbin/ping6/ping6.8 stable/12/sbin/ping6/ping6.c Modified: stable/12/sbin/ping6/ping6.8 == --- stable/12/sbin/ping6/ping6.8Fri Sep 25 10:49:26 2020 (r366151) +++ stable/12/sbin/ping6/ping6.8Fri Sep 25 10:57:11 2020 (r366152) @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd September 22, 2014 +.Dd September 10, 2020 .Dt PING6 8 .Os .Sh NAME @@ -87,6 +87,9 @@ packets to network hosts .Op Fl s Ar packetsize .Ek .Bk -words +.Op Fl z Ar tclass +.Ek +.Bk -words .Op Ar hops ... .Ek .Bk -words @@ -329,6 +332,8 @@ This option is present for backward compatibility. has no effect if .Fl w is specified. +.It Fl z Ar tclass +Use the specified traffic class when sending. .It Ar hops IPv6 addresses for intermediate nodes, which will be put into type 0 routing header. Modified: stable/12/sbin/ping6/ping6.c == --- stable/12/sbin/ping6/ping6.cFri Sep 25 10:49:26 2020 (r366151) +++ stable/12/sbin/ping6/ping6.cFri Sep 25 10:57:11 2020 (r366152) @@ -229,6 +229,7 @@ static char *hostname; static int ident; /* process id to identify our packets */ static u_int8_t nonce[8]; /* nonce field for node information */ static int hoplimit = -1; /* hoplimit */ +static int tclass = -1;/* traffic class */ static u_char *packet = NULL; static cap_channel_t *capdns; @@ -351,7 +352,7 @@ main(int argc, char *argv[]) #endif /*IPSEC_POLICY_IPSEC*/ #endif while ((ch = getopt(argc, argv, - "a:b:c:DdfHg:h:I:i:l:mnNop:qrRS:s:tvwWx:X:" ADDOPTS)) != -1) { + "a:b:c:DdfHg:h:I:i:l:mnNop:qrRS:s:tvwWx:X:z:" ADDOPTS)) != -1) { #undef ADDOPTS switch (ch) { case 'a': @@ -576,6 +577,14 @@ main(int argc, char *argv[]) optarg, MAXALARM); alarm((int)alarmtimeout); break; + case 'z': /* traffic class */ + tclass = strtol(optarg, , 10); + if (*optarg == '\0' || *e != '\0') + errx(1, "illegal traffic class %s", optarg); + if (255 < tclass || tclass < -1) + errx(1, + "illegal traffic class -- %s", optarg); + break; #ifdef IPSEC #ifdef IPSEC_POLICY_IPSEC case 'P': @@ -926,6 +935,12 @@ main(int argc, char *argv[]) memcpy(CMSG_DATA(scmsgp), , sizeof(hoplimit)); scmsgp = CMSG_NXTHDR(, scmsgp); + } + + if (tclass != -1) { + if (setsockopt(ssend, IPPROTO_IPV6, IPV6_TCLASS, + , sizeof(tclass)) == -1) + err(1, "setsockopt(IPV6_TCLASS)"); } if (argc > 1) { /* some intermediate addrs are specified */ ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r366151 - stable/12/sys/netinet/cc
Author: rscheff Date: Fri Sep 25 10:49:26 2020 New Revision: 366151 URL: https://svnweb.freebsd.org/changeset/base/366151 Log: MFC r365546: cc_mod: remove unused CCF_DELACK definition During the DCTCP improvements, use of CCF_DELACK was removed. This change is just to rename the unused flag bit to prevent use of it, without also re-implementing the tcp_input and tcp_output interfaces. No functional change. Reviewed by: chengc_netapp.com,tuexen MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26181 Modified: stable/12/sys/netinet/cc/cc.h Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc.h == --- stable/12/sys/netinet/cc/cc.h Fri Sep 25 10:38:19 2020 (r366150) +++ stable/12/sys/netinet/cc/cc.h Fri Sep 25 10:49:26 2020 (r366151) @@ -98,7 +98,7 @@ struct cc_var { /* cc_var flags. */ #defineCCF_ABC_SENTAWND0x0001 /* ABC counted cwnd worth of bytes? */ #defineCCF_CWND_LIMITED0x0002 /* Are we currently cwnd limited? */ -#defineCCF_DELACK 0x0004 /* Is this ack delayed? */ +#defineCCF_UNUSED1 0x0004 /* unused */ #defineCCF_ACKNOW 0x0008 /* Will this ack be sent now? */ #defineCCF_IPHDR_CE0x0010 /* Does this packet set CE bit? */ #defineCCF_TCPHDR_CWR 0x0020 /* Does this packet set CWR bit? */ ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r366150 - head/sys/netinet
Author: rscheff Date: Fri Sep 25 10:38:19 2020 New Revision: 366150 URL: https://svnweb.freebsd.org/changeset/base/366150 Log: TCP: send full initial window when timestamps are in use The fastpath in tcp_output tries to send out full segments, and avoid sending partial segments by comparing against the static t_maxseg variable. That value does not consider tcp options like timestamps, while the initial window calculation is using the correct dynamic tcp_maxseg() function. Due to this interaction, the last, full size segment is considered too short and not sent out immediately. Reviewed by: tuexen MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26478 Modified: head/sys/netinet/tcp.h head/sys/netinet/tcp_output.c head/sys/netinet/tcp_subr.c Modified: head/sys/netinet/tcp.h == --- head/sys/netinet/tcp.h Fri Sep 25 10:23:14 2020(r366149) +++ head/sys/netinet/tcp.h Fri Sep 25 10:38:19 2020(r366150) @@ -80,6 +80,8 @@ struct tcphdr { u_short th_urp; /* urgent pointer */ }; +#definePADTCPOLEN(len) len) / 4) + !!((len) % 4)) * 4) + #defineTCPOPT_EOL 0 #define TCPOLEN_EOL 1 #defineTCPOPT_PAD 0 /* padding after EOL */ Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Fri Sep 25 10:23:14 2020 (r366149) +++ head/sys/netinet/tcp_output.c Fri Sep 25 10:38:19 2020 (r366150) @@ -591,6 +591,20 @@ after_sack_rexmit: if (len >= tp->t_maxseg) goto send; /* +* As the TCP header options are now +* considered when setting up the initial +* window, we would not send the last segment +* if we skip considering the option length here. +* Note: this may not work when tcp headers change +* very dynamically in the future. +*/ + if tp->t_flags & TF_SIGNATURE) ? + PADTCPOLEN(TCPOLEN_SIGNATURE) : 0) + + ((tp->t_flags & TF_RCVD_TSTMP) ? + PADTCPOLEN(TCPOLEN_TIMESTAMP) : 0) + + len) >= tp->t_maxseg) + goto send; + /* * NOTE! on localhost connections an 'ack' from the remote * end may occur synchronously with the output and cause * us to flush a buffer queued with moretocome. XXX Modified: head/sys/netinet/tcp_subr.c == --- head/sys/netinet/tcp_subr.c Fri Sep 25 10:23:14 2020(r366149) +++ head/sys/netinet/tcp_subr.c Fri Sep 25 10:38:19 2020(r366150) @@ -3013,7 +3013,6 @@ tcp_maxseg(const struct tcpcb *tp) * but this is harmless, since result of tcp_maxseg() is used * only in cwnd and ssthresh estimations. */ -#definePAD(len)len) / 4) + !!((len) % 4)) * 4) if (TCPS_HAVEESTABLISHED(tp->t_state)) { if (tp->t_flags & TF_RCVD_TSTMP) optlen = TCPOLEN_TSTAMP_APPA; @@ -3021,26 +3020,26 @@ tcp_maxseg(const struct tcpcb *tp) optlen = 0; #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (tp->t_flags & TF_SIGNATURE) - optlen += PAD(TCPOLEN_SIGNATURE); + optlen += PADTCPOLEN(TCPOLEN_SIGNATURE); #endif if ((tp->t_flags & TF_SACK_PERMIT) && tp->rcv_numsacks > 0) { optlen += TCPOLEN_SACKHDR; optlen += tp->rcv_numsacks * TCPOLEN_SACK; - optlen = PAD(optlen); + optlen = PADTCPOLEN(optlen); } } else { if (tp->t_flags & TF_REQ_TSTMP) optlen = TCPOLEN_TSTAMP_APPA; else - optlen = PAD(TCPOLEN_MAXSEG); + optlen = PADTCPOLEN(TCPOLEN_MAXSEG); if (tp->t_flags & TF_REQ_SCALE) - optlen += PAD(TCPOLEN_WINDOW); + optlen += PADTCPOLEN(TCPOLEN_WINDOW); #if defined(IPSEC_SUPPORT) || defined(TCP_SIGNATURE) if (tp->t_flags & TF_SIGNATURE) - optlen += PAD(TCPOLEN_SIGNATURE); + optlen += PADTCPOLEN(TCPOLEN_SIGNATURE); #endif if (tp->t_flags & TF_SACK_PERMIT) - optlen += PAD(TCPOLEN_SACK_PERMITTED); + optlen += PADTCPOLEN(TCPOLEN_SACK_PERMITTED); } #undef PAD optlen =
svn commit: r366149 - head/sys/netinet/cc
Author: rscheff Date: Fri Sep 25 10:23:14 2020 New Revision: 366149 URL: https://svnweb.freebsd.org/changeset/base/366149 Log: TCP newreno: improve after_idle ssthresh Adjust ssthresh in after_idle to the maximum of the prior ssthresh, or 3/4 of the prior cwnd. See RFC2861 section 2 for an in depth explanation for the rationale around this. As newreno is the default "fall-through" reaction, most tcp variants will benefit from this. Reviewed by: tuexen MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D22438 Modified: head/sys/netinet/cc/cc_newreno.c Modified: head/sys/netinet/cc/cc_newreno.c == --- head/sys/netinet/cc/cc_newreno.cFri Sep 25 10:20:12 2020 (r366148) +++ head/sys/netinet/cc/cc_newreno.cFri Sep 25 10:23:14 2020 (r366149) @@ -213,8 +213,15 @@ newreno_after_idle(struct cc_var *ccv) * wirespeed, overloading router and switch buffers along the way. * * See RFC5681 Section 4.1. "Restarting Idle Connections". +* +* In addition, per RFC2861 Section 2, the ssthresh is set to the +* maximum of the former ssthresh or 3/4 of the old cwnd, to +* not exit slow-start prematurely. */ rw = tcp_compute_initwnd(tcp_maxseg(ccv->ccvc.tcp)); + + CCV(ccv, snd_ssthresh) = max(CCV(ccv, snd_ssthresh), + CCV(ccv, snd_cwnd)-(CCV(ccv, snd_cwnd)>>2)); CCV(ccv, snd_cwnd) = min(rw, CCV(ccv, snd_cwnd)); } ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r365547 - head/sbin/ping6
Author: rscheff Date: Thu Sep 10 00:50:18 2020 New Revision: 365547 URL: https://svnweb.freebsd.org/changeset/base/365547 Log: Add -z "TOS" option to ping6, to test DSCP/ECN values ping has the option to add the (deprecated) TOS byte using the -z option. Adding the same option, with the same (deprecated) Traffic Class Byte (nowadays actually DSCP and ECN fields) to ping6 to validate proper QoS processing in network switches. Reviewed by: tuexen MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26384 Modified: head/sbin/ping6/ping6.8 head/sbin/ping6/ping6.c Modified: head/sbin/ping6/ping6.8 == --- head/sbin/ping6/ping6.8 Thu Sep 10 00:46:38 2020(r365546) +++ head/sbin/ping6/ping6.8 Thu Sep 10 00:50:18 2020(r365547) @@ -29,7 +29,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 20, 2019 +.Dd September 10, 2020 .Dt PING6 8 .Os .Sh NAME @@ -87,6 +87,9 @@ packets to network hosts .Op Fl W Ar waittime .Ek .Bk -words +.Op Fl z Ar tclass +.Ek +.Bk -words .Op Ar hops ... .Ek .Bk -words @@ -329,6 +332,8 @@ This option is present for backward compatibility. has no effect if .Fl y is specified. +.It Fl z Ar tclass +Use the specified traffic class when sending. .It Ar hops IPv6 addresses for intermediate nodes, which will be put into type 0 routing header. Modified: head/sbin/ping6/ping6.c == --- head/sbin/ping6/ping6.c Thu Sep 10 00:46:38 2020(r365546) +++ head/sbin/ping6/ping6.c Thu Sep 10 00:50:18 2020(r365547) @@ -229,6 +229,7 @@ static char *hostname; static int ident; /* process id to identify our packets */ static u_int8_t nonce[8]; /* nonce field for node information */ static int hoplimit = -1; /* hoplimit */ +static int tclass = -1;/* traffic class */ static u_char *packet = NULL; static cap_channel_t *capdns; @@ -352,7 +353,7 @@ main(int argc, char *argv[]) #endif /*IPSEC_POLICY_IPSEC*/ #endif while ((ch = getopt(argc, argv, - "k:b:c:DdfHe:m:I:i:l:unNop:qaAS:s:OvyYW:t:" ADDOPTS)) != -1) { + "k:b:c:DdfHe:m:I:i:l:unNop:qaAS:s:OvyYW:t:z:" ADDOPTS)) != -1) { #undef ADDOPTS switch (ch) { case 'k': @@ -585,6 +586,14 @@ main(int argc, char *argv[]) err(1, "setitimer"); } break; + case 'z': /* traffic class */ + tclass = strtol(optarg, , 10); + if (*optarg == '\0' || *e != '\0') + errx(1, "illegal traffic class %s", optarg); + if (255 < tclass || tclass < -1) + errx(1, + "illegal traffic class -- %s", optarg); + break; #ifdef IPSEC #ifdef IPSEC_POLICY_IPSEC case 'P': @@ -935,6 +944,12 @@ main(int argc, char *argv[]) memcpy(CMSG_DATA(scmsgp), , sizeof(hoplimit)); scmsgp = CMSG_NXTHDR(, scmsgp); + } + + if (tclass != -1) { + if (setsockopt(ssend, IPPROTO_IPV6, IPV6_TCLASS, + , sizeof(tclass)) == -1) + err(1, "setsockopt(IPV6_TCLASS)"); } if (argc > 1) { /* some intermediate addrs are specified */ ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r365546 - head/sys/netinet/cc
Author: rscheff Date: Thu Sep 10 00:46:38 2020 New Revision: 365546 URL: https://svnweb.freebsd.org/changeset/base/365546 Log: cc_mod: remove unused CCF_DELACK definition During the DCTCP improvements, use of CCF_DELACK was removed. This change is just to rename the unused flag bit to prevent use of it, without also re-implementing the tcp_input and tcp_output interfaces. No functional change. Reviewed by: chengc_netapp.com, tuexen MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26181 Modified: head/sys/netinet/cc/cc.h Modified: head/sys/netinet/cc/cc.h == --- head/sys/netinet/cc/cc.hWed Sep 9 23:11:55 2020(r365545) +++ head/sys/netinet/cc/cc.hThu Sep 10 00:46:38 2020(r365546) @@ -96,7 +96,7 @@ struct cc_var { /* cc_var flags. */ #defineCCF_ABC_SENTAWND0x0001 /* ABC counted cwnd worth of bytes? */ #defineCCF_CWND_LIMITED0x0002 /* Are we currently cwnd limited? */ -#defineCCF_DELACK 0x0004 /* Is this ack delayed? */ +#defineCCF_UNUSED1 0x0004 /* unused */ #defineCCF_ACKNOW 0x0008 /* Will this ack be sent now? */ #defineCCF_IPHDR_CE0x0010 /* Does this packet set CE bit? */ #defineCCF_TCPHDR_CWR 0x0020 /* Does this packet set CWR bit? */ ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r365295 - stable/12/sys/netinet/cc
Author: rscheff Date: Thu Sep 3 09:09:44 2020 New Revision: 365295 URL: https://svnweb.freebsd.org/changeset/base/365295 Log: MFC r364354: TCP Cubic: recalculate cwnd for every ACK. Since cubic calculates cwnd based on absolute time, retaining RFC3465 (ABC) once-per-window updates can lead to dramatic changes of cwnd in the convex region. Updating cwnd for each incoming ack minimizes this delta, preventing unintentional line-rate bursts. Reviewed by: chengc_netapp.com, tuexen (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26060 Modified: stable/12/sys/netinet/cc/cc_cubic.c Modified: stable/12/sys/netinet/cc/cc_cubic.c == --- stable/12/sys/netinet/cc/cc_cubic.c Thu Sep 3 08:45:21 2020 (r365294) +++ stable/12/sys/netinet/cc/cc_cubic.c Thu Sep 3 09:09:44 2020 (r365295) @@ -131,16 +131,11 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) cubic_record_rtt(ccv); /* -* Regular ACK and we're not in cong/fast recovery and we're cwnd -* limited and we're either not doing ABC or are just coming out -* from slow-start or were application limited or are slow starting -* or are doing ABC and we've sent a cwnd's worth of bytes. +* For a regular ACK and we're not in cong/fast recovery and +* we're cwnd limited, always recalculate cwnd. */ if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && - (ccv->flags & CCF_CWND_LIMITED) && (!V_tcp_do_rfc3465 || - (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | CUBICFLAG_IN_APPLIMIT)) || - CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || - (V_tcp_do_rfc3465 && (ccv->flags & CCF_ABC_SENTAWND { + (ccv->flags & CCF_CWND_LIMITED)) { /* Use the logic in NewReno ack_received() for slow start. */ if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) { @@ -193,15 +188,8 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) * cwnd growth. * Only update snd_cwnd, if it doesn't shrink. */ - if (V_tcp_do_rfc3465) - CCV(ccv, snd_cwnd) = ulmin(w_cubic_next, - INT_MAX); - else - CCV(ccv, snd_cwnd) += ulmax(1, - ((ulmin(w_cubic_next, INT_MAX) - - CCV(ccv, snd_cwnd)) * - CCV(ccv, t_maxseg)) / - CCV(ccv, snd_cwnd)); + CCV(ccv, snd_cwnd) = ulmin(w_cubic_next, + INT_MAX); } /* ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r365294 - stable/12/sys/netinet/cc
Author: rscheff Date: Thu Sep 3 08:45:21 2020 New Revision: 365294 URL: https://svnweb.freebsd.org/changeset/base/365294 Log: MFC r364197: TCP Cubic: Have Fast Convergence Heuristic work for ECN, and align concave region The Cubic concave region was not aligned nicely for the very first exit from slow start, where a 50% cwnd reduction is done instead of the normal 30%. This addresses an issue, where a short line-rate burst could result from that sudden jump of cwnd. In addition, the Fast Convergence Heuristic has been expanded to work also with ECN induced congestion response. Submitted by: chengc_netapp.com Reported by: chengc_netapp.com Reviewed by: tuexen (mentor), rgrimes (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25976 Modified: stable/12/sys/netinet/cc/cc_cubic.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc_cubic.c == --- stable/12/sys/netinet/cc/cc_cubic.c Thu Sep 3 08:41:38 2020 (r365293) +++ stable/12/sys/netinet/cc/cc_cubic.c Thu Sep 3 08:45:21 2020 (r365294) @@ -286,8 +286,7 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { cubic_ssthresh_update(ccv); cubic_data->flags |= CUBICFLAG_CONG_EVENT; - cubic_data->prev_max_cwnd = cubic_data->max_cwnd; - cubic_data->max_cwnd = CCV(ccv, snd_cwnd); + cubic_data->t_last_cong = ticks; cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); } ENTER_RECOVERY(CCV(ccv, t_flags)); @@ -298,8 +297,6 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { cubic_ssthresh_update(ccv); cubic_data->flags |= CUBICFLAG_CONG_EVENT; - cubic_data->prev_max_cwnd = cubic_data->max_cwnd; - cubic_data->max_cwnd = CCV(ccv, snd_cwnd); cubic_data->t_last_cong = ticks; cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); @@ -361,11 +358,6 @@ cubic_post_recovery(struct cc_var *ccv) cubic_data = ccv->cc_data; pipe = 0; - /* Fast convergence heuristic. */ - if (cubic_data->max_cwnd < cubic_data->prev_max_cwnd) - cubic_data->max_cwnd = (cubic_data->max_cwnd * CUBIC_FC_FACTOR) - >> CUBIC_SHIFT; - if (IN_FASTRECOVERY(CCV(ccv, t_flags))) { /* * If inflight data is less than ssthresh, set cwnd @@ -392,7 +384,6 @@ cubic_post_recovery(struct cc_var *ccv) CUBIC_BETA) >> CUBIC_SHIFT, 2 * CCV(ccv, t_maxseg)); } - cubic_data->t_last_cong = ticks; /* Calculate the average RTT between congestion epochs. */ if (cubic_data->epoch_ack_count > 0 && @@ -403,7 +394,6 @@ cubic_post_recovery(struct cc_var *ccv) cubic_data->epoch_ack_count = 0; cubic_data->sum_rtt_ticks = 0; - cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); } /* @@ -457,18 +447,32 @@ cubic_ssthresh_update(struct cc_var *ccv) { struct cubic *cubic_data; uint32_t ssthresh; + uint32_t cwnd; cubic_data = ccv->cc_data; + cwnd = CCV(ccv, snd_cwnd); + /* Fast convergence heuristic. */ + if (cwnd < cubic_data->max_cwnd) { + cwnd = ((uint64_t)cwnd * CUBIC_FC_FACTOR) >> CUBIC_SHIFT; + } + cubic_data->prev_max_cwnd = cubic_data->max_cwnd; + cubic_data->max_cwnd = cwnd; + /* -* On the first congestion event, set ssthresh to cwnd * 0.5, on -* subsequent congestion events, set it to cwnd * beta. +* On the first congestion event, set ssthresh to cwnd * 0.5 +* and reduce max_cwnd to cwnd * beta. This aligns the cubic concave +* region appropriately. On subsequent congestion events, set +* ssthresh to cwnd * beta. */ - if ((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) - ssthresh = CCV(ccv, snd_cwnd) >> 1; - else - ssthresh = ((uint64_t)CCV(ccv, snd_cwnd) * + if ((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) { + ssthresh = cwnd >> 1; + cubic_data->max_cwnd = ((uint64_t)cwnd * CUBIC_BETA) >> CUBIC_SHIFT; + } else { + ssthresh = ((uint64_t)cwnd * +
svn commit: r365293 - stable/12/sys/netinet/cc
Author: rscheff Date: Thu Sep 3 08:41:38 2020 New Revision: 365293 URL: https://svnweb.freebsd.org/changeset/base/365293 Log: MFC r364196: TCP Cubic: After leaving slowstart fix unintended cwnd jump. Initializing K to zero in D23655 introduced a miscalculation, where cwnd would suddenly jump to cwnd_max instead of gradually increasing, after leaving slow-start. Properly calculating K instead of resetting it to zero resolves this issue. Also making sure, that cwnd is recalculated at the earliest opportunity once slow-start is over. Reported by: chengc_netapp.com Reviewed by: chengc_netapp.com, tuexen (mentor), rgrimes (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25746 Modified: stable/12/sys/netinet/cc/cc_cubic.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc_cubic.c == --- stable/12/sys/netinet/cc/cc_cubic.c Thu Sep 3 08:38:04 2020 (r365292) +++ stable/12/sys/netinet/cc/cc_cubic.c Thu Sep 3 08:41:38 2020 (r365293) @@ -132,19 +132,29 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) /* * Regular ACK and we're not in cong/fast recovery and we're cwnd -* limited and we're either not doing ABC or are slow starting or are -* doing ABC and we've sent a cwnd's worth of bytes. +* limited and we're either not doing ABC or are just coming out +* from slow-start or were application limited or are slow starting +* or are doing ABC and we've sent a cwnd's worth of bytes. */ if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && (ccv->flags & CCF_CWND_LIMITED) && (!V_tcp_do_rfc3465 || + (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | CUBICFLAG_IN_APPLIMIT)) || CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || - (V_tcp_do_rfc3465 && ccv->flags & CCF_ABC_SENTAWND))) { + (V_tcp_do_rfc3465 && (ccv->flags & CCF_ABC_SENTAWND { /* Use the logic in NewReno ack_received() for slow start. */ if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) { cubic_data->flags |= CUBICFLAG_IN_SLOWSTART; newreno_cc_algo.ack_received(ccv, type); } else { + if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | +CUBICFLAG_IN_APPLIMIT)) { + cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART | + CUBICFLAG_IN_APPLIMIT); + cubic_data->t_last_cong = ticks; + cubic_data->K = cubic_k(cubic_data->max_cwnd / + CCV(ccv, t_maxseg)); + } if ((ticks_since_cong = ticks - cubic_data->t_last_cong) < 0) { /* @@ -152,14 +162,6 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) */ ticks_since_cong = INT_MAX; cubic_data->t_last_cong = ticks - INT_MAX; - } - - if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | -CUBICFLAG_IN_APPLIMIT)) { - cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART | - CUBICFLAG_IN_APPLIMIT); - cubic_data->t_last_cong = ticks; - cubic_data->K = 0; } /* * The mean RTT is used to best reflect the equations in ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r365292 - stable/12/sys/netinet
Author: rscheff Date: Thu Sep 3 08:38:04 2020 New Revision: 365292 URL: https://svnweb.freebsd.org/changeset/base/365292 Log: MFC r364195: Improve SACK support code for RFC6675 and PRR Adding proper accounting of sacked_bytes and (per-ACK) delivered data to the SACK scoreboard. This will allow more aspects of RFC6675 to be implemented as well as Proportional Rate Reduction (RFC6937). Prior to this change, the pipe calculation controlled with net.inet.tcp.rfc6675_pipe was also susceptible to incorrect results when more than 3 (or 4) holes in the sequence space were present, which can no longer all fit into a single ACK's SACK option. Reviewed by: kbowling, rgrimes (mentor) Approved by: rgrimes (blanket) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D18624 Modified: stable/12/sys/netinet/tcp_input.c stable/12/sys/netinet/tcp_sack.c stable/12/sys/netinet/tcp_var.h Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/tcp_input.c == --- stable/12/sys/netinet/tcp_input.c Thu Sep 3 08:16:57 2020 (r365291) +++ stable/12/sys/netinet/tcp_input.c Thu Sep 3 08:38:04 2020 (r365292) @@ -2715,9 +2715,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru tp->t_dupacks = 0; /* * If this ack also has new SACK info, increment the -* counter as per rfc6675. +* counter as per rfc6675. The variable +* sack_changed tracks all changes to the SACK +* scoreboard, including when partial ACKs without +* SACK options are received, and clear the scoreboard +* from the left side. Such partial ACKs should not be +* counted as dupacks here. */ - if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed) + if ((tp->t_flags & TF_SACK_PERMIT) && + (to.to_flags & TOF_SACK) && + sack_changed) tp->t_dupacks++; } Modified: stable/12/sys/netinet/tcp_sack.c == --- stable/12/sys/netinet/tcp_sack.cThu Sep 3 08:16:57 2020 (r365291) +++ stable/12/sys/netinet/tcp_sack.cThu Sep 3 08:38:04 2020 (r365292) @@ -534,9 +534,7 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of * the sequence space). * Returns 1 if incoming ACK has previously unknown SACK information, - * 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes - * to that (i.e. left edge moving) would also be considered a change in SACK - * information which is slightly different than rfc6675. + * 0 otherwise. */ int tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) @@ -544,16 +542,21 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tc struct sackhole *cur, *temp; struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp; int i, j, num_sack_blks, sack_changed; + int delivered_data, left_edge_delta; INP_WLOCK_ASSERT(tp->t_inpcb); num_sack_blks = 0; sack_changed = 0; + delivered_data = 0; + left_edge_delta = 0; /* * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist, * treat [SND.UNA, SEG.ACK) as if it is a SACK block. +* Account changes to SND.UNA always in delivered data. */ if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(>snd_holes)) { + left_edge_delta = th_ack - tp->snd_una; sack_blocks[num_sack_blks].start = tp->snd_una; sack_blocks[num_sack_blks++].end = th_ack; } @@ -562,7 +565,6 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tc * received new blocks from the other side. */ if (to->to_flags & TOF_SACK) { - tp->sackhint.sacked_bytes = 0; /* reset */ for (i = 0; i < to->to_nsacks; i++) { bcopy((to->to_sacks + i * TCPOLEN_SACK), , sizeof(sack)); @@ -575,8 +577,6 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tc SEQ_GT(sack.end, tp->snd_una) && SEQ_LEQ(sack.end, tp->snd_max)) { sack_blocks[num_sack_blks++] = sack; - tp->sackhint.sacked_bytes += - (sack.end-sack.start); } } } @@ -601,7 +601,7 @@
svn commit: r364378 - stable/12/sys/netinet/cc
Author: rscheff Date: Wed Aug 19 10:40:02 2020 New Revision: 364378 URL: https://svnweb.freebsd.org/changeset/base/364378 Log: MFC r363397: Fix style and comment around concave/convex regions in TCP cubic. In cubic, the concave region is when snd_cwnd starts growing slower towards max_cwnd (cwnd at the time of the congestion event), and the convex region is when snd_cwnd starts to grow faster and eventually appearing like slow-start like growth. PR: 238478 Reviewed by: tuexen (mentor), rgrimes (mentor) Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D24657 Modified: stable/12/sys/netinet/cc/cc_cubic.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc_cubic.c == --- stable/12/sys/netinet/cc/cc_cubic.c Wed Aug 19 10:36:16 2020 (r364377) +++ stable/12/sys/netinet/cc/cc_cubic.c Wed Aug 19 10:40:02 2020 (r364378) @@ -185,12 +185,11 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) */ if (CCV(ccv, snd_cwnd) < w_tf) CCV(ccv, snd_cwnd) = ulmin(w_tf, INT_MAX); - } - - else if (CCV(ccv, snd_cwnd) < w_cubic_next) { + } else if (CCV(ccv, snd_cwnd) < w_cubic_next) { /* * Concave or convex region, follow CUBIC * cwnd growth. +* Only update snd_cwnd, if it doesn't shrink. */ if (V_tcp_do_rfc3465) CCV(ccv, snd_cwnd) = ulmin(w_cubic_next, ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r364377 - stable/12/sys/netinet/cc
Author: rscheff Date: Wed Aug 19 10:36:16 2020 New Revision: 364377 URL: https://svnweb.freebsd.org/changeset/base/364377 Log: MFC r363380: Add MODULE_VERSION to TCP loadable congestion control modules. Without versioning information, using preexisting loader / linker code is not easily possible when another module may have dependencies on pre-loaded modules, and also doesn't allow the automatic loading of dependent modules. No functional change of the actual modules. Reviewed by: tuexen (mentor), rgrimes (mentor) Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25744 Modified: stable/12/sys/netinet/cc/cc_cdg.c stable/12/sys/netinet/cc/cc_chd.c stable/12/sys/netinet/cc/cc_cubic.c stable/12/sys/netinet/cc/cc_dctcp.c stable/12/sys/netinet/cc/cc_hd.c stable/12/sys/netinet/cc/cc_htcp.c stable/12/sys/netinet/cc/cc_newreno.c stable/12/sys/netinet/cc/cc_vegas.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc_cdg.c == --- stable/12/sys/netinet/cc/cc_cdg.c Wed Aug 19 10:32:26 2020 (r364376) +++ stable/12/sys/netinet/cc/cc_cdg.c Wed Aug 19 10:36:16 2020 (r364377) @@ -714,5 +714,5 @@ SYSCTL_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compe "the window backoff for loss based CC compatibility"); DECLARE_CC_MODULE(cdg, _cc_algo); - +MODULE_VERSION(cdg, 1); MODULE_DEPEND(cdg, ertt, 1, 1, 1); Modified: stable/12/sys/netinet/cc/cc_chd.c == --- stable/12/sys/netinet/cc/cc_chd.c Wed Aug 19 10:32:26 2020 (r364376) +++ stable/12/sys/netinet/cc/cc_chd.c Wed Aug 19 10:36:16 2020 (r364377) @@ -493,4 +493,5 @@ SYSCTL_UINT(_net_inet_tcp_cc_chd, OID_AUTO, use_max, "as the basic delay measurement for the algorithm."); DECLARE_CC_MODULE(chd, _cc_algo); +MODULE_VERSION(chd, 1); MODULE_DEPEND(chd, ertt, 1, 1, 1); Modified: stable/12/sys/netinet/cc/cc_cubic.c == --- stable/12/sys/netinet/cc/cc_cubic.c Wed Aug 19 10:32:26 2020 (r364376) +++ stable/12/sys/netinet/cc/cc_cubic.c Wed Aug 19 10:36:16 2020 (r364377) @@ -473,3 +473,4 @@ cubic_ssthresh_update(struct cc_var *ccv) DECLARE_CC_MODULE(cubic, _cc_algo); +MODULE_VERSION(cubic, 1); Modified: stable/12/sys/netinet/cc/cc_dctcp.c == --- stable/12/sys/netinet/cc/cc_dctcp.c Wed Aug 19 10:32:26 2020 (r364376) +++ stable/12/sys/netinet/cc/cc_dctcp.c Wed Aug 19 10:36:16 2020 (r364377) @@ -467,3 +467,4 @@ SYSCTL_PROC(_net_inet_tcp_cc_dctcp, OID_AUTO, slowstar "IU", "half CWND reduction after the first slow start"); DECLARE_CC_MODULE(dctcp, _cc_algo); +MODULE_VERSION(dctcp, 1); Modified: stable/12/sys/netinet/cc/cc_hd.c == --- stable/12/sys/netinet/cc/cc_hd.cWed Aug 19 10:32:26 2020 (r364376) +++ stable/12/sys/netinet/cc/cc_hd.cWed Aug 19 10:36:16 2020 (r364377) @@ -249,4 +249,5 @@ SYSCTL_PROC(_net_inet_tcp_cc_hd, OID_AUTO, queue_min, _qmin_handler, "IU", "minimum queueing delay threshold (qmin) in ticks"); DECLARE_CC_MODULE(hd, _cc_algo); +MODULE_VERSION(hd, 1); MODULE_DEPEND(hd, ertt, 1, 1, 1); Modified: stable/12/sys/netinet/cc/cc_htcp.c == --- stable/12/sys/netinet/cc/cc_htcp.c Wed Aug 19 10:32:26 2020 (r364376) +++ stable/12/sys/netinet/cc/cc_htcp.c Wed Aug 19 10:36:16 2020 (r364377) @@ -530,3 +530,4 @@ SYSCTL_UINT(_net_inet_tcp_cc_htcp, OID_AUTO, rtt_scali "enable H-TCP RTT scaling"); DECLARE_CC_MODULE(htcp, _cc_algo); +MODULE_VERSION(htcp, 1); Modified: stable/12/sys/netinet/cc/cc_newreno.c == --- stable/12/sys/netinet/cc/cc_newreno.c Wed Aug 19 10:32:26 2020 (r364376) +++ stable/12/sys/netinet/cc/cc_newreno.c Wed Aug 19 10:36:16 2020 (r364377) @@ -399,3 +399,4 @@ SYSCTL_PROC(_net_inet_tcp_cc_newreno, OID_AUTO, beta_e "New Reno beta ecn, specified as number between 1 and 100"); DECLARE_CC_MODULE(newreno, _cc_algo); +MODULE_VERSION(newreno, 1); Modified: stable/12/sys/netinet/cc/cc_vegas.c == --- stable/12/sys/netinet/cc/cc_vegas.c Wed Aug 19 10:32:26 2020 (r364376) +++ stable/12/sys/netinet/cc/cc_vegas.c Wed Aug 19 10:36:16 2020 (r364377) @@ -300,4 +300,5 @@ SYSCTL_PROC(_net_inet_tcp_cc_vegas, OID_AUTO, beta, "vegas beta, specified as number of \"buffers\" (0 < alpha < beta)"); DECLARE_CC_MODULE(vegas, _cc_algo); +MODULE_VERSION(vegas, 1);
svn commit: r364376 - stable/12/sys/netinet
Author: rscheff Date: Wed Aug 19 10:32:26 2020 New Revision: 364376 URL: https://svnweb.freebsd.org/changeset/base/364376 Log: MFC r362988: Fix KASSERT during tcp_newtcpcb when low on memory While testing with system default cc set to cubic, and running a memory exhaustion validation, FreeBSD panics for a missing inpcb reference / lock. Reviewed by: rgrimes (mentor), tuexen (mentor) Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25583 Modified: stable/12/sys/netinet/tcp_subr.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/tcp_subr.c == --- stable/12/sys/netinet/tcp_subr.cWed Aug 19 10:01:05 2020 (r364375) +++ stable/12/sys/netinet/tcp_subr.cWed Aug 19 10:32:26 2020 (r364376) @@ -1615,6 +1615,12 @@ tcp_newtcpcb(struct inpcb *inp) KASSERT(!STAILQ_EMPTY(_list), ("cc_list is empty!")); CC_ALGO(tp) = CC_DEFAULT(); CC_LIST_RUNLOCK(); + /* +* The tcpcb will hold a reference on its inpcb until tcp_discardcb() +* is called. +*/ + in_pcbref(inp); /* Reference for tcpcb */ + tp->t_inpcb = inp; if (CC_ALGO(tp)->cb_init != NULL) if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) { @@ -1659,12 +1665,6 @@ tcp_newtcpcb(struct inpcb *inp) if (V_tcp_do_sack) tp->t_flags |= TF_SACK_PERMIT; TAILQ_INIT(>snd_holes); - /* -* The tcpcb will hold a reference on its inpcb until tcp_discardcb() -* is called. -*/ - in_pcbref(inp); /* Reference for tcpcb */ - tp->t_inpcb = inp; /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r364354 - head/sys/netinet/cc
Author: rscheff Date: Tue Aug 18 19:34:31 2020 New Revision: 364354 URL: https://svnweb.freebsd.org/changeset/base/364354 Log: TCP Cubic: recalculate cwnd for every ACK. Since cubic calculates cwnd based on absolute time, retaining RFC3465 (ABC) once-per-window updates can lead to dramatic changes of cwnd in the convex region. Updating cwnd for each incoming ack minimizes this delta, preventing unintentional line-rate bursts. Reviewed by: chengc_netapp.com, tuexen (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D26060 Modified: head/sys/netinet/cc/cc_cubic.c Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Tue Aug 18 19:25:03 2020 (r364353) +++ head/sys/netinet/cc/cc_cubic.c Tue Aug 18 19:34:31 2020 (r364354) @@ -131,16 +131,11 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) cubic_record_rtt(ccv); /* -* Regular ACK and we're not in cong/fast recovery and we're cwnd -* limited and we're either not doing ABC or are just coming out -* from slow-start or were application limited or are slow starting -* or are doing ABC and we've sent a cwnd's worth of bytes. +* For a regular ACK and we're not in cong/fast recovery and +* we're cwnd limited, always recalculate cwnd. */ if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && - (ccv->flags & CCF_CWND_LIMITED) && (!V_tcp_do_rfc3465 || - (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | CUBICFLAG_IN_APPLIMIT)) || - CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || - (V_tcp_do_rfc3465 && (ccv->flags & CCF_ABC_SENTAWND { + (ccv->flags & CCF_CWND_LIMITED)) { /* Use the logic in NewReno ack_received() for slow start. */ if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) { @@ -193,15 +188,8 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) * cwnd growth. * Only update snd_cwnd, if it doesn't shrink. */ - if (V_tcp_do_rfc3465) - CCV(ccv, snd_cwnd) = ulmin(w_cubic_next, - INT_MAX); - else - CCV(ccv, snd_cwnd) += ulmax(1, - ((ulmin(w_cubic_next, INT_MAX) - - CCV(ccv, snd_cwnd)) * - CCV(ccv, t_maxseg)) / - CCV(ccv, snd_cwnd)); + CCV(ccv, snd_cwnd) = ulmin(w_cubic_next, + INT_MAX); } /* ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r364197 - head/sys/netinet/cc
Author: rscheff Date: Thu Aug 13 16:45:55 2020 New Revision: 364197 URL: https://svnweb.freebsd.org/changeset/base/364197 Log: TCP Cubic: Have Fast Convergence Heuristic work for ECN, and align concave region The Cubic concave region was not aligned nicely for the very first exit from slow start, where a 50% cwnd reduction is done instead of the normal 30%. This addresses an issue, where a short line-rate burst could result from that sudden jump of cwnd. In addition, the Fast Convergence Heuristic has been expanded to work also with ECN induced congestion response. Submitted by: chengc_netapp.com Reported by: chengc_netapp.com Reviewed by: tuexen (mentor), rgrimes (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25976 Modified: head/sys/netinet/cc/cc_cubic.c Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Thu Aug 13 16:38:51 2020 (r364196) +++ head/sys/netinet/cc/cc_cubic.c Thu Aug 13 16:45:55 2020 (r364197) @@ -286,8 +286,7 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { cubic_ssthresh_update(ccv); cubic_data->flags |= CUBICFLAG_CONG_EVENT; - cubic_data->prev_max_cwnd = cubic_data->max_cwnd; - cubic_data->max_cwnd = CCV(ccv, snd_cwnd); + cubic_data->t_last_cong = ticks; cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); } ENTER_RECOVERY(CCV(ccv, t_flags)); @@ -298,8 +297,6 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { cubic_ssthresh_update(ccv); cubic_data->flags |= CUBICFLAG_CONG_EVENT; - cubic_data->prev_max_cwnd = cubic_data->max_cwnd; - cubic_data->max_cwnd = CCV(ccv, snd_cwnd); cubic_data->t_last_cong = ticks; cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); @@ -361,11 +358,6 @@ cubic_post_recovery(struct cc_var *ccv) cubic_data = ccv->cc_data; pipe = 0; - /* Fast convergence heuristic. */ - if (cubic_data->max_cwnd < cubic_data->prev_max_cwnd) - cubic_data->max_cwnd = (cubic_data->max_cwnd * CUBIC_FC_FACTOR) - >> CUBIC_SHIFT; - if (IN_FASTRECOVERY(CCV(ccv, t_flags))) { /* * If inflight data is less than ssthresh, set cwnd @@ -392,7 +384,6 @@ cubic_post_recovery(struct cc_var *ccv) CUBIC_BETA) >> CUBIC_SHIFT, 2 * CCV(ccv, t_maxseg)); } - cubic_data->t_last_cong = ticks; /* Calculate the average RTT between congestion epochs. */ if (cubic_data->epoch_ack_count > 0 && @@ -403,7 +394,6 @@ cubic_post_recovery(struct cc_var *ccv) cubic_data->epoch_ack_count = 0; cubic_data->sum_rtt_ticks = 0; - cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); } /* @@ -457,18 +447,32 @@ cubic_ssthresh_update(struct cc_var *ccv) { struct cubic *cubic_data; uint32_t ssthresh; + uint32_t cwnd; cubic_data = ccv->cc_data; + cwnd = CCV(ccv, snd_cwnd); + /* Fast convergence heuristic. */ + if (cwnd < cubic_data->max_cwnd) { + cwnd = ((uint64_t)cwnd * CUBIC_FC_FACTOR) >> CUBIC_SHIFT; + } + cubic_data->prev_max_cwnd = cubic_data->max_cwnd; + cubic_data->max_cwnd = cwnd; + /* -* On the first congestion event, set ssthresh to cwnd * 0.5, on -* subsequent congestion events, set it to cwnd * beta. +* On the first congestion event, set ssthresh to cwnd * 0.5 +* and reduce max_cwnd to cwnd * beta. This aligns the cubic concave +* region appropriately. On subsequent congestion events, set +* ssthresh to cwnd * beta. */ - if ((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) - ssthresh = CCV(ccv, snd_cwnd) >> 1; - else - ssthresh = ((uint64_t)CCV(ccv, snd_cwnd) * + if ((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) { + ssthresh = cwnd >> 1; + cubic_data->max_cwnd = ((uint64_t)cwnd * CUBIC_BETA) >> CUBIC_SHIFT; + } else { + ssthresh = ((uint64_t)cwnd * + CUBIC_BETA) >> CUBIC_SHIFT; + } CCV(ccv, snd_ssthresh) =
svn commit: r364196 - head/sys/netinet/cc
Author: rscheff Date: Thu Aug 13 16:38:51 2020 New Revision: 364196 URL: https://svnweb.freebsd.org/changeset/base/364196 Log: TCP Cubic: After leaving slowstart fix unintended cwnd jump. Initializing K to zero in D23655 introduced a miscalculation, where cwnd would suddenly jump to cwnd_max instead of gradually increasing, after leaving slow-start. Properly calculating K instead of resetting it to zero resolves this issue. Also making sure, that cwnd is recalculated at the earliest opportunity once slow-start is over. Reported by: chengc_netapp.com Reviewed by: chengc_netapp.com, tuexen (mentor), rgrimes (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25746 Modified: head/sys/netinet/cc/cc_cubic.c Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Thu Aug 13 16:30:09 2020 (r364195) +++ head/sys/netinet/cc/cc_cubic.c Thu Aug 13 16:38:51 2020 (r364196) @@ -132,19 +132,29 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) /* * Regular ACK and we're not in cong/fast recovery and we're cwnd -* limited and we're either not doing ABC or are slow starting or are -* doing ABC and we've sent a cwnd's worth of bytes. +* limited and we're either not doing ABC or are just coming out +* from slow-start or were application limited or are slow starting +* or are doing ABC and we've sent a cwnd's worth of bytes. */ if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && (ccv->flags & CCF_CWND_LIMITED) && (!V_tcp_do_rfc3465 || + (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | CUBICFLAG_IN_APPLIMIT)) || CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || - (V_tcp_do_rfc3465 && ccv->flags & CCF_ABC_SENTAWND))) { + (V_tcp_do_rfc3465 && (ccv->flags & CCF_ABC_SENTAWND { /* Use the logic in NewReno ack_received() for slow start. */ if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) { cubic_data->flags |= CUBICFLAG_IN_SLOWSTART; newreno_cc_algo.ack_received(ccv, type); } else { + if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | +CUBICFLAG_IN_APPLIMIT)) { + cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART | + CUBICFLAG_IN_APPLIMIT); + cubic_data->t_last_cong = ticks; + cubic_data->K = cubic_k(cubic_data->max_cwnd / + CCV(ccv, t_maxseg)); + } if ((ticks_since_cong = ticks - cubic_data->t_last_cong) < 0) { /* @@ -152,14 +162,6 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) */ ticks_since_cong = INT_MAX; cubic_data->t_last_cong = ticks - INT_MAX; - } - - if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | -CUBICFLAG_IN_APPLIMIT)) { - cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART | - CUBICFLAG_IN_APPLIMIT); - cubic_data->t_last_cong = ticks; - cubic_data->K = 0; } /* * The mean RTT is used to best reflect the equations in ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r364195 - head/sys/netinet
Author: rscheff Date: Thu Aug 13 16:30:09 2020 New Revision: 364195 URL: https://svnweb.freebsd.org/changeset/base/364195 Log: Improve SACK support code for RFC6675 and PRR Adding proper accounting of sacked_bytes and (per-ACK) delivered data to the SACK scoreboard. This will allow more aspects of RFC6675 to be implemented as well as Proportional Rate Reduction (RFC6937). Prior to this change, the pipe calculation controlled with net.inet.tcp.rfc6675_pipe was also susceptible to incorrect results when more than 3 (or 4) holes in the sequence space were present, which can no longer all fit into a single ACK's SACK option. Reviewed by: kbowling, rgrimes (mentor) Approved by: rgrimes (mentor, blanket) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D18624 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_sack.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cThu Aug 13 14:26:25 2020 (r364194) +++ head/sys/netinet/tcp_input.cThu Aug 13 16:30:09 2020 (r364195) @@ -2673,9 +2673,16 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru tp->t_dupacks = 0; /* * If this ack also has new SACK info, increment the -* counter as per rfc6675. +* counter as per rfc6675. The variable +* sack_changed tracks all changes to the SACK +* scoreboard, including when partial ACKs without +* SACK options are received, and clear the scoreboard +* from the left side. Such partial ACKs should not be +* counted as dupacks here. */ - if ((tp->t_flags & TF_SACK_PERMIT) && sack_changed) + if ((tp->t_flags & TF_SACK_PERMIT) && + (to.to_flags & TOF_SACK) && + sack_changed) tp->t_dupacks++; } Modified: head/sys/netinet/tcp_sack.c == --- head/sys/netinet/tcp_sack.c Thu Aug 13 14:26:25 2020(r364194) +++ head/sys/netinet/tcp_sack.c Thu Aug 13 16:30:09 2020(r364195) @@ -535,9 +535,7 @@ tcp_sackhole_remove(struct tcpcb *tp, struct sackhole * tp->snd_holes is an ordered list of holes (oldest to newest, in terms of * the sequence space). * Returns 1 if incoming ACK has previously unknown SACK information, - * 0 otherwise. Note: We treat (snd_una, th_ack) as a sack block so any changes - * to that (i.e. left edge moving) would also be considered a change in SACK - * information which is slightly different than rfc6675. + * 0 otherwise. */ int tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tcp_seq th_ack) @@ -545,16 +543,21 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tc struct sackhole *cur, *temp; struct sackblk sack, sack_blocks[TCP_MAX_SACK + 1], *sblkp; int i, j, num_sack_blks, sack_changed; + int delivered_data, left_edge_delta; INP_WLOCK_ASSERT(tp->t_inpcb); num_sack_blks = 0; sack_changed = 0; + delivered_data = 0; + left_edge_delta = 0; /* * If SND.UNA will be advanced by SEG.ACK, and if SACK holes exist, * treat [SND.UNA, SEG.ACK) as if it is a SACK block. +* Account changes to SND.UNA always in delivered data. */ if (SEQ_LT(tp->snd_una, th_ack) && !TAILQ_EMPTY(>snd_holes)) { + left_edge_delta = th_ack - tp->snd_una; sack_blocks[num_sack_blks].start = tp->snd_una; sack_blocks[num_sack_blks++].end = th_ack; } @@ -563,7 +566,6 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tc * received new blocks from the other side. */ if (to->to_flags & TOF_SACK) { - tp->sackhint.sacked_bytes = 0; /* reset */ for (i = 0; i < to->to_nsacks; i++) { bcopy((to->to_sacks + i * TCPOLEN_SACK), , sizeof(sack)); @@ -576,8 +578,6 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tc SEQ_GT(sack.end, tp->snd_una) && SEQ_LEQ(sack.end, tp->snd_max)) { sack_blocks[num_sack_blks++] = sack; - tp->sackhint.sacked_bytes += - (sack.end-sack.start); } } } @@ -602,7 +602,7 @@ tcp_sack_doack(struct tcpcb *tp, struct tcpopt *to, tc } } } -
svn commit: r363397 - head/sys/netinet/cc
Author: rscheff Date: Tue Jul 21 16:21:52 2020 New Revision: 363397 URL: https://svnweb.freebsd.org/changeset/base/363397 Log: Fix style and comment around concave/convex regions in TCP cubic. In cubic, the concave region is when snd_cwnd starts growing slower towards max_cwnd (cwnd at the time of the congestion event), and the convex region is when snd_cwnd starts to grow faster and eventually appearing like slow-start like growth. PR: 238478 Reviewed by: tuexen (mentor), rgrimes (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D24657 Modified: head/sys/netinet/cc/cc_cubic.c Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Tue Jul 21 16:17:23 2020 (r363396) +++ head/sys/netinet/cc/cc_cubic.c Tue Jul 21 16:21:52 2020 (r363397) @@ -185,12 +185,11 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) */ if (CCV(ccv, snd_cwnd) < w_tf) CCV(ccv, snd_cwnd) = ulmin(w_tf, INT_MAX); - } - - else if (CCV(ccv, snd_cwnd) < w_cubic_next) { + } else if (CCV(ccv, snd_cwnd) < w_cubic_next) { /* * Concave or convex region, follow CUBIC * cwnd growth. +* Only update snd_cwnd, if it doesn't shrink. */ if (V_tcp_do_rfc3465) CCV(ccv, snd_cwnd) = ulmin(w_cubic_next, ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r363380 - head/sys/netinet/cc
Author: rscheff Date: Mon Jul 20 23:47:27 2020 New Revision: 363380 URL: https://svnweb.freebsd.org/changeset/base/363380 Log: Add MODULE_VERSION to TCP loadable congestion control modules. Without versioning information, using preexisting loader / linker code is not easily possible when another module may have dependencies on pre-loaded modules, and also doesn't allow the automatic loading of dependent modules. No functional change of the actual modules. Reviewed by: tuexen (mentor), rgrimes (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25744 Modified: head/sys/netinet/cc/cc_cdg.c head/sys/netinet/cc/cc_chd.c head/sys/netinet/cc/cc_cubic.c head/sys/netinet/cc/cc_dctcp.c head/sys/netinet/cc/cc_hd.c head/sys/netinet/cc/cc_htcp.c head/sys/netinet/cc/cc_newreno.c head/sys/netinet/cc/cc_vegas.c Modified: head/sys/netinet/cc/cc_cdg.c == --- head/sys/netinet/cc/cc_cdg.cMon Jul 20 22:32:39 2020 (r363379) +++ head/sys/netinet/cc/cc_cdg.cMon Jul 20 23:47:27 2020 (r363380) @@ -714,5 +714,5 @@ SYSCTL_UINT(_net_inet_tcp_cc_cdg, OID_AUTO, loss_compe "the window backoff for loss based CC compatibility"); DECLARE_CC_MODULE(cdg, _cc_algo); - +MODULE_VERSION(cdg, 1); MODULE_DEPEND(cdg, ertt, 1, 1, 1); Modified: head/sys/netinet/cc/cc_chd.c == --- head/sys/netinet/cc/cc_chd.cMon Jul 20 22:32:39 2020 (r363379) +++ head/sys/netinet/cc/cc_chd.cMon Jul 20 23:47:27 2020 (r363380) @@ -493,4 +493,5 @@ SYSCTL_UINT(_net_inet_tcp_cc_chd, OID_AUTO, use_max, "as the basic delay measurement for the algorithm."); DECLARE_CC_MODULE(chd, _cc_algo); +MODULE_VERSION(chd, 1); MODULE_DEPEND(chd, ertt, 1, 1, 1); Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Mon Jul 20 22:32:39 2020 (r363379) +++ head/sys/netinet/cc/cc_cubic.c Mon Jul 20 23:47:27 2020 (r363380) @@ -473,3 +473,4 @@ cubic_ssthresh_update(struct cc_var *ccv) DECLARE_CC_MODULE(cubic, _cc_algo); +MODULE_VERSION(cubic, 1); Modified: head/sys/netinet/cc/cc_dctcp.c == --- head/sys/netinet/cc/cc_dctcp.c Mon Jul 20 22:32:39 2020 (r363379) +++ head/sys/netinet/cc/cc_dctcp.c Mon Jul 20 23:47:27 2020 (r363380) @@ -464,3 +464,4 @@ SYSCTL_PROC(_net_inet_tcp_cc_dctcp, OID_AUTO, slowstar "half CWND reduction after the first slow start"); DECLARE_CC_MODULE(dctcp, _cc_algo); +MODULE_VERSION(dctcp, 1); Modified: head/sys/netinet/cc/cc_hd.c == --- head/sys/netinet/cc/cc_hd.c Mon Jul 20 22:32:39 2020(r363379) +++ head/sys/netinet/cc/cc_hd.c Mon Jul 20 23:47:27 2020(r363380) @@ -251,4 +251,5 @@ SYSCTL_PROC(_net_inet_tcp_cc_hd, OID_AUTO, queue_min, "minimum queueing delay threshold (qmin) in ticks"); DECLARE_CC_MODULE(hd, _cc_algo); +MODULE_VERSION(hd, 1); MODULE_DEPEND(hd, ertt, 1, 1, 1); Modified: head/sys/netinet/cc/cc_htcp.c == --- head/sys/netinet/cc/cc_htcp.c Mon Jul 20 22:32:39 2020 (r363379) +++ head/sys/netinet/cc/cc_htcp.c Mon Jul 20 23:47:27 2020 (r363380) @@ -530,3 +530,4 @@ SYSCTL_UINT(_net_inet_tcp_cc_htcp, OID_AUTO, rtt_scali "enable H-TCP RTT scaling"); DECLARE_CC_MODULE(htcp, _cc_algo); +MODULE_VERSION(htcp, 1); Modified: head/sys/netinet/cc/cc_newreno.c == --- head/sys/netinet/cc/cc_newreno.cMon Jul 20 22:32:39 2020 (r363379) +++ head/sys/netinet/cc/cc_newreno.cMon Jul 20 23:47:27 2020 (r363380) @@ -396,3 +396,4 @@ SYSCTL_PROC(_net_inet_tcp_cc_newreno, OID_AUTO, beta_e "New Reno beta ecn, specified as number between 1 and 100"); DECLARE_CC_MODULE(newreno, _cc_algo); +MODULE_VERSION(newreno, 1); Modified: head/sys/netinet/cc/cc_vegas.c == --- head/sys/netinet/cc/cc_vegas.c Mon Jul 20 22:32:39 2020 (r363379) +++ head/sys/netinet/cc/cc_vegas.c Mon Jul 20 23:47:27 2020 (r363380) @@ -301,4 +301,5 @@ SYSCTL_PROC(_net_inet_tcp_cc_vegas, OID_AUTO, beta, "vegas beta, specified as number of \"buffers\" (0 < alpha < beta)"); DECLARE_CC_MODULE(vegas, _cc_algo); +MODULE_VERSION(vegas, 1); MODULE_DEPEND(vegas, ertt, 1, 1, 1); ___ svn-src-all@freebsd.org mailing list
svn commit: r363005 - stable/12/sys/netinet/cc
Author: rscheff Date: Wed Jul 8 09:04:20 2020 New Revision: 363005 URL: https://svnweb.freebsd.org/changeset/base/363005 Log: MFC r362580: TCP: fix cubic RTO reaction. Proper TCP Cubic operation requires the knowledge of the maximum congestion window prior to the last congestion event. This restores and improves a bugfix previously added by jtl@ but subsequently removed due to a revert. Reported by: chengc_netapp.com Reviewed by: chengc_netapp.com, tuexen (mentor) Approved by: tuexen (mentor), rgrimes (mentor, blanket) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25133 Modified: stable/12/sys/netinet/cc/cc_cubic.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc_cubic.c == --- stable/12/sys/netinet/cc/cc_cubic.c Wed Jul 8 09:00:05 2020 (r363004) +++ stable/12/sys/netinet/cc/cc_cubic.c Wed Jul 8 09:04:20 2020 (r363005) @@ -313,10 +313,15 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) * timeout has fired more than once, as there is a reasonable * chance the first one is a false alarm and may not indicate * congestion. +* This will put Cubic firmly into the concave / TCP friendly +* region, for a slower ramp-up after two consecutive RTOs. */ if (CCV(ccv, t_rxtshift) >= 2) { cubic_data->flags |= CUBICFLAG_CONG_EVENT; cubic_data->t_last_cong = ticks; + cubic_data->max_cwnd = CCV(ccv, snd_cwnd_prev); + cubic_data->K = cubic_k(cubic_data->max_cwnd / + CCV(ccv, t_maxseg)); } break; } ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r363004 - stable/12/sys/netinet
Author: rscheff Date: Wed Jul 8 09:00:05 2020 New Revision: 363004 URL: https://svnweb.freebsd.org/changeset/base/363004 Log: MFC r362577: TCP: make after-idle work for transactional sessions. The use of t_rcvtime as proxy for the last transmission fails for transactional IO, where the client requests data before the server can respond with a bulk transfer. Set aside a dedicated variable to actually track the last locally sent segment going forward. Reported by: rrs Reviewed by: rrs, tuexen (mentor) Approved by: tuexen (mentor), rgrimes (mentor, blanket) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25016 Modified: stable/12/sys/netinet/tcp_output.c stable/12/sys/netinet/tcp_var.h Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/tcp_output.c == --- stable/12/sys/netinet/tcp_output.c Wed Jul 8 06:33:07 2020 (r363003) +++ stable/12/sys/netinet/tcp_output.c Wed Jul 8 09:00:05 2020 (r363004) @@ -246,7 +246,8 @@ tcp_output(struct tcpcb *tp) * to send, then transmit; otherwise, investigate further. */ idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una); - if (idle && ticks - tp->t_rcvtime >= tp->t_rxtcur) + if (idle && (((ticks - tp->t_rcvtime) >= tp->t_rxtcur) || + (tp->t_sndtime && ((ticks - tp->t_sndtime) >= tp->t_rxtcur cc_after_idle(tp); tp->t_flags &= ~TF_LASTIDLE; if (idle) { @@ -1461,6 +1462,7 @@ out: * Time this transmission if not a retransmission and * not currently timing anything. */ + tp->t_sndtime = ticks; if (tp->t_rtttime == 0) { tp->t_rtttime = ticks; tp->t_rtseq = startseq; Modified: stable/12/sys/netinet/tcp_var.h == --- stable/12/sys/netinet/tcp_var.h Wed Jul 8 06:33:07 2020 (r363003) +++ stable/12/sys/netinet/tcp_var.h Wed Jul 8 09:00:05 2020 (r363004) @@ -152,8 +152,9 @@ struct tcpcb { tcp_seq snd_wl2;/* window update seg ack number */ tcp_seq irs;/* initial receive sequence number */ - tcp_seq iss;/* initial send sequence number */ - u_int t_acktime; + tcp_seq iss;/* initial send sequence number */ + u_int t_acktime; /* RACK and BBR incoming new data was acked */ + u_int t_sndtime; /* time last data was sent */ u_int ts_recent_age; /* when last updated */ tcp_seq snd_recover;/* for use in NewReno Fast Recovery */ uint16_t cl4_spare; /* Spare to adjust CL 4 */ ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r362988 - head/sys/netinet
Author: rscheff Date: Tue Jul 7 12:10:59 2020 New Revision: 362988 URL: https://svnweb.freebsd.org/changeset/base/362988 Log: Fix KASSERT during tcp_newtcpcb when low on memory While testing with system default cc set to cubic, and running a memory exhaustion validation, FreeBSD panics for a missing inpcb reference / lock. Reviewed by: rgrimes (mentor), tuexen (mentor) Approved by: rgrimes (mentor), tuexen (mentor) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25583 Modified: head/sys/netinet/tcp_subr.c Modified: head/sys/netinet/tcp_subr.c == --- head/sys/netinet/tcp_subr.c Tue Jul 7 07:51:09 2020(r362987) +++ head/sys/netinet/tcp_subr.c Tue Jul 7 12:10:59 2020(r362988) @@ -1702,6 +1702,12 @@ tcp_newtcpcb(struct inpcb *inp) KASSERT(!STAILQ_EMPTY(_list), ("cc_list is empty!")); CC_ALGO(tp) = CC_DEFAULT(); CC_LIST_RUNLOCK(); + /* +* The tcpcb will hold a reference on its inpcb until tcp_discardcb() +* is called. +*/ + in_pcbref(inp); /* Reference for tcpcb */ + tp->t_inpcb = inp; if (CC_ALGO(tp)->cb_init != NULL) if (CC_ALGO(tp)->cb_init(tp->ccv) > 0) { @@ -1746,12 +1752,6 @@ tcp_newtcpcb(struct inpcb *inp) if (V_tcp_do_sack) tp->t_flags |= TF_SACK_PERMIT; TAILQ_INIT(>snd_holes); - /* -* The tcpcb will hold a reference on its inpcb until tcp_discardcb() -* is called. -*/ - in_pcbref(inp); /* Reference for tcpcb */ - tp->t_inpcb = inp; /* * Init srtt to TCPTV_SRTTBASE (0), so we can tell that we have no ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r362832 - stable/12/sys/netinet/cc
Author: rscheff Date: Wed Jul 1 09:35:33 2020 New Revision: 362832 URL: https://svnweb.freebsd.org/changeset/base/362832 Log: MFC r362006: Prevent TCP Cubic to abruptly increase cwnd after app-limited Cubic calculates the new cwnd based on absolute time elapsed since the start of an epoch. A cubic epoch is started on congestion events, or once the congestion avoidance phase is started, after slow-start has completed. When a sender is application limited for an extended amount of time and subsequently a larger volume of data becomes ready for sending, Cubic recalculates cwnd with a lingering cubic epoch. This recalculation of the cwnd can induce a massive increase in cwnd, causing a burst of data to be sent at line rate by the sender. This adds a flag to reset the cubic epoch once a session transitions from app-limited to cwnd-limited to prevent the above effect. Reviewed by: chengc_netapp.com, tuexen (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25065 Modified: stable/12/sys/netinet/cc/cc_cubic.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc_cubic.c == --- stable/12/sys/netinet/cc/cc_cubic.c Wed Jul 1 09:32:17 2020 (r362831) +++ stable/12/sys/netinet/cc/cc_cubic.c Wed Jul 1 09:35:33 2020 (r362832) @@ -92,6 +92,7 @@ struct cubic { uint32_tflags; #define CUBICFLAG_CONG_EVENT 0x0001 /* congestion experienced */ #define CUBICFLAG_IN_SLOWSTART 0x0002 /* in slow start */ +#define CUBICFLAG_IN_APPLIMIT 0x0004 /* application limited */ /* Minimum observed rtt in ticks. */ int min_rtt_ticks; /* Mean observed rtt between congestion epochs. */ @@ -143,8 +144,10 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) } else { ticks_since_cong = ticks - cubic_data->t_last_cong; - if (cubic_data->flags & CUBICFLAG_IN_SLOWSTART) { - cubic_data->flags &= ~CUBICFLAG_IN_SLOWSTART; + if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | +CUBICFLAG_IN_APPLIMIT)) { + cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART | + CUBICFLAG_IN_APPLIMIT); cubic_data->t_last_cong = ticks; cubic_data->K = 0; } @@ -197,6 +200,9 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) cubic_data->max_cwnd < CCV(ccv, snd_cwnd)) cubic_data->max_cwnd = CCV(ccv, snd_cwnd); } + } else if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && + !(ccv->flags & CCF_CWND_LIMITED)) { + cubic_data->flags |= CUBICFLAG_IN_APPLIMIT; } } ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r362831 - stable/12/sys/netinet/cc
Author: rscheff Date: Wed Jul 1 09:32:17 2020 New Revision: 362831 URL: https://svnweb.freebsd.org/changeset/base/362831 Log: MFC r361987: Prevent TCP Cubic to abruptly increase cwnd after slow-start Introducing flags to track the initial Wmax dragging and exit from slow-start in TCP Cubic. This prevents sudden jumps in the caluclated cwnd by cubic, especially when the flow is application limited during slow start (cwnd can not grow as fast as expected). The downside is that cubic may remain slightly longer in the concave region before starting the convex region beyond Wmax again. Reviewed by: chengc_netapp.com, tuexen (mentor) Approved by: tuexen (mentor), rgrimes (mentor, blanket) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23655 Modified: stable/12/sys/netinet/cc/cc_cubic.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc_cubic.c == --- stable/12/sys/netinet/cc/cc_cubic.c Wed Jul 1 09:28:00 2020 (r362830) +++ stable/12/sys/netinet/cc/cc_cubic.c Wed Jul 1 09:32:17 2020 (r362831) @@ -88,8 +88,10 @@ struct cubic { unsigned long max_cwnd; /* cwnd at the previous congestion event. */ unsigned long prev_max_cwnd; - /* Number of congestion events. */ - uint32_tnum_cong_events; + /* various flags */ + uint32_tflags; +#define CUBICFLAG_CONG_EVENT 0x0001 /* congestion experienced */ +#define CUBICFLAG_IN_SLOWSTART 0x0002 /* in slow start */ /* Minimum observed rtt in ticks. */ int min_rtt_ticks; /* Mean observed rtt between congestion epochs. */ @@ -135,11 +137,17 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) (V_tcp_do_rfc3465 && ccv->flags & CCF_ABC_SENTAWND))) { /* Use the logic in NewReno ack_received() for slow start. */ if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || - cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) + cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) { + cubic_data->flags |= CUBICFLAG_IN_SLOWSTART; newreno_cc_algo.ack_received(ccv, type); - else { + } else { ticks_since_cong = ticks - cubic_data->t_last_cong; + if (cubic_data->flags & CUBICFLAG_IN_SLOWSTART) { + cubic_data->flags &= ~CUBICFLAG_IN_SLOWSTART; + cubic_data->t_last_cong = ticks; + cubic_data->K = 0; + } /* * The mean RTT is used to best reflect the equations in * the I-D. Using min_rtt in the tf_cwnd calculation @@ -185,7 +193,7 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) * keep updating our current estimate of the * max_cwnd. */ - if (cubic_data->num_cong_events == 0 && + if (((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) && cubic_data->max_cwnd < CCV(ccv, snd_cwnd)) cubic_data->max_cwnd = CCV(ccv, snd_cwnd); } @@ -233,9 +241,10 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) { if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { cubic_ssthresh_update(ccv); - cubic_data->num_cong_events++; + cubic_data->flags |= CUBICFLAG_CONG_EVENT; cubic_data->prev_max_cwnd = cubic_data->max_cwnd; cubic_data->max_cwnd = CCV(ccv, snd_cwnd); + cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); } ENTER_RECOVERY(CCV(ccv, t_flags)); } @@ -244,10 +253,11 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) case CC_ECN: if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { cubic_ssthresh_update(ccv); - cubic_data->num_cong_events++; + cubic_data->flags |= CUBICFLAG_CONG_EVENT; cubic_data->prev_max_cwnd = cubic_data->max_cwnd; cubic_data->max_cwnd = CCV(ccv, snd_cwnd); cubic_data->t_last_cong = ticks; + cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh);
svn commit: r362830 - stable/12/bin/dd
Author: rscheff Date: Wed Jul 1 09:28:00 2020 New Revision: 362830 URL: https://svnweb.freebsd.org/changeset/base/362830 Log: MFC r361806: Add O_DIRECT flag to DD for cache bypass FreeBSD DD utility has not had support for the O_DIRECT flag, which is useful to bypass local caching, e.g. for unconditionally issuing NFS IO requests during testing. Reviewed by: rgrimes (mentor) Approved by: rgrimes (blanket) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25066 Modified: stable/12/bin/dd/args.c stable/12/bin/dd/dd.1 stable/12/bin/dd/dd.c stable/12/bin/dd/dd.h Directory Properties: stable/12/ (props changed) Modified: stable/12/bin/dd/args.c == --- stable/12/bin/dd/args.c Wed Jul 1 08:23:57 2020(r362829) +++ stable/12/bin/dd/args.c Wed Jul 1 09:28:00 2020(r362830) @@ -266,6 +266,7 @@ static const struct iflag { const char *name; uint64_t set, noset; } ilist[] = { + { "direct", C_IDIRECT, 0 }, { "fullblock", C_IFULLBLOCK, C_SYNC }, }; @@ -410,6 +411,7 @@ static const struct oflag { const char *name; uint64_t set; } olist[] = { + { "direct", C_ODIRECT }, { "fsync", C_OFSYNC }, { "sync", C_OFSYNC }, }; Modified: stable/12/bin/dd/dd.1 == --- stable/12/bin/dd/dd.1 Wed Jul 1 08:23:57 2020(r362829) +++ stable/12/bin/dd/dd.1 Wed Jul 1 09:28:00 2020(r362830) @@ -32,7 +32,7 @@ .\" @(#)dd.1 8.2 (Berkeley) 1/13/94 .\" $FreeBSD$ .\" -.Dd March 26, 2019 +.Dd June 4, 2020 .Dt DD 1 .Os .Sh NAME @@ -117,6 +117,8 @@ limits the number of times is called on the input rather than the number of blocks copied in full. May not be combined with .Cm conv=sync . +.It Cm direct +Set the O_DIRECT flag on the input file to make reads bypass any local caching. .El .It Cm iseek Ns = Ns Ar n Seek on the input file @@ -143,7 +145,7 @@ the output file is truncated at that point. Where .Cm value is one of the symbols from the following list. -.Bl -tag -width "fsync" +.Bl -tag -width "direct" .It Cm fsync Set the O_FSYNC flag on the output file to make writes synchronous. .It Cm sync @@ -151,6 +153,8 @@ Set the O_SYNC flag on the output file to make writes This is synonymous with the .Cm fsync value. +.It Cm direct +Set the O_DIRECT flag on the output file to make writes bypass any local caching. .El .It Cm oseek Ns = Ns Ar n Seek on the output file Modified: stable/12/bin/dd/dd.c == --- stable/12/bin/dd/dd.c Wed Jul 1 08:23:57 2020(r362829) +++ stable/12/bin/dd/dd.c Wed Jul 1 09:28:00 2020(r362830) @@ -143,7 +143,7 @@ static void setup(void) { u_int cnt; - int oflags; + int iflags, oflags; cap_rights_t rights; unsigned long cmds[] = { FIODTYPE, MTIOCTOP }; @@ -151,7 +151,10 @@ setup(void) in.name = "stdin"; in.fd = STDIN_FILENO; } else { - in.fd = open(in.name, O_RDONLY, 0); + iflags = 0; + if (ddflags & C_IDIRECT) + iflags |= O_DIRECT; + in.fd = open(in.name, O_RDONLY | iflags, 0); if (in.fd == -1) err(1, "%s", in.name); } @@ -186,6 +189,8 @@ setup(void) oflags |= O_TRUNC; if (ddflags & C_OFSYNC) oflags |= O_FSYNC; + if (ddflags & C_ODIRECT) + oflags |= O_DIRECT; out.fd = open(out.name, O_RDWR | oflags, DEFFILEMODE); /* * May not have read access, so try again with write only. Modified: stable/12/bin/dd/dd.h == --- stable/12/bin/dd/dd.h Wed Jul 1 08:23:57 2020(r362829) +++ stable/12/bin/dd/dd.h Wed Jul 1 09:28:00 2020(r362830) @@ -105,6 +105,8 @@ typedef struct { #defineC_FDATASYNC 0x0001ULL #defineC_OFSYNC0x0002ULL #defineC_IFULLBLOCK0x0004ULL +#defineC_IDIRECT 0x0008ULL +#defineC_ODIRECT 0x0010ULL #defineC_PARITY(C_PAREVEN | C_PARODD | C_PARNONE | C_PARSET) ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r362586 - in stable/12/sys/netinet: . tcp_stacks
Author: rscheff Date: Wed Jun 24 16:17:58 2020 New Revision: 362586 URL: https://svnweb.freebsd.org/changeset/base/362586 Log: MFC r361347: With RFC3168 ECN, CWR SHOULD only be sent with new data Overly conservative data receivers may ignore the CWR flag on other packets, and keep ECE latched. This can result in continous reduction of the congestion window, and very poor performance when ECN is enabled. PR: 243590 Reviewed by: rgrimes (mentor), rrs Approved by: rgrimes (mentor, blanket) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23364 Modified: stable/12/sys/netinet/tcp_input.c stable/12/sys/netinet/tcp_output.c stable/12/sys/netinet/tcp_stacks/rack.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/tcp_input.c == --- stable/12/sys/netinet/tcp_input.c Wed Jun 24 15:46:33 2020 (r362585) +++ stable/12/sys/netinet/tcp_input.c Wed Jun 24 16:17:58 2020 (r362586) @@ -417,9 +417,15 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, ui } break; case CC_ECN: - if (!IN_CONGRECOVERY(tp->t_flags)) { + if (!IN_CONGRECOVERY(tp->t_flags) || + /* +* Allow ECN reaction on ACK to CWR, if +* that data segment was also CE marked. +*/ + SEQ_GEQ(th->th_ack, tp->snd_recover)) { + EXIT_CONGRECOVERY(tp->t_flags); TCPSTAT_INC(tcps_ecn_rcwnd); - tp->snd_recover = tp->snd_max; + tp->snd_recover = tp->snd_max + 1; if (tp->t_flags & TF_ECN_PERMIT) tp->t_flags |= TF_ECN_SND_CWR; } Modified: stable/12/sys/netinet/tcp_output.c == --- stable/12/sys/netinet/tcp_output.c Wed Jun 24 15:46:33 2020 (r362585) +++ stable/12/sys/netinet/tcp_output.c Wed Jun 24 16:17:58 2020 (r362586) @@ -1132,7 +1132,8 @@ send: * Ignore pure ack packets, retransmissions and window probes. */ if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && - !((tp->t_flags & TF_FORCEDATA) && len == 1)) { + !((tp->t_flags & TF_FORCEDATA) && len == 1 && + SEQ_LT(tp->snd_una, tp->snd_max))) { #ifdef INET6 if (isipv6) ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); @@ -1140,15 +1141,15 @@ send: #endif ip->ip_tos |= IPTOS_ECN_ECT0; TCPSTAT_INC(tcps_ecn_ect0); + /* +* Reply with proper ECN notifications. +* Only set CWR on new data segments. +*/ + if (tp->t_flags & TF_ECN_SND_CWR) { + flags |= TH_CWR; + tp->t_flags &= ~TF_ECN_SND_CWR; + } } - - /* -* Reply with proper ECN notifications. -*/ - if (tp->t_flags & TF_ECN_SND_CWR) { - flags |= TH_CWR; - tp->t_flags &= ~TF_ECN_SND_CWR; - } if (tp->t_flags & TF_ECN_SND_ECE) flags |= TH_ECE; } Modified: stable/12/sys/netinet/tcp_stacks/rack.c == --- stable/12/sys/netinet/tcp_stacks/rack.c Wed Jun 24 15:46:33 2020 (r362585) +++ stable/12/sys/netinet/tcp_stacks/rack.c Wed Jun 24 16:17:58 2020 (r362586) @@ -1415,9 +1415,15 @@ rack_cong_signal(struct tcpcb *tp, struct tcphdr *th, } break; case CC_ECN: - if (!IN_CONGRECOVERY(tp->t_flags)) { + if (!IN_CONGRECOVERY(tp->t_flags) || + /* +* Allow ECN reaction on ACK to CWR, if +* that data segment was also CE marked. +*/ + SEQ_GEQ(th->th_ack, tp->snd_recover)) { + EXIT_CONGRECOVERY(tp->t_flags); TCPSTAT_INC(tcps_ecn_rcwnd); - tp->snd_recover = tp->snd_max; + tp->snd_recover = tp->snd_max + 1; if (tp->t_flags & TF_ECN_PERMIT) tp->t_flags |= TF_ECN_SND_CWR; } @@ -8283,13 +8289,14 @@ send: #endif ip->ip_tos |= IPTOS_ECN_ECT0; TCPSTAT_INC(tcps_ecn_ect0); -
svn commit: r362580 - head/sys/netinet/cc
Author: rscheff Date: Wed Jun 24 13:52:53 2020 New Revision: 362580 URL: https://svnweb.freebsd.org/changeset/base/362580 Log: TCP: fix cubic RTO reaction. Proper TCP Cubic operation requires the knowledge of the maximum congestion window prior to the last congestion event. This restores and improves a bugfix previously added by jtl@ but subsequently removed due to a revert. Reported by: chengc_netapp.com Reviewed by: chengc_netapp.com, tuexen (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25133 Modified: head/sys/netinet/cc/cc_cubic.c Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Wed Jun 24 13:49:30 2020 (r362579) +++ head/sys/netinet/cc/cc_cubic.c Wed Jun 24 13:52:53 2020 (r362580) @@ -313,10 +313,15 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) * timeout has fired more than once, as there is a reasonable * chance the first one is a false alarm and may not indicate * congestion. +* This will put Cubic firmly into the concave / TCP friendly +* region, for a slower ramp-up after two consecutive RTOs. */ if (CCV(ccv, t_rxtshift) >= 2) { cubic_data->flags |= CUBICFLAG_CONG_EVENT; cubic_data->t_last_cong = ticks; + cubic_data->max_cwnd = CCV(ccv, snd_cwnd_prev); + cubic_data->K = cubic_k(cubic_data->max_cwnd / + CCV(ccv, t_maxseg)); } break; } ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r362577 - head/sys/netinet
Author: rscheff Date: Wed Jun 24 13:42:42 2020 New Revision: 362577 URL: https://svnweb.freebsd.org/changeset/base/362577 Log: TCP: make after-idle work for transactional sessions. The use of t_rcvtime as proxy for the last transmission fails for transactional IO, where the client requests data before the server can respond with a bulk transfer. Set aside a dedicated variable to actually track the last locally sent segment going forward. Reported by: rrs Reviewed by: rrs, tuexen (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25016 Modified: head/sys/netinet/tcp_output.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Wed Jun 24 13:11:19 2020 (r362576) +++ head/sys/netinet/tcp_output.c Wed Jun 24 13:42:42 2020 (r362577) @@ -260,7 +260,8 @@ tcp_output(struct tcpcb *tp) * to send, then transmit; otherwise, investigate further. */ idle = (tp->t_flags & TF_LASTIDLE) || (tp->snd_max == tp->snd_una); - if (idle && ticks - tp->t_rcvtime >= tp->t_rxtcur) + if (idle && (((ticks - tp->t_rcvtime) >= tp->t_rxtcur) || + (tp->t_sndtime && ((ticks - tp->t_sndtime) >= tp->t_rxtcur cc_after_idle(tp); tp->t_flags &= ~TF_LASTIDLE; if (idle) { @@ -1502,6 +1503,7 @@ out: * Time this transmission if not a retransmission and * not currently timing anything. */ + tp->t_sndtime = ticks; if (tp->t_rtttime == 0) { tp->t_rtttime = ticks; tp->t_rtseq = startseq; Modified: head/sys/netinet/tcp_var.h == --- head/sys/netinet/tcp_var.h Wed Jun 24 13:11:19 2020(r362576) +++ head/sys/netinet/tcp_var.h Wed Jun 24 13:42:42 2020(r362577) @@ -188,8 +188,9 @@ struct tcpcb { tcp_seq snd_wl2;/* window update seg ack number */ tcp_seq irs;/* initial receive sequence number */ - tcp_seq iss;/* initial send sequence number */ - u_int t_acktime; + tcp_seq iss;/* initial send sequence number */ + u_int t_acktime; /* RACK and BBR incoming new data was acked */ + u_int t_sndtime; /* time last data was sent */ u_int ts_recent_age; /* when last updated */ tcp_seq snd_recover;/* for use in NewReno Fast Recovery */ uint16_t cl4_spare; /* Spare to adjust CL 4 */ ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r362006 - head/sys/netinet/cc
Author: rscheff Date: Wed Jun 10 07:32:02 2020 New Revision: 362006 URL: https://svnweb.freebsd.org/changeset/base/362006 Log: Prevent TCP Cubic to abruptly increase cwnd after app-limited Cubic calculates the new cwnd based on absolute time elapsed since the start of an epoch. A cubic epoch is started on congestion events, or once the congestion avoidance phase is started, after slow-start has completed. When a sender is application limited for an extended amount of time and subsequently a larger volume of data becomes ready for sending, Cubic recalculates cwnd with a lingering cubic epoch. This recalculation of the cwnd can induce a massive increase in cwnd, causing a burst of data to be sent at line rate by the sender. This adds a flag to reset the cubic epoch once a session transitions from app-limited to cwnd-limited to prevent the above effect. Reviewed by: chengc_netapp.com, tuexen (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25065 Modified: head/sys/netinet/cc/cc_cubic.c Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Wed Jun 10 05:01:00 2020 (r362005) +++ head/sys/netinet/cc/cc_cubic.c Wed Jun 10 07:32:02 2020 (r362006) @@ -94,6 +94,7 @@ struct cubic { uint32_tflags; #define CUBICFLAG_CONG_EVENT 0x0001 /* congestion experienced */ #define CUBICFLAG_IN_SLOWSTART 0x0002 /* in slow start */ +#define CUBICFLAG_IN_APPLIMIT 0x0004 /* application limited */ /* Minimum observed rtt in ticks. */ int min_rtt_ticks; /* Mean observed rtt between congestion epochs. */ @@ -153,8 +154,10 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) cubic_data->t_last_cong = ticks - INT_MAX; } - if (cubic_data->flags & CUBICFLAG_IN_SLOWSTART) { - cubic_data->flags &= ~CUBICFLAG_IN_SLOWSTART; + if (cubic_data->flags & (CUBICFLAG_IN_SLOWSTART | +CUBICFLAG_IN_APPLIMIT)) { + cubic_data->flags &= ~(CUBICFLAG_IN_SLOWSTART | + CUBICFLAG_IN_APPLIMIT); cubic_data->t_last_cong = ticks; cubic_data->K = 0; } @@ -214,6 +217,9 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) CCV(ccv, t_maxseg)); } } + } else if (type == CC_ACK && !IN_RECOVERY(CCV(ccv, t_flags)) && + !(ccv->flags & CCF_CWND_LIMITED)) { + cubic_data->flags |= CUBICFLAG_IN_APPLIMIT; } } ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r361987 - head/sys/netinet/cc
Author: rscheff Date: Tue Jun 9 21:07:58 2020 New Revision: 361987 URL: https://svnweb.freebsd.org/changeset/base/361987 Log: Prevent TCP Cubic to abruptly increase cwnd after slow-start Introducing flags to track the initial Wmax dragging and exit from slow-start in TCP Cubic. This prevents sudden jumps in the caluclated cwnd by cubic, especially when the flow is application limited during slow start (cwnd can not grow as fast as expected). The downside is that cubic may remain slightly longer in the concave region before starting the convex region beyond Wmax again. Reviewed by: chengc_netapp.com, tuexen (mentor) Approved by: tuexen (mentor), rgrimes (mentor, blanket) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23655 Modified: head/sys/netinet/cc/cc_cubic.c Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Tue Jun 9 20:52:35 2020 (r361986) +++ head/sys/netinet/cc/cc_cubic.c Tue Jun 9 21:07:58 2020 (r361987) @@ -90,8 +90,10 @@ struct cubic { unsigned long max_cwnd; /* cwnd at the previous congestion event. */ unsigned long prev_max_cwnd; - /* Number of congestion events. */ - uint32_tnum_cong_events; + /* various flags */ + uint32_tflags; +#define CUBICFLAG_CONG_EVENT 0x0001 /* congestion experienced */ +#define CUBICFLAG_IN_SLOWSTART 0x0002 /* in slow start */ /* Minimum observed rtt in ticks. */ int min_rtt_ticks; /* Mean observed rtt between congestion epochs. */ @@ -138,9 +140,10 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) (V_tcp_do_rfc3465 && ccv->flags & CCF_ABC_SENTAWND))) { /* Use the logic in NewReno ack_received() for slow start. */ if (CCV(ccv, snd_cwnd) <= CCV(ccv, snd_ssthresh) || - cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) + cubic_data->min_rtt_ticks == TCPTV_SRTTBASE) { + cubic_data->flags |= CUBICFLAG_IN_SLOWSTART; newreno_cc_algo.ack_received(ccv, type); - else { + } else { if ((ticks_since_cong = ticks - cubic_data->t_last_cong) < 0) { /* @@ -150,6 +153,11 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) cubic_data->t_last_cong = ticks - INT_MAX; } + if (cubic_data->flags & CUBICFLAG_IN_SLOWSTART) { + cubic_data->flags &= ~CUBICFLAG_IN_SLOWSTART; + cubic_data->t_last_cong = ticks; + cubic_data->K = 0; + } /* * The mean RTT is used to best reflect the equations in * the I-D. Using min_rtt in the tf_cwnd calculation @@ -199,7 +207,7 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) * keep updating our current estimate of the * max_cwnd. */ - if (cubic_data->num_cong_events == 0 && + if (((cubic_data->flags & CUBICFLAG_CONG_EVENT) == 0) && cubic_data->max_cwnd < CCV(ccv, snd_cwnd)) { cubic_data->max_cwnd = CCV(ccv, snd_cwnd); cubic_data->K = cubic_k(cubic_data->max_cwnd / @@ -270,9 +278,10 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) { if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { cubic_ssthresh_update(ccv); - cubic_data->num_cong_events++; + cubic_data->flags |= CUBICFLAG_CONG_EVENT; cubic_data->prev_max_cwnd = cubic_data->max_cwnd; cubic_data->max_cwnd = CCV(ccv, snd_cwnd); + cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); } ENTER_RECOVERY(CCV(ccv, t_flags)); } @@ -281,10 +290,11 @@ cubic_cong_signal(struct cc_var *ccv, uint32_t type) case CC_ECN: if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { cubic_ssthresh_update(ccv); - cubic_data->num_cong_events++; + cubic_data->flags |= CUBICFLAG_CONG_EVENT; cubic_data->prev_max_cwnd = cubic_data->max_cwnd; cubic_data->max_cwnd = CCV(ccv, snd_cwnd);
svn commit: r361841 - in stable/12/sys/netinet: . tcp_stacks
Author: rscheff Date: Fri Jun 5 18:00:36 2020 New Revision: 361841 URL: https://svnweb.freebsd.org/changeset/base/361841 Log: MFC r361346: Retain only mutually supported TCP options after simultaneous SYN When receiving a parallel SYN in SYN-SENT state, remove all the options only we supported locally before sending the SYN,ACK. This addresses a consistency issue on parallel opens. Also, on such a parallel open, the stack could be coaxed into running with timestamps enabled, even if administratively disabled. This does NOT contain the merge of the change to BBR since at this time that code does not exist in stable/12, and there is no plan to merge BBR(v1) to stable/12. Reviewed by: tuexen (mentor) Approved by: tuexen (mentor), rgrimes (mentor, blanket) Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23371 Modified: stable/12/sys/netinet/tcp_input.c stable/12/sys/netinet/tcp_stacks/rack.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/tcp_input.c == --- stable/12/sys/netinet/tcp_input.c Fri Jun 5 17:00:38 2020 (r361840) +++ stable/12/sys/netinet/tcp_input.c Fri Jun 5 18:00:36 2020 (r361841) @@ -1667,17 +1667,20 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; tp->snd_scale = to.to_wscale; - } + } else + tp->t_flags &= ~TF_REQ_SCALE; /* * Initial send window. It will be updated with * the next incoming segment to the scaled value. */ tp->snd_wnd = th->th_win; - if (to.to_flags & TOF_TS) { + if ((to.to_flags & TOF_TS) && + (tp->t_flags & TF_REQ_TSTMP)) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = to.to_tsval; tp->ts_recent_age = tcp_ts_getticks(); - } + } else + tp->t_flags &= ~TF_REQ_TSTMP; if (to.to_flags & TOF_MSS) tcp_mss(tp, to.to_mss); if ((tp->t_flags & TF_SACK_PERMIT) && Modified: stable/12/sys/netinet/tcp_stacks/rack.c == --- stable/12/sys/netinet/tcp_stacks/rack.c Fri Jun 5 17:00:38 2020 (r361840) +++ stable/12/sys/netinet/tcp_stacks/rack.c Fri Jun 5 18:00:36 2020 (r361841) @@ -6754,17 +6754,20 @@ rack_hpts_do_segment(struct mbuf *m, struct tcphdr *th (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; tp->snd_scale = to.to_wscale; - } + } else + tp->t_flags &= ~TF_REQ_SCALE; /* * Initial send window. It will be updated with the * next incoming segment to the scaled value. */ tp->snd_wnd = th->th_win; - if (to.to_flags & TOF_TS) { + if ((to.to_flags & TOF_TS) && + (tp->t_flags & TF_REQ_TSTMP)) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = to.to_tsval; tp->ts_recent_age = cts; - } + } else + tp->t_flags &= ~TF_REQ_TSTMP; if (to.to_flags & TOF_MSS) tcp_mss(tp, to.to_mss); if ((tp->t_flags & TF_SACK_PERMIT) && ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r361808 - stable/12/sys/netinet/cc
Author: rscheff Date: Thu Jun 4 21:02:24 2020 New Revision: 361808 URL: https://svnweb.freebsd.org/changeset/base/361808 Log: MFC rS361348: DCTCP: update alpha only once after loss recovery. In mixed ECN marking and loss scenarios it was found, that the alpha value of DCTCP is updated two times. The second update happens with freshly initialized counters indicating to ECN loss. Overall this leads to alpha not adjusting as quickly as expected to ECN markings, and therefore lead to excessive loss. Reported by: Cheng Cui Reviewed by: chengc_netapp.com, rrs, tuexen (mentor) Approved by: tuexen (mentor), rgrimes (mentor, blanket) Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D24817 Modified: stable/12/sys/netinet/cc/cc_dctcp.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc_dctcp.c == --- stable/12/sys/netinet/cc/cc_dctcp.c Thu Jun 4 20:48:57 2020 (r361807) +++ stable/12/sys/netinet/cc/cc_dctcp.c Thu Jun 4 21:02:24 2020 (r361808) @@ -154,10 +154,8 @@ dctcp_ack_received(struct cc_var *ccv, uint16_t type) * Update the fraction of marked bytes at the end of * current window size. */ - if ((IN_FASTRECOVERY(CCV(ccv, t_flags)) && - SEQ_GEQ(ccv->curack, CCV(ccv, snd_recover))) || - (!IN_FASTRECOVERY(CCV(ccv, t_flags)) && - SEQ_GT(ccv->curack, dctcp_data->save_sndnxt))) + if (!IN_FASTRECOVERY(CCV(ccv, t_flags)) && + SEQ_GT(ccv->curack, dctcp_data->save_sndnxt)) dctcp_update_alpha(ccv); } else newreno_cc_algo.ack_received(ccv, type); ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r361806 - head/bin/dd
Author: rscheff Date: Thu Jun 4 20:47:11 2020 New Revision: 361806 URL: https://svnweb.freebsd.org/changeset/base/361806 Log: Add O_DIRECT flag to DD for cache bypass FreeBSD DD utility has not had support for the O_DIRECT flag, which is useful to bypass local caching, e.g. for unconditionally issuing NFS IO requests during testing. Reviewed by: rgrimes (mentor) Approved by: rgrimes (mentor, blanket) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D25066 Modified: head/bin/dd/args.c head/bin/dd/dd.1 head/bin/dd/dd.c head/bin/dd/dd.h Modified: head/bin/dd/args.c == --- head/bin/dd/args.c Thu Jun 4 20:39:28 2020(r361805) +++ head/bin/dd/args.c Thu Jun 4 20:47:11 2020(r361806) @@ -266,6 +266,7 @@ static const struct iflag { const char *name; uint64_t set, noset; } ilist[] = { + { "direct", C_IDIRECT, 0 }, { "fullblock", C_IFULLBLOCK, C_SYNC }, }; @@ -410,6 +411,7 @@ static const struct oflag { const char *name; uint64_t set; } olist[] = { + { "direct", C_ODIRECT }, { "fsync", C_OFSYNC }, { "sync", C_OFSYNC }, }; Modified: head/bin/dd/dd.1 == --- head/bin/dd/dd.1Thu Jun 4 20:39:28 2020(r361805) +++ head/bin/dd/dd.1Thu Jun 4 20:47:11 2020(r361806) @@ -32,7 +32,7 @@ .\" @(#)dd.1 8.2 (Berkeley) 1/13/94 .\" $FreeBSD$ .\" -.Dd March 26, 2019 +.Dd June 4, 2020 .Dt DD 1 .Os .Sh NAME @@ -117,6 +117,8 @@ limits the number of times is called on the input rather than the number of blocks copied in full. May not be combined with .Cm conv=sync . +.It Cm direct +Set the O_DIRECT flag on the input file to make reads bypass any local caching. .El .It Cm iseek Ns = Ns Ar n Seek on the input file @@ -143,7 +145,7 @@ the output file is truncated at that point. Where .Cm value is one of the symbols from the following list. -.Bl -tag -width "fsync" +.Bl -tag -width "direct" .It Cm fsync Set the O_FSYNC flag on the output file to make writes synchronous. .It Cm sync @@ -151,6 +153,8 @@ Set the O_SYNC flag on the output file to make writes This is synonymous with the .Cm fsync value. +.It Cm direct +Set the O_DIRECT flag on the output file to make writes bypass any local caching. .El .It Cm oseek Ns = Ns Ar n Seek on the output file Modified: head/bin/dd/dd.c == --- head/bin/dd/dd.cThu Jun 4 20:39:28 2020(r361805) +++ head/bin/dd/dd.cThu Jun 4 20:47:11 2020(r361806) @@ -143,7 +143,7 @@ static void setup(void) { u_int cnt; - int oflags; + int iflags, oflags; cap_rights_t rights; unsigned long cmds[] = { FIODTYPE, MTIOCTOP }; @@ -151,7 +151,10 @@ setup(void) in.name = "stdin"; in.fd = STDIN_FILENO; } else { - in.fd = open(in.name, O_RDONLY, 0); + iflags = 0; + if (ddflags & C_IDIRECT) + iflags |= O_DIRECT; + in.fd = open(in.name, O_RDONLY | iflags, 0); if (in.fd == -1) err(1, "%s", in.name); } @@ -186,6 +189,8 @@ setup(void) oflags |= O_TRUNC; if (ddflags & C_OFSYNC) oflags |= O_FSYNC; + if (ddflags & C_ODIRECT) + oflags |= O_DIRECT; out.fd = open(out.name, O_RDWR | oflags, DEFFILEMODE); /* * May not have read access, so try again with write only. Modified: head/bin/dd/dd.h == --- head/bin/dd/dd.hThu Jun 4 20:39:28 2020(r361805) +++ head/bin/dd/dd.hThu Jun 4 20:47:11 2020(r361806) @@ -105,6 +105,8 @@ typedef struct { #defineC_FDATASYNC 0x0001ULL #defineC_OFSYNC0x0002ULL #defineC_IFULLBLOCK0x0004ULL +#defineC_IDIRECT 0x0008ULL +#defineC_ODIRECT 0x0010ULL #defineC_PARITY(C_PAREVEN | C_PARODD | C_PARNONE | C_PARSET) ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r361565 - releng/11.4/sys/netinet
Author: rscheff Date: Wed May 27 22:34:46 2020 New Revision: 361565 URL: https://svnweb.freebsd.org/changeset/base/361565 Log: MFS r361436: MFC r361347: With RFC3168 ECN, CWR SHOULD only be sent with new data. Overly conservative data receivers may ignore the CWR flag on other packets, and keep ECE latched. This can result in continuous reduction of the congestion window, and very poor performance when ECN is enabled. This does NOT contain the merge of the change to RACK since at this time that code does not exist in stable/11, and there is no plan to merge RACK to stable/11. PR: 243590 Reviewed by: rgrimes (mentor), rrs Approved by: re(gjb) Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23364 Modified: releng/11.4/sys/netinet/tcp_input.c releng/11.4/sys/netinet/tcp_output.c Directory Properties: releng/11.4/ (props changed) Modified: releng/11.4/sys/netinet/tcp_input.c == --- releng/11.4/sys/netinet/tcp_input.c Wed May 27 21:56:45 2020 (r361564) +++ releng/11.4/sys/netinet/tcp_input.c Wed May 27 22:34:46 2020 (r361565) @@ -417,9 +417,15 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, ui } break; case CC_ECN: - if (!IN_CONGRECOVERY(tp->t_flags)) { + if (!IN_CONGRECOVERY(tp->t_flags) || + /* +* Allow ECN reaction on ACK to CWR, if +* that data segment was also CE marked. +*/ + SEQ_GEQ(th->th_ack, tp->snd_recover)) { + EXIT_CONGRECOVERY(tp->t_flags); TCPSTAT_INC(tcps_ecn_rcwnd); - tp->snd_recover = tp->snd_max; + tp->snd_recover = tp->snd_max + 1; if (tp->t_flags & TF_ECN_PERMIT) tp->t_flags |= TF_ECN_SND_CWR; } Modified: releng/11.4/sys/netinet/tcp_output.c == --- releng/11.4/sys/netinet/tcp_output.cWed May 27 21:56:45 2020 (r361564) +++ releng/11.4/sys/netinet/tcp_output.cWed May 27 22:34:46 2020 (r361565) @@ -1161,7 +1161,8 @@ send: * Ignore pure ack packets, retransmissions and window probes. */ if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && - !((tp->t_flags & TF_FORCEDATA) && len == 1)) { + !((tp->t_flags & TF_FORCEDATA) && len == 1 && + SEQ_LT(tp->snd_una, tp->snd_max))) { #ifdef INET6 if (isipv6) ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); @@ -1169,15 +1170,15 @@ send: #endif ip->ip_tos |= IPTOS_ECN_ECT0; TCPSTAT_INC(tcps_ecn_ect0); + /* +* Reply with proper ECN notifications. +* Only set CWR on new data segments. +*/ + if (tp->t_flags & TF_ECN_SND_CWR) { + flags |= TH_CWR; + tp->t_flags &= ~TF_ECN_SND_CWR; + } } - - /* -* Reply with proper ECN notifications. -*/ - if (tp->t_flags & TF_ECN_SND_CWR) { - flags |= TH_CWR; - tp->t_flags &= ~TF_ECN_SND_CWR; - } if (tp->t_flags & TF_ECN_SND_ECE) flags |= TH_ECE; } ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r361436 - stable/11/sys/netinet
Author: rscheff Date: Sun May 24 17:51:14 2020 New Revision: 361436 URL: https://svnweb.freebsd.org/changeset/base/361436 Log: MFC r361347: With RFC3168 ECN, CWR SHOULD only be sent with new data Overly conservative data receivers may ignore the CWR flag on other packets, and keep ECE latched. This can result in continous reduction of the congestion window, and very poor performance when ECN is enabled. This does NOT contain the merge of the change to RACK since at this time that code does not exist in stable/11, and there is no plan to merge RACK to stable/11. PR: 243590 Reviewed by: rgrimes (mentor), rrs Approved by: rgrimes (mentor), tuexen (mentor) MFC after:3 days Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23364 Modified: stable/11/sys/netinet/tcp_input.c stable/11/sys/netinet/tcp_output.c Directory Properties: stable/11/ (props changed) Modified: stable/11/sys/netinet/tcp_input.c == --- stable/11/sys/netinet/tcp_input.c Sun May 24 16:47:27 2020 (r361435) +++ stable/11/sys/netinet/tcp_input.c Sun May 24 17:51:14 2020 (r361436) @@ -417,9 +417,15 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, ui } break; case CC_ECN: - if (!IN_CONGRECOVERY(tp->t_flags)) { + if (!IN_CONGRECOVERY(tp->t_flags) || + /* +* Allow ECN reaction on ACK to CWR, if +* that data segment was also CE marked. +*/ + SEQ_GEQ(th->th_ack, tp->snd_recover)) { + EXIT_CONGRECOVERY(tp->t_flags); TCPSTAT_INC(tcps_ecn_rcwnd); - tp->snd_recover = tp->snd_max; + tp->snd_recover = tp->snd_max + 1; if (tp->t_flags & TF_ECN_PERMIT) tp->t_flags |= TF_ECN_SND_CWR; } Modified: stable/11/sys/netinet/tcp_output.c == --- stable/11/sys/netinet/tcp_output.c Sun May 24 16:47:27 2020 (r361435) +++ stable/11/sys/netinet/tcp_output.c Sun May 24 17:51:14 2020 (r361436) @@ -1161,7 +1161,8 @@ send: * Ignore pure ack packets, retransmissions and window probes. */ if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && - !((tp->t_flags & TF_FORCEDATA) && len == 1)) { + !((tp->t_flags & TF_FORCEDATA) && len == 1 && + SEQ_LT(tp->snd_una, tp->snd_max))) { #ifdef INET6 if (isipv6) ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); @@ -1169,15 +1170,15 @@ send: #endif ip->ip_tos |= IPTOS_ECN_ECT0; TCPSTAT_INC(tcps_ecn_ect0); + /* +* Reply with proper ECN notifications. +* Only set CWR on new data segments. +*/ + if (tp->t_flags & TF_ECN_SND_CWR) { + flags |= TH_CWR; + tp->t_flags &= ~TF_ECN_SND_CWR; + } } - - /* -* Reply with proper ECN notifications. -*/ - if (tp->t_flags & TF_ECN_SND_CWR) { - flags |= TH_CWR; - tp->t_flags &= ~TF_ECN_SND_CWR; - } if (tp->t_flags & TF_ECN_SND_ECE) flags |= TH_ECE; } ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r361348 - head/sys/netinet/cc
Author: rscheff Date: Thu May 21 21:42:49 2020 New Revision: 361348 URL: https://svnweb.freebsd.org/changeset/base/361348 Log: DCTCP: update alpha only once after loss recovery. In mixed ECN marking and loss scenarios it was found, that the alpha value of DCTCP is updated two times. The second update happens with freshly initialized counters indicating to ECN loss. Overall this leads to alpha not adjusting as quickly as expected to ECN markings, and therefore lead to excessive loss. Reported by: Cheng Cui Reviewed by: chengc_netapp.com, rrs, tuexen (mentor) Approved by: tuexen (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D24817 Modified: head/sys/netinet/cc/cc_dctcp.c Modified: head/sys/netinet/cc/cc_dctcp.c == --- head/sys/netinet/cc/cc_dctcp.c Thu May 21 21:33:15 2020 (r361347) +++ head/sys/netinet/cc/cc_dctcp.c Thu May 21 21:42:49 2020 (r361348) @@ -154,10 +154,8 @@ dctcp_ack_received(struct cc_var *ccv, uint16_t type) * Update the fraction of marked bytes at the end of * current window size. */ - if ((IN_FASTRECOVERY(CCV(ccv, t_flags)) && - SEQ_GEQ(ccv->curack, CCV(ccv, snd_recover))) || - (!IN_FASTRECOVERY(CCV(ccv, t_flags)) && - SEQ_GT(ccv->curack, dctcp_data->save_sndnxt))) + if (!IN_FASTRECOVERY(CCV(ccv, t_flags)) && + SEQ_GT(ccv->curack, dctcp_data->save_sndnxt)) dctcp_update_alpha(ccv); } else newreno_cc_algo.ack_received(ccv, type); ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r361347 - in head/sys/netinet: . tcp_stacks
Author: rscheff Date: Thu May 21 21:33:15 2020 New Revision: 361347 URL: https://svnweb.freebsd.org/changeset/base/361347 Log: With RFC3168 ECN, CWR SHOULD only be sent with new data Overly conservative data receivers may ignore the CWR flag on other packets, and keep ECE latched. This can result in continous reduction of the congestion window, and very poor performance when ECN is enabled. Reviewed by: rgrimes (mentor), rrs Approved by: rgrimes (mentor), tuexen (mentor) MFC after:3 days Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23364 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_output.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cThu May 21 21:26:21 2020 (r361346) +++ head/sys/netinet/tcp_input.cThu May 21 21:33:15 2020 (r361347) @@ -447,9 +447,15 @@ cc_cong_signal(struct tcpcb *tp, struct tcphdr *th, ui } break; case CC_ECN: - if (!IN_CONGRECOVERY(tp->t_flags)) { + if (!IN_CONGRECOVERY(tp->t_flags) || + /* +* Allow ECN reaction on ACK to CWR, if +* that data segment was also CE marked. +*/ + SEQ_GEQ(th->th_ack, tp->snd_recover)) { + EXIT_CONGRECOVERY(tp->t_flags); TCPSTAT_INC(tcps_ecn_rcwnd); - tp->snd_recover = tp->snd_max; + tp->snd_recover = tp->snd_max + 1; if (tp->t_flags2 & TF2_ECN_PERMIT) tp->t_flags2 |= TF2_ECN_SND_CWR; } Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Thu May 21 21:26:21 2020 (r361346) +++ head/sys/netinet/tcp_output.c Thu May 21 21:33:15 2020 (r361347) @@ -1170,7 +1170,8 @@ send: */ if (len > 0 && SEQ_GEQ(tp->snd_nxt, tp->snd_max) && (sack_rxmit == 0) && - !((tp->t_flags & TF_FORCEDATA) && len == 1)) { + !((tp->t_flags & TF_FORCEDATA) && len == 1 && + SEQ_LT(tp->snd_una, tp->snd_max))) { #ifdef INET6 if (isipv6) ip6->ip6_flow |= htonl(IPTOS_ECN_ECT0 << 20); @@ -1178,14 +1179,14 @@ send: #endif ip->ip_tos |= IPTOS_ECN_ECT0; TCPSTAT_INC(tcps_ecn_ect0); - } - - /* -* Reply with proper ECN notifications. -*/ - if (tp->t_flags2 & TF2_ECN_SND_CWR) { - flags |= TH_CWR; - tp->t_flags2 &= ~TF2_ECN_SND_CWR; + /* +* Reply with proper ECN notifications. +* Only set CWR on new data segments. +*/ + if (tp->t_flags2 & TF2_ECN_SND_CWR) { + flags |= TH_CWR; + tp->t_flags2 &= ~TF2_ECN_SND_CWR; + } } if (tp->t_flags2 & TF2_ECN_SND_ECE) flags |= TH_ECE; Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Thu May 21 21:26:21 2020 (r361346) +++ head/sys/netinet/tcp_stacks/rack.c Thu May 21 21:33:15 2020 (r361347) @@ -4095,9 +4095,15 @@ rack_cong_signal(struct tcpcb *tp, struct tcphdr *th, } break; case CC_ECN: - if (!IN_CONGRECOVERY(tp->t_flags)) { + if (!IN_CONGRECOVERY(tp->t_flags) || + /* +* Allow ECN reaction on ACK to CWR, if +* that data segment was also CE marked. +*/ + SEQ_GEQ(th->th_ack, tp->snd_recover)) { + EXIT_CONGRECOVERY(tp->t_flags); KMOD_TCPSTAT_INC(tcps_ecn_rcwnd); - tp->snd_recover = tp->snd_max; + tp->snd_recover = tp->snd_max + 1; if (tp->t_flags2 & TF2_ECN_PERMIT) tp->t_flags2 |= TF2_ECN_SND_CWR; } @@ -13556,13 +13562,14 @@ send: #endif ip->ip_tos |= IPTOS_ECN_ECT0; KMOD_TCPSTAT_INC(tcps_ecn_ect0); - } - /* -* Reply with proper ECN notifications. -*/ - if (tp->t_flags2 & TF2_ECN_SND_CWR) { -
svn commit: r361346 - in head/sys/netinet: . tcp_stacks
Author: rscheff Date: Thu May 21 21:26:21 2020 New Revision: 361346 URL: https://svnweb.freebsd.org/changeset/base/361346 Log: Retain only mutually supported TCP options after simultaneous SYN When receiving a parallel SYN in SYN-SENT state, remove all the options only we supported locally before sending the SYN,ACK. This addresses a consistency issue on parallel opens. Also, on such a parallel open, the stack could be coaxed into running with timestamps enabled, even if administratively disabled. Reviewed by: tuexen (mentor) Approved by: tuexen (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23371 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cThu May 21 21:15:25 2020 (r361345) +++ head/sys/netinet/tcp_input.cThu May 21 21:26:21 2020 (r361346) @@ -1623,17 +1623,20 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; tp->snd_scale = to.to_wscale; - } + } else + tp->t_flags &= ~TF_REQ_SCALE; /* * Initial send window. It will be updated with * the next incoming segment to the scaled value. */ tp->snd_wnd = th->th_win; - if (to.to_flags & TOF_TS) { + if ((to.to_flags & TOF_TS) && + (tp->t_flags & TF_REQ_TSTMP)) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = to.to_tsval; tp->ts_recent_age = tcp_ts_getticks(); - } + } else + tp->t_flags &= ~TF_REQ_TSTMP; if (to.to_flags & TOF_MSS) tcp_mss(tp, to.to_mss); if ((tp->t_flags & TF_SACK_PERMIT) && Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Thu May 21 21:15:25 2020 (r361345) +++ head/sys/netinet/tcp_stacks/bbr.c Thu May 21 21:26:21 2020 (r361346) @@ -11595,17 +11595,20 @@ bbr_do_segment_nounlock(struct mbuf *m, struct tcphdr (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; tp->snd_scale = to.to_wscale; - } + } else + tp->t_flags &= ~TF_REQ_SCALE; /* * Initial send window. It will be updated with the * next incoming segment to the scaled value. */ tp->snd_wnd = th->th_win; - if (to.to_flags & TOF_TS) { + if ((to.to_flags & TOF_TS) && + (tp->t_flags & TF_REQ_TSTMP)) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent = to.to_tsval; tp->ts_recent_age = tcp_tv_to_mssectick(>rc_tv); - } + } else + tp->t_flags &= ~TF_REQ_TSTMP; if (to.to_flags & TOF_MSS) tcp_mss(tp, to.to_mss); if ((tp->t_flags & TF_SACK_PERMIT) && Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Thu May 21 21:15:25 2020 (r361345) +++ head/sys/netinet/tcp_stacks/rack.c Thu May 21 21:26:21 2020 (r361346) @@ -11082,17 +11082,20 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; tp->snd_scale = to.to_wscale; - } + } else + tp->t_flags &= ~TF_REQ_SCALE; /* * Initial send window. It will be updated with the * next incoming segment to the scaled value. */ tp->snd_wnd = th->th_win; - if (to.to_flags & TOF_TS) { + if ((to.to_flags & TOF_TS) && + (tp->t_flags & TF_REQ_TSTMP)) { tp->t_flags |= TF_RCVD_TSTMP; tp->ts_recent =
svn commit: r361345 - in head/sys/netinet: . tcp_stacks
Author: rscheff Date: Thu May 21 21:15:25 2020 New Revision: 361345 URL: https://svnweb.freebsd.org/changeset/base/361345 Log: Handle ECN handshake in simultaneous open While testing simultaneous open TCP with ECN, found that negotiation fails to arrive at the expected final state. Reviewed by: tuexen (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23373 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_output.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cThu May 21 21:00:46 2020 (r361344) +++ head/sys/netinet/tcp_input.cThu May 21 21:15:25 2020 (r361345) @@ -1611,6 +1611,14 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru * XXX this is traditional behavior, may need to be cleaned up. */ if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { + /* Handle parallel SYN for ECN */ + if (!(thflags & TH_ACK) && + ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) && + ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) { + tp->t_flags2 |= TF2_ECN_PERMIT; + tp->t_flags2 |= TF2_ECN_SND_ECE; + TCPSTAT_INC(tcps_ecn_shs); + } if ((to.to_flags & TOF_SCALE) && (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Thu May 21 21:00:46 2020 (r361344) +++ head/sys/netinet/tcp_output.c Thu May 21 21:15:25 2020 (r361345) @@ -1154,6 +1154,12 @@ send: } else flags |= TH_ECE|TH_CWR; } + /* Handle parallel SYN for ECN */ + if ((tp->t_state == TCPS_SYN_RECEIVED) && + (tp->t_flags2 & TF2_ECN_SND_ECE)) { + flags |= TH_ECE; + tp->t_flags2 &= ~TF2_ECN_SND_ECE; + } if (tp->t_state == TCPS_ESTABLISHED && (tp->t_flags2 & TF2_ECN_PERMIT)) { Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Thu May 21 21:00:46 2020 (r361344) +++ head/sys/netinet/tcp_stacks/rack.c Thu May 21 21:15:25 2020 (r361345) @@ -11070,6 +11070,14 @@ rack_do_segment_nounlock(struct mbuf *m, struct tcphdr * this is traditional behavior, may need to be cleaned up. */ if (tp->t_state == TCPS_SYN_SENT && (thflags & TH_SYN)) { + /* Handle parallel SYN for ECN */ + if (!(thflags & TH_ACK) && + ((thflags & (TH_CWR | TH_ECE)) == (TH_CWR | TH_ECE)) && + ((V_tcp_do_ecn == 1) || (V_tcp_do_ecn == 2))) { + tp->t_flags2 |= TF2_ECN_PERMIT; + tp->t_flags2 |= TF2_ECN_SND_ECE; + TCPSTAT_INC(tcps_ecn_shs); + } if ((to.to_flags & TOF_SCALE) && (tp->t_flags & TF_REQ_SCALE)) { tp->t_flags |= TF_RCVD_SCALE; @@ -13522,6 +13530,12 @@ send: flags |= TH_ECE | TH_CWR; } else flags |= TH_ECE | TH_CWR; + } + /* Handle parallel SYN for ECN */ + if ((tp->t_state == TCPS_SYN_RECEIVED) && + (tp->t_flags2 & TF2_ECN_SND_ECE)) { + flags |= TH_ECE; + tp->t_flags2 &= ~TF2_ECN_SND_ECE; } if (tp->t_state == TCPS_ESTABLISHED && (tp->t_flags2 & TF2_ECN_PERMIT)) { ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r361342 - in stable/12/sys/netinet: . tcp_stacks
Author: rscheff Date: Thu May 21 19:46:11 2020 New Revision: 361342 URL: https://svnweb.freebsd.org/changeset/base/361342 Log: MFC r360477: Correctly set up the initial TCP congestion window in all cases by not including the SYN bit sequence space in cwnd related calculations. Snd_und is adjusted explicitly in all cases, outside the cwnd update, instead. This fixes an off-by-one conformance issue with regular TCP sessions not using Appropriate Byte Counting (RFC3465), sending one more packet during the initial window than expected. PR: 235256 Reviewed by: tuexen (mentor), rgrimes (mentor, blanket) Approved by: tuexen (mentor), rgrimes (mentor, blanket) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D19000 Modified: stable/12/sys/netinet/tcp_input.c stable/12/sys/netinet/tcp_stacks/rack.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/tcp_input.c == --- stable/12/sys/netinet/tcp_input.c Thu May 21 19:45:14 2020 (r361341) +++ stable/12/sys/netinet/tcp_input.c Thu May 21 19:46:11 2020 (r361342) @@ -1519,7 +1519,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos) { int thflags, acked, ourfinisacked, needoutput = 0, sack_changed; - int rstreason, todrop, win; + int rstreason, todrop, win, incforsyn = 0; uint32_t tiwin; uint16_t nsegs; char *s; @@ -2432,12 +2432,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; - - /* -* Account for the ACK of our SYN prior to -* regular ACK processing below. -*/ - tp->snd_una++; } if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); @@ -2458,6 +2452,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } /* +* Account for the ACK of our SYN prior to +* regular ACK processing below, except for +* simultaneous SYN, which is handled later. +*/ + if (SEQ_GT(th->th_ack, tp->snd_una) && !(tp->t_flags & TF_NEEDSYN)) + incforsyn = 1; + /* * If segment contains data or ACK, will call tcp_reass() * later; if not, do so now to pass queued data to user. */ @@ -2751,6 +2752,15 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru process_ACK: INP_WLOCK_ASSERT(tp->t_inpcb); + /* +* Adjust for the SYN bit in sequence space, +* but don't account for it in cwnd calculations. +* This is for the SYN_RECEIVED, non-simultaneous +* SYN case. SYN_SENT and simultaneous SYN are +* treated elsewhere. +*/ + if (incforsyn) + tp->snd_una++; acked = BYTES_THIS_ACK(tp, th); KASSERT(acked >= 0, ("%s: acked unexepectedly negative " "(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__, Modified: stable/12/sys/netinet/tcp_stacks/rack.c == --- stable/12/sys/netinet/tcp_stacks/rack.c Thu May 21 19:45:14 2020 (r361341) +++ stable/12/sys/netinet/tcp_stacks/rack.c Thu May 21 19:46:11 2020 (r361342) @@ -5580,12 +5580,6 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, st if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; - - /* -* Account for the ACK of our SYN prior to -* regular ACK processing below. -*/ - tp->snd_una++; } if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); @@ -5603,6 +5597,13 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, st if (!IS_FASTOPEN(tp->t_flags)) cc_conn_init(tp); } + /* +* Account for the ACK of our SYN prior to +* regular ACK processing below, except for +* simultaneous SYN, which is handled later. +*/ + if (SEQ_GT(th->th_ack, tp->snd_una) && !(tp->t_flags & TF_NEEDSYN)) + tp->snd_una++;
svn commit: r361340 - in stable/12/sys/netinet: . tcp_stacks
Author: rscheff Date: Thu May 21 19:41:25 2020 New Revision: 361340 URL: https://svnweb.freebsd.org/changeset/base/361340 Log: MFC r360479: Prevent premature shrinking of the scaled receive window which can cause a TCP client to use invalid or stale TCP sequence numbers for ACK packets. Packets with old sequence numbers are ignored and not used to update the send window size. This might cause the TCP session to hang indefinitely under some circumstances. Reported by: Cui Cheng Reviewed by: tuexen (mentor), rgrimes (mentor, blanket) Approved by: tuexen (mentor), rgrimes (mentor, blanket) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D24515 Modified: stable/12/sys/netinet/tcp_output.c stable/12/sys/netinet/tcp_stacks/rack.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/tcp_output.c == --- stable/12/sys/netinet/tcp_output.c Thu May 21 18:50:05 2020 (r361339) +++ stable/12/sys/netinet/tcp_output.c Thu May 21 19:41:25 2020 (r361340) @@ -1206,8 +1206,11 @@ send: if (flags & TH_SYN) th->th_win = htons((u_short) (min(sbspace(>so_rcv), TCP_MAXWIN))); - else + else { + /* Avoid shrinking window with window scaling. */ + recwin = roundup2(recwin, 1 << tp->rcv_scale); th->th_win = htons((u_short)(recwin >> tp->rcv_scale)); + } /* * Adjust the RXWIN0SENT flag - indicate that we have advertised Modified: stable/12/sys/netinet/tcp_stacks/rack.c == --- stable/12/sys/netinet/tcp_stacks/rack.c Thu May 21 18:50:05 2020 (r361339) +++ stable/12/sys/netinet/tcp_stacks/rack.c Thu May 21 19:41:25 2020 (r361340) @@ -8355,8 +8355,11 @@ send: if (flags & TH_SYN) th->th_win = htons((u_short) (min(sbspace(>so_rcv), TCP_MAXWIN))); - else + else { + /* Avoid shrinking window with window scaling. */ + recwin = roundup2(recwin, 1 << tp->rcv_scale); th->th_win = htons((u_short)(recwin >> tp->rcv_scale)); + } /* * Adjust the RXWIN0SENT flag - indicate that we have advertised a 0 * window. This may cause the remote transmitter to stall. This ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r361029 - stable/12/sys/netinet/cc
Author: rscheff Date: Thu May 14 09:55:41 2020 New Revision: 361029 URL: https://svnweb.freebsd.org/changeset/base/361029 Log: MFC r360491: Introduce a lower bound of 2 MSS to TCP Cubic. Running TCP Cubic together with ECN could end up reducing cwnd down to 1 byte, if the receiver continously sets the ECE flag, resulting in very poor transmission speeds. In line with RFC6582 App. B, a lower bound of 2 MSS is introduced, as well as a typecast to prevent any potential integer overflows during intermediate calculation steps of the adjusted cwnd. Reported by: Cheng Cui Reviewed by: tuexen (mentor) Approved by: tuexen (mentor) Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23353 Modified: stable/12/sys/netinet/cc/cc_cubic.c Directory Properties: stable/12/ (props changed) Modified: stable/12/sys/netinet/cc/cc_cubic.c == --- stable/12/sys/netinet/cc/cc_cubic.c Thu May 14 09:18:50 2020 (r361028) +++ stable/12/sys/netinet/cc/cc_cubic.c Thu May 14 09:55:41 2020 (r361029) @@ -332,8 +332,9 @@ cubic_post_recovery(struct cc_var *ccv) CCV(ccv, t_maxseg); else /* Update cwnd based on beta and adjusted max_cwnd. */ - CCV(ccv, snd_cwnd) = max(1, ((CUBIC_BETA * - cubic_data->max_cwnd) >> CUBIC_SHIFT)); + CCV(ccv, snd_cwnd) = max(((uint64_t)cubic_data->max_cwnd * + CUBIC_BETA) >> CUBIC_SHIFT, + 2 * CCV(ccv, t_maxseg)); } cubic_data->t_last_cong = ticks; @@ -399,6 +400,7 @@ static void cubic_ssthresh_update(struct cc_var *ccv) { struct cubic *cubic_data; + uint32_t ssthresh; cubic_data = ccv->cc_data; @@ -407,10 +409,11 @@ cubic_ssthresh_update(struct cc_var *ccv) * subsequent congestion events, set it to cwnd * beta. */ if (cubic_data->num_cong_events == 0) - CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd) >> 1; + ssthresh = CCV(ccv, snd_cwnd) >> 1; else - CCV(ccv, snd_ssthresh) = ((u_long)CCV(ccv, snd_cwnd) * + ssthresh = ((uint64_t)CCV(ccv, snd_cwnd) * CUBIC_BETA) >> CUBIC_SHIFT; + CCV(ccv, snd_ssthresh) = max(ssthresh, 2 * CCV(ccv, t_maxseg)); } ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r360510 - in stable/12: share/man/man7 sys/netinet
Author: rscheff Date: Thu Apr 30 21:16:08 2020 New Revision: 360510 URL: https://svnweb.freebsd.org/changeset/base/360510 Log: MFC r360010: Reduce the delayed ACK timeout from 100ms to 40ms. Reviewed by: kbowling, tuexen Approved by: tuexen (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23281 Modified: stable/12/share/man/man7/tuning.7 stable/12/sys/netinet/tcp_timer.h Directory Properties: stable/12/ (props changed) Modified: stable/12/share/man/man7/tuning.7 == --- stable/12/share/man/man7/tuning.7 Thu Apr 30 21:09:01 2020 (r360509) +++ stable/12/share/man/man7/tuning.7 Thu Apr 30 21:16:08 2020 (r360510) @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 30, 2017 +.Dd April 16, 2020 .Dt TUNING 7 .Os .Sh NAME @@ -435,7 +435,7 @@ number of tiny packets flowing across the network in h The .Fx delayed ACK implementation also follows the TCP protocol rule that -at least every other packet be acknowledged even if the standard 100ms +at least every other packet be acknowledged even if the standard 40ms timeout has not yet passed. Normally the worst a delayed ACK can do is slightly delay the teardown of a connection, or slightly delay the ramp-up Modified: stable/12/sys/netinet/tcp_timer.h == --- stable/12/sys/netinet/tcp_timer.h Thu Apr 30 21:09:01 2020 (r360509) +++ stable/12/sys/netinet/tcp_timer.h Thu Apr 30 21:16:08 2020 (r360510) @@ -119,7 +119,7 @@ #defineTCP_MAXRXTSHIFT 12 /* maximum retransmits */ -#defineTCPTV_DELACK( hz/10 ) /* 100ms timeout */ +#defineTCPTV_DELACK( hz/25 ) /* 40ms timeout */ /* * If we exceed this number of retransmits for a single segment, we'll consider ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r360491 - head/sys/netinet/cc
Author: rscheff Date: Thu Apr 30 11:11:28 2020 New Revision: 360491 URL: https://svnweb.freebsd.org/changeset/base/360491 Log: Introduce a lower bound of 2 MSS to TCP Cubic. Running TCP Cubic together with ECN could end up reducing cwnd down to 1 byte, if the receiver continously sets the ECE flag, resulting in very poor transmission speeds. In line with RFC6582 App. B, a lower bound of 2 MSS is introduced, as well as a typecast to prevent any potential integer overflows during intermediate calculation steps of the adjusted cwnd. Reported by: Cheng Cui Reviewed by: tuexen (mentor) Approved by: tuexen (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23353 Modified: head/sys/netinet/cc/cc_cubic.c Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Thu Apr 30 06:34:34 2020 (r360490) +++ head/sys/netinet/cc/cc_cubic.c Thu Apr 30 11:11:28 2020 (r360491) @@ -366,8 +366,9 @@ cubic_post_recovery(struct cc_var *ccv) CCV(ccv, t_maxseg); else /* Update cwnd based on beta and adjusted max_cwnd. */ - CCV(ccv, snd_cwnd) = max(1, ((CUBIC_BETA * - cubic_data->max_cwnd) >> CUBIC_SHIFT)); + CCV(ccv, snd_cwnd) = max(((uint64_t)cubic_data->max_cwnd * + CUBIC_BETA) >> CUBIC_SHIFT, + 2 * CCV(ccv, t_maxseg)); } cubic_data->t_last_cong = ticks; @@ -433,6 +434,7 @@ static void cubic_ssthresh_update(struct cc_var *ccv) { struct cubic *cubic_data; + uint32_t ssthresh; cubic_data = ccv->cc_data; @@ -441,10 +443,11 @@ cubic_ssthresh_update(struct cc_var *ccv) * subsequent congestion events, set it to cwnd * beta. */ if (cubic_data->num_cong_events == 0) - CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd) >> 1; + ssthresh = CCV(ccv, snd_cwnd) >> 1; else - CCV(ccv, snd_ssthresh) = ((u_long)CCV(ccv, snd_cwnd) * + ssthresh = ((uint64_t)CCV(ccv, snd_cwnd) * CUBIC_BETA) >> CUBIC_SHIFT; + CCV(ccv, snd_ssthresh) = max(ssthresh, 2 * CCV(ccv, t_maxseg)); } ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r360479 - in head/sys/netinet: . tcp_stacks
Author: rscheff Date: Wed Apr 29 22:01:33 2020 New Revision: 360479 URL: https://svnweb.freebsd.org/changeset/base/360479 Log: Prevent premature shrinking of the scaled receive window which can cause a TCP client to use invalid or stale TCP sequence numbers for ACK packets. Packets with old sequence numbers are ignored and not used to update the send window size. This might cause the TCP session to hang indefinitely under some circumstances. Reported by: Cui Cheng Reviewed by: tuexen (mentor), rgrimes (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D24515 Modified: head/sys/netinet/tcp_output.c head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Wed Apr 29 21:54:09 2020 (r360478) +++ head/sys/netinet/tcp_output.c Wed Apr 29 22:01:33 2020 (r360479) @@ -1238,8 +1238,11 @@ send: if (flags & TH_SYN) th->th_win = htons((u_short) (min(sbspace(>so_rcv), TCP_MAXWIN))); - else + else { + /* Avoid shrinking window with window scaling. */ + recwin = roundup2(recwin, 1 << tp->rcv_scale); th->th_win = htons((u_short)(recwin >> tp->rcv_scale)); + } /* * Adjust the RXWIN0SENT flag - indicate that we have advertised Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Wed Apr 29 21:54:09 2020 (r360478) +++ head/sys/netinet/tcp_stacks/bbr.c Wed Apr 29 22:01:33 2020 (r360479) @@ -13756,8 +13756,11 @@ send: if (flags & TH_SYN) th->th_win = htons((u_short) (min(sbspace(>so_rcv), TCP_MAXWIN))); - else + else { + /* Avoid shrinking window with window scaling. */ + recwin = roundup2(recwin, 1 << tp->rcv_scale); th->th_win = htons((u_short)(recwin >> tp->rcv_scale)); + } /* * Adjust the RXWIN0SENT flag - indicate that we have advertised a 0 * window. This may cause the remote transmitter to stall. This Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Wed Apr 29 21:54:09 2020 (r360478) +++ head/sys/netinet/tcp_stacks/rack.c Wed Apr 29 22:01:33 2020 (r360479) @@ -9572,8 +9572,11 @@ send: if (flags & TH_SYN) th->th_win = htons((u_short) (min(sbspace(>so_rcv), TCP_MAXWIN))); - else + else { + /* Avoid shrinking window with window scaling. */ + recwin = roundup2(recwin, 1 << tp->rcv_scale); th->th_win = htons((u_short)(recwin >> tp->rcv_scale)); + } /* * Adjust the RXWIN0SENT flag - indicate that we have advertised a 0 * window. This may cause the remote transmitter to stall. This ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r360477 - in head/sys/netinet: . tcp_stacks
Author: rscheff Date: Wed Apr 29 21:48:52 2020 New Revision: 360477 URL: https://svnweb.freebsd.org/changeset/base/360477 Log: Correctly set up the initial TCP congestion window in all cases, by not including the SYN bit sequence space in cwnd related calculations. Snd_und is adjusted explicitly in all cases, outside the cwnd update, instead. This fixes an off-by-one conformance issue with regular TCP sessions not using Appropriate Byte Counting (RFC3465), sending one more packet during the initial window than expected. PR: 235256 Reviewed by: tuexen (mentor), rgrimes (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:3 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D19000 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cWed Apr 29 21:12:32 2020 (r360476) +++ head/sys/netinet/tcp_input.cWed Apr 29 21:48:52 2020 (r360477) @@ -1470,7 +1470,7 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos) { int thflags, acked, ourfinisacked, needoutput = 0, sack_changed; - int rstreason, todrop, win; + int rstreason, todrop, win, incforsyn = 0; uint32_t tiwin; uint16_t nsegs; char *s; @@ -2374,12 +2374,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; - - /* -* Account for the ACK of our SYN prior to -* regular ACK processing below. -*/ - tp->snd_una++; } if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); @@ -2400,6 +2394,13 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } /* +* Account for the ACK of our SYN prior to +* regular ACK processing below, except for +* simultaneous SYN, which is handled later. +*/ + if (SEQ_GT(th->th_ack, tp->snd_una) && !(tp->t_flags & TF_NEEDSYN)) + incforsyn = 1; + /* * If segment contains data or ACK, will call tcp_reass() * later; if not, do so now to pass queued data to user. */ @@ -2693,6 +2694,15 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru process_ACK: INP_WLOCK_ASSERT(tp->t_inpcb); + /* +* Adjust for the SYN bit in sequence space, +* but don't account for it in cwnd calculations. +* This is for the SYN_RECEIVED, non-simultaneous +* SYN case. SYN_SENT and simultaneous SYN are +* treated elsewhere. +*/ + if (incforsyn) + tp->snd_una++; acked = BYTES_THIS_ACK(tp, th); KASSERT(acked >= 0, ("%s: acked unexepectedly negative " "(tp->snd_una=%u, th->th_ack=%u, tp=%p, m=%p)", __func__, Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Wed Apr 29 21:12:32 2020 (r360476) +++ head/sys/netinet/tcp_stacks/bbr.c Wed Apr 29 21:48:52 2020 (r360477) @@ -9326,11 +9326,6 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, str tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; - /* -* Account for the ACK of our SYN prior to regular -* ACK processing below. -*/ - tp->snd_una++; } /* * Make transitions: SYN-RECEIVED -> ESTABLISHED SYN-RECEIVED* -> @@ -9353,6 +9348,13 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, str if (!IS_FASTOPEN(tp->t_flags)) cc_conn_init(tp); } + /* +* Account for the ACK of our SYN prior to +* regular ACK processing below, except for +* simultaneous SYN, which is handled later. +*/ + if (SEQ_GT(th->th_ack, tp->snd_una) && !(tp->t_flags & TF_NEEDSYN)) + tp->snd_una++; /* * If segment contains data or ACK, will call tcp_reass() later; if * not, do so now to pass queued data to user. Modified:
svn commit: r360180 - in head/sys/netinet: . tcp_stacks
Author: rscheff Date: Wed Apr 22 00:16:42 2020 New Revision: 360180 URL: https://svnweb.freebsd.org/changeset/base/360180 Log: revert rS360143 - Correctly set up initial cwnd due to syzkaller panics found Reported by: tuexen Approved by: tuexen (mentor) Sponsored by: NetApp, Inc. Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cTue Apr 21 23:38:54 2020 (r360179) +++ head/sys/netinet/tcp_input.cWed Apr 22 00:16:42 2020 (r360180) @@ -2374,6 +2374,12 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; + + /* +* Account for the ACK of our SYN prior to +* regular ACK processing below. +*/ + tp->snd_una++; } if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); @@ -2393,12 +2399,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } - if (SEQ_GT(th->th_ack, tp->snd_una)) - /* -* Account for the ACK of our SYN prior to -* regular ACK processing below. -*/ - tp->snd_una++; /* * If segment contains data or ACK, will call tcp_reass() * later; if not, do so now to pass queued data to user. Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Tue Apr 21 23:38:54 2020 (r360179) +++ head/sys/netinet/tcp_stacks/bbr.c Wed Apr 22 00:16:42 2020 (r360180) @@ -9325,6 +9325,11 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, str tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; + /* +* Account for the ACK of our SYN prior to regular +* ACK processing below. +*/ + tp->snd_una++; } /* * Make transitions: SYN-RECEIVED -> ESTABLISHED SYN-RECEIVED* -> @@ -9347,12 +9352,6 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, str if (!IS_FASTOPEN(tp->t_flags)) cc_conn_init(tp); } - if (SEQ_GT(th->th_ack, tp->snd_una)) - /* -* Account for the ACK of our SYN prior to -* regular ACK processing below. -*/ - tp->snd_una++; /* * If segment contains data or ACK, will call tcp_reass() later; if * not, do so now to pass queued data to user. Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Tue Apr 21 23:38:54 2020 (r360179) +++ head/sys/netinet/tcp_stacks/rack.c Wed Apr 22 00:16:42 2020 (r360180) @@ -6539,6 +6539,12 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, st if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; + + /* +* Account for the ACK of our SYN prior to +* regular ACK processing below. +*/ + tp->snd_una++; } if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); @@ -6556,12 +6562,6 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, st if (!IS_FASTOPEN(tp->t_flags)) cc_conn_init(tp); } - if (SEQ_GT(th->th_ack, tp->snd_una)) - /* -* Account for the ACK of our SYN prior to -* regular ACK processing below. -*/ - tp->snd_una++; /* * If segment contains data or ACK, will call tcp_reass() later; if * not, do so now to pass queued data to user. ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r360143 - in head/sys/netinet: . tcp_stacks
Author: rscheff Date: Tue Apr 21 13:05:44 2020 New Revision: 360143 URL: https://svnweb.freebsd.org/changeset/base/360143 Log: Correctly set up the initial TCP congestion window in all cases, by adjust snd_una right after the connection initialization, to include the one byte in sequence space occupied by the SYN bit. This does not change the regular ACK processing, while making the BYTES_THIS_ACK macro to work properly. PR: 235256 Reviewed by: tuexen (mentor), rgrimes (mentor) Approved by: tuexen (mentor), rgrimes (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D19000 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_stacks/bbr.c head/sys/netinet/tcp_stacks/rack.c Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cTue Apr 21 05:00:35 2020 (r360142) +++ head/sys/netinet/tcp_input.cTue Apr 21 13:05:44 2020 (r360143) @@ -2374,12 +2374,6 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; - - /* -* Account for the ACK of our SYN prior to -* regular ACK processing below. -*/ - tp->snd_una++; } if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); @@ -2399,6 +2393,12 @@ tcp_do_segment(struct mbuf *m, struct tcphdr *th, stru cc_conn_init(tp); tcp_timer_activate(tp, TT_KEEP, TP_KEEPIDLE(tp)); } + if (SEQ_GT(th->th_ack, tp->snd_una)) + /* +* Account for the ACK of our SYN prior to +* regular ACK processing below. +*/ + tp->snd_una++; /* * If segment contains data or ACK, will call tcp_reass() * later; if not, do so now to pass queued data to user. Modified: head/sys/netinet/tcp_stacks/bbr.c == --- head/sys/netinet/tcp_stacks/bbr.c Tue Apr 21 05:00:35 2020 (r360142) +++ head/sys/netinet/tcp_stacks/bbr.c Tue Apr 21 13:05:44 2020 (r360143) @@ -9325,11 +9325,6 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, str tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; - /* -* Account for the ACK of our SYN prior to regular -* ACK processing below. -*/ - tp->snd_una++; } /* * Make transitions: SYN-RECEIVED -> ESTABLISHED SYN-RECEIVED* -> @@ -9352,6 +9347,12 @@ bbr_do_syn_recv(struct mbuf *m, struct tcphdr *th, str if (!IS_FASTOPEN(tp->t_flags)) cc_conn_init(tp); } + if (SEQ_GT(th->th_ack, tp->snd_una)) + /* +* Account for the ACK of our SYN prior to +* regular ACK processing below. +*/ + tp->snd_una++; /* * If segment contains data or ACK, will call tcp_reass() later; if * not, do so now to pass queued data to user. Modified: head/sys/netinet/tcp_stacks/rack.c == --- head/sys/netinet/tcp_stacks/rack.c Tue Apr 21 05:00:35 2020 (r360142) +++ head/sys/netinet/tcp_stacks/rack.c Tue Apr 21 13:05:44 2020 (r360143) @@ -6539,12 +6539,6 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, st if (IS_FASTOPEN(tp->t_flags) && tp->t_tfo_pending) { tcp_fastopen_decrement_counter(tp->t_tfo_pending); tp->t_tfo_pending = NULL; - - /* -* Account for the ACK of our SYN prior to -* regular ACK processing below. -*/ - tp->snd_una++; } if (tp->t_flags & TF_NEEDFIN) { tcp_state_change(tp, TCPS_FIN_WAIT_1); @@ -6562,6 +6556,12 @@ rack_do_syn_recv(struct mbuf *m, struct tcphdr *th, st if (!IS_FASTOPEN(tp->t_flags)) cc_conn_init(tp); } + if (SEQ_GT(th->th_ack, tp->snd_una)) + /* +* Account for the ACK of our SYN prior to +* regular ACK processing below. +*/ + tp->snd_una++; /* * If segment contains data or ACK, will call tcp_reass() later; if * not, do so now
svn commit: r360010 - in head: share/man/man7 sys/netinet
Author: rscheff Date: Thu Apr 16 15:59:23 2020 New Revision: 360010 URL: https://svnweb.freebsd.org/changeset/base/360010 Log: Reduce default TCP delayed ACK timeout to 40ms. Reviewed by: kbowling, tuexen Approved by: tuexen (mentor) MFC after:2 weeks Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D23281 Modified: head/share/man/man7/tuning.7 head/sys/netinet/tcp_timer.h Modified: head/share/man/man7/tuning.7 == --- head/share/man/man7/tuning.7Thu Apr 16 15:58:58 2020 (r360009) +++ head/share/man/man7/tuning.7Thu Apr 16 15:59:23 2020 (r360010) @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd October 30, 2017 +.Dd April 16, 2020 .Dt TUNING 7 .Os .Sh NAME @@ -435,7 +435,7 @@ number of tiny packets flowing across the network in h The .Fx delayed ACK implementation also follows the TCP protocol rule that -at least every other packet be acknowledged even if the standard 100ms +at least every other packet be acknowledged even if the standard 40ms timeout has not yet passed. Normally the worst a delayed ACK can do is slightly delay the teardown of a connection, or slightly delay the ramp-up Modified: head/sys/netinet/tcp_timer.h == --- head/sys/netinet/tcp_timer.hThu Apr 16 15:58:58 2020 (r360009) +++ head/sys/netinet/tcp_timer.hThu Apr 16 15:59:23 2020 (r360010) @@ -119,7 +119,7 @@ #defineTCP_MAXRXTSHIFT 12 /* maximum retransmits */ -#defineTCPTV_DELACK( hz/10 ) /* 100ms timeout */ +#defineTCPTV_DELACK( hz/25 ) /* 40ms timeout */ /* * If we exceed this number of retransmits for a single segment, we'll consider ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r359933 - stable/12/share/misc
Author: rscheff Date: Tue Apr 14 18:24:59 2020 New Revision: 359933 URL: https://svnweb.freebsd.org/changeset/base/359933 Log: add my (rscheff) mentor relationship Reviewed by: rgrimes (mentor), tuexen (mentor) Approved by: rgrimes (mentor), tuexen (mentor) Sponsored by: NetApp, Inc. Differential Revision: https://reviews.freebsd.org/D24318 Modified: stable/12/share/misc/committers-src.dot Directory Properties: stable/12/ (props changed) Modified: stable/12/share/misc/committers-src.dot == --- stable/12/share/misc/committers-src.dot Tue Apr 14 18:11:54 2020 (r359932) +++ stable/12/share/misc/committers-src.dot Tue Apr 14 18:24:59 2020 (r359933) @@ -304,6 +304,7 @@ royger [label="Roger Pau Monne\nroy...@freebsd.org\n20 rpaulo [label="Rui Paulo\nrpa...@freebsd.org\n2007/09/25"] rpokala [label="Ravi Pokala\nrpok...@freebsd.org\n2015/11/19"] rrs [label="Randall R Stewart\n...@freebsd.org\n2007/02/08"] +rscheff [label="Richard Scheffenegger\nrsch...@freebsd.org\n2020/04/06"] rse [label="Ralf S. Engelschall\n...@freebsd.org\n1997/07/31"] rstone [label="Ryan Stone\nrst...@freebsd.org\n2010/04/19"] ru [label="Ruslan Ermilov\n...@freebsd.org\n1999/05/27"] @@ -766,6 +767,7 @@ pjd -> smh pjd -> trociny rgrimes -> markm +rgrimes -> rscheff rmacklem -> jwd @@ -855,6 +857,8 @@ thompsa -> eri trasz -> jh trasz -> mjg + +tuexen -> rscheff ume -> jinmei ume -> suz ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"
svn commit: r359764 - head/share/misc
Author: rscheff Date: Fri Apr 10 00:31:52 2020 New Revision: 359764 URL: https://svnweb.freebsd.org/changeset/base/359764 Log: add myself (rscheff) as a src committer. Reviewed by: rgrimes (mentor), tuexen (mentor) Approved by: rgrimes (mentor), tuexen (mentor) MFC after:3 days Sponsored by: NetApp, Inc. Differential Revision:https://reviews.freebsd.org/D24318 Modified: head/share/misc/committers-src.dot Modified: head/share/misc/committers-src.dot == --- head/share/misc/committers-src.dot Fri Apr 10 00:27:19 2020 (r359763) +++ head/share/misc/committers-src.dot Fri Apr 10 00:31:52 2020 (r359764) @@ -312,6 +312,7 @@ royger [label="Roger Pau Monne\nroy...@freebsd.org\n20 rpaulo [label="Rui Paulo\nrpa...@freebsd.org\n2007/09/25"] rpokala [label="Ravi Pokala\nrpok...@freebsd.org\n2015/11/19"] rrs [label="Randall R Stewart\n...@freebsd.org\n2007/02/08"] +rscheff [label="Richard Scheffenegger\nrsch...@freebsd.org\n2020/04/06"] rse [label="Ralf S. Engelschall\n...@freebsd.org\n1997/07/31"] rstone [label="Ryan Stone\nrst...@freebsd.org\n2010/04/19"] ru [label="Ruslan Ermilov\n...@freebsd.org\n1999/05/27"] @@ -792,6 +793,7 @@ pjd -> smh pjd -> trociny rgrimes -> markm +rgrimes -> rscheff rmacklem -> jwd @@ -882,6 +884,8 @@ thompsa -> eri trasz -> jh trasz -> mjg + +tuexen -> rscheff ume -> jinmei ume -> suz ___ svn-src-all@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-all To unsubscribe, send any mail to "svn-src-all-unsubscr...@freebsd.org"