svn commit: r342127 - head/sys/netinet/cc
Author: hiren Date: Sat Dec 15 17:01:16 2018 New Revision: 342127 URL: https://svnweb.freebsd.org/changeset/base/342127 Log: Revert r331567 CC Cubic: fix underflow for cubic_cwnd() This change is causing TCP connections using cubic to hang. Need to dig more to find exact cause and fix it. Reported by: tj at mrsk dot me, Matt Garber (via twitter) Discussed with: sbruno (previously), allanjude, cperciva MFC after:3 days Modified: head/sys/netinet/cc/cc.h head/sys/netinet/cc/cc_cubic.c head/sys/netinet/cc/cc_cubic.h Modified: head/sys/netinet/cc/cc.h == --- head/sys/netinet/cc/cc.hSat Dec 15 16:53:15 2018(r342126) +++ head/sys/netinet/cc/cc.hSat Dec 15 17:01:16 2018(r342127) @@ -102,8 +102,6 @@ struct cc_var { #defineCCF_ACKNOW 0x0008 /* Will this ack be sent now? */ #defineCCF_IPHDR_CE0x0010 /* Does this packet set CE bit? */ #defineCCF_TCPHDR_CWR 0x0020 /* Does this packet set CWR bit? */ -#defineCCF_MAX_CWND0x0040 /* Have we reached maximum cwnd? */ -#defineCCF_CHG_MAX_CWND0x0080 /* Cubic max_cwnd changed, for K */ /* ACK types passed to the ack_received() hook. */ #defineCC_ACK 0x0001 /* Regular in sequence ACK. */ Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Sat Dec 15 16:53:15 2018 (r342126) +++ head/sys/netinet/cc/cc_cubic.c Sat Dec 15 17:01:16 2018 (r342127) @@ -88,8 +88,6 @@ struct cubic { unsigned long max_cwnd; /* cwnd at the previous congestion event. */ unsigned long prev_max_cwnd; - /* Cached value for t_maxseg when K was computed */ - uint32_tk_maxseg; /* Number of congestion events. */ uint32_tnum_cong_events; /* Minimum observed rtt in ticks. */ @@ -126,9 +124,6 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) cubic_data = ccv->cc_data; cubic_record_rtt(ccv); - if (ccv->flags & CCF_MAX_CWND) - return; - /* * Regular ACK and we're not in cong/fast recovery and we're cwnd * limited and we're either not doing ABC or are slow starting or are @@ -156,12 +151,6 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) cubic_data->mean_rtt_ticks, cubic_data->max_cwnd, CCV(ccv, t_maxseg)); - if (ccv->flags & CCF_CHG_MAX_CWND || cubic_data->k_maxseg != CCV(ccv, t_maxseg)) { - cubic_data->K = cubic_k(cubic_data->max_cwnd / CCV(ccv, t_maxseg)); - cubic_data->k_maxseg = CCV(ccv, t_maxseg); - ccv->flags &= ~(CCF_MAX_CWND|CCF_CHG_MAX_CWND); - } - w_cubic_next = cubic_cwnd(ticks_since_cong + cubic_data->mean_rtt_ticks, cubic_data->max_cwnd, CCV(ccv, t_maxseg), cubic_data->K); @@ -173,18 +162,13 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) * TCP-friendly region, follow tf * cwnd growth. */ - CCV(ccv, snd_cwnd) = ulmin(w_tf, TCP_MAXWIN << CCV(ccv, snd_scale)); + CCV(ccv, snd_cwnd) = w_tf; else if (CCV(ccv, snd_cwnd) < w_cubic_next) { /* * Concave or convex region, follow CUBIC * cwnd growth. */ - if (w_cubic_next >= TCP_MAXWIN << CCV(ccv, snd_scale)) { - w_cubic_next = TCP_MAXWIN << CCV(ccv, snd_scale); - ccv->flags |= CCF_MAX_CWND; - } - w_cubic_next = ulmin(w_cubic_next, TCP_MAXWIN << CCV(ccv, snd_scale)); if (V_tcp_do_rfc3465) CCV(ccv, snd_cwnd) = w_cubic_next; else @@ -202,10 +186,8 @@ cubic_ack_received(struct cc_var *ccv, uint16_t type) * max_cwnd. */ if (cubic_data->num_cong_events == 0 && - cubic_data->max_cwnd < CCV(ccv, snd_cwnd)) { + cubic_data->max_cwnd < CCV(ccv, snd_cwnd)) cubic_data->max_cwnd = CCV(ccv, snd_cwnd); - ccv->flags |= CCF_CHG_MAX_CWND; - } } } } @@
Re: svn commit: r334804 - in head/sys: kern modules/tcp modules/tcp/rack netinet netinet/tcp_stacks sys
On 06/07/18 at 08:07P, Matthew Macy wrote: > > > > Okay. I believe there might be situations where we may want to still > > keep the 'default' stack alive. I know Windows doesn't yet use RACK when > > rtt is lesser than 10ms (or something like that), as an example. > > > > Is there any reason RACK wouldn't work for tracking 10us RTTs? If we > know we know the peer doesn't do delack or have enough data in flight > and the other stack doesn't have broken LRO, could we just use this in > lieu of high resolution timestamps? I believe the issue is both ends having fine-grained timers for RACK to be able to do the right thing. If timer resolution ends up being coarser than rtt, just depending on rack may be problematic. I know 10ms is doable on most systems but just using this as an example that we probably want non-rack ('default') stack to be around a little longer and possibly with the enhancements that we can easily extract out to be shared among all the stacks. Also RACK needing pacing which requires but more CPU so question for us could be that do we want to keep the 'default' stack around for machines that can't take that extra CPU hit. SACK is inefficient in default stack and PRR could be super useful as "psuedo" pacing mechanism and could help recover faster but at the end of the day it all depends on someone with time/energy/motivation to maintain the 'default' stack with all shiny things that appear in non-default stacks. cheers, Hiren ps: I know we are not killing the default stack as of yet and just stopping active maintenance of it but just wanted to raise these (probably obvious) points. pgpAd9US7XWHW.pgp Description: PGP signature
Re: svn commit: r334804 - in head/sys: kern modules/tcp modules/tcp/rack netinet netinet/tcp_stacks sys
On 06/07/18 at 08:58P, Randall Stewart wrote: > > > > On Jun 7, 2018, at 6:01 PM, hiren panchasara > > wrote: > > > > On 06/07/18 at 06:18P, Randall Stewart wrote: > >> Author: rrs > >> Date: Thu Jun 7 18:18:13 2018 > >> New Revision: 334804 > >> URL: https://svnweb.freebsd.org/changeset/base/334804 > >> > >> Log: > >> This commit brings in a new refactored TCP stack called Rack. > >> Rack includes the following features: > >> - A different SACK processing scheme (the old sack structures are not > >> used). > >> - RACK (Recent acknowledgment) where counting dup-acks is no longer done > >> instead time is used to knwo when to retransmit. (see the I-D) > >> - TLP (Tail Loss Probe) where we will probe for tail-losses to attempt > >> to try not to take a retransmit time-out. (see the I-D) > >> - Burst mitigation using TCPHTPS > >> - PRR (partial rate reduction) see the RFC. > >> > >> Once built into your kernel, you can select this stack by either > >> socket option with the name of the stack is "rack" or by setting > >> the global sysctl so the default is rack. > >> > >> Note that any connection that does not support SACK will be kicked > >> back to the "default" base FreeBSD stack (currently known as "default"). > >> > >> To build this into your kernel you will need to enable in your > >> kernel: > >> makeoptions WITH_EXTRA_TCP_STACKS=1 > >> options TCPHPTS > >> > >> Sponsored by: Netflix Inc. > >> Differential Revision:https://reviews.freebsd.org/D15525 > >> > >> Added: > >> head/sys/modules/tcp/rack/ > >> head/sys/modules/tcp/rack/Makefile (contents, props changed) > >> head/sys/netinet/tcp_stacks/rack.c (contents, props changed) > >> head/sys/netinet/tcp_stacks/rack_bbr_common.h (contents, props changed) > >> head/sys/netinet/tcp_stacks/sack_filter.c (contents, props changed) > >> head/sys/netinet/tcp_stacks/sack_filter.h (contents, props changed) > >> head/sys/netinet/tcp_stacks/tcp_rack.h (contents, props changed) > >> Modified: > >> head/sys/kern/uipc_sockbuf.c > >> head/sys/modules/tcp/Makefile > >> head/sys/netinet/tcp.h > >> head/sys/netinet/tcp_log_buf.h > >> head/sys/netinet/tcp_output.c > >> head/sys/netinet/tcp_stacks/fastpath.c > >> head/sys/netinet/tcp_timer.c > >> head/sys/netinet/tcp_timer.h > >> head/sys/netinet/tcp_var.h > >> head/sys/sys/mbuf.h > >> head/sys/sys/queue.h > >> head/sys/sys/sockbuf.h > >> head/sys/sys/time.h > > > > I thought we'd have more time to review/test this. Looks like BSDCan > > commit-spree in effect. :-) > > The Phabricator review has been up since May 22nd. Thats over 2.5 weeks, > this was also discussed on the Thursday conference calls. Fair enough. (I am out of touch a little so shouldn't really complain ;-)) > > > > A few questions: > > 1) Does RACK work reliably without HPTS? If yes, has that config been > > tested? > > > No it requires the pacer. > > > 2) It looks like PRR is tied to RACK. Why did we go that route? > > Shouldn't it be easily used with the 'default' stack also? > > > > It is what I developed.. and I had no desire to work with the default stack. > That > is a fifth rail that no one wants touched. > > > 3) Can new SACK be used with the traditional stack? > > Well if you want to rework the base stack you might be able to do that :) > > It would be quite some effort.. I think Robert wants eventually the old > stack to be de-composed and then slowly work at getting more common > code between them until eventually you can have a diff and somehow > figure out how to integrate the two. Okay. I believe there might be situations where we may want to still keep the 'default' stack alive. I know Windows doesn't yet use RACK when rtt is lesser than 10ms (or something like that), as an example. Such optimizations (PRR, better SACK) should be made available to that also if we see non-RACK having a viable future. > > > > > 4) Where should manpage like info for RACK go? a new man-page or > > extending tcp(4)? Info like how to enable system-wide or per socket > > should go here. > > > > The enable/disable or per-socket I think is in with the pluggable stack > stuff. We might want a Rack man page.. have to think about it. > > &g
Re: svn commit: r334804 - in head/sys: kern modules/tcp modules/tcp/rack netinet netinet/tcp_stacks sys
On 06/07/18 at 06:18P, Randall Stewart wrote: > Author: rrs > Date: Thu Jun 7 18:18:13 2018 > New Revision: 334804 > URL: https://svnweb.freebsd.org/changeset/base/334804 > > Log: > This commit brings in a new refactored TCP stack called Rack. > Rack includes the following features: >- A different SACK processing scheme (the old sack structures are not > used). >- RACK (Recent acknowledgment) where counting dup-acks is no longer done > instead time is used to knwo when to retransmit. (see the I-D) >- TLP (Tail Loss Probe) where we will probe for tail-losses to attempt > to try not to take a retransmit time-out. (see the I-D) >- Burst mitigation using TCPHTPS >- PRR (partial rate reduction) see the RFC. > > Once built into your kernel, you can select this stack by either > socket option with the name of the stack is "rack" or by setting > the global sysctl so the default is rack. > > Note that any connection that does not support SACK will be kicked > back to the "default" base FreeBSD stack (currently known as "default"). > > To build this into your kernel you will need to enable in your > kernel: > makeoptions WITH_EXTRA_TCP_STACKS=1 > options TCPHPTS > > Sponsored by: Netflix Inc. > Differential Revision: https://reviews.freebsd.org/D15525 > > Added: > head/sys/modules/tcp/rack/ > head/sys/modules/tcp/rack/Makefile (contents, props changed) > head/sys/netinet/tcp_stacks/rack.c (contents, props changed) > head/sys/netinet/tcp_stacks/rack_bbr_common.h (contents, props changed) > head/sys/netinet/tcp_stacks/sack_filter.c (contents, props changed) > head/sys/netinet/tcp_stacks/sack_filter.h (contents, props changed) > head/sys/netinet/tcp_stacks/tcp_rack.h (contents, props changed) > Modified: > head/sys/kern/uipc_sockbuf.c > head/sys/modules/tcp/Makefile > head/sys/netinet/tcp.h > head/sys/netinet/tcp_log_buf.h > head/sys/netinet/tcp_output.c > head/sys/netinet/tcp_stacks/fastpath.c > head/sys/netinet/tcp_timer.c > head/sys/netinet/tcp_timer.h > head/sys/netinet/tcp_var.h > head/sys/sys/mbuf.h > head/sys/sys/queue.h > head/sys/sys/sockbuf.h > head/sys/sys/time.h I thought we'd have more time to review/test this. Looks like BSDCan commit-spree in effect. :-) A few questions: 1) Does RACK work reliably without HPTS? If yes, has that config been tested? 2) It looks like PRR is tied to RACK. Why did we go that route? Shouldn't it be easily used with the 'default' stack also? 3) Can new SACK be used with the traditional stack? 4) Where should manpage like info for RACK go? a new man-page or extending tcp(4)? Info like how to enable system-wide or per socket should go here. 5) Any perf numbers to go along with this commit? Synthetic or production numbers showing improvements in transfer speed or any other impact on CPU usage (specially with HPTS) that you can share? 6) In your testing, have you found cases where RACK does poorly compared to the 'default' stack? Any recommendations on when should RACK be enabled? (Something like this could go in the manpage.) Glad to finally see this in -head! Cheers, Hiren pgpJJnDKxX6dS.pgp Description: PGP signature
Re: svn commit: r332770 - in head/sys: conf netinet netinet/tcp_stacks sys
On 05/01/18 at 05:11P, Warner Losh wrote: > On Tue, May 1, 2018 at 5:00 PM, Jonathan Looney <jonloo...@gmail.com> wrote: > > > On Mon, Apr 30, 2018 at 3:16 AM, hiren panchasara < > > hi...@strugglingcoder.info> wrote: > > > > > > In my understanding, default stack currently cannot use this mechanism. > > When do > > > you think that'll be possible? > > > > > > I think I can speak to Randall's plans for this. Thank you! > > > > Randall chose not to include in this commit the hooks for the default > > stack to use the high-precision timers. I believe his immediate priorities > > are upstreaming RACK and BBR. After that, if there is demand, he may > > upstream the (relatively untested) code that allows the default stack to > > use the high-precision timers (protected by a non-default kernel option) so > > others can choose to experiment with it. I believe it'd be useful to be able to pace packets with the traditional stack. > > > > (By the way, we're hoping to change the terminology away from describing > > the traditional FreeBSD stack as the "default" stack. In theory, someone > > can make any stack be their local default. We'll need to figure out what to > > actually call it at some point. My suggestion was the "FreeBSD" stack, > > although that is lacking in some imagination. In any case, we should have > > that discussion at some point in the future. > > > > Only slightly less poor would be "traditional" stack :) See, I've already started using this name. :-) Cheers, Hiren pgpx7Xm2Xao5i.pgp Description: PGP signature
Re: svn commit: r332770 - in head/sys: conf netinet netinet/tcp_stacks sys
On 04/19/18 at 01:38P, Randall Stewart wrote: > Author: rrs > Date: Thu Apr 19 13:37:59 2018 > New Revision: 332770 > URL: https://svnweb.freebsd.org/changeset/base/332770 > > Log: > This commit brings in the TCP high precision timer system (tcp_hpts). > It is the forerunner/foundational work of bringing in both Rack and BBR > which use hpts for pacing out packets. The feature is optional and requires > the TCPHPTS option to be enabled before the feature will be active. TCP > modules that use it must assure that the base component is compile in > the kernel in which they are loaded. > > MFC after: Never > Sponsored by: Netflix Inc. > Differential Revision: https://reviews.freebsd.org/D15020 Randall, In my understanding, default stack currently cannot use this mechanism. When do you think that'll be possible? Thanks for your work, Hiren pgpXYMyA3vInC.pgp Description: PGP signature
Re: svn commit: r327559 - in head: . sys/net
On 01/04/18 at 11:37P, Steven Hartland wrote: > > > On 04/01/2018 22:42, hiren panchasara wrote: > > On 01/04/18 at 09:52P, Steven Hartland wrote: > >> On 04/01/2018 20:50, Eugene Grosbein wrote: > >>> 05.01.2018 3:05, Steven Hartland wrote: > >>> > >>>> Author: smh > >>>> Date: Thu Jan 4 20:05:47 2018 > >>>> New Revision: 327559 > >>>> URL: https://svnweb.freebsd.org/changeset/base/327559 > >>>> > >>>> Log: > >>>> Disabled the use of flowid for lagg by default > >>>> > >>>> Disabled the use of RSS hash from the network card aka flowid for > >>>> lagg(4) interfaces by default as it's currently incompatible with > >>>> the lacp and loadbalance protocols. > >>>> > >>>> The incompatibility is due to the fact that the flowid isn't know > >>>> for the first packet of a new outbound stream which can result in > >>>> the hash calculation method changing and hence a stream being > >>>> incorrectly split across multiple interfaces during normal > >>>> operation. > >>>> > >>>> This can be re-enabled by setting the following in loader.conf: > >>>> net.link.lagg.default_use_flowid="1" > >>>> > >>>> Discussed with: kmacy > >>>> Sponsored by:Multiplay > >>> RSS by definition has meaning to received stream. What is "outbound" > >>> stream > >>> in this context, why can the hash calculatiom method change and what > >>> exactly > >>> does it mean "a stream being incorrectly split"? > >> Yes RSS is indeed a received stream but that is used by lagg for lacp > >> and loadbalance protocols to decide which port of the lagg to "send" the > >> packet out of. As the flowid is not known when a new "output" stream is > >> instigated the current code falls back to manual hash calculation to > >> determine which port to send the initial packet from. Once a response is > >> received a tx then uses the flowid. This change of hash calculation > >> method can result in the initial packet being sent from a different port > >> than the rest of the stream; this is what I meant by "incorrectly split". > > For my understanding, is this just an issue for the first packet when we > > originate the flow? Once we have a response and if flowid is there, we'd > > use it, right? OR am I missing something? > Initially yes, but that can cause a whole cascading set of problems. If > the source machine sends from two different ports then flow can traverse > across the network using different paths and hence arrive at the > destination on different ports too, causing the corresponding? issue on > the other side. > > And with this change, we'd always go and do manual calculation even when > > we have a valid flowid (i.e. we didn't initiate a connection)? > Correct, but there's potentially no easy way to correctly determine what > the flowid and hence hash should be in this case, likely impossible if > the lagg consists of different interface types. > > In addition if the hardware hash doesn't match the requested one as per > laggproto then additional issues could also be triggered. > > Our TCP stack seems fragile during setup to out of order packets which > this multipath behavior causes, we've seen this on our loadbalancers > which is what triggered the investigation. The concrete result is many > aborted TCP connections, over 300k ~2% on the machine I'm looking at. > > I hope there's some improvements that can be made, for example if we can > determine the stream was instigated remotely then flowid would always be > valid hence we can use it assuming it matches the requested spec or if > we can make it clear to the user that laggproto is not the one they > requested, I'm open to ideas? IIRC, with 'RSS' in kernconf, most NIC drivers and stack should do the right thing. Look at drivers and also conn startup code in TCP as I recall it doing the flowid mapping correctly when stream originated from the other side and had flowid assigned to it by the NIC. I am mostly concerned about the overhead of manual calculation but my knowledge is a bit rusty right now and lagg has always been special so please try this out and see. Thank you. Hiren pgpZlEzLX74r7.pgp Description: PGP signature
Re: svn commit: r327559 - in head: . sys/net
On 01/04/18 at 09:52P, Steven Hartland wrote: > On 04/01/2018 20:50, Eugene Grosbein wrote: > > 05.01.2018 3:05, Steven Hartland wrote: > > > >> Author: smh > >> Date: Thu Jan 4 20:05:47 2018 > >> New Revision: 327559 > >> URL: https://svnweb.freebsd.org/changeset/base/327559 > >> > >> Log: > >>Disabled the use of flowid for lagg by default > >> > >>Disabled the use of RSS hash from the network card aka flowid for > >>lagg(4) interfaces by default as it's currently incompatible with > >>the lacp and loadbalance protocols. > >> > >>The incompatibility is due to the fact that the flowid isn't know > >>for the first packet of a new outbound stream which can result in > >>the hash calculation method changing and hence a stream being > >>incorrectly split across multiple interfaces during normal > >>operation. > >> > >>This can be re-enabled by setting the following in loader.conf: > >>net.link.lagg.default_use_flowid="1" > >> > >>Discussed with: kmacy > >>Sponsored by: Multiplay > > RSS by definition has meaning to received stream. What is "outbound" stream > > in this context, why can the hash calculatiom method change and what exactly > > does it mean "a stream being incorrectly split"? > Yes RSS is indeed a received stream but that is used by lagg for lacp > and loadbalance protocols to decide which port of the lagg to "send" the > packet out of. As the flowid is not known when a new "output" stream is > instigated the current code falls back to manual hash calculation to > determine which port to send the initial packet from. Once a response is > received a tx then uses the flowid. This change of hash calculation > method can result in the initial packet being sent from a different port > than the rest of the stream; this is what I meant by "incorrectly split". For my understanding, is this just an issue for the first packet when we originate the flow? Once we have a response and if flowid is there, we'd use it, right? OR am I missing something? And with this change, we'd always go and do manual calculation even when we have a valid flowid (i.e. we didn't initiate a connection)? Thanks, Hiren > > See the following: > https://github.com/freebsd/freebsd/blob/master/sys/net/if_lagg.c#L2066 > https://github.com/freebsd/freebsd/blob/master/sys/net/ieee8023ad_lacp.c#L846 > > > Defaults should not be changed so easily just because they are not optimal > > for some specific case. Each lagg has its own setting for flowid usage > > and why one cannot just use "ifconfig lagg0 -use_flowid" for such cases? > > > Yes we're already using -use_flowid to mitigate the problem, but the > defaults should never result in broken behavior hence the change, at > least for now. > > For reference I did look at keeping the default of 1 but only using that > for protocols which weren't effected by the issue, and introducing a 2 > to force those that are, but as its defined as acting on creation and we > always create lagg interfaces as failover and then amend them that > wasn't possible without making more invasive changes. > > ??? Regards > ??? Steve pgpAnCnco5U0Y.pgp Description: PGP signature
Re: svn commit: r324836 - in head/sys: kern sys
On 10/21/17 at 10:40P, Mateusz Guzik wrote: > Author: mjg > Date: Sat Oct 21 22:40:09 2017 > New Revision: 324836 > URL: https://svnweb.freebsd.org/changeset/base/324836 > > Log: > mtx: implement thread lock fastpath Hi, Can you please elaborate this commit-log message a little more? what is the fastpath here and what scenarios would benefit from this change? This would help novice like myself. :-) Cheers, Hiren pgpWCFEEFUsZ2.pgp Description: PGP signature
Re: svn commit: r316699 - head/sys/net
On 04/11/17 at 08:56P, Andrey V. Elsukov wrote: > Author: ae > Date: Tue Apr 11 08:56:18 2017 > New Revision: 316699 > URL: https://svnweb.freebsd.org/changeset/base/316699 > > Log: > Do not adjust interface MTU automatically. Leave this task to the system > administrator. > > This restores the behavior that was prior to r274246. Hi Andrey, I was going to ask for more context and then I saw that phabric review has a lot more details that I wish were a part of the commit log. Cheers, Hiren pgppOYqQFvfp5.pgp Description: PGP signature
Re: svn commit: r316309 - head/sys/dev/qlxgbe
On 03/30/17 at 11:07P, Somayajulu, David wrote: > Hi Hiren, > > I know this is not a new topic but a little more descriptive commit-log > > would have been nicer. Also, you should update the manpage reflecting this > > change. i.e. now it also supports software LRO when h/w LRO is disabled. > Will do. Sorry about that. Thanks! > > > Do you know of a case where one would want to disable h/w lro and enable > > s/w lro? I guess where you want to free up nic and make cpu do more work? > I was under the impression as well, that s/w lro is moot, if h/w lro was > available, till one costumer asked for it. Not sure what the use case is. I thought this idea (like gro in linux) was popular when lro in h/w was considered buggy and couldn't correctly do batching (i.e. hide useful flags). Not sure if that's still the case. cheers, Hiren pgpPTil7LZX6p.pgp Description: PGP signature
Re: svn commit: r316309 - head/sys/dev/qlxgbe
On 03/30/17 at 10:43P, David C Somayajulu wrote: > Author: davidcs > Date: Thu Mar 30 22:43:32 2017 > New Revision: 316309 > URL: https://svnweb.freebsd.org/changeset/base/316309 > > Log: > Add support for optional Soft LRO Hi David, I know this is not a new topic but a little more descriptive commit-log would have been nicer. Also, you should update the manpage reflecting this change. i.e. now it also supports software LRO when h/w LRO is disabled. Do you know of a case where one would want to disable h/w lro and enable s/w lro? I guess where you want to free up nic and make cpu do more work? Cheers, Hiren pgpZbAb0UTTl5.pgp Description: PGP signature
Re: svn commit: r314813 - head/sys/kern
On 03/06/17 at 07:08P, Ngie Cooper (yaneurabeya) wrote: > > > On Mar 6, 2017, at 16:20, Hiren Panchasara <hi...@freebsd.org> wrote: > > > > Author: hiren > > Date: Tue Mar 7 00:20:01 2017 > > New Revision: 314813 > > URL: https://svnweb.freebsd.org/changeset/base/314813 > > > > Log: > > We've found a recurring problem where some userland process would be > > stuck spinning at 100% cpu around sbcut_internal(). Inside > > sbflush_internal(), sb_ccc reached to about 4GB and before passing it > > to sbcut_internal(), we type-cast it from uint to int making it -ve. > > > > The root cause of sockbuf growing this large is unknown. Correct fix > > is also not clear but based on mailing list discussions, adding > > KASSERTs to panic instead of looping endlessly. > > > > Reviewed by: glebius > > Sponsored by: Limelight Networks > > > > Modified: > > head/sys/kern/uipc_sockbuf.c > > > > Modified: head/sys/kern/uipc_sockbuf.c > > == > > --- head/sys/kern/uipc_sockbuf.cTue Mar 7 00:13:53 2017 > > (r314812) > > +++ head/sys/kern/uipc_sockbuf.cTue Mar 7 00:20:01 2017 > > (r314813) > > @@ -1043,6 +1043,11 @@ sbcut_internal(struct sockbuf *sb, int l > > { > > struct mbuf *m, *next, *mfree; > > > > + KASSERT(len > 0, ("%s: len is %d but it is supposed to be +ve", > > + __func__, len)); > > + KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u", > > + __func__, len, sb->sb_ccc)); > > + > > next = (m = sb->sb_mb) ? m->m_nextpkt : 0; > > mfree = NULL; > > The KASSERT is bad ? please fix it. From > https://ci.freebsd.org/job/FreeBSD-head-amd64-test/1994/console : > > add host 127.0.0.1panic: sbcut_internal: len is 0 but it is supposed to be +ve Thanks, fixed in r314838. Cheers, Hiren pgp0bALgWmnMf.pgp Description: PGP signature
svn commit: r314838 - head/sys/kern
Author: hiren Date: Tue Mar 7 06:46:38 2017 New Revision: 314838 URL: https://svnweb.freebsd.org/changeset/base/314838 Log: Fix the KASSERT check from r314813. len being 0 is valid. Submitted by: ngie Reported by: ngie (via jenkins test run) Sponsored by: Limelight Networks Modified: head/sys/kern/uipc_sockbuf.c Modified: head/sys/kern/uipc_sockbuf.c == --- head/sys/kern/uipc_sockbuf.cTue Mar 7 06:11:36 2017 (r314837) +++ head/sys/kern/uipc_sockbuf.cTue Mar 7 06:46:38 2017 (r314838) @@ -1043,7 +1043,7 @@ sbcut_internal(struct sockbuf *sb, int l { struct mbuf *m, *next, *mfree; - KASSERT(len > 0, ("%s: len is %d but it is supposed to be +ve", + KASSERT(len >= 0, ("%s: len is %d but it is supposed to be >= 0", __func__, len)); KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u", __func__, len, sb->sb_ccc)); ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r314813 - head/sys/kern
Author: hiren Date: Tue Mar 7 00:20:01 2017 New Revision: 314813 URL: https://svnweb.freebsd.org/changeset/base/314813 Log: We've found a recurring problem where some userland process would be stuck spinning at 100% cpu around sbcut_internal(). Inside sbflush_internal(), sb_ccc reached to about 4GB and before passing it to sbcut_internal(), we type-cast it from uint to int making it -ve. The root cause of sockbuf growing this large is unknown. Correct fix is also not clear but based on mailing list discussions, adding KASSERTs to panic instead of looping endlessly. Reviewed by: glebius Sponsored by: Limelight Networks Modified: head/sys/kern/uipc_sockbuf.c Modified: head/sys/kern/uipc_sockbuf.c == --- head/sys/kern/uipc_sockbuf.cTue Mar 7 00:13:53 2017 (r314812) +++ head/sys/kern/uipc_sockbuf.cTue Mar 7 00:20:01 2017 (r314813) @@ -1043,6 +1043,11 @@ sbcut_internal(struct sockbuf *sb, int l { struct mbuf *m, *next, *mfree; + KASSERT(len > 0, ("%s: len is %d but it is supposed to be +ve", + __func__, len)); + KASSERT(len <= sb->sb_ccc, ("%s: len: %d is > ccc: %u", + __func__, len, sb->sb_ccc)); + next = (m = sb->sb_mb) ? m->m_nextpkt : 0; mfree = NULL; ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r314216 - head/sys/x86/x86
On 02/24/17 at 06:56P, Jonathan T. Looney wrote: > Author: jtl > Date: Fri Feb 24 18:56:00 2017 > New Revision: 314216 > URL: https://svnweb.freebsd.org/changeset/base/314216 > > Log: > We have seen several cases recently where we appear to get a double-fault: > We have an original panic. Then, instead of writing the core to the dump > device, the kernel has a second panic: "smp_targeted_tlb_shootdown: > interrupts disabled". This change is an attempt to fix that second panic. > > When the other CPUs are stopped, we can't notify them of the TLB shootdown, > so we skip that operation. However, when the CPUs come back up, we > invalidate the TLB to ensure they correctly observe any changes to the > page mappings. > > Reviewed by:kib > Sponsored by: Netflix > Differential Revision: https://reviews.freebsd.org/D9786 Can this be MFCd to 11? Cheers, Hiren pgpaLlGRNGfLX.pgp Description: PGP signature
Re: svn commit: r278729 - head/sys/sys
On 03/19/15 at 11:08P, hiren panchasara wrote: > On 03/16/15 at 06:06P, hiren panchasara wrote: > > On 03/16/15 at 03:39P, Gleb Smirnoff wrote: > > > On Wed, Mar 11, 2015 at 02:36:07PM -0700, hiren panchasara wrote: > > > h> On 02/13/15 at 11:19P, Simon J. Gerraty wrote: > > > h> > Author: sjg > > > h> > Date: Fri Feb 13 23:19:35 2015 > > > h> > New Revision: 278729 > > > h> > URL: https://svnweb.freebsd.org/changeset/base/278729 > > > h> > > > > h> > Log: > > > h> > sbspace: size of bleft, mleft must match sockbuf fields to avoid > > > h> > overflow on amd64 > > > h> > > > > h> > Submitted by: anshu...@juniper.net > > > h> > Obtained from: Juniper Networks > > > h> > > > h> Talking to sjg on -arch to MFC this. If he cannot get around doing > > > that, > > > h> I'll do it tomorrow. > > > h> > > > h> Letting people know here to see if there are any objections. > > > > > > Would that fix the bug we've been discussing? > > > > Unsure as I am not sure what caused the issue I saw. > > > > For those who do not know the details, we recently saw a userland > > process stuck spinning at 100% around sbcut_internal(). Inside > > sbflush_internal(), the sb_cc was grown to be about 4G. And before > > passing it to sbcut_internal(), we cast it from uint to int which > > would make that valud -ve. > > > > Gleb pointed out to me that sbspace() is supposed to check/stop sb_cc > > from growing that large. > > > > Now, I am not sure if we'd ever run into this situation again but > > current fix is a great catch anyways. > > > > I still have 2 questions around what we saw. It'd be great if someone can > > clarify them for my understanding: > > > > 1) Even if we get into such a scenario that we were in, following would > > help by not looping endlessly. > > > > --- uipc_sockbuf.c.02015-03-11 15:49:52.0 -0700 > > +++ uipc_sockbuf.c 2015-03-11 15:51:48.0 -0700 > > @@ -877,6 +877,9 @@ > > { > > struct mbuf *m, *n, *next, *mfree; > > > > + if (len < 0) > > + panic("%s: len is %d and it is supposed to be +ve", > > + __func__, len); > > + > > next = (m = sb->sb_mb) ? m->m_nextpkt : 0; > > mfree = NULL > > > > 2) We need 1) because we are casting a uint to int which _may_ rander a > > value -ve. Is there a way we can avoid the casting? > > It'd be useful if someone with knowledge in this area can weigh in. Ran into this again today. While the real question of how sb_ccc grew this large is still unsolved, any objection to adding this patch to avoid a hang and panic instead? Cheers, Hiren pgp5bAUfjwGuq.pgp Description: PGP signature
Re: svn commit: r313043 - head/sys/kern
On 02/01/17 at 01:12P, Hartmut Brandt wrote: > Author: harti > Date: Wed Feb 1 13:12:07 2017 > New Revision: 313043 > URL: https://svnweb.freebsd.org/changeset/base/313043 > > Log: > Merge filt_soread and filt_solisten and decide what to do when checking > for EVFILT_READ at the point of the check not when the event is registers. > This fixes a problem with asio when accepting a connection. > > Reviewed by:kib@, Scott Mitchell > > Modified: > head/sys/kern/uipc_socket.c Is it possible to MFC this back to 11? Cheers, Hiren pgpBfu3YzKPxR.pgp Description: PGP signature
svn commit: r312976 - head/share/man/man4
Author: hiren Date: Mon Jan 30 02:25:55 2017 New Revision: 312976 URL: https://svnweb.freebsd.org/changeset/base/312976 Log: Man page update to go along with r312907. Reviewed by: Allanjude MFC after:2 weeks Sponsored by: Limelight Networks Modified: head/share/man/man4/tcp.4 Modified: head/share/man/man4/tcp.4 == --- head/share/man/man4/tcp.4 Mon Jan 30 02:24:54 2017(r312975) +++ head/share/man/man4/tcp.4 Mon Jan 30 02:25:55 2017(r312976) @@ -34,7 +34,7 @@ .\" From: @(#)tcp.48.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd October 21, 2016 +.Dd Jan 29, 2017 .Dt TCP 4 .Os .Sh NAME @@ -586,6 +586,12 @@ downshift. List of available TCP function blocks (TCP stacks). .It Va functions_default The default TCP function block (TCP stack). +.It Va functions_inherit_listen_socket_stack +Determines whether to inherit listen socket's tcp stack or use the current +system default tcp stack, as defined by +.Va functions_default +.Pc . +Default is true. .It Va insecure_rst Use criteria defined in RFC793 instead of RFC5961 for accepting RST segments. Default is false. ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r312905 - head/sys/net
On 01/27/17 at 11:19P, Cy Schubert wrote: > Really? Just a copyright notice? > > The reason I ask is because of this: > > ===> ae (all) > --- iflib.o --- > /export/home/cy/freebsd/svn/ip6-cksum/sys/net/iflib.c:1561:1: error: > function definition is not allowed here > { > Should these 2 lines be swapped? https://svnweb.freebsd.org/base/head/sys/net/iflib.c?annotate=312905#l1539 https://svnweb.freebsd.org/base/head/sys/net/iflib.c?annotate=312905#l1540 seems like #if #endif block has an extra '}'? Cheers, Hiren pgph7YfXR3MtR.pgp Description: PGP signature
svn commit: r312907 - head/sys/netinet
Author: hiren Date: Fri Jan 27 23:10:46 2017 New Revision: 312907 URL: https://svnweb.freebsd.org/changeset/base/312907 Log: Add a knob to change default behavior of inheriting listen socket's tcp stack regardless of what the default stack for the system is set to. With current/default behavior, after changing the default tcp stack, the application needs to be restarted to pick up that change. Setting this new knob net.inet.tcp.functions_inherit_listen_socket_stack to '0' would change that behavior and make any new connection use the newly selected default tcp stack. Reviewed by: rrs MFC after:2 weeks Sponsored by: Limelight Networks Modified: head/sys/netinet/tcp_syncache.c Modified: head/sys/netinet/tcp_syncache.c == --- head/sys/netinet/tcp_syncache.c Fri Jan 27 23:08:30 2017 (r312906) +++ head/sys/netinet/tcp_syncache.c Fri Jan 27 23:10:46 2017 (r312907) @@ -120,6 +120,14 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, sync _NAME(tcp_syncookiesonly), 0, "Use only TCP SYN cookies"); +static VNET_DEFINE(int, functions_inherit_listen_socket_stack) = 1; +#define V_functions_inherit_listen_socket_stack \ +VNET(functions_inherit_listen_socket_stack) +SYSCTL_INT(_net_inet_tcp, OID_AUTO, functions_inherit_listen_socket_stack, +CTLFLAG_VNET | CTLFLAG_RW, +_NAME(functions_inherit_listen_socket_stack), 0, +"Inherit listen socket's stack"); + #ifdef TCP_OFFLOAD #define ADDED_BY_TOE(sc) ((sc)->sc_tod != NULL) #endif @@ -830,7 +838,7 @@ syncache_socket(struct syncache *sc, str tcp_rcvseqinit(tp); tcp_sendseqinit(tp); blk = sototcpcb(lso)->t_fb; - if (blk != tp->t_fb) { + if (V_functions_inherit_listen_socket_stack && blk != tp->t_fb) { /* * Our parents t_fb was not the default, * we need to release our ref on tp->t_fb and ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r312277 - in head/sys: kern sys
On 01/16/17 at 12:39P, Sergey Kandaurov wrote: > On 16 January 2017 at 11:25, Hiren Panchasara <hi...@freebsd.org> wrote: > > > Author: hiren > > Date: Mon Jan 16 08:25:33 2017 > > New Revision: 312277 > > URL: https://svnweb.freebsd.org/changeset/base/312277 > > > > Log: > > Add kevent EVFILT_EMPTY for notification when a client has received all > > data > > i.e. everything outstanding has been acked. > > > > Reviewed by: bz, gnn (previous version) > > MFC after:3 days > > Sponsored by: Limelight Networks > > Differential Revision:https://reviews.freebsd.org/D9150 > > > > Modified: > > head/sys/kern/kern_event.c > > head/sys/kern/uipc_socket.c > > head/sys/sys/event.h > > > > > Hello. > > Is kqueue(2) man page update planned? Thanks for the reminder, I'll do it. Cheers, Hiren pgpLvgd3sbq9V.pgp Description: PGP signature
svn commit: r312277 - in head/sys: kern sys
Author: hiren Date: Mon Jan 16 08:25:33 2017 New Revision: 312277 URL: https://svnweb.freebsd.org/changeset/base/312277 Log: Add kevent EVFILT_EMPTY for notification when a client has received all data i.e. everything outstanding has been acked. Reviewed by: bz, gnn (previous version) MFC after:3 days Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D9150 Modified: head/sys/kern/kern_event.c head/sys/kern/uipc_socket.c head/sys/sys/event.h Modified: head/sys/kern/kern_event.c == --- head/sys/kern/kern_event.c Mon Jan 16 07:41:39 2017(r312276) +++ head/sys/kern/kern_event.c Mon Jan 16 08:25:33 2017(r312277) @@ -344,6 +344,7 @@ static struct { { _filtops }, /* EVFILT_LIO */ { _filtops, 1 }, /* EVFILT_USER */ { _filtops }, /* EVFILT_SENDFILE */ + { _filtops, 1 }, /* EVFILT_EMPTY */ }; /* Modified: head/sys/kern/uipc_socket.c == --- head/sys/kern/uipc_socket.c Mon Jan 16 07:41:39 2017(r312276) +++ head/sys/kern/uipc_socket.c Mon Jan 16 08:25:33 2017(r312277) @@ -161,6 +161,7 @@ static void filt_sowdetach(struct knote static int filt_sowrite(struct knote *kn, long hint); static int filt_solisten(struct knote *kn, long hint); static int inline hhook_run_socket(struct socket *so, void *hctx, int32_t h_id); +static int filt_soempty(struct knote *kn, long hint); fo_kqfilter_t soo_kqfilter; static struct filterops solisten_filtops = { @@ -178,6 +179,11 @@ static struct filterops sowrite_filtops .f_detach = filt_sowdetach, .f_event = filt_sowrite, }; +static struct filterops soempty_filtops = { + .f_isfd = 1, + .f_detach = filt_sowdetach, + .f_event = filt_soempty, +}; so_gen_t so_gencnt; /* generation count for sockets */ @@ -3083,6 +3089,10 @@ soo_kqfilter(struct file *fp, struct kno kn->kn_fop = _filtops; sb = >so_snd; break; + case EVFILT_EMPTY: + kn->kn_fop = _filtops; + sb = >so_snd; + break; default: return (EINVAL); } @@ -3344,6 +3354,21 @@ filt_sowrite(struct knote *kn, long hint return (kn->kn_data >= so->so_snd.sb_lowat); } +static int +filt_soempty(struct knote *kn, long hint) +{ + struct socket *so; + + so = kn->kn_fp->f_data; + SOCKBUF_LOCK_ASSERT(>so_snd); + kn->kn_data = sbused(>so_snd); + + if (kn->kn_data == 0) + return (1); + else + return (0); +} + /*ARGSUSED*/ static int filt_solisten(struct knote *kn, long hint) Modified: head/sys/sys/event.h == --- head/sys/sys/event.hMon Jan 16 07:41:39 2017(r312276) +++ head/sys/sys/event.hMon Jan 16 08:25:33 2017(r312277) @@ -43,7 +43,8 @@ #define EVFILT_LIO (-10) /* attached to lio requests */ #define EVFILT_USER(-11) /* User events */ #define EVFILT_SENDFILE(-12) /* attached to sendfile requests */ -#define EVFILT_SYSCOUNT12 +#define EVFILT_EMPTY (-13) /* empty send socket buf */ +#define EVFILT_SYSCOUNT13 #define EV_SET(kevp_, a, b, c, d, e, f) do { \ struct kevent *kevp = (kevp_); \ ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r311453 - head/sys/netinet
Author: hiren Date: Thu Jan 5 17:22:09 2017 New Revision: 311453 URL: https://svnweb.freebsd.org/changeset/base/311453 Log: sysctl net.inet.tcp.hostcache.list in a jail can see connections from other jails and the host. This commit fixes it. PR: 200361 Submitted by: bz (original version), hiren (minor corrections) Reported by: Marcus Reid Reviewed by: bz, gnn Tested by:Lohith Bellad MFC after:1 week Sponsored by: Limelight Networks (minor corrections) Modified: head/sys/netinet/tcp_hostcache.c Modified: head/sys/netinet/tcp_hostcache.c == --- head/sys/netinet/tcp_hostcache.cThu Jan 5 17:19:26 2017 (r311452) +++ head/sys/netinet/tcp_hostcache.cThu Jan 5 17:22:09 2017 (r311453) @@ -69,10 +69,12 @@ __FBSDID("$FreeBSD$"); #include #include +#include #include #include #include #include +#include #include #include #include @@ -625,6 +627,9 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS) char ip6buf[INET6_ADDRSTRLEN]; #endif + if (jailed_without_vnet(curthread->td_ucred) != 0) + return (EPERM); + sbuf_new(, NULL, linesize * (V_tcp_hostcache.cache_count + 1), SBUF_INCLUDENUL); ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r310547 - head/sys/netinet
On 12/25/16 at 05:37P, Michael Tuexen wrote: > Author: tuexen > Date: Sun Dec 25 17:37:18 2016 > New Revision: 310547 > URL: https://svnweb.freebsd.org/changeset/base/310547 > > Log: > Remove a KASSERT which is not always true. > > In case of the empty queue tp->snd_holes and tcp_sackhole_insert() > failing due to memory shortage, tp->snd_holes will be empty. > This problem was hit when stress tests where performed by pho. > > PR: 215513 > Reported by:pho > Tested by: pho > Sponsored by: Netflix, Inc. > > Modified: > head/sys/netinet/tcp_sack.c Thanks for the fix, Michael. Can you please MFC this? Cheers, Hiren pgp6UGh01qwPo.pgp Description: PGP signature
Re: svn commit: r310433 - head/lib/libc/stdio
On 12/23/16 at 03:05P, Andrey Chernov wrote: > On 23.12.2016 1:39, Conrad Meyer wrote: > > This was unjustified and inappropriate. > > I don't think so. While being able to back it out or fix it by another > way by yourself, you just doing nothing for several days. Your personal > reasons of inability to act properly so long time weights less than > project consistency at whole, so backing it out from anybody is > justified and appropriate in such situations. Conrad, As Andrey rightly said, you didn't respond to all raised concerns neither did you revert your commit (which was the majority vote anyways). So I'd say, it is totally justified and appropriate. Cheers, Hiren pgplaTryDoXnN.pgp Description: PGP signature
Re: svn commit: r309745 - in head: share/man/man9 sys/kern sys/sys
On 12/22/16 at 10:33P, Gleb Smirnoff wrote: > Hi! > > On Wed, Dec 21, 2016 at 04:19:28PM -0800, hiren panchasara wrote: > h> On 12/09/16 at 05:58P, Gleb Smirnoff wrote: > h> > Author: glebius > h> > Date: Fri Dec 9 17:58:34 2016 > h> > New Revision: 309745 > h> > URL: https://svnweb.freebsd.org/changeset/base/309745 > h> > > h> > Log: > h> > Provide counter_ratecheck(), a MP-friendly substitution to > ppsratecheck(). > h> > When rated event happens at a very quick rate, the ppsratecheck() is > not > h> > only racy, but also becomes a performance bottleneck. > h> > > h> > Together with: rrs, jtl > h> > h> Is it possible for you to MFC this and the following fix to it to 11? > > Have you tried it and found useful? If yes, you are welcome to merge it. As you stated yourself in the commit message, this is a clear bug and an opportunity for performance enhancement. Do you feel this bug won't be in 11 and systems running on 11 won't have similar performance gains as those running on -head? Specially when 11 is the latest released version that many (like myself) would be running in the production. > > My current work involves only FreeBSD head, I don't have any STABLE production > boxes, that's why I am very conservative with my MFCs: I do only obvious > conservative bugfixes, not performance improvements. The obvious reason we encourage original submitter to do MFCs is that (s)he is intimately familiar with the changes. I can understand your point of not having any 11 based systems to test. I'll test this on our $work prod and do the MFC myself. Thanks for the original fix. Cheers, Hiren pgpnErzhijPN7.pgp Description: PGP signature
Re: svn commit: r309745 - in head: share/man/man9 sys/kern sys/sys
On 12/09/16 at 05:58P, Gleb Smirnoff wrote: > Author: glebius > Date: Fri Dec 9 17:58:34 2016 > New Revision: 309745 > URL: https://svnweb.freebsd.org/changeset/base/309745 > > Log: > Provide counter_ratecheck(), a MP-friendly substitution to ppsratecheck(). > When rated event happens at a very quick rate, the ppsratecheck() is not > only racy, but also becomes a performance bottleneck. > > Together with: rrs, jtl Glebius, Is it possible for you to MFC this and the following fix to it to 11? Cheers, Hiren pgp_ltvj3VcBt.pgp Description: PGP signature
Re: svn commit: r307745 - head/sys/kern
On 12/10/16 at 09:35P, Oliver Pinter wrote: > On 10/21/16, Hiren Panchasara <hi...@freebsd.org> wrote: > > Author: hiren > > Date: Fri Oct 21 18:27:30 2016 > > New Revision: 307745 > > URL: https://svnweb.freebsd.org/changeset/base/307745 > > > > Log: > > Rework r306337. > > > > In sendit(), if mp->msg_control is present, then in sockargs() we are > > allocating mbuf to store mp->msg_control. Later in kern_sendit(), call > > to getsock_cap(), will check validity of file pointer passed, if this > > fails EBADF is returned but mbuf allocated in sockargs() is not freed. > > Made code changes to free the same. > > > > Since freeing control mbuf in sendit() after checking (control != NULL) > > may lead to double freeing of control mbuf in sendit(), we can free > > control mbuf in kern_sendit() if there are any errors in the routine. > > > > Submitted by: Lohith Bellad <lohith.bel...@me.com> > > Reviewed by: glebius > > MFC after:3 weeks > > Differential Revision:https://reviews.freebsd.org/D8152 > > Hi Hiren! > > What's the status of the MFC? I not see them (this and r306337) in > 11-STABLE nor in 10-STABLE branch. Thanks for the reminder. I MFCd to 11. I don't really track 10 anymore so I'd request you to do it yourself. If you can't for some reason, let me know. Cheers, Hiren pgpSvDnrGKVF3.pgp Description: PGP signature
svn commit: r309858 - head/sys/netinet
Author: hiren Date: Sun Dec 11 23:14:47 2016 New Revision: 309858 URL: https://svnweb.freebsd.org/changeset/base/309858 Log: We currently don't do TSO if ip options are present. In case of IPv6, we look at in6p_options to check that. That is incorrect as we carry ip options in in6p_outputopts. Also, just checking for in6p_outputopts being NULL won't suffice as we combine ip options and ip header fields both in that one field. The commit fixes this by using ip6_optlen() which correctly calculates length of only ip options for IPv6. Reviewed by: ae, bz MFC after:3 weeks Sponsored by: Limelight Networks Modified: head/sys/netinet/tcp_output.c Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Sun Dec 11 19:58:13 2016 (r309857) +++ head/sys/netinet/tcp_output.c Sun Dec 11 23:14:47 2016 (r309858) @@ -545,6 +545,11 @@ after_sack_rexmit: * (except for the sequence number) for all generated packets. This * makes it impossible to transmit any options which vary per generated * segment or packet. +* +* IPv4 handling has a clear separation of ip options and ip header +* flags while IPv6 combines both in in6p_outputopts. ip6_optlen() does +* the right thing below to provide length of just ip options and thus +* checking for ipoptlen is enough to decide if ip options are present. */ #ifdef IPSEC /* @@ -553,14 +558,25 @@ after_sack_rexmit: */ ipsec_optlen = ipsec_hdrsiz_tcp(tp); #endif + +#ifdef INET6 + if (isipv6) + ipoptlen = ip6_optlen(tp->t_inpcb); + else +#endif + if (tp->t_inpcb->inp_options) + ipoptlen = tp->t_inpcb->inp_options->m_len - + offsetof(struct ipoption, ipopt_list); + else + ipoptlen = 0; +#ifdef IPSEC + ipoptlen += ipsec_optlen; +#endif + if ((tp->t_flags & TF_TSO) && V_tcp_do_tso && len > tp->t_maxseg && ((tp->t_flags & TF_SIGNATURE) == 0) && tp->rcv_numsacks == 0 && sack_rxmit == 0 && -#ifdef IPSEC - ipsec_optlen == 0 && -#endif - tp->t_inpcb->inp_options == NULL && - tp->t_inpcb->in6p_options == NULL) + ipoptlen == 0) tso = 1; if (sack_rxmit) { @@ -833,20 +849,6 @@ send: hdrlen += optlen = tcp_addoptions(, opt); } -#ifdef INET6 - if (isipv6) - ipoptlen = ip6_optlen(tp->t_inpcb); - else -#endif - if (tp->t_inpcb->inp_options) - ipoptlen = tp->t_inpcb->inp_options->m_len - - offsetof(struct ipoption, ipopt_list); - else - ipoptlen = 0; -#ifdef IPSEC - ipoptlen += ipsec_optlen; -#endif - /* * Adjust data length if insertion of options will * bump the packet length beyond the t_maxseg length. ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r309351 - head/etc/defaults
On 12/01/16 at 08:48P, hiren panchasara wrote: > On 12/01/16 at 04:35P, Warner Losh wrote: > > Author: imp > > Date: Thu Dec 1 04:35:43 2016 > > New Revision: 309351 > > URL: https://svnweb.freebsd.org/changeset/base/309351 > > > > Log: > > Revert the 'performance' setting to 'NONE' from C2. > > Just a note that this commit reverts r282110. Ah, my bad. performance_cx_lowest has more history than that. :-) (HIGH -> Cmax -> C2 -> NONE) Cheers, Hiren pgppfedJR0kE5.pgp Description: PGP signature
Re: svn commit: r309351 - head/etc/defaults
On 12/01/16 at 04:35P, Warner Losh wrote: > Author: imp > Date: Thu Dec 1 04:35:43 2016 > New Revision: 309351 > URL: https://svnweb.freebsd.org/changeset/base/309351 > > Log: > Revert the 'performance' setting to 'NONE' from C2. Just a note that this commit reverts r282110. Cheers, Hiren pgpg5ixLSD8tO.pgp Description: PGP signature
Re: svn commit: r308943 - head/sys/netinet
Forgot to tag for MFC. On 11/21/16 at 08:53P, Hiren Panchasara wrote: > Author: hiren > Date: Mon Nov 21 20:53:11 2016 > New Revision: 308943 > URL: https://svnweb.freebsd.org/changeset/base/308943 > > Log: > For RTT calculations mid-session, we explicitly ignore ACKs with tsecr of 0 > as > many borken middle-boxes tend to do that. But during 3whs, in > syncache_expand(), > we don't do that which causes us to send a RST to such a client. Relax this > constraint by only using tsecr to compare against timestamp that we sent > when it > is not 0. As a result, we'd now accept the final ACK of 3whs with tsecr of > 0. > > Reviewed by:jtl, gnn > Sponsored by: Limelight Networks > Differential Revision: https://reviews.freebsd.org/D8552 MFC after: 2 weeks Cheers, Hiren pgpvDJzzSkGiG.pgp Description: PGP signature
svn commit: r308944 - head/sys/modules/bytgpio
Author: hiren Date: Mon Nov 21 21:07:43 2016 New Revision: 308944 URL: https://svnweb.freebsd.org/changeset/base/308944 Log: r308942 broke kernel build. Add acpi_if.h to module makefile to fix it. Submitted by: peter Modified: head/sys/modules/bytgpio/Makefile Modified: head/sys/modules/bytgpio/Makefile == --- head/sys/modules/bytgpio/Makefile Mon Nov 21 20:53:11 2016 (r308943) +++ head/sys/modules/bytgpio/Makefile Mon Nov 21 21:07:43 2016 (r308944) @@ -3,6 +3,6 @@ .PATH: ${.CURDIR}/../../dev/gpio KMOD= bytgpio SRCS= bytgpio.c -SRCS+= device_if.h bus_if.h gpio_if.h +SRCS+= acpi_if.h device_if.h bus_if.h gpio_if.h .include ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r308943 - head/sys/netinet
Author: hiren Date: Mon Nov 21 20:53:11 2016 New Revision: 308943 URL: https://svnweb.freebsd.org/changeset/base/308943 Log: For RTT calculations mid-session, we explicitly ignore ACKs with tsecr of 0 as many borken middle-boxes tend to do that. But during 3whs, in syncache_expand(), we don't do that which causes us to send a RST to such a client. Relax this constraint by only using tsecr to compare against timestamp that we sent when it is not 0. As a result, we'd now accept the final ACK of 3whs with tsecr of 0. Reviewed by: jtl, gnn Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D8552 Modified: head/sys/netinet/tcp_syncache.c Modified: head/sys/netinet/tcp_syncache.c == --- head/sys/netinet/tcp_syncache.c Mon Nov 21 19:47:37 2016 (r308942) +++ head/sys/netinet/tcp_syncache.c Mon Nov 21 20:53:11 2016 (r308943) @@ -1069,10 +1069,17 @@ syncache_expand(struct in_conninfo *inc, } /* -* If timestamps were negotiated the reflected timestamp -* must be equal to what we actually sent in the SYN|ACK. +* If timestamps were negotiated, the reflected timestamp +* must be equal to what we actually sent in the SYN|ACK +* except in the case of 0. Some boxes are known for sending +* broken timestamp replies during the 3whs (and potentially +* during the connection also). +* +* Accept the final ACK of 3whs with reflected timestamp of 0 +* instead of sending a RST and deleting the syncache entry. */ - if ((to->to_flags & TOF_TS) && to->to_tsecr != sc->sc_ts) { + if ((to->to_flags & TOF_TS) && to->to_tsecr && + to->to_tsecr != sc->sc_ts) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, "%s; %s: TSECR %u != TS %u, " "segment rejected\n", ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r308180 - in head/sys/netinet: . cc
Author: hiren Date: Tue Nov 1 21:08:37 2016 New Revision: 308180 URL: https://svnweb.freebsd.org/changeset/base/308180 Log: Set slow start threshold more accurately on loss to be flightsize/2 instead of cwnd/2 as recommended by RFC5681. (spotted by mmacy at nextbsd dot org) Restore pre-r307901 behavior of aligning ssthresh/cwnd on mss boundary. (spotted by slawa at zxy dot spb dot ru) Tested by:dim, Slawa MFC after:1 month X-MFC with: r307901 Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D8349 Modified: head/sys/netinet/cc/cc_cdg.c head/sys/netinet/cc/cc_chd.c head/sys/netinet/cc/cc_dctcp.c head/sys/netinet/cc/cc_htcp.c head/sys/netinet/cc/cc_newreno.c head/sys/netinet/tcp_input.c Modified: head/sys/netinet/cc/cc_cdg.c == --- head/sys/netinet/cc/cc_cdg.cTue Nov 1 19:18:54 2016 (r308179) +++ head/sys/netinet/cc/cc_cdg.cTue Nov 1 21:08:37 2016 (r308180) @@ -474,7 +474,9 @@ cdg_cong_signal(struct cc_var *ccv, uint ENTER_RECOVERY(CCV(ccv, t_flags)); break; case CC_RTO: - CCV(ccv, snd_ssthresh) = max(2*mss, cwin/2); + CCV(ccv, snd_ssthresh) = + max((CCV(ccv, snd_max) - CCV(ccv, snd_una)) / 2 / mss, 2) + * mss; CCV(ccv, snd_cwnd) = mss; break; default: Modified: head/sys/netinet/cc/cc_chd.c == --- head/sys/netinet/cc/cc_chd.cTue Nov 1 19:18:54 2016 (r308179) +++ head/sys/netinet/cc/cc_chd.cTue Nov 1 21:08:37 2016 (r308180) @@ -330,13 +330,11 @@ chd_cong_signal(struct cc_var *ccv, uint struct ertt *e_t; struct chd *chd_data; int qdly; - uint32_t cwin; u_int mss; e_t = khelp_get_osd(CCV(ccv, osd), ertt_id); chd_data = ccv->cc_data; qdly = imax(e_t->rtt, chd_data->maxrtt_in_rtt) - e_t->minrtt; - cwin = CCV(ccv, snd_cwnd); mss = CCV(ccv, t_maxseg); switch(signal_type) { @@ -378,7 +376,9 @@ chd_cong_signal(struct cc_var *ccv, uint ENTER_FASTRECOVERY(CCV(ccv, t_flags)); break; case CC_RTO: - CCV(ccv, snd_ssthresh) = max(2*mss, cwin/2); + CCV(ccv, snd_ssthresh) = + max((CCV(ccv, snd_max) - CCV(ccv, snd_una)) / 2 / mss, 2) + * mss; CCV(ccv, snd_cwnd) = mss; break; Modified: head/sys/netinet/cc/cc_dctcp.c == --- head/sys/netinet/cc/cc_dctcp.c Tue Nov 1 19:18:54 2016 (r308179) +++ head/sys/netinet/cc/cc_dctcp.c Tue Nov 1 21:08:37 2016 (r308180) @@ -230,19 +230,21 @@ static void dctcp_cong_signal(struct cc_var *ccv, uint32_t type) { struct dctcp *dctcp_data; - uint32_t cwin; + uint32_t cwin, ssthresh_on_loss; u_int mss; dctcp_data = ccv->cc_data; cwin = CCV(ccv, snd_cwnd); mss = CCV(ccv, t_maxseg); + ssthresh_on_loss = + max((CCV(ccv, snd_max) - CCV(ccv, snd_una)) / 2 / mss, 2) + * mss; switch (type) { case CC_NDUPACK: if (!IN_FASTRECOVERY(CCV(ccv, t_flags))) { if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { - CCV(ccv, snd_ssthresh) = mss * - max(cwin / 2 / mss, 2); + CCV(ccv, snd_ssthresh) = ssthresh_on_loss; dctcp_data->num_cong_events++; } else { /* cwnd has already updated as congestion @@ -250,8 +252,7 @@ dctcp_cong_signal(struct cc_var *ccv, ui * snd_cwnd_prev and recalculate snd_ssthresh */ cwin = CCV(ccv, snd_cwnd_prev); - CCV(ccv, snd_ssthresh) = - max(cwin / 2 / mss, 2) * mss; + CCV(ccv, snd_ssthresh) = ssthresh_on_loss; } ENTER_RECOVERY(CCV(ccv, t_flags)); } @@ -265,8 +266,7 @@ dctcp_cong_signal(struct cc_var *ccv, ui if (!IN_CONGRECOVERY(CCV(ccv, t_flags))) { if (V_dctcp_slowstart && dctcp_data->num_cong_events++ == 0) { - CCV(ccv, snd_ssthresh) = - mss * max(cwin / 2 / mss, 2); + CCV(ccv, snd_ssthresh) = ssthresh_on_loss; dctcp_data->alpha =
svn commit: r307901 - in head/sys/netinet: . cc tcp_stacks
Author: hiren Date: Tue Oct 25 05:45:47 2016 New Revision: 307901 URL: https://svnweb.freebsd.org/changeset/base/307901 Log: FreeBSD tcp stack used to inform respective congestion control module about the loss event but not use or obay the recommendations i.e. values set by it in some cases. Here is an attempt to solve that confusion by following relevant RFCs/drafts. Stack only sets congestion window/slow start threshold values when there is no CC module availalbe to take that action. All CC modules are inspected and updated when needed to take appropriate action on loss. tcp_stacks/fastpath module has been updated to adapt these changes. Note: Probably, the most significant change would be to not bring congestion window down to 1MSS on a loss signaled by 3-duplicate acks and letting respective CC decide that value. In collaboration with:Matt Macy Discussed on: transport@ mailing list Reviewed by: jtl MFC after:1 month Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D8225 Modified: head/sys/netinet/cc/cc_cdg.c head/sys/netinet/cc/cc_chd.c head/sys/netinet/cc/cc_cubic.c head/sys/netinet/cc/cc_dctcp.c head/sys/netinet/cc/cc_htcp.c head/sys/netinet/cc/cc_newreno.c head/sys/netinet/tcp_input.c head/sys/netinet/tcp_stacks/fastpath.c Modified: head/sys/netinet/cc/cc_cdg.c == --- head/sys/netinet/cc/cc_cdg.cTue Oct 25 05:07:51 2016 (r307900) +++ head/sys/netinet/cc/cc_cdg.cTue Oct 25 05:45:47 2016 (r307901) @@ -431,6 +431,11 @@ static void cdg_cong_signal(struct cc_var *ccv, uint32_t signal_type) { struct cdg *cdg_data = ccv->cc_data; + uint32_t cwin; + u_int mss; + + cwin = CCV(ccv, snd_cwnd); + mss = CCV(ccv, t_maxseg); switch(signal_type) { case CC_CDG_DELAY: @@ -448,7 +453,7 @@ cdg_cong_signal(struct cc_var *ccv, uint */ if (IN_CONGRECOVERY(CCV(ccv, t_flags)) || cdg_data->queue_state < CDG_Q_FULL) { - CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd); + CCV(ccv, snd_ssthresh) = cwin; CCV(ccv, snd_recover) = CCV(ccv, snd_max); } else { /* @@ -461,13 +466,17 @@ cdg_cong_signal(struct cc_var *ccv, uint cdg_data->shadow_w, RENO_BETA); CCV(ccv, snd_ssthresh) = max(cdg_data->shadow_w, - cdg_window_decrease(ccv, CCV(ccv, snd_cwnd), - V_cdg_beta_loss)); + cdg_window_decrease(ccv, cwin, V_cdg_beta_loss)); + CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); cdg_data->window_incr = cdg_data->rtt_count = 0; } ENTER_RECOVERY(CCV(ccv, t_flags)); break; + case CC_RTO: + CCV(ccv, snd_ssthresh) = max(2*mss, cwin/2); + CCV(ccv, snd_cwnd) = mss; + break; default: newreno_cc_algo.cong_signal(ccv, signal_type); break; Modified: head/sys/netinet/cc/cc_chd.c == --- head/sys/netinet/cc/cc_chd.cTue Oct 25 05:07:51 2016 (r307900) +++ head/sys/netinet/cc/cc_chd.cTue Oct 25 05:45:47 2016 (r307901) @@ -330,10 +330,14 @@ chd_cong_signal(struct cc_var *ccv, uint struct ertt *e_t; struct chd *chd_data; int qdly; + uint32_t cwin; + u_int mss; e_t = khelp_get_osd(CCV(ccv, osd), ertt_id); chd_data = ccv->cc_data; qdly = imax(e_t->rtt, chd_data->maxrtt_in_rtt) - e_t->minrtt; + cwin = CCV(ccv, snd_cwnd); + mss = CCV(ccv, t_maxseg); switch(signal_type) { case CC_CHD_DELAY: @@ -373,6 +377,10 @@ chd_cong_signal(struct cc_var *ccv, uint } ENTER_FASTRECOVERY(CCV(ccv, t_flags)); break; + case CC_RTO: + CCV(ccv, snd_ssthresh) = max(2*mss, cwin/2); + CCV(ccv, snd_cwnd) = mss; + break; default: newreno_cc_algo.cong_signal(ccv, signal_type); Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Tue Oct 25 05:07:51 2016 (r307900) +++ head/sys/netinet/cc/cc_cubic.c Tue Oct 25 05:45:47 2016 (r307901) @@ -225,8 +225,12 @@ static void cubic_cong_signal(struct cc_var *ccv, uint32_t type) { struct cubic *cubic_data; + uint32_t cwin; + u_int mss; cubic_data = ccv->cc_data; + cwin = CCV(ccv, snd_cwnd); +
svn commit: r307900 - in head/sys/netinet: . cc
Author: hiren Date: Tue Oct 25 05:07:51 2016 New Revision: 307900 URL: https://svnweb.freebsd.org/changeset/base/307900 Log: Undo r307899. It needs a bit more work and proper commit log. Modified: head/sys/netinet/cc/cc_cdg.c head/sys/netinet/cc/cc_chd.c head/sys/netinet/cc/cc_cubic.c head/sys/netinet/cc/cc_dctcp.c head/sys/netinet/cc/cc_htcp.c head/sys/netinet/cc/cc_newreno.c head/sys/netinet/tcp_input.c Modified: head/sys/netinet/cc/cc_cdg.c == --- head/sys/netinet/cc/cc_cdg.cTue Oct 25 05:03:33 2016 (r307899) +++ head/sys/netinet/cc/cc_cdg.cTue Oct 25 05:07:51 2016 (r307900) @@ -431,11 +431,6 @@ static void cdg_cong_signal(struct cc_var *ccv, uint32_t signal_type) { struct cdg *cdg_data = ccv->cc_data; - uint32_t cwin; - u_int mss; - - cwin = CCV(ccv, snd_cwnd); - mss = CCV(ccv, t_maxseg); switch(signal_type) { case CC_CDG_DELAY: @@ -453,7 +448,7 @@ cdg_cong_signal(struct cc_var *ccv, uint */ if (IN_CONGRECOVERY(CCV(ccv, t_flags)) || cdg_data->queue_state < CDG_Q_FULL) { - CCV(ccv, snd_ssthresh) = cwin; + CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd); CCV(ccv, snd_recover) = CCV(ccv, snd_max); } else { /* @@ -466,17 +461,13 @@ cdg_cong_signal(struct cc_var *ccv, uint cdg_data->shadow_w, RENO_BETA); CCV(ccv, snd_ssthresh) = max(cdg_data->shadow_w, - cdg_window_decrease(ccv, cwin, V_cdg_beta_loss)); - CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); + cdg_window_decrease(ccv, CCV(ccv, snd_cwnd), + V_cdg_beta_loss)); cdg_data->window_incr = cdg_data->rtt_count = 0; } ENTER_RECOVERY(CCV(ccv, t_flags)); break; - case CC_RTO: - CCV(ccv, snd_ssthresh) = max(2*mss, cwin/2); - CCV(ccv, snd_cwnd) = mss; - break; default: newreno_cc_algo.cong_signal(ccv, signal_type); break; Modified: head/sys/netinet/cc/cc_chd.c == --- head/sys/netinet/cc/cc_chd.cTue Oct 25 05:03:33 2016 (r307899) +++ head/sys/netinet/cc/cc_chd.cTue Oct 25 05:07:51 2016 (r307900) @@ -330,14 +330,10 @@ chd_cong_signal(struct cc_var *ccv, uint struct ertt *e_t; struct chd *chd_data; int qdly; - uint32_t cwin; - u_int mss; e_t = khelp_get_osd(CCV(ccv, osd), ertt_id); chd_data = ccv->cc_data; qdly = imax(e_t->rtt, chd_data->maxrtt_in_rtt) - e_t->minrtt; - cwin = CCV(ccv, snd_cwnd); - mss = CCV(ccv, t_maxseg); switch(signal_type) { case CC_CHD_DELAY: @@ -377,10 +373,6 @@ chd_cong_signal(struct cc_var *ccv, uint } ENTER_FASTRECOVERY(CCV(ccv, t_flags)); break; - case CC_RTO: - CCV(ccv, snd_ssthresh) = max(2*mss, cwin/2); - CCV(ccv, snd_cwnd) = mss; - break; default: newreno_cc_algo.cong_signal(ccv, signal_type); Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Tue Oct 25 05:03:33 2016 (r307899) +++ head/sys/netinet/cc/cc_cubic.c Tue Oct 25 05:07:51 2016 (r307900) @@ -225,12 +225,8 @@ static void cubic_cong_signal(struct cc_var *ccv, uint32_t type) { struct cubic *cubic_data; - uint32_t cwin; - u_int mss; cubic_data = ccv->cc_data; - cwin = CCV(ccv, snd_cwnd); - mss = CCV(ccv, t_maxseg); switch (type) { case CC_NDUPACK: @@ -239,8 +235,7 @@ cubic_cong_signal(struct cc_var *ccv, ui cubic_ssthresh_update(ccv); cubic_data->num_cong_events++; cubic_data->prev_max_cwnd = cubic_data->max_cwnd; - cubic_data->max_cwnd = cwin; - CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); + cubic_data->max_cwnd = CCV(ccv, snd_cwnd); } ENTER_RECOVERY(CCV(ccv, t_flags)); } @@ -251,7 +246,7 @@ cubic_cong_signal(struct cc_var *ccv, ui cubic_ssthresh_update(ccv); cubic_data->num_cong_events++; cubic_data->prev_max_cwnd = cubic_data->max_cwnd; - cubic_data->max_cwnd = cwin; +
Re: svn commit: r307899 - in head/sys/netinet: . cc
Sigh. I'll revert this and do it right. On 10/25/16 at 05:03P, Hiren Panchasara wrote: > Author: hiren > Date: Tue Oct 25 05:03:33 2016 > New Revision: 307899 > URL: https://svnweb.freebsd.org/changeset/base/307899 > > Log: > In Collaboration with: Matt Macy > Reviewed by:jtl > Sponsored by: Limelight Networks > Differential Revision: https://reviews.freebsd.org/D8225 > > Modified: > head/sys/netinet/cc/cc_cdg.c > head/sys/netinet/cc/cc_chd.c > head/sys/netinet/cc/cc_cubic.c > head/sys/netinet/cc/cc_dctcp.c > head/sys/netinet/cc/cc_htcp.c > head/sys/netinet/cc/cc_newreno.c > head/sys/netinet/tcp_input.c > > Modified: head/sys/netinet/cc/cc_cdg.c > == > --- head/sys/netinet/cc/cc_cdg.c Tue Oct 25 04:14:03 2016 > (r307898) > +++ head/sys/netinet/cc/cc_cdg.c Tue Oct 25 05:03:33 2016 > (r307899) > @@ -431,6 +431,11 @@ static void > cdg_cong_signal(struct cc_var *ccv, uint32_t signal_type) > { > struct cdg *cdg_data = ccv->cc_data; > + uint32_t cwin; > + u_int mss; > + > + cwin = CCV(ccv, snd_cwnd); > + mss = CCV(ccv, t_maxseg); > > switch(signal_type) { > case CC_CDG_DELAY: > @@ -448,7 +453,7 @@ cdg_cong_signal(struct cc_var *ccv, uint >*/ > if (IN_CONGRECOVERY(CCV(ccv, t_flags)) || > cdg_data->queue_state < CDG_Q_FULL) { > - CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd); > + CCV(ccv, snd_ssthresh) = cwin; > CCV(ccv, snd_recover) = CCV(ccv, snd_max); > } else { > /* > @@ -461,13 +466,17 @@ cdg_cong_signal(struct cc_var *ccv, uint > cdg_data->shadow_w, RENO_BETA); > > CCV(ccv, snd_ssthresh) = max(cdg_data->shadow_w, > - cdg_window_decrease(ccv, CCV(ccv, snd_cwnd), > - V_cdg_beta_loss)); > + cdg_window_decrease(ccv, cwin, V_cdg_beta_loss)); > + CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); > > cdg_data->window_incr = cdg_data->rtt_count = 0; > } > ENTER_RECOVERY(CCV(ccv, t_flags)); > break; > + case CC_RTO: > + CCV(ccv, snd_ssthresh) = max(2*mss, cwin/2); > + CCV(ccv, snd_cwnd) = mss; > + break; > default: > newreno_cc_algo.cong_signal(ccv, signal_type); > break; > > Modified: head/sys/netinet/cc/cc_chd.c > == > --- head/sys/netinet/cc/cc_chd.c Tue Oct 25 04:14:03 2016 > (r307898) > +++ head/sys/netinet/cc/cc_chd.c Tue Oct 25 05:03:33 2016 > (r307899) > @@ -330,10 +330,14 @@ chd_cong_signal(struct cc_var *ccv, uint > struct ertt *e_t; > struct chd *chd_data; > int qdly; > + uint32_t cwin; > + u_int mss; > > e_t = khelp_get_osd(CCV(ccv, osd), ertt_id); > chd_data = ccv->cc_data; > qdly = imax(e_t->rtt, chd_data->maxrtt_in_rtt) - e_t->minrtt; > + cwin = CCV(ccv, snd_cwnd); > + mss = CCV(ccv, t_maxseg); > > switch(signal_type) { > case CC_CHD_DELAY: > @@ -373,6 +377,10 @@ chd_cong_signal(struct cc_var *ccv, uint > } > ENTER_FASTRECOVERY(CCV(ccv, t_flags)); > break; > + case CC_RTO: > + CCV(ccv, snd_ssthresh) = max(2*mss, cwin/2); > + CCV(ccv, snd_cwnd) = mss; > + break; > > default: > newreno_cc_algo.cong_signal(ccv, signal_type); > > Modified: head/sys/netinet/cc/cc_cubic.c > == > --- head/sys/netinet/cc/cc_cubic.cTue Oct 25 04:14:03 2016 > (r307898) > +++ head/sys/netinet/cc/cc_cubic.cTue Oct 25 05:03:33 2016 > (r307899) > @@ -225,8 +225,12 @@ static void > cubic_cong_signal(struct cc_var *ccv, uint32_t type) > { > struct cubic *cubic_data; > + uint32_t cwin; > + u_int mss; > > cubic_data = ccv->cc_data; > + cwin = CCV(ccv, snd_cwnd); > + mss = CCV(ccv, t_maxseg); > > switch (type) { > case CC_NDUPACK: > @@ -235,7 +239,8 @@ cubic_cong_signal(struct cc_var *ccv, ui > cubic_ssthresh_update(ccv); >
svn commit: r307899 - in head/sys/netinet: . cc
Author: hiren Date: Tue Oct 25 05:03:33 2016 New Revision: 307899 URL: https://svnweb.freebsd.org/changeset/base/307899 Log: In Collaboration with:Matt Macy Reviewed by: jtl Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D8225 Modified: head/sys/netinet/cc/cc_cdg.c head/sys/netinet/cc/cc_chd.c head/sys/netinet/cc/cc_cubic.c head/sys/netinet/cc/cc_dctcp.c head/sys/netinet/cc/cc_htcp.c head/sys/netinet/cc/cc_newreno.c head/sys/netinet/tcp_input.c Modified: head/sys/netinet/cc/cc_cdg.c == --- head/sys/netinet/cc/cc_cdg.cTue Oct 25 04:14:03 2016 (r307898) +++ head/sys/netinet/cc/cc_cdg.cTue Oct 25 05:03:33 2016 (r307899) @@ -431,6 +431,11 @@ static void cdg_cong_signal(struct cc_var *ccv, uint32_t signal_type) { struct cdg *cdg_data = ccv->cc_data; + uint32_t cwin; + u_int mss; + + cwin = CCV(ccv, snd_cwnd); + mss = CCV(ccv, t_maxseg); switch(signal_type) { case CC_CDG_DELAY: @@ -448,7 +453,7 @@ cdg_cong_signal(struct cc_var *ccv, uint */ if (IN_CONGRECOVERY(CCV(ccv, t_flags)) || cdg_data->queue_state < CDG_Q_FULL) { - CCV(ccv, snd_ssthresh) = CCV(ccv, snd_cwnd); + CCV(ccv, snd_ssthresh) = cwin; CCV(ccv, snd_recover) = CCV(ccv, snd_max); } else { /* @@ -461,13 +466,17 @@ cdg_cong_signal(struct cc_var *ccv, uint cdg_data->shadow_w, RENO_BETA); CCV(ccv, snd_ssthresh) = max(cdg_data->shadow_w, - cdg_window_decrease(ccv, CCV(ccv, snd_cwnd), - V_cdg_beta_loss)); + cdg_window_decrease(ccv, cwin, V_cdg_beta_loss)); + CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); cdg_data->window_incr = cdg_data->rtt_count = 0; } ENTER_RECOVERY(CCV(ccv, t_flags)); break; + case CC_RTO: + CCV(ccv, snd_ssthresh) = max(2*mss, cwin/2); + CCV(ccv, snd_cwnd) = mss; + break; default: newreno_cc_algo.cong_signal(ccv, signal_type); break; Modified: head/sys/netinet/cc/cc_chd.c == --- head/sys/netinet/cc/cc_chd.cTue Oct 25 04:14:03 2016 (r307898) +++ head/sys/netinet/cc/cc_chd.cTue Oct 25 05:03:33 2016 (r307899) @@ -330,10 +330,14 @@ chd_cong_signal(struct cc_var *ccv, uint struct ertt *e_t; struct chd *chd_data; int qdly; + uint32_t cwin; + u_int mss; e_t = khelp_get_osd(CCV(ccv, osd), ertt_id); chd_data = ccv->cc_data; qdly = imax(e_t->rtt, chd_data->maxrtt_in_rtt) - e_t->minrtt; + cwin = CCV(ccv, snd_cwnd); + mss = CCV(ccv, t_maxseg); switch(signal_type) { case CC_CHD_DELAY: @@ -373,6 +377,10 @@ chd_cong_signal(struct cc_var *ccv, uint } ENTER_FASTRECOVERY(CCV(ccv, t_flags)); break; + case CC_RTO: + CCV(ccv, snd_ssthresh) = max(2*mss, cwin/2); + CCV(ccv, snd_cwnd) = mss; + break; default: newreno_cc_algo.cong_signal(ccv, signal_type); Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Tue Oct 25 04:14:03 2016 (r307898) +++ head/sys/netinet/cc/cc_cubic.c Tue Oct 25 05:03:33 2016 (r307899) @@ -225,8 +225,12 @@ static void cubic_cong_signal(struct cc_var *ccv, uint32_t type) { struct cubic *cubic_data; + uint32_t cwin; + u_int mss; cubic_data = ccv->cc_data; + cwin = CCV(ccv, snd_cwnd); + mss = CCV(ccv, t_maxseg); switch (type) { case CC_NDUPACK: @@ -235,7 +239,8 @@ cubic_cong_signal(struct cc_var *ccv, ui cubic_ssthresh_update(ccv); cubic_data->num_cong_events++; cubic_data->prev_max_cwnd = cubic_data->max_cwnd; - cubic_data->max_cwnd = CCV(ccv, snd_cwnd); + cubic_data->max_cwnd = cwin; + CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); } ENTER_RECOVERY(CCV(ccv, t_flags)); } @@ -246,7 +251,7 @@ cubic_cong_signal(struct cc_var *ccv, ui cubic_ssthresh_update(ccv); cubic_data->num_cong_events++;
svn commit: r307745 - head/sys/kern
Author: hiren Date: Fri Oct 21 18:27:30 2016 New Revision: 307745 URL: https://svnweb.freebsd.org/changeset/base/307745 Log: Rework r306337. In sendit(), if mp->msg_control is present, then in sockargs() we are allocating mbuf to store mp->msg_control. Later in kern_sendit(), call to getsock_cap(), will check validity of file pointer passed, if this fails EBADF is returned but mbuf allocated in sockargs() is not freed. Made code changes to free the same. Since freeing control mbuf in sendit() after checking (control != NULL) may lead to double freeing of control mbuf in sendit(), we can free control mbuf in kern_sendit() if there are any errors in the routine. Submitted by: Lohith BelladReviewed by: glebius MFC after:3 weeks Differential Revision:https://reviews.freebsd.org/D8152 Modified: head/sys/kern/uipc_syscalls.c Modified: head/sys/kern/uipc_syscalls.c == --- head/sys/kern/uipc_syscalls.c Fri Oct 21 17:44:47 2016 (r307744) +++ head/sys/kern/uipc_syscalls.c Fri Oct 21 18:27:30 2016 (r307745) @@ -762,8 +762,10 @@ kern_sendit(struct thread *td, int s, st cap_rights_set(, CAP_CONNECT); } error = getsock_cap(td, s, , , NULL, NULL); - if (error != 0) + if (error != 0) { + m_freem(control); return (error); + } so = (struct socket *)fp->f_data; #ifdef KTRACE @@ -774,12 +776,16 @@ kern_sendit(struct thread *td, int s, st if (mp->msg_name != NULL) { error = mac_socket_check_connect(td->td_ucred, so, mp->msg_name); - if (error != 0) + if (error != 0) { + m_freem(control); goto bad; + } } error = mac_socket_check_send(td->td_ucred, so); - if (error != 0) + if (error != 0) { + m_freem(control); goto bad; + } #endif auio.uio_iov = mp->msg_iov; @@ -793,6 +799,7 @@ kern_sendit(struct thread *td, int s, st for (i = 0; i < mp->msg_iovlen; i++, iov++) { if ((auio.uio_resid += iov->iov_len) < 0) { error = EINVAL; + m_freem(control); goto bad; } } ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r307727 - head/share/man/man4
On 10/21/16 at 11:29P, Michael Tuexen wrote: > Author: tuexen > Date: Fri Oct 21 11:29:25 2016 > New Revision: 307727 > URL: https://svnweb.freebsd.org/changeset/base/307727 > > Log: > Document the TCP sysctl variables insecure_rst and insecure_syn. > > MFC after: 1 month > Sponsored by: netflix > > Modified: > head/share/man/man4/tcp.4 > > Modified: head/share/man/man4/tcp.4 > == > --- head/share/man/man4/tcp.4 Fri Oct 21 10:32:57 2016(r307726) > +++ head/share/man/man4/tcp.4 Fri Oct 21 11:29:25 2016(r307727) > @@ -34,7 +34,7 @@ > .\" From: @(#)tcp.4 8.1 (Berkeley) 6/5/93 > .\" $FreeBSD$ > .\" > -.Dd June 28, 2016 > +.Dd October 21, 2016 > .Dt TCP 4 > .Os > .Sh NAME > @@ -586,6 +586,12 @@ downshift. > List of available TCP function blocks (TCP stacks). > .It Va functions_default > The default TCP function block (TCP stack). > +.It Va insecure_rst > +Use criterias defined in RFC793 instead of RFC5961 for accepting RST > segments. I am not a native speaker but should we s/criterias /criteria / ? > +Default is false. > +.It Va insecure_syn > +Use criterias defined in RFC793 instead of RFC5961 for accepting SYN > segments. Same. > +Default is false. > .El > .Sh ERRORS > A socket operation may fail with one of the following errors returned: > Cheers, Hiren pgpI6MPidxqLP.pgp Description: PGP signature
svn commit: r307545 - head/sys/netinet
Author: hiren Date: Tue Oct 18 02:40:25 2016 New Revision: 307545 URL: https://svnweb.freebsd.org/changeset/base/307545 Log: Make sure tcp_mss() has the same check as tcp_mss_update() to have t_maxseg set to at least 64. This is still just a coverup to avoid kernel panic and not an actual fix. PR: 213232 Reviewed by: glebius MFC after:1 week Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D8272 Modified: head/sys/netinet/tcp_input.c Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cTue Oct 18 01:55:07 2016 (r307544) +++ head/sys/netinet/tcp_input.cTue Oct 18 02:40:25 2016 (r307545) @@ -3758,7 +3758,15 @@ tcp_mss(struct tcpcb *tp, int offer) (void)sbreserve_locked(>so_snd, bufsize, so, NULL); } SOCKBUF_UNLOCK(>so_snd); - tp->t_maxseg = mss; + /* +* Sanity check: make sure that maxseg will be large +* enough to allow some data on segments even if the +* all the option space is used (40bytes). Otherwise +* funny things may happen in tcp_output. +* +* XXXGL: shouldn't we reserve space for IP/IPv6 options? +*/ + tp->t_maxseg = max(mss, 64); SOCKBUF_LOCK(>so_rcv); if ((so->so_rcv.sb_hiwat == V_tcp_recvspace) && metrics.rmx_recvpipe) ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r306337 - head/sys/kern
+ Lohith On 10/04/16 at 01:53P, Gleb Smirnoff wrote: > Hiren, > > On Mon, Sep 26, 2016 at 10:13:58AM +0000, Hiren Panchasara wrote: > H> Author: hiren > H> Date: Mon Sep 26 10:13:58 2016 > H> New Revision: 306337 > H> URL: https://svnweb.freebsd.org/changeset/base/306337 > H> > H> Log: > H> In sendit(), if mp->msg_control is present, then in sockargs() we are > allocating > H> mbuf to store mp->msg_control. Later in kern_sendit(), call to > getsock_cap(), > H> will check validity of file pointer passed, if this fails EBADF is > returned but > H> mbuf allocated in sockargs() is not freed. Fix this possible leak. > H> > H> Submitted by:Lohith Bellad <lohith.bel...@me.com> > H> Reviewed by: adrian > H> MFC after: 3 weeks > H> Differential Revision: https://reviews.freebsd.org/D7910 > > The commit appeared to be incorrect, but a problem exists. I'd like to look > at it. > Is there any reproduce recipe for the leak or bug filed? > Cheers, Hiren pgp8eVxXFJl1H.pgp Description: PGP signature
svn commit: r306464 - head/sys/netinet
Author: hiren Date: Fri Sep 30 00:10:57 2016 New Revision: 306464 URL: https://svnweb.freebsd.org/changeset/base/306464 Log: This adds a sysctl which allows you to disable the TCP hostcache. This is handy during testing of network related changes where cached entries may pollute your results, or during known congestion events where you don't want to unfairly penalize hosts. Prior to r232346 this would have meant you would break any connection with a sub 1500 MTU, as the hostcache was authoritative. All entries as they stand today should simply be used to pre populate values for efficiency. Submitted by: Jason Wolfe (j at nitrology dot com) Reviewed by: rwatson, sbruno, rrs , bz (earlier version) MFC after:2 weeks Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D6198 Modified: head/sys/netinet/tcp_hostcache.c Modified: head/sys/netinet/tcp_hostcache.c == --- head/sys/netinet/tcp_hostcache.cThu Sep 29 23:41:57 2016 (r306463) +++ head/sys/netinet/tcp_hostcache.cFri Sep 30 00:10:57 2016 (r306464) @@ -124,6 +124,12 @@ static void tcp_hc_purge(void *); static SYSCTL_NODE(_net_inet_tcp, OID_AUTO, hostcache, CTLFLAG_RW, 0, "TCP Host cache"); +VNET_DEFINE(int, tcp_use_hostcache) = 1; +#define V_tcp_use_hostcache VNET(tcp_use_hostcache) +SYSCTL_INT(_net_inet_tcp_hostcache, OID_AUTO, enable, CTLFLAG_VNET | CTLFLAG_RW, +_NAME(tcp_use_hostcache), 0, +"Enable the TCP hostcache"); + SYSCTL_UINT(_net_inet_tcp_hostcache, OID_AUTO, cachelimit, CTLFLAG_VNET | CTLFLAG_RDTUN, _NAME(tcp_hostcache.cache_limit), 0, "Overall entry limit for hostcache"); @@ -276,6 +282,9 @@ tcp_hc_lookup(struct in_conninfo *inc) struct hc_head *hc_head; struct hc_metrics *hc_entry; + if (!V_tcp_use_hostcache) + return NULL; + KASSERT(inc != NULL, ("tcp_hc_lookup with NULL in_conninfo pointer")); /* @@ -332,6 +341,9 @@ tcp_hc_insert(struct in_conninfo *inc) struct hc_head *hc_head; struct hc_metrics *hc_entry; + if (!V_tcp_use_hostcache) + return NULL; + KASSERT(inc != NULL, ("tcp_hc_insert with NULL in_conninfo pointer")); /* @@ -421,6 +433,9 @@ tcp_hc_get(struct in_conninfo *inc, stru { struct hc_metrics *hc_entry; + if (!V_tcp_use_hostcache) + return; + /* * Find the right bucket. */ @@ -452,7 +467,7 @@ tcp_hc_get(struct in_conninfo *inc, stru /* * External function: look up an entry in the hostcache and return the - * discovered path MTU. Returns NULL if no entry is found or value is not + * discovered path MTU. Returns 0 if no entry is found or value is not * set. */ u_long @@ -461,6 +476,9 @@ tcp_hc_getmtu(struct in_conninfo *inc) struct hc_metrics *hc_entry; u_long mtu; + if (!V_tcp_use_hostcache) + return 0; + hc_entry = tcp_hc_lookup(inc); if (hc_entry == NULL) { return 0; @@ -482,6 +500,9 @@ tcp_hc_updatemtu(struct in_conninfo *inc { struct hc_metrics *hc_entry; + if (!V_tcp_use_hostcache) + return; + /* * Find the right bucket. */ @@ -521,6 +542,9 @@ tcp_hc_update(struct in_conninfo *inc, s { struct hc_metrics *hc_entry; + if (!V_tcp_use_hostcache) + return; + hc_entry = tcp_hc_lookup(inc); if (hc_entry == NULL) { hc_entry = tcp_hc_insert(inc); ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r306337 - head/sys/kern
On 09/26/16 at 10:18P, Bruce Evans wrote: > On Mon, 26 Sep 2016, Bruce Evans wrote: > Thanks Bruce for your inputs. I've reverted this change with r306348 for now as there was a panic reported with this. I do not have time to deal with it for at least a few days. I'll try to get back to it. If you (or anyone else) want to fix it the right way, please go ahead. Sorry for the trouble. Cheers, Hiren pgpiOiNRXzpSW.pgp Description: PGP signature
svn commit: r306348 - head/sys/kern
Author: hiren Date: Mon Sep 26 15:45:30 2016 New Revision: 306348 URL: https://svnweb.freebsd.org/changeset/base/306348 Log: Revert r306337. dhw@ reproted a panic which seems related to this and bde@ has raised some issues. Modified: head/sys/kern/uipc_syscalls.c Modified: head/sys/kern/uipc_syscalls.c == --- head/sys/kern/uipc_syscalls.c Mon Sep 26 15:38:02 2016 (r306347) +++ head/sys/kern/uipc_syscalls.c Mon Sep 26 15:45:30 2016 (r306348) @@ -685,7 +685,7 @@ sys_socketpair(struct thread *td, struct static int sendit(struct thread *td, int s, struct msghdr *mp, int flags) { - struct mbuf *control = NULL; + struct mbuf *control; struct sockaddr *to; int error; @@ -737,8 +737,6 @@ sendit(struct thread *td, int s, struct bad: free(to, M_SONAME); - if (control) - m_freem(control); return (error); } ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r306337 - head/sys/kern
Author: hiren Date: Mon Sep 26 10:13:58 2016 New Revision: 306337 URL: https://svnweb.freebsd.org/changeset/base/306337 Log: In sendit(), if mp->msg_control is present, then in sockargs() we are allocating mbuf to store mp->msg_control. Later in kern_sendit(), call to getsock_cap(), will check validity of file pointer passed, if this fails EBADF is returned but mbuf allocated in sockargs() is not freed. Fix this possible leak. Submitted by: Lohith BelladReviewed by: adrian MFC after:3 weeks Differential Revision:https://reviews.freebsd.org/D7910 Modified: head/sys/kern/uipc_syscalls.c Modified: head/sys/kern/uipc_syscalls.c == --- head/sys/kern/uipc_syscalls.c Mon Sep 26 08:21:29 2016 (r306336) +++ head/sys/kern/uipc_syscalls.c Mon Sep 26 10:13:58 2016 (r306337) @@ -685,7 +685,7 @@ sys_socketpair(struct thread *td, struct static int sendit(struct thread *td, int s, struct msghdr *mp, int flags) { - struct mbuf *control; + struct mbuf *control = NULL; struct sockaddr *to; int error; @@ -737,6 +737,8 @@ sendit(struct thread *td, int s, struct bad: free(to, M_SONAME); + if (control) + m_freem(control); return (error); } ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r306284 - head/sys/dev/bxe
On 09/23/16 at 11:18P, David C Somayajulu wrote: > Author: davidcs > Date: Fri Sep 23 23:18:54 2016 > New Revision: 306284 > URL: https://svnweb.freebsd.org/changeset/base/306284 > > Log: > Fixes for issues under high workloads David, It is really useful to have details about said issues in the commit-log. Can you share them here, if possible? I'd suggest you add that to your MFC commit(s). > > MFC after:5 days > > Modified: > head/sys/dev/bxe/bxe.c > head/sys/dev/bxe/bxe.h > head/sys/dev/bxe/bxe_stats.h Cheers, Hiren pgpGVCtvLlw9E.pgp Description: PGP signature
Re: svn commit: r304895 - head/usr.bin/netstat
On 08/29/16 at 05:14P, Bruce Evans wrote: > On Sun, 28 Aug 2016, hiren panchasara wrote: > > > On 08/27/16 at 11:06P, Bruce Evans wrote: > >> > >> Log: > >> Fix build without INET6 and with gcc. A function definition was ifdefed > >> for INET6, but its protototype was not, and gcc detects the error. > >> > >> Modified: > >> head/usr.bin/netstat/route.c > > > > Thanks for the fix. > > > > Wouldn't stable/11 be broken in the same way? > > Probably. > > This only affects gcc users who omit INET6, and much more is broken for > stable/11 for gcc. Broken as in couldn't build? I don't think so. gcc + no-v6 seems like a special case but I think this should be fixed in 11. If you don't want to, I'll MFC the change myself. :-) Cheers, Hiren pgpLF_A492Fi3.pgp Description: PGP signature
Re: svn commit: r304895 - head/usr.bin/netstat
On 08/27/16 at 11:06P, Bruce Evans wrote: > Author: bde > Date: Sat Aug 27 11:06:06 2016 > New Revision: 304895 > URL: https://svnweb.freebsd.org/changeset/base/304895 > > Log: > Fix build without INET6 and with gcc. A function definition was ifdefed > for INET6, but its protototype was not, and gcc detects the error. > > Modified: > head/usr.bin/netstat/route.c Thanks for the fix. Wouldn't stable/11 be broken in the same way? Cheers, Hiren pgpballfEOpZr.pgp Description: PGP signature
Re: svn commit: r304857 - head/sys/netinet/tcp_stacks
On 08/27/16 at 01:47P, Lawrence Stewart wrote: > Pointy hat to: lstewart@ > > Apologies all for the breakage, thanks Hiren for fixing and apologies > also for missing your email. For some reason your reply to my commit did > not make it to my inbox and was filtered straight to my mailing list > folder which is annoying - I must have introduced a bug in my > .procmailrc at some point. No worries. :-) > > I don't understand why my buildkernel prior to commit succeeded though. > Is fastpath not build by default? Yeah, it's not since r301814. Cheers, Hiren pgpchIWM8G0ds.pgp Description: PGP signature
svn commit: r304857 - head/sys/netinet/tcp_stacks
Author: hiren Date: Fri Aug 26 19:23:17 2016 New Revision: 304857 URL: https://svnweb.freebsd.org/changeset/base/304857 Log: Adjust TCP module fastpath after r304803's cc_ack_received() changes. Reported by: hiren, bz, np Reviewed by: rrs Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D7664 Modified: head/sys/netinet/tcp_stacks/fastpath.c Modified: head/sys/netinet/tcp_stacks/fastpath.c == --- head/sys/netinet/tcp_stacks/fastpath.c Fri Aug 26 19:08:58 2016 (r304856) +++ head/sys/netinet/tcp_stacks/fastpath.c Fri Aug 26 19:23:17 2016 (r304857) @@ -172,7 +172,10 @@ tcp_do_fastack(struct mbuf *m, struct tc int ti_locked, u_long tiwin) { int acked; + uint16_t nsegs; int winup_only=0; + + nsegs = max(1, m->m_pkthdr.lro_nsegs); #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, @@ -278,7 +281,7 @@ tcp_do_fastack(struct mbuf *m, struct tc * typically means increasing the congestion * window. */ - cc_ack_received(tp, th, CC_ACK); + cc_ack_received(tp, th, nsegs, CC_ACK); tp->snd_una = th->th_ack; /* @@ -502,9 +505,12 @@ tcp_do_slowpath(struct mbuf *m, struct t { int acked, ourfinisacked, needoutput = 0; int rstreason, todrop, win; + uint16_t nsegs; char *s; struct in_conninfo *inc; struct mbuf *mfree = NULL; + + nsegs = max(1, m->m_pkthdr.lro_nsegs); #ifdef TCPDEBUG /* * The size of tcp_saveipgen must be the size of the max ip header, @@ -1085,7 +1091,8 @@ tcp_do_slowpath(struct mbuf *m, struct t tp->t_dupacks = 0; else if (++tp->t_dupacks > tcprexmtthresh || IN_FASTRECOVERY(tp->t_flags)) { - cc_ack_received(tp, th, CC_DUPACK); + cc_ack_received(tp, th, nsegs, + CC_DUPACK); if ((tp->t_flags & TF_SACK_PERMIT) && IN_FASTRECOVERY(tp->t_flags)) { int awnd; @@ -1135,7 +1142,8 @@ tcp_do_slowpath(struct mbuf *m, struct t } /* Congestion signal before ack. */ cc_cong_signal(tp, th, CC_NDUPACK); - cc_ack_received(tp, th, CC_DUPACK); + cc_ack_received(tp, th, nsegs, + CC_DUPACK); tcp_timer_activate(tp, TT_REXMT, 0); tp->t_rtttime = 0; if (tp->t_flags & TF_SACK_PERMIT) { @@ -1169,7 +1177,8 @@ tcp_do_slowpath(struct mbuf *m, struct t * segment. Restore the original * snd_cwnd after packet transmission. */ - cc_ack_received(tp, th, CC_DUPACK); + cc_ack_received(tp, th, nsegs, + CC_DUPACK); u_long oldcwnd = tp->snd_cwnd; tcp_seq oldsndmax = tp->snd_max; u_int sent; @@ -1323,7 +1332,7 @@ process_ACK: * control related information. This typically means increasing * the congestion window. */ - cc_ack_received(tp, th, CC_ACK); + cc_ack_received(tp, th, nsegs, CC_ACK); SOCKBUF_LOCK(>so_snd); if (acked > sbavail(>so_snd)) { @@ -1758,6 +1767,7 @@ tcp_do_segment_fastslow(struct mbuf *m, int thflags; u_long tiwin; char *s; + uint16_t nsegs; int can_enter; struct in_conninfo *inc; struct tcpopt to; @@ -1765,6 +1775,7 @@ tcp_do_segment_fastslow(struct mbuf *m, thflags = th->th_flags; tp->sackhint.last_sack_ack = 0; inc = >t_inpcb->inp_inc; + nsegs = max(1, m->m_pkthdr.lro_nsegs); /* * If this is either a state-changing packet or current state isn't * established, we require a write lock on tcbinfo. Otherwise, we @@ -1983,7 +1994,10 @@ tcp_fastack(struct mbuf *m, struct tcphd int ti_locked, u_long tiwin) { int acked; + uint16_t nsegs; int winup_only=0; + +
svn commit: r304855 - head/sys/netinet
Author: hiren Date: Fri Aug 26 17:48:54 2016 New Revision: 304855 URL: https://svnweb.freebsd.org/changeset/base/304855 Log: Update TCPS_HAVERCVDFIN() macro to correctly include all states a connection can be in after receiving a FIN. FWIW, NetBSD has this change for quite some time. This has been tested at Netflix and Limelight in production traffic. Reported by: Sam Kumar on transport@ Reviewed by: rrs MFC after:4 weeks Sponsored by: Limelight Networks Differential Revision: https://reviews.freebsd.org/D7475 Modified: head/sys/netinet/tcp_fsm.h Modified: head/sys/netinet/tcp_fsm.h == --- head/sys/netinet/tcp_fsm.h Fri Aug 26 17:38:13 2016(r304854) +++ head/sys/netinet/tcp_fsm.h Fri Aug 26 17:48:54 2016(r304855) @@ -73,7 +73,8 @@ #defineTCPS_HAVERCVDSYN(s) ((s) >= TCPS_SYN_RECEIVED) #defineTCPS_HAVEESTABLISHED(s) ((s) >= TCPS_ESTABLISHED) -#defineTCPS_HAVERCVDFIN(s) ((s) >= TCPS_TIME_WAIT) +#defineTCPS_HAVERCVDFIN(s) \ +((s) == TCPS_CLOSE_WAIT || ((s) >= TCPS_CLOSING && (s) != TCPS_FIN_WAIT_2)) #ifdef TCPOUTFLAGS /* ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r303656 - head/sys/netinet
On 08/26/16 at 09:23P, Sepherosa Ziehau wrote: > On Fri, Aug 26, 2016 at 1:54 AM, hiren panchasara > <hi...@strugglingcoder.info> wrote: > > On 08/02/16 at 06:36P, Sepherosa Ziehau wrote: > >> Author: sephe > >> Date: Tue Aug 2 06:36:47 2016 > >> New Revision: 303656 > >> URL: https://svnweb.freebsd.org/changeset/base/303656 > >> > >> Log: > >> tcp/lro: Implement hash table for LRO entries. > >> > >> This significantly improves HTTP workload performance and reduces > >> HTTP workload latency. > >> > >> Reviewed by:rrs, gallatin, hps > >> Obtained from: rrs, gallatin > >> Sponsored by: Netflix (rrs, gallatin) , Microsoft (sephe) > >> Differential Revision: https://reviews.freebsd.org/D6689 > > > > Hi Sephe, > > > > Can you please MFC this to stable/11? > > I don't think this one can be MFC'ed, since it changes the size of LRO > control struct, which is usually embedded in the driver's softc/RX > ring struct. Ah, okay. Thanks for checking. Cheers, Hiren pgpfmvMF8_Ka9.pgp Description: PGP signature
Re: svn commit: r304803 - in head/sys: netinet netinet/cc sys
On 08/25/16 at 01:33P, Lawrence Stewart wrote: > Author: lstewart > Date: Thu Aug 25 13:33:32 2016 > New Revision: 304803 > URL: https://svnweb.freebsd.org/changeset/base/304803 > > Log: > Pass the number of segments coalesced by LRO up the stack by repurposing the > tso_segsz pkthdr field during RX processing, and use the information in TCP > for > more correct accounting and as a congestion control input. This is only a > start, > and an audit of other uses for the data is left as future work. > > Reviewed by:gallatin, rrs > Sponsored by: Netflix, Inc. > Differential Revision: https://reviews.freebsd.org/D7564 > > Modified: > head/sys/netinet/cc/cc.h > head/sys/netinet/cc/cc_newreno.c > head/sys/netinet/tcp_input.c > head/sys/netinet/tcp_lro.c > head/sys/netinet/tcp_var.h > head/sys/sys/mbuf.h fastpath module is broken now. You may want to update that. Also, can this be brought back to stable/11? Cheers, Hiren pgpE1t_cUPcc_.pgp Description: PGP signature
Re: svn commit: r304223 - in head: share/man/man4 share/man/man9 sys/netinet
On 08/16/16 at 03:11P, Randall Stewart wrote: > Author: rrs > Date: Tue Aug 16 15:11:46 2016 > New Revision: 304223 > URL: https://svnweb.freebsd.org/changeset/base/304223 > > Log: > Here we update the modular tcp to be able to switch to an > alternate TCP stack in other then the closed state (pre-listen/connect). > The idea is that *if* that is supported by the alternate stack, it > is asked if its ok to switch. If it approves the "handoff" then we > allow the switch to happen. Also the fini() function now gets a flag > to tell if you are switching away *or* the tcb is destroyed. The > init() call into the alternate stack is moved to the end so the > tcb is more fully formed before the init transpires. > > Sponsored by: Netflix Inc. > Differential Revision: D6790 > > Modified: > head/share/man/man4/tcp.4 > head/share/man/man9/tcp_functions.9 > head/sys/netinet/tcp_subr.c > head/sys/netinet/tcp_syncache.c > head/sys/netinet/tcp_usrreq.c > head/sys/netinet/tcp_var.h Randall, Is this something we can bring back to stable/11? Cheers, Hiren pgpYWPaZU_VAZ.pgp Description: PGP signature
Re: svn commit: r303626 - in head/sys: netinet netinet6
On 08/01/16 at 05:02P, Andrew Gallatin wrote: > Author: gallatin > Date: Mon Aug 1 17:02:21 2016 > New Revision: 303626 > URL: https://svnweb.freebsd.org/changeset/base/303626 > > Log: > Rework IPV6 TCP path MTU discovery to match IPv4 > > - Re-write tcp_ctlinput6() to closely mimic the IPv4 tcp_ctlinput() > > - Now that tcp_ctlinput6() updates t_maxseg, we can allow ip6_output() > to send TCP packets without looking at the tcp host cache for every > single transmit. > > - Make the icmp6 code mimic the IPv4 code & avoid returning > PRC_HOSTDEAD because it is so expensive. > > Without these changes in place, every TCP6 pmtu discovery or host > unreachable ICMP resulted in a call to in6_pcbnotify() which walks the > tcbinfo table with the write lock held. Because the tcbinfo table is > shared between IPv4 and IPv6, this causes huge scalabilty issues on > servers with lots of (~100K) TCP connections, to the point where even > a small percent of IPv6 traffic had a disproportionate impact on > overall throughput. > > Reviewed by:bz, rrs, ae (all earlier versions), lstewart (in > Netflix's tree) > Sponsored by: Netflix > Differential Revision: https://reviews.freebsd.org/D7272 Drew, What do you think about getting this into stable/11? Cheers, Hiren pgpgs4DV2cEsP.pgp Description: PGP signature
Re: svn commit: r303656 - head/sys/netinet
On 08/02/16 at 06:36P, Sepherosa Ziehau wrote: > Author: sephe > Date: Tue Aug 2 06:36:47 2016 > New Revision: 303656 > URL: https://svnweb.freebsd.org/changeset/base/303656 > > Log: > tcp/lro: Implement hash table for LRO entries. > > This significantly improves HTTP workload performance and reduces > HTTP workload latency. > > Reviewed by:rrs, gallatin, hps > Obtained from: rrs, gallatin > Sponsored by: Netflix (rrs, gallatin) , Microsoft (sephe) > Differential Revision: https://reviews.freebsd.org/D6689 Hi Sephe, Can you please MFC this to stable/11? Cheers, Hiren pgp_voLYVchLF.pgp Description: PGP signature
Re: svn commit: r303766 - head/sys/netinet
On 08/05/16 at 09:08P, Sepherosa Ziehau wrote: > Author: sephe > Date: Fri Aug 5 09:08:00 2016 > New Revision: 303766 > URL: https://svnweb.freebsd.org/changeset/base/303766 > > Log: > tcp/lro: If timestamps mismatch or it's a FIN, force flush. > > This keeps the segments/ACK/FIN delivery order. > > Before this patch, it was observed: if A sent FIN immediately after > an ACK, B would deliver FIN first to the TCP stack, then the ACK. > This out-of-order delivery causes one unnecessary ACK sent from B. > > Reviewed by:gallatin, hps > Obtained from: rrs, gallatin > Sponsored by: Netflix (rrs, gallatin), Microsoft (sephe) > Differential Revision: https://reviews.freebsd.org/D7415 Hi Sephe, This looks like a good fix for stable/11. Can you please MFC it? Cheers, Hiren pgpdUXqsRO9Mr.pgp Description: PGP signature
Re: svn commit: r302081 - head/sys/netinet6
On 06/23/16 at 11:48P, Andrey V. Elsukov wrote: > On 22.06.16 18:46, hiren panchasara wrote: > >> Fix the NULL pointer dereference for unresolved link layer entries in > >> the netinet6 code. Copy link layer address only when corresponding entry > >> has LLE_VALID flag. > >> > >> PR: 210379 > >> Approved by: re (kib) > >> > >> Modified: > >> head/sys/netinet6/in6.c > > > > Cursory look tells me that this bug is also present in 10. Is that true? > > If so, is it possible for you to mfc this? > > The patch is applicable, but due to the difference in the LLE code > stable/10 has not affected with this bug. Ah, okay. Thanks a lot for checking. Cheers, Hiren pgpc2hBvxwd2t.pgp Description: PGP signature
Re: svn commit: r302081 - head/sys/netinet6
On 06/22/16 at 11:29P, Andrey V. Elsukov wrote: > Author: ae > Date: Wed Jun 22 11:29:21 2016 > New Revision: 302081 > URL: https://svnweb.freebsd.org/changeset/base/302081 > > Log: > Fix the NULL pointer dereference for unresolved link layer entries in > the netinet6 code. Copy link layer address only when corresponding entry > has LLE_VALID flag. > > PR: 210379 > Approved by:re (kib) > > Modified: > head/sys/netinet6/in6.c Cursory look tells me that this bug is also present in 10. Is that true? If so, is it possible for you to mfc this? Cheers, Hiren pgpe1iIgwOpeK.pgp Description: PGP signature
Re: svn commit: r300865 - in head/sys: sys vm
On 05/27/16 at 07:15P, Alan Cox wrote: > Author: alc > Date: Fri May 27 19:15:45 2016 > New Revision: 300865 > URL: https://svnweb.freebsd.org/changeset/base/300865 > > Log: > The flag "vm_pages_needed" has long served two distinct purposes: (1) to > indicate that threads are waiting for free pages to become available and > (2) to indicate whether a wakeup call has been sent to the page daemon. > The trouble is that a single flag cannot really serve both purposes, because > we have two distinct targets for when to wakeup threads waiting for free > pages versus when the page daemon has completed its work. In particular, > the flag will be cleared by vm_page_free() before the page daemon has met > its target, and this can lead to the OOM killer being invoked prematurely. > To address this problem, a new flag "vm_pageout_wanted" is introduced. > > Discussed with: jeff > Reviewed by:kib, markj > Tested by: markj > Sponsored by: EMC / Isilon Storage Division I'd assume the problem exists in 10 too. Can this be MFCed there? Cheers, Hiren pgpyLH2Z2cduF.pgp Description: PGP signature
svn commit: r299280 - head/sys/netinet/cc
Author: hiren Date: Mon May 9 19:19:03 2016 New Revision: 299280 URL: https://svnweb.freebsd.org/changeset/base/299280 Log: Add an option to use rfc6675 based pipe/inflight bytes calculation in htcp. Submitted by: Kevin BowlingMFC after:1 week Sponsored by: Limelight Networks Modified: head/sys/netinet/cc/cc_htcp.c Modified: head/sys/netinet/cc/cc_htcp.c == --- head/sys/netinet/cc/cc_htcp.c Mon May 9 18:53:46 2016 (r299279) +++ head/sys/netinet/cc/cc_htcp.c Mon May 9 19:19:03 2016 (r299280) @@ -346,8 +346,10 @@ htcp_mod_init(void) static void htcp_post_recovery(struct cc_var *ccv) { + int pipe; struct htcp *htcp_data; + pipe = 0; htcp_data = ccv->cc_data; if (IN_FASTRECOVERY(CCV(ccv, t_flags))) { @@ -358,10 +360,13 @@ htcp_post_recovery(struct cc_var *ccv) * * XXXLAS: Find a way to do this without needing curack */ - if (SEQ_GT(ccv->curack + CCV(ccv, snd_ssthresh), - CCV(ccv, snd_max))) - CCV(ccv, snd_cwnd) = CCV(ccv, snd_max) - ccv->curack + - CCV(ccv, t_maxseg); + if (V_tcp_do_rfc6675_pipe) + pipe = tcp_compute_pipe(ccv->ccvc.tcp); + else + pipe = CCV(ccv, snd_max) - ccv->curack; + + if (pipe < CCV(ccv, snd_ssthresh)) + CCV(ccv, snd_cwnd) = pipe + CCV(ccv, t_maxseg); else CCV(ccv, snd_cwnd) = max(1, ((htcp_data->beta * htcp_data->prev_cwnd / CCV(ccv, t_maxseg)) ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r298769 - head/sys/netinet
On 05/03/16 at 09:29P, Sepherosa Ziehau wrote: > On Fri, Apr 29, 2016 at 11:55 PM, hiren panchasara > <hi...@strugglingcoder.info> wrote: > > On 04/29/16 at 07:23P, Sepherosa Ziehau wrote: > >> Author: sephe > >> Date: Fri Apr 29 07:23:08 2016 > >> New Revision: 298769 > >> URL: https://svnweb.freebsd.org/changeset/base/298769 > >> > >> Log: > >> tcp/syncache: Set flowid and hash type properly for SYN|ACK > >> > >> So the underlying drivers can use it to select the sending queue > >> properly for SYN|ACK instead of rolling their own hash. > >> > >> Sponsored by: Microsoft OSTC > >> Differential Revision: https://reviews.freebsd.org/D6120 > >> > >> Modified: > >> head/sys/netinet/tcp_syncache.c > > > > Would it be possible to MFC this? > > I _think_ we can :) That'd be nice. Thank you! Cheers, Hiren pgpiyoPXg3VVN.pgp Description: PGP signature
Re: svn commit: r298769 - head/sys/netinet
On 04/29/16 at 07:23P, Sepherosa Ziehau wrote: > Author: sephe > Date: Fri Apr 29 07:23:08 2016 > New Revision: 298769 > URL: https://svnweb.freebsd.org/changeset/base/298769 > > Log: > tcp/syncache: Set flowid and hash type properly for SYN|ACK > > So the underlying drivers can use it to select the sending queue > properly for SYN|ACK instead of rolling their own hash. > > Sponsored by: Microsoft OSTC > Differential Revision: https://reviews.freebsd.org/D6120 > > Modified: > head/sys/netinet/tcp_syncache.c Would it be possible to MFC this? Cheers, Hiren pgpfS0rnfSu5Y.pgp Description: PGP signature
Re: svn commit: r298696 - head/sys/netinet
On 04/27/16 at 09:40P, Sepherosa Ziehau wrote: > Author: sephe > Date: Wed Apr 27 09:40:55 2016 > New Revision: 298696 > URL: https://svnweb.freebsd.org/changeset/base/298696 > > Log: > tcp/lro: Fix typo. > > MFC after: 1 week > Sponsored by: Microsoft OSTC > > Modified: > head/sys/netinet/tcp_lro.c > > Modified: head/sys/netinet/tcp_lro.c > == > --- head/sys/netinet/tcp_lro.cWed Apr 27 07:46:38 2016 > (r298695) > +++ head/sys/netinet/tcp_lro.cWed Apr 27 09:40:55 2016 > (r298696) > @@ -569,7 +569,7 @@ tcp_lro_rx(struct lro_ctrl *lc, struct m > if ((th->th_flags & ~(TH_ACK | TH_PUSH)) != 0) > return (TCP_LRO_CANNOT); > > - /* XXX-BZ We lose a AKC|PUSH flag concatinating multiple segments. */ > + /* XXX-BZ We lose a ACK|PUSH flag concatinating multiple segments. */ Also s/concatinating /concatenating / ? Cheers, Hiren pgpO9VnmqcdsL.pgp Description: PGP signature
Re: svn commit: r298131 - head/sys/conf
On 04/17/16 at 12:20P, Alexander Leidinger wrote: > > Quoting Luiz Otavio O Souza(from Sat, 16 Apr 2016 > 20:54:55 + (UTC)): > > > Author: loos > > Date: Sat Apr 16 20:54:55 2016 > > New Revision: 298131 > > URL: https://svnweb.freebsd.org/changeset/base/298131 > > > > Log: > > Add Codel to NOTES. > > I haven't found much documentation about this. It looks like it is > only available in PF, but not in dummynet. Is this correct? Are there > somewhere examples for the use of codel (options/parameters/HOWTO)? I > would expect in places where e.g. "red" could be used, but I don't > find any mention of codel in e.g. the FreeBSD handbook. I'd also appreciate if more details on 1) how to use it 2) under what scenarios/workloads would this be most beneficial. And more importantly 3) how was this addition validated. Thank you for the work! Cheers, Hiren pgp2Y6KW8lk8G.pgp Description: PGP signature
svn commit: r298087 - in head: cddl/lib/libdtrace share/man/man4
Author: hiren Date: Fri Apr 15 20:27:36 2016 New Revision: 298087 URL: https://svnweb.freebsd.org/changeset/base/298087 Log: Fix the 'type' for a few variables from tcpcb. Reviewed by: markj Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D5973 Modified: head/cddl/lib/libdtrace/tcp.d head/share/man/man4/dtrace_tcp.4 Modified: head/cddl/lib/libdtrace/tcp.d == --- head/cddl/lib/libdtrace/tcp.d Fri Apr 15 20:19:32 2016 (r298086) +++ head/cddl/lib/libdtrace/tcp.d Fri Apr 15 20:27:36 2016 (r298087) @@ -108,16 +108,16 @@ typedef struct tcpsinfo { uint32_t tcps_snxt; /* next sequence # to send */ uint32_t tcps_rack; /* sequence # we have acked */ uint32_t tcps_rnxt; /* next sequence # expected */ - uint32_t tcps_swnd; /* send window size */ + u_long tcps_swnd; /* send window size */ int32_t tcps_snd_ws;/* send window scaling */ uint32_t tcps_swl1; /* window update seg seq number */ uint32_t tcps_swl2; /* window update seg ack number */ uint32_t tcps_rup; /* receive urgent pointer */ uint32_t tcps_radv; /* advertised window */ - uint32_t tcps_rwnd; /* receive window size */ + u_long tcps_rwnd; /* receive window size */ int32_t tcps_rcv_ws;/* receive window scaling */ - uint32_t tcps_cwnd; /* congestion window */ - uint32_t tcps_cwnd_ssthresh;/* threshold for congestion avoidance */ + u_long tcps_cwnd; /* congestion window */ + u_long tcps_cwnd_ssthresh; /* threshold for congestion avoidance */ uint32_t tcps_srecover; /* for use in NewReno Fast Recovery */ uint32_t tcps_sack_fack;/* SACK sequence # we have acked */ uint32_t tcps_sack_snxt;/* next SACK seq # for retransmission */ Modified: head/share/man/man4/dtrace_tcp.4 == --- head/share/man/man4/dtrace_tcp.4Fri Apr 15 20:19:32 2016 (r298086) +++ head/share/man/man4/dtrace_tcp.4Fri Apr 15 20:27:36 2016 (r298087) @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd July 5, 2015 +.Dd April 15, 2016 .Dt DTRACE_TCP 4 .Os .Sh NAME @@ -217,17 +217,17 @@ Next sequence number for send. Sequence number of received and acknowledged data. .It Vt uint32_t tcps_rnxt Next expected sequence number for receive. -.It Vt uint32_t tcps_swnd +.It Vt u_long tcps_swnd TCP send window size. .It Vt int32_t tcps_snd_ws Window scaling factor for the TCP send window. -.It Vt uint32_t tcps_rwnd +.It Vt u_long tcps_rwnd TCP receive window size. .It Vt int32_t tcps_rcv_ws Window scaling factor for the TCP receive window. -.It Vt uint32_t tcps_cwnd +.It Vt u_long tcps_cwnd TCP congestion window size. -.It Vt uint32_t tcps_cwnd_ssthresh +.It Vt u_long tcps_cwnd_ssthresh Congestion window threshold at which slow start ends and congestion avoidance begins. .It Vt uint32_t tcps_sack_fack ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r294840 - head/sys/netinet
Somehow I totally missed this email. On 01/26/16 at 02:53P, Gleb Smirnoff wrote: > Hiren, > > On Tue, Jan 26, 2016 at 04:33:38PM +0000, Hiren Panchasara wrote: > H> Author: hiren > H> Date: Tue Jan 26 16:33:38 2016 > H> New Revision: 294840 > H> URL: https://svnweb.freebsd.org/changeset/base/294840 > H> > H> Log: > H> Persist timers TCPTV_PERSMIN and TCPTV_PERSMAX are hardcoded with 5 > seconds and > H> 60 seconds, respectively. Turn them into sysctls that can be tuned live. > The > H> default values of 5 seconds and 60 seconds have been retained. > H> > H> Submitted by:Jason Wolfe (j at nitrology dot com) > H> Reviewed by: gnn, rrs, hiren, bz > H> MFC after: 1 week > H> Sponsored by:Limelight Networks > H> Differential Revision: https://reviews.freebsd.org/D5024 > > A theoretical question: could it be useful to make them socket options > like the TCP_KEEP* timeouts? Hum, unsure. I do not have a use-case for that currently. Cheers, Hiren pgpJMB2qhf2Tf.pgp Description: PGP signature
svn commit: r294840 - head/sys/netinet
Author: hiren Date: Tue Jan 26 16:33:38 2016 New Revision: 294840 URL: https://svnweb.freebsd.org/changeset/base/294840 Log: Persist timers TCPTV_PERSMIN and TCPTV_PERSMAX are hardcoded with 5 seconds and 60 seconds, respectively. Turn them into sysctls that can be tuned live. The default values of 5 seconds and 60 seconds have been retained. Submitted by: Jason Wolfe (j at nitrology dot com) Reviewed by: gnn, rrs, hiren, bz MFC after:1 week Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D5024 Modified: head/sys/netinet/tcp_output.c head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_timer.c head/sys/netinet/tcp_timer.h Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Tue Jan 26 15:26:35 2016 (r294839) +++ head/sys/netinet/tcp_output.c Tue Jan 26 16:33:38 2016 (r294840) @@ -1626,7 +1626,7 @@ tcp_setpersist(struct tcpcb *tp) * Start/restart persistance timer. */ TCPT_RANGESET(tt, t * tcp_backoff[tp->t_rxtshift], - TCPTV_PERSMIN, TCPTV_PERSMAX); + tcp_persmin, tcp_persmax); tcp_timer_activate(tp, TT_PERSIST, tt); if (tp->t_rxtshift < TCP_MAXRXTSHIFT) tp->t_rxtshift++; Modified: head/sys/netinet/tcp_subr.c == --- head/sys/netinet/tcp_subr.c Tue Jan 26 15:26:35 2016(r294839) +++ head/sys/netinet/tcp_subr.c Tue Jan 26 16:33:38 2016(r294840) @@ -675,6 +675,8 @@ tcp_init(void) tcp_rexmit_min = TCPTV_MIN; if (tcp_rexmit_min < 1) tcp_rexmit_min = 1; + tcp_persmin = TCPTV_PERSMIN; + tcp_persmax = TCPTV_PERSMAX; tcp_rexmit_slop = TCPTV_CPU_VAR; tcp_finwait2_timeout = TCPTV_FINWAIT2_TIMEOUT; tcp_tcbhashsize = hashsize; Modified: head/sys/netinet/tcp_timer.c == --- head/sys/netinet/tcp_timer.cTue Jan 26 15:26:35 2016 (r294839) +++ head/sys/netinet/tcp_timer.cTue Jan 26 16:33:38 2016 (r294840) @@ -77,6 +77,14 @@ __FBSDID("$FreeBSD$"); #include #endif +inttcp_persmin; +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmin, CTLTYPE_INT|CTLFLAG_RW, +_persmin, 0, sysctl_msec_to_ticks, "I", "minimum persistence interval"); + +inttcp_persmax; +SYSCTL_PROC(_net_inet_tcp, OID_AUTO, persmax, CTLTYPE_INT|CTLFLAG_RW, +_persmax, 0, sysctl_msec_to_ticks, "I", "maximum persistence interval"); + inttcp_keepinit; SYSCTL_PROC(_net_inet_tcp, TCPCTL_KEEPINIT, keepinit, CTLTYPE_INT|CTLFLAG_RW, _keepinit, 0, sysctl_msec_to_ticks, "I", "time to establish connection"); Modified: head/sys/netinet/tcp_timer.h == --- head/sys/netinet/tcp_timer.hTue Jan 26 15:26:35 2016 (r294839) +++ head/sys/netinet/tcp_timer.hTue Jan 26 16:33:38 2016 (r294840) @@ -77,7 +77,7 @@ if 0, no idea yet */ #defineTCPTV_RTOBASE ( 3*hz)/* assumed RTO if no info */ -#defineTCPTV_PERSMIN ( 5*hz)/* retransmit persistence */ +#defineTCPTV_PERSMIN ( 5*hz)/* minimum persist interval */ #defineTCPTV_PERSMAX ( 60*hz)/* maximum persist interval */ #defineTCPTV_KEEP_INIT ( 75*hz)/* initial connect keepalive */ @@ -173,6 +173,8 @@ struct tcp_timer { #defineTP_KEEPCNT(tp) ((tp)->t_keepcnt ? (tp)->t_keepcnt : tcp_keepcnt) #defineTP_MAXIDLE(tp) (TP_KEEPCNT(tp) * TP_KEEPINTVL(tp)) +extern int tcp_persmin;/* minimum persist interval */ +extern int tcp_persmax;/* maximum persist interval */ extern int tcp_keepinit; /* time to establish connection */ extern int tcp_keepidle; /* time before keepalive probes begin */ extern int tcp_keepintvl; /* time between keepalive probes */ ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r294535 - in head/sys/netinet: . cc tcp_stacks
On 01/22/16 at 04:06P, Bjoern A. Zeeb wrote: > > > On 22 Jan 2016, at 15:21 , George Neville-Neil> > wrote: > > > > > > > > On 22 Jan 2016, at 2:13, Lawrence Stewart wrote: > > > >> Hi Gleb, > >> > >> On 01/22/16 09:34, Gleb Smirnoff wrote: > >>> Author: glebius > >>> Date: Thu Jan 21 22:34:51 2016 > >>> New Revision: 294535 > >>> URL: https://svnweb.freebsd.org/changeset/base/294535 > >>> > >>> Log: > >>> - Rename cc.h to more meaningful tcp_cc.h. > >> > >> As a bit of historical context, the naming was intentionally protocol > >> agnostic because it was originally hoped that the CC framework could be > >> shared between multiple CC aware transports, and the design went to some > >> lengths to accommodate that possibility (e.g. the ccv_container union in > >> struct cc_var). SCTP was the obvious potential in tree consumer at the > >> time, and other protocols like DCCP were considered as well. > >> > >> This hasn't come about to date, but I'm not sure what value is obtained > >> from your rename change unless we decide to completely give up on shared > >> CC and if we do that, this change doesn't go far enough and we can > >> further simplify the framework to make it entirely TCP specific e.g. we > >> should probably do away with struct cc_var. > >> > >> I'd argue in favour of reverting the rename and if you're gung ho about > >> making the framework TCP specific, we can start a public discussion > >> about what that should look like. > >> > > > > I actually was wondering about this as well. I think it ought to be > > reverted to agnostic. > > I probably share that view but I also agree that cc.h is not a good name. > > So before we entirely revert this, can when maybe come up with a name that is > better than cc.h or tcp_cc.h and only make this one more change forward > rather than going back to the previous status quo? We use "cc" everywhere in the stack to refer to congestion control. Whether thats mod_cc or cc_ or sys/netinet/cc directory. I don't see a problem with the name. Neither do I feel a need for any change. Cheers, Hiren pgpG6wFD7H7qH.pgp Description: PGP signature
Re: svn commit: r294540 - in head: etc/mtree include share/man/man4 share/man/man9 sys/netinet
On 01/21/16 at 06:39P, Gleb Smirnoff wrote: > Hiren, > > On Thu, Jan 21, 2016 at 06:24:02PM -0800, hiren panchasara wrote: > h> > Log: > h> > Provide new socket option TCP_CCALGOOPT, which stands for TCP > congestion > h> > control algorithm options. The argument is variable length and is > opaque > h> > to TCP, forwarded directly to the algorithm's ctl_output method. > h> > > h> > Provide new includes directory netinet/cc, where algorithm specific > h> > headers can be installed. > h> > > h> > The new API doesn't yet have any in tree consumers. > h> > > h> > The original code written by lstewart. > h> > Reviewed by: rrs, emax > h> > Sponsored by: Netflix > h> > Differential Revision: https://reviews.freebsd.org/D711 > h> > h> Thanks Lawrence and Glebius for the patch. > h> > h> I know original plan was to have this in earlier but is it possible to > h> still get this in 10.3 or would it be too soon? > > Do you have a module that is going to utilize it? Not right now. But having this in 10.3 can be useful. I don't want to push. I can always carry this patch around if I need in 10. Cheers, Hiren pgp0lxfBAmKrx.pgp Description: PGP signature
Re: svn commit: r294540 - in head: etc/mtree include share/man/man4 share/man/man9 sys/netinet
On 01/21/16 at 06:50P, Gleb Smirnoff wrote: > On Thu, Jan 21, 2016 at 06:43:10PM -0800, hiren panchasara wrote: > h> > h> > Provide new socket option TCP_CCALGOOPT, which stands for TCP > congestion > h> > h> > control algorithm options. The argument is variable length and > is opaque > h> > h> > to TCP, forwarded directly to the algorithm's ctl_output method. > h> > h> > > h> > h> > Provide new includes directory netinet/cc, where algorithm > specific > h> > h> > headers can be installed. > h> > h> > > h> > h> > The new API doesn't yet have any in tree consumers. > h> > h> > > h> > h> > The original code written by lstewart. > h> > h> > Reviewed by: rrs, emax > h> > h> > Sponsored by: Netflix > h> > h> > Differential Revision:https://reviews.freebsd.org/D711 > h> > h> > h> > h> Thanks Lawrence and Glebius for the patch. > h> > h> > h> > h> I know original plan was to have this in earlier but is it possible to > h> > h> still get this in 10.3 or would it be too soon? > h> > > h> > Do you have a module that is going to utilize it? > h> > h> Not right now. But having this in 10.3 can be useful. I don't want to > h> push. I can always carry this patch around if I need in 10. > > Yes, I'm quite reluctant to merge. First we need more modules using the API, > and then we can say ourselves that API is good and can go into stable. Since > when it wents into stable, we should no longer change it. Fair enough. Cheers, Hiren pgp5hDbCw48WM.pgp Description: PGP signature
Re: svn commit: r294540 - in head: etc/mtree include share/man/man4 share/man/man9 sys/netinet
On 01/22/16 at 02:07P, Gleb Smirnoff wrote: > Author: glebius > Date: Fri Jan 22 02:07:48 2016 > New Revision: 294540 > URL: https://svnweb.freebsd.org/changeset/base/294540 > > Log: > Provide new socket option TCP_CCALGOOPT, which stands for TCP congestion > control algorithm options. The argument is variable length and is opaque > to TCP, forwarded directly to the algorithm's ctl_output method. > > Provide new includes directory netinet/cc, where algorithm specific > headers can be installed. > > The new API doesn't yet have any in tree consumers. > > The original code written by lstewart. > Reviewed by:rrs, emax > Sponsored by: Netflix > Differential Revision: https://reviews.freebsd.org/D711 Thanks Lawrence and Glebius for the patch. I know original plan was to have this in earlier but is it possible to still get this in 10.3 or would it be too soon? Cheers, Hiren pgp8nqJzQgYoq.pgp Description: PGP signature
Re: svn commit: r293439 - in head: lib/libc/sys sys/dev/ti sys/kern sys/sys usr.bin/netstat
I'd also stress on having important details in commit-log itself. On 01/08/16 at 01:32P, Gleb Smirnoff wrote: > I'm writing status report on this right now. Looking forward to it. > > Also there is slideshare: > http://www.slideshare.net/facepalmtarbz2/new-sendfile-in-english Thanks. Cheers, Hiren pgpMzF9dYbvoc.pgp Description: PGP signature
svn commit: r292087 - head/sys/netinet
Author: hiren Date: Fri Dec 11 06:22:58 2015 New Revision: 292087 URL: https://svnweb.freebsd.org/changeset/base/292087 Log: Clean up unused bandwidth entry in the TCP hostcache. Submitted by: Jason Wolfe (j at nitrology dot com) Reviewed by: rrs, hiren Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D4154 Modified: head/sys/netinet/tcp_hostcache.c head/sys/netinet/tcp_hostcache.h head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_hostcache.c == --- head/sys/netinet/tcp_hostcache.cFri Dec 11 06:20:31 2015 (r292086) +++ head/sys/netinet/tcp_hostcache.cFri Dec 11 06:22:58 2015 (r292087) @@ -32,8 +32,8 @@ * table to a dedicated structure indexed by the remote IP address. It keeps * information on the measured TCP parameters of past TCP sessions to allow * better initial start values to be used with later connections to/from the - * same source. Depending on the network parameters (delay, bandwidth, max - * MTU, congestion window) between local and remote sites, this can lead to + * same source. Depending on the network parameters (delay, max MTU, + * congestion window) between local and remote sites, this can lead to * significant speed-ups for new TCP connections after the first one. * * Due to the tcp_hostcache, all TCP-specific metrics information in the @@ -440,7 +440,6 @@ tcp_hc_get(struct in_conninfo *inc, stru hc_metrics_lite->rmx_ssthresh = hc_entry->rmx_ssthresh; hc_metrics_lite->rmx_rtt = hc_entry->rmx_rtt; hc_metrics_lite->rmx_rttvar = hc_entry->rmx_rttvar; - hc_metrics_lite->rmx_bandwidth = hc_entry->rmx_bandwidth; hc_metrics_lite->rmx_cwnd = hc_entry->rmx_cwnd; hc_metrics_lite->rmx_sendpipe = hc_entry->rmx_sendpipe; hc_metrics_lite->rmx_recvpipe = hc_entry->rmx_recvpipe; @@ -555,14 +554,6 @@ tcp_hc_update(struct in_conninfo *inc, s (hc_entry->rmx_ssthresh + hcml->rmx_ssthresh) / 2; TCPSTAT_INC(tcps_cachedssthresh); } - if (hcml->rmx_bandwidth != 0) { - if (hc_entry->rmx_bandwidth == 0) - hc_entry->rmx_bandwidth = hcml->rmx_bandwidth; - else - hc_entry->rmx_bandwidth = - (hc_entry->rmx_bandwidth + hcml->rmx_bandwidth) / 2; - /* TCPSTAT_INC(tcps_cachedbandwidth); */ - } if (hcml->rmx_cwnd != 0) { if (hc_entry->rmx_cwnd == 0) hc_entry->rmx_cwnd = hcml->rmx_cwnd; @@ -612,7 +603,7 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS) SBUF_INCLUDENUL); sbuf_printf(, - "\nIP addressMTU SSTRESH RTT RTTVAR BANDWIDTH " + "\nIP addressMTU SSTRESH RTT RTTVAR " "CWND SENDPIPE RECVPIPE HITS UPD EXP\n"); #define msec(u) (((u) + 500) / 1000) @@ -621,8 +612,8 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS) TAILQ_FOREACH(hc_entry, _tcp_hostcache.hashbase[i].hch_bucket, rmx_q) { sbuf_printf(, - "%-15s %5lu %8lu %6lums %6lums %9lu %8lu %8lu %8lu " - "%4lu %4lu %4i\n", + "%-15s %5lu %8lu %6lums %6lums %8lu %8lu %8lu %4lu " + "%4lu %4i\n", hc_entry->ip4.s_addr ? inet_ntoa(hc_entry->ip4) : #ifdef INET6 ip6_sprintf(ip6buf, _entry->ip6), @@ -635,7 +626,6 @@ sysctl_tcp_hc_list(SYSCTL_HANDLER_ARGS) (RTM_RTTUNIT / (hz * TCP_RTT_SCALE))), msec(hc_entry->rmx_rttvar * (RTM_RTTUNIT / (hz * TCP_RTTVAR_SCALE))), - hc_entry->rmx_bandwidth * 8, hc_entry->rmx_cwnd, hc_entry->rmx_sendpipe, hc_entry->rmx_recvpipe, Modified: head/sys/netinet/tcp_hostcache.h == --- head/sys/netinet/tcp_hostcache.hFri Dec 11 06:20:31 2015 (r292086) +++ head/sys/netinet/tcp_hostcache.hFri Dec 11 06:22:58 2015 (r292087) @@ -57,7 +57,6 @@ struct hc_metrics { u_long rmx_ssthresh; /* outbound gateway buffer limit */ u_long rmx_rtt;/* estimated round trip time */ u_long rmx_rttvar; /* estimated rtt variance */ - u_long rmx_bandwidth; /* estimated bandwidth */ u_long rmx_cwnd; /* congestion window */ u_long rmx_sendpipe; /* outbound delay-bandwidth product */ u_long rmx_recvpipe; /* inbound delay-bandwidth product */ Modified: head/sys/netinet/tcp_var.h
svn commit: r292012 - head/sys/netinet/cc
Author: hiren Date: Wed Dec 9 08:53:41 2015 New Revision: 292012 URL: https://svnweb.freebsd.org/changeset/base/292012 Log: Add an option to use rfc6675 based pipe/inflight bytes calculation in newreno. MFC after:3 weeks Sponsored by: Limelight Networks Modified: head/sys/netinet/cc/cc_newreno.c Modified: head/sys/netinet/cc/cc_newreno.c == --- head/sys/netinet/cc/cc_newreno.cWed Dec 9 07:56:40 2015 (r292011) +++ head/sys/netinet/cc/cc_newreno.cWed Dec 9 08:53:41 2015 (r292012) @@ -214,6 +214,9 @@ newreno_cong_signal(struct cc_var *ccv, static void newreno_post_recovery(struct cc_var *ccv) { + int pipe; + pipe = 0; + if (IN_FASTRECOVERY(CCV(ccv, t_flags))) { /* * Fast recovery will conclude after returning from this @@ -224,10 +227,13 @@ newreno_post_recovery(struct cc_var *ccv * * XXXLAS: Find a way to do this without needing curack */ - if (SEQ_GT(ccv->curack + CCV(ccv, snd_ssthresh), - CCV(ccv, snd_max))) - CCV(ccv, snd_cwnd) = CCV(ccv, snd_max) - - ccv->curack + CCV(ccv, t_maxseg); + if (V_tcp_do_rfc6675_pipe) + pipe = tcp_compute_pipe(ccv->ccvc.tcp); + else + pipe = CCV(ccv, snd_max) - ccv->curack; + + if (pipe < CCV(ccv, snd_ssthresh)) + CCV(ccv, snd_cwnd) = pipe + CCV(ccv, t_maxseg); else CCV(ccv, snd_cwnd) = CCV(ccv, snd_ssthresh); } ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r292046 - head/sys/netinet
Author: hiren Date: Thu Dec 10 03:20:10 2015 New Revision: 292046 URL: https://svnweb.freebsd.org/changeset/base/292046 Log: r290122 added 4 bytes and removed 8 in struct sackhint. Add a pad entry of 4 bytes to restore the size. Spotted by: rrs Reviewed by: rrs X-MFC with: r290122 Sponsored by: Limelight Networks Modified: head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_var.h == --- head/sys/netinet/tcp_var.h Thu Dec 10 02:11:42 2015(r292045) +++ head/sys/netinet/tcp_var.h Thu Dec 10 03:20:10 2015(r292046) @@ -78,6 +78,7 @@ struct sackhint { * Total sacked bytes reported by the * receiver via sack option */ + uint32_t_pad1[1]; /* TBD */ uint64_t_pad[1];/* TBD */ }; ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r292011 - head/sys/netinet/cc
Author: hiren Date: Wed Dec 9 07:56:40 2015 New Revision: 292011 URL: https://svnweb.freebsd.org/changeset/base/292011 Log: Add an option to use rfc6675 based pipe/inflight bytes calculation in cubic. Reviewed by: gnn MFC after:3 weeks Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D4205 Modified: head/sys/netinet/cc/cc_cubic.c Modified: head/sys/netinet/cc/cc_cubic.c == --- head/sys/netinet/cc/cc_cubic.c Wed Dec 9 06:59:04 2015 (r292010) +++ head/sys/netinet/cc/cc_cubic.c Wed Dec 9 07:56:40 2015 (r292011) @@ -299,8 +299,10 @@ static void cubic_post_recovery(struct cc_var *ccv) { struct cubic *cubic_data; + int pipe; cubic_data = ccv->cc_data; + pipe = 0; /* Fast convergence heuristic. */ if (cubic_data->max_cwnd < cubic_data->prev_max_cwnd) @@ -315,10 +317,13 @@ cubic_post_recovery(struct cc_var *ccv) * * XXXLAS: Find a way to do this without needing curack */ - if (SEQ_GT(ccv->curack + CCV(ccv, snd_ssthresh), - CCV(ccv, snd_max))) - CCV(ccv, snd_cwnd) = CCV(ccv, snd_max) - ccv->curack + - CCV(ccv, t_maxseg); + if (V_tcp_do_rfc6675_pipe) + pipe = tcp_compute_pipe(ccv->ccvc.tcp); + else + pipe = CCV(ccv, snd_max) - ccv->curack; + + if (pipe < CCV(ccv, snd_ssthresh)) + CCV(ccv, snd_cwnd) = pipe + CCV(ccv, t_maxseg); else /* Update cwnd based on beta and adjusted max_cwnd. */ CCV(ccv, snd_cwnd) = max(1, ((CUBIC_BETA * ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r292003 - head/sys/netinet
Author: hiren Date: Tue Dec 8 21:21:48 2015 New Revision: 292003 URL: https://svnweb.freebsd.org/changeset/base/292003 Log: One of the ways to detect loss is to count duplicate acks coming back from the other end till it reaches predetermined threshold which is 3 for us right now. Once that happens, we trigger fast-retransmit to do loss recovery. Main problem with the current implementation is that we don't honor SACK information well to detect whether an incoming ack is a dupack or not. RFC6675 has latest recommendations for that. According to it, dupack is a segment that arrives carrying a SACK block that identifies previously unknown information between snd_una and snd_max even if it carries new data, changes the advertised window, or moves the cumulative acknowledgment point. With the prevalence of Selective ACK (SACK) these days, improper handling can lead to delayed loss recovery. With the fix, new behavior looks like following: 0) th_ack < snd_una --> ignore Old acks are ignored. 1) th_ack == snd_una, !sack_changed --> ignore Acks with SACK enabled but without any new SACK info in them are ignored. 2) th_ack == snd_una, window == old_window --> increment Increment on a good dupack. 3) th_ack == snd_una, window != old_window, sack_changed --> increment When SACK enabled, it's okay to have advertized window changed if the ack has new SACK info. 4) th_ack > snd_una --> reset to 0 Reset to 0 when left edge moves. 5) th_ack > snd_una, sack_changed --> increment Increment if left edge moves but there is new SACK info. Here, sack_changed is the indicator that incoming ack has previously unknown SACK info in it. Note: This fix is not fully compliant to RFC6675. That may require a few changes to current implementation in order to keep per-sackhole dupack counter and change to the way we mark/handle sack holes. PR: 203663 Reviewed by: jtl MFC after:3 weeks Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D4225 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_sack.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cTue Dec 8 20:20:40 2015 (r292002) +++ head/sys/netinet/tcp_input.cTue Dec 8 21:21:48 2015 (r292003) @@ -1481,7 +1481,7 @@ tcp_do_segment(struct mbuf *m, struct tc struct tcpcb *tp, int drop_hdrlen, int tlen, uint8_t iptos, int ti_locked) { - int thflags, acked, ourfinisacked, needoutput = 0; + int thflags, acked, ourfinisacked, needoutput = 0, sack_changed; int rstreason, todrop, win; u_long tiwin; char *s; @@ -1501,6 +1501,7 @@ tcp_do_segment(struct mbuf *m, struct tc thflags = th->th_flags; inc = >t_inpcb->inp_inc; tp->sackhint.last_sack_ack = 0; + sack_changed = 0; /* * If this is either a state-changing packet or current state isn't @@ -2424,7 +2425,7 @@ tcp_do_segment(struct mbuf *m, struct tc if ((tp->t_flags & TF_SACK_PERMIT) && ((to.to_flags & TOF_SACK) || !TAILQ_EMPTY(>snd_holes))) - tcp_sack_doack(tp, , th->th_ack); + sack_changed = tcp_sack_doack(tp, , th->th_ack); else /* * Reset the value so that previous (valid) value @@ -2436,7 +2437,9 @@ tcp_do_segment(struct mbuf *m, struct tc hhook_run_tcp_est_in(tp, th, ); if (SEQ_LEQ(th->th_ack, tp->snd_una)) { - if (tlen == 0 && tiwin == tp->snd_wnd) { + if (tlen == 0 && + (tiwin == tp->snd_wnd || + (tp->t_flags & TF_SACK_PERMIT))) { /* * If this is the first time we've seen a * FIN from the remote, this is not a @@ -2478,8 +2481,20 @@ tcp_do_segment(struct mbuf *m, struct tc * When using TCP ECN, notify the peer that * we reduced the cwnd. */ - if (!tcp_timer_active(tp, TT_REXMT) || - th->th_ack != tp->snd_una) + /* +* Following 2 kinds of acks should not affect +* dupack counting: +* 1) Old acks +* 2) Acks with SACK but without any new SACK +* information in them. These could result from +* any anomaly in the network like a
svn commit: r290379 - head/sys/netinet
Author: hiren Date: Thu Nov 5 02:09:48 2015 New Revision: 290379 URL: https://svnweb.freebsd.org/changeset/base/290379 Log: Improve the sysctl node name. X-MFC with: r290122 Sponsored by: Limelight Networks Modified: head/sys/netinet/tcp_input.c Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cThu Nov 5 01:54:38 2015 (r290378) +++ head/sys/netinet/tcp_input.cThu Nov 5 02:09:48 2015 (r290379) @@ -149,7 +149,7 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop "Drop TCP packets with SYN+FIN set"); VNET_DEFINE(int, tcp_do_rfc6675_pipe) = 0; -SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_pipe, CTLFLAG_VNET | CTLFLAG_RW, +SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc6675_pipe, CTLFLAG_VNET | CTLFLAG_RW, _NAME(tcp_do_rfc6675_pipe), 0, "Use calculated pipe/in-flight bytes per RFC 6675"); ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
svn commit: r290122 - head/sys/netinet
Author: hiren Date: Wed Oct 28 22:57:51 2015 New Revision: 290122 URL: https://svnweb.freebsd.org/changeset/base/290122 Log: Calculate the correct amount of bytes that are in-flight for a connection as suggested by RFC 6675. Currently differnt places in the stack tries to guess this in suboptimal ways. The main problem is that current calculations don't take sacked bytes into account. Sacked bytes are the bytes receiver acked via SACK option. This is suboptimal because it assumes that network has more outstanding (unacked) bytes than the actual value and thus sends less data by setting congestion window lower than what's possible which in turn may cause slower recovery from losses. As an example, one of the current calculations looks something like this: snd_nxt - snd_fack + sackhint.sack_bytes_rexmit New proposal from RFC 6675 is: snd_max - snd_una - sackhint.sacked_bytes + sackhint.sack_bytes_rexmit which takes sacked bytes into account which is a new addition to the sackhint struct. Only thing we are missing from RFC 6675 is isLost() i.e. segment being considered lost and thus adjusting pipe based on that which makes this calculation a bit on conservative side. The approach is very simple. We already process each ack with sack info in tcp_sack_doack() and extract sack blocks/holes out of it. We'd now also track this new variable sacked_bytes which keeps track of total sacked bytes reported. One downside to this approach is that we may get incorrect count of sacked_bytes if the other end decides to drop sack info in the ack because of memory pressure or some other reasons. But in this (not very likely) case also the pipe calculation would be conservative which is okay as opposed to being aggressive in sending packets into the network. Next step is to use this more accurate pipe estimation to drive congestion window adjustments. In collaboration with:rrs Reviewed by: jason_eggnet dot com, rrs MFC after:2 weeks Sponsored by: Limelight Networks Differential Revision:https://reviews.freebsd.org/D3971 Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_sack.c head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cWed Oct 28 22:49:37 2015 (r290121) +++ head/sys/netinet/tcp_input.cWed Oct 28 22:57:51 2015 (r290122) @@ -148,6 +148,11 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, drop _NAME(drop_synfin), 0, "Drop TCP packets with SYN+FIN set"); +VNET_DEFINE(int, tcp_do_rfc6675_pipe) = 0; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, do_pipe, CTLFLAG_VNET | CTLFLAG_RW, +_NAME(tcp_do_rfc6675_pipe), 0, +"Use calculated pipe/in-flight bytes per RFC 6675"); + VNET_DEFINE(int, tcp_do_rfc3042) = 1; #defineV_tcp_do_rfc3042VNET(tcp_do_rfc3042) SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3042, CTLFLAG_VNET | CTLFLAG_RW, @@ -2420,6 +2425,12 @@ tcp_do_segment(struct mbuf *m, struct tc ((to.to_flags & TOF_SACK) || !TAILQ_EMPTY(>snd_holes))) tcp_sack_doack(tp, , th->th_ack); + else + /* +* Reset the value so that previous (valid) value +* from the last ack with SACK doesn't get used. +*/ + tp->sackhint.sacked_bytes = 0; /* Run HHOOK_TCP_ESTABLISHED_IN helper hooks. */ hhook_run_tcp_est_in(tp, th, ); @@ -2483,8 +2494,12 @@ tcp_do_segment(struct mbuf *m, struct tc * we have less than 1/2 the original window's * worth of data in flight. */ - awnd = (tp->snd_nxt - tp->snd_fack) + - tp->sackhint.sack_bytes_rexmit; + if (V_tcp_do_rfc6675_pipe) + awnd = tcp_compute_pipe(tp); + else + awnd = (tp->snd_nxt - tp->snd_fack) + + tp->sackhint.sack_bytes_rexmit; + if (awnd < tp->snd_ssthresh) { tp->snd_cwnd += tp->t_maxseg; if (tp->snd_cwnd > tp->snd_ssthresh) @@ -3729,3 +3744,11 @@ tcp_newreno_partial_ack(struct tcpcb *tp tp->snd_cwnd = 0; tp->snd_cwnd += tp->t_maxseg; } + +int +tcp_compute_pipe(struct tcpcb *tp)
svn commit: r290043 - in head: share/man/man4 sys/netinet
Author: hiren Date: Tue Oct 27 09:43:05 2015 New Revision: 290043 URL: https://svnweb.freebsd.org/changeset/base/290043 Log: Add sysctl tunable net.inet.tcp.initcwnd_segments to specify initial congestion window in number of segments on fly. It is set to 10 segments by default. Remove net.inet.tcp.experimental.initcwnd10 which is now redundant. Also remove the parent node net.inet.tcp.experimental as it's not needed anymore and also because it was not well thought out. Differential Revision:https://reviews.freebsd.org/D3858 In collaboration with:lstewart Reviewed by: gnn (prev version), rwatson, allanjude, wblock (man page) MFC after:2 weeks Relnotes: yes Sponsored by: Limelight Networks Modified: head/share/man/man4/tcp.4 head/sys/netinet/tcp_input.c head/sys/netinet/tcp_var.h Modified: head/share/man/man4/tcp.4 == --- head/share/man/man4/tcp.4 Tue Oct 27 09:33:47 2015(r290042) +++ head/share/man/man4/tcp.4 Tue Oct 27 09:43:05 2015(r290043) @@ -34,7 +34,7 @@ .\" From: @(#)tcp.48.1 (Berkeley) 6/5/93 .\" $FreeBSD$ .\" -.Dd October 13, 2014 +.Dd October 27, 2015 .Dt TCP 4 .Os .Sh NAME @@ -454,6 +454,17 @@ code. For this reason, we use 200ms of slop and a near-0 minimum, which gives us an effective minimum of 200ms (similar to .Tn Linux ) . +.It Va initcwnd_segments +Enable the ability to specify initial congestion window in number of segments. +The default value is 10 as suggested by RFC 6928. +Changing the value on fly would not affect connections using congestion window +from the hostcache. +Caution: +This regulates the burst of packets allowed to be sent in the first RTT. +The value should be relative to the link capacity. +Start with small values for lower-capacity links. +Large bursts can cause buffer overruns and packet drops if routers have small +buffers or the link is experiencing congestion. .It Va rfc3042 Enable the Limited Transmit algorithm as described in RFC 3042. It helps avoid timeouts on lossy links and also when the congestion window Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cTue Oct 27 09:33:47 2015 (r290042) +++ head/sys/netinet/tcp_input.cTue Oct 27 09:43:05 2015 (r290043) @@ -159,13 +159,10 @@ SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3 _NAME(tcp_do_rfc3390), 0, "Enable RFC 3390 (Increasing TCP's Initial Congestion Window)"); -SYSCTL_NODE(_net_inet_tcp, OID_AUTO, experimental, CTLFLAG_RW, 0, -"Experimental TCP extensions"); - -VNET_DEFINE(int, tcp_do_initcwnd10) = 1; -SYSCTL_INT(_net_inet_tcp_experimental, OID_AUTO, initcwnd10, CTLFLAG_VNET | CTLFLAG_RW, -_NAME(tcp_do_initcwnd10), 0, -"Enable RFC 6928 (Increasing initial CWND to 10)"); +VNET_DEFINE(int, tcp_initcwnd_segments) = 10; +SYSCTL_INT(_net_inet_tcp, OID_AUTO, initcwnd_segments, +CTLFLAG_VNET | CTLFLAG_RW, _NAME(tcp_initcwnd_segments), 0, +"Slow-start flight size (initial congestion window) in number of segments"); VNET_DEFINE(int, tcp_do_rfc3465) = 1; SYSCTL_INT(_net_inet_tcp, OID_AUTO, rfc3465, CTLFLAG_VNET | CTLFLAG_RW, @@ -364,6 +361,7 @@ cc_conn_init(struct tcpcb *tp) * RFC5681 Section 3.1 specifies the default conservative values. * RFC3390 specifies slightly more aggressive values. * RFC6928 increases it to ten segments. +* Support for user specified value for initial flight size. * * If a SYN or SYN/ACK was lost and retransmitted, we have to * reduce the initial CWND to one segment as congestion is likely @@ -371,9 +369,9 @@ cc_conn_init(struct tcpcb *tp) */ if (tp->snd_cwnd == 1) tp->snd_cwnd = tp->t_maxseg;/* SYN(-ACK) lost */ - else if (V_tcp_do_initcwnd10) - tp->snd_cwnd = min(10 * tp->t_maxseg, - max(2 * tp->t_maxseg, 14600)); + else if (V_tcp_initcwnd_segments) + tp->snd_cwnd = min(V_tcp_initcwnd_segments * tp->t_maxseg, + max(2 * tp->t_maxseg, V_tcp_initcwnd_segments * 1460)); else if (V_tcp_do_rfc3390) tp->snd_cwnd = min(4 * tp->t_maxseg, max(2 * tp->t_maxseg, 4380)); Modified: head/sys/netinet/tcp_var.h == --- head/sys/netinet/tcp_var.h Tue Oct 27 09:33:47 2015(r290042) +++ head/sys/netinet/tcp_var.h Tue Oct 27 09:43:05 2015(r290043) @@ -621,7 +621,7 @@ VNET_DECLARE(int, tcp_mssdflt); /* XXX * VNET_DECLARE(int, tcp_minmss); VNET_DECLARE(int, tcp_delack_enabled); VNET_DECLARE(int, tcp_do_rfc3390); -VNET_DECLARE(int, tcp_do_initcwnd10); +VNET_DECLARE(int, tcp_initcwnd_segments); VNET_DECLARE(int,
Re: svn commit: r289667 - head/share/man/man9
On 10/20/15 at 11:48P, Conrad E. Meyer wrote: > Author: cem > Date: Tue Oct 20 23:48:14 2015 > New Revision: 289667 > URL: https://svnweb.freebsd.org/changeset/base/289667 > > Log: > Document cpuset(9) > > A follow-up to r289467. > > Coerced by: jhb > Sponsored by: EMC / Isilon Storage Division Thanks a lot! Cheers, Hiren pgpahBvIKAjEu.pgp Description: PGP signature
svn commit: r289293 - head/sys/netinet
Author: hiren Date: Wed Oct 14 06:57:28 2015 New Revision: 289293 URL: https://svnweb.freebsd.org/changeset/base/289293 Log: Fix an unnecessarily aggressive behavior where mtu clamping begins on first retransmission timeout (rto) when blackhole detection is enabled. Make sure it only happens when the second attempt to send the same segment also fails with rto. Also make sure that each mtu probing stage (usually 1448 -> 1188 -> 524) follows the same pattern and gets 2 chances (rto) before further clamping down. Note: RFC4821 doesn't specify implementation details on how this situation should be handled. Differential Revision:https://reviews.freebsd.org/D3434 Reviewed by: sbruno, gnn (previous version) MFC after:2 weeks Sponsored by: Limelight Networks Modified: head/sys/netinet/tcp_timer.c Modified: head/sys/netinet/tcp_timer.c == --- head/sys/netinet/tcp_timer.cWed Oct 14 06:31:49 2015 (r289292) +++ head/sys/netinet/tcp_timer.cWed Oct 14 06:57:28 2015 (r289293) @@ -664,9 +664,15 @@ tcp_timer_rexmt(void * xtp) int isipv6; #endif + /* +* Idea here is that at each stage of mtu probe (usually, 1448 +* -> 1188 -> 524) should be given 2 chances to recover before +* further clamping down. 'tp->t_rxtshift % 2 == 0' should +* take care of that. +*/ if (((tp->t_flags2 & (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) == (TF2_PLPMTU_PMTUD|TF2_PLPMTU_MAXSEGSNT)) && - (tp->t_rxtshift <= 2)) { + (tp->t_rxtshift >= 2 && tp->t_rxtshift % 2 == 0)) { /* * Enter Path MTU Black-hole Detection mechanism: * - Disable Path MTU Discovery (IP "DF" bit). @@ -734,9 +740,11 @@ tcp_timer_rexmt(void * xtp) * with a lowered MTU, maybe this isn't a blackhole and * we restore the previous MSS and blackhole detection * flags. +* The limit '6' is determined by giving each probe +* stage (1448, 1188, 524) 2 chances to recover. */ if ((tp->t_flags2 & TF2_PLPMTU_BLACKHOLE) && - (tp->t_rxtshift > 4)) { + (tp->t_rxtshift > 6)) { tp->t_flags2 |= TF2_PLPMTU_PMTUD; tp->t_flags2 &= ~TF2_PLPMTU_BLACKHOLE; optlen = tp->t_maxopd - tp->t_maxseg; ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r289276 - in head/sys: conf kern netinet sys
On 10/13/15 at 06:58P, Bryan Drewery wrote: > On 10/13/2015 5:35 PM, Hiren Panchasara wrote: > > Author: hiren > > Date: Wed Oct 14 00:35:37 2015 > > New Revision: 289276 > > URL: https://svnweb.freebsd.org/changeset/base/289276 > > > > Log: > > There are times when it would be really nice to have a record of the last > > few > > packets and/or state transitions from each TCP socket. That would help > > with > > narrowing down certain problems we see in the field that are hard to > > reproduce > > without understanding the history of how we got into a certain state. This > > change provides just that. > > > > It saves copies of the last N packets in a list in the tcpcb. When the > > tcpcb is > > destroyed, the list is freed. I thought this was likely to be more > > performance-friendly than saving copies of the tcpcb. Plus, with the > > packets, > > you should be able to reverse-engineer what happened to the tcpcb. > > > > To enable the feature, you will need to compile a kernel with the TCPPCAP > > option. Even then, the feature defaults to being deactivated. You can > > activate > > it by setting a positive value for the number of captured packets. You > > can do > > that on either a global basis or on a per-socket basis (via a setsockopt > > call). > > > > There is no way to get the packets out of the kernel other than using > > kmem or > > getting a coredump. I thought that would help some of the legal/privacy > > concerns > > regarding such a feature. However, it should be possible to add a future > > effort > > to export them in PCAP format. > > > > I tested this at low scale, and found that there were no mbuf leaks and > > the peak > > mbuf usage appeared to be unchanged with and without the feature. > > > > The main performance concern I can envision is the number of mbufs that > > would be > > used on systems with a large number of sockets. If you save five packets > > per > > direction per socket and have 3,000 sockets, that will consume at least > > 30,000 > > mbufs just to keep these packets. I tried to reduce the concerns > > associated with > > this by limiting the number of clusters (not mbufs) that could be used > > for this > > feature. Again, in my testing, that appears to work correctly. > > > > Differential Revision:D3100 > > You're supposed to use the full URL here which will auto close the review. Okay. It did pick up the commit though. What more does it need to know? > > I also replied to the review with style findings just now. > I'll ask Jonathan to look at it. Cheers, Hiren pgplZsKZ9XBzj.pgp Description: PGP signature
svn commit: r289276 - in head/sys: conf kern netinet sys
Author: hiren Date: Wed Oct 14 00:35:37 2015 New Revision: 289276 URL: https://svnweb.freebsd.org/changeset/base/289276 Log: There are times when it would be really nice to have a record of the last few packets and/or state transitions from each TCP socket. That would help with narrowing down certain problems we see in the field that are hard to reproduce without understanding the history of how we got into a certain state. This change provides just that. It saves copies of the last N packets in a list in the tcpcb. When the tcpcb is destroyed, the list is freed. I thought this was likely to be more performance-friendly than saving copies of the tcpcb. Plus, with the packets, you should be able to reverse-engineer what happened to the tcpcb. To enable the feature, you will need to compile a kernel with the TCPPCAP option. Even then, the feature defaults to being deactivated. You can activate it by setting a positive value for the number of captured packets. You can do that on either a global basis or on a per-socket basis (via a setsockopt call). There is no way to get the packets out of the kernel other than using kmem or getting a coredump. I thought that would help some of the legal/privacy concerns regarding such a feature. However, it should be possible to add a future effort to export them in PCAP format. I tested this at low scale, and found that there were no mbuf leaks and the peak mbuf usage appeared to be unchanged with and without the feature. The main performance concern I can envision is the number of mbufs that would be used on systems with a large number of sockets. If you save five packets per direction per socket and have 3,000 sockets, that will consume at least 30,000 mbufs just to keep these packets. I tried to reduce the concerns associated with this by limiting the number of clusters (not mbufs) that could be used for this feature. Again, in my testing, that appears to work correctly. Differential Revision:D3100 Submitted by: Jonathan Looney Reviewed by: gnn, hiren Added: head/sys/netinet/tcp_pcap.c (contents, props changed) head/sys/netinet/tcp_pcap.h (contents, props changed) Modified: head/sys/conf/NOTES head/sys/conf/files head/sys/conf/options head/sys/kern/uipc_mbuf.c head/sys/netinet/tcp.h head/sys/netinet/tcp_input.c head/sys/netinet/tcp_output.c head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_usrreq.c head/sys/netinet/tcp_var.h head/sys/sys/mbuf.h Modified: head/sys/conf/NOTES == --- head/sys/conf/NOTES Wed Oct 14 00:23:31 2015(r289275) +++ head/sys/conf/NOTES Wed Oct 14 00:35:37 2015(r289276) @@ -960,6 +960,9 @@ device lagg # for sockets with the SO_DEBUG option set, which can then be examined # using the trpt(8) utility. # +# TCPPCAP enables code which keeps the last n packets sent and received +# on a TCP socket. +# # RADIX_MPATH provides support for equal-cost multi-path routing. # optionsMROUTING# Multicast routing @@ -976,6 +979,7 @@ options IPFILTER_DEFAULT_BLOCK #block a optionsIPSTEALTH #support for stealth forwarding optionsPF_DEFAULT_TO_DROP #drop everything by default optionsTCPDEBUG +optionsTCPPCAP optionsRADIX_MPATH # The MBUF_STRESS_TEST option enables options which create Modified: head/sys/conf/files == --- head/sys/conf/files Wed Oct 14 00:23:31 2015(r289275) +++ head/sys/conf/files Wed Oct 14 00:35:37 2015(r289276) @@ -3682,6 +3682,7 @@ netinet/tcp_input.c optional inet | ine netinet/tcp_lro.c optional inet | inet6 netinet/tcp_output.c optional inet | inet6 netinet/tcp_offload.c optional tcp_offload inet | tcp_offload inet6 +netinet/tcp_pcap.c optional tcppcap netinet/tcp_reass.coptional inet | inet6 netinet/tcp_sack.c optional inet | inet6 netinet/tcp_subr.c optional inet | inet6 Modified: head/sys/conf/options == --- head/sys/conf/options Wed Oct 14 00:23:31 2015(r289275) +++ head/sys/conf/options Wed Oct 14 00:35:37 2015(r289276) @@ -436,6 +436,7 @@ ROUTETABLES opt_route.h RSSopt_rss.h SLIP_IFF_OPTS opt_slip.h TCPDEBUG +TCPPCAPopt_global.h SIFTR TCP_OFFLOADopt_inet.h # Enable code to dispatch TCP offloading TCP_SIGNATURE opt_inet.h Modified: head/sys/kern/uipc_mbuf.c == --- head/sys/kern/uipc_mbuf.c Wed Oct 14 00:23:31 2015(r289275) +++
svn commit: r288914 - head/sys/netinet
Author: hiren Date: Tue Oct 6 07:46:19 2015 New Revision: 288914 URL: https://svnweb.freebsd.org/changeset/base/288914 Log: Add a comment specifying how we implement rfc3042. Differential Revision:D3746 MFC after:1 week Sponsored by: Limelight Networks Modified: head/sys/netinet/tcp_input.c Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cTue Oct 6 07:28:54 2015 (r288913) +++ head/sys/netinet/tcp_input.cTue Oct 6 07:46:19 2015 (r288914) @@ -2536,6 +2536,16 @@ tcp_do_segment(struct mbuf *m, struct tc tp->snd_nxt = onxt; goto drop; } else if (V_tcp_do_rfc3042) { + /* +* Process first and second duplicate +* ACKs. Each indicates a segment +* leaving the network, creating room +* for more. Make sure we can send a +* packet on reception of each duplicate +* ACK by increasing snd_cwnd by one +* segment. Restore the original +* snd_cwnd after packet transmission. +*/ cc_ack_received(tp, th, CC_DUPACK); u_long oldcwnd = tp->snd_cwnd; tcp_seq oldsndmax = tp->snd_max; ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r287780 - in head: share/man/man9 sys/kern sys/sys
On 09/20/15 at 10:49P, Hans Petter Selasky wrote: > hiren: jch@, wrote in a comment in Phabricator he wanted to approve the > change, so it is not fully true that no other kernel developers approved > the change, for that matter. Yes, I know. And I think that's what I said in my previous email. Cheers, Hiren pgpwGL_FKzrD8.pgp Description: PGP signature
Re: svn commit: r287780 - in head: share/man/man9 sys/kern sys/sys
Couldn't have said this any better. On 09/19/15 at 06:38P, Adrian Chadd wrote: > What isn't necessarily public knowledge is the sheer volume of emails > that went out a few months ago whilst chasing down callout and tcp > bugs. There were (and maybe still are) very subtle bugs in the callout > system and after a few attempts at fixing them there were some very > careful bug fixes made. Some attempts failed, I think a couple of > successful ones made it into the tree. jch@ and hans have been working on callout lately. Both collaborate on reviews/commits, take each other's suggestions and catch each other's mistakes. In this particular instance, rrs@ didn't like the change and he asked some questions. Hans responded to that. Which is pretty normal for this list. What I do not understand is, why is everyone coming out with "please revert right away"?? > > Yes, this whole callout system is very delicate at the moment. hps@ > has some very specific ideas of how the API should behave in order to > be predictable/reasoning-able (and I agree with him about almost all > of it, even though it makes RSS painful, but that's because of our TCP > stack and how we use callouts, not because its his fault!) but it's a > pretty big fundamental change to how things currently work and he was > shot down. I think people are just very weary of new changes. > > On the flip side, he did actively solicit reviews - rrs, kib, hiren, > jhb, wblock and jch were included in the review request, which dates > back to August 28. He gave people a little short of three weeks for > review before he committed the code. So as much as I'm cautious about > things (and it gets me in trouble at work, hi alfred!) I think he did > the right thing here - he added a new thing, documented it, solicited > a review, and it timed out. If people would like more time to review > it then fine, but please give him either a firm "no, not ever" right > now and be honest about your intentions, or give him a timeframe that > you'll review it before it times out. In principle, jch@ agreed to the review/change in question so it'd be incorrect to say that hans made this changes without anyone's knowledge. Now, without any other reviewers commenting on the review or asking him to wait for the review for 3 weeks, how long should he have waited before committing the changes? > > Hans - personally, I think you should've emailed out a review request > on freebsd-arch@ and put out a request for testers and give a firm > date that you'll commit it. That makes it all very explicit. > That indeed would have made things clearer. Hans - if you think that questions raised by Randall would take more iterations to get answered, please revert the change and discuss it on the review you already have opened for this issue. Also, try and get explicit YES/APPROVED on callout related reviews from jch, rrs and others. (Others: please speak up so hans can add you to this review and future reviews.) > People channel phrases involving silence and agreement and all that; > this is one of those times it happened. Cheers, Hiren pgp85rN23q8dQ.pgp Description: PGP signature
svn commit: r287830 - head/sys/netinet
Author: hiren Date: Tue Sep 15 20:04:30 2015 New Revision: 287830 URL: https://svnweb.freebsd.org/changeset/base/287830 Log: Remove unnecessary tcp state transition call. Differential Revision:D3451 Reviewed by: markj MFC after:2 weeks Sponsored by: Limelight Networks Modified: head/sys/netinet/tcp_usrreq.c Modified: head/sys/netinet/tcp_usrreq.c == --- head/sys/netinet/tcp_usrreq.c Tue Sep 15 19:59:35 2015 (r287829) +++ head/sys/netinet/tcp_usrreq.c Tue Sep 15 20:04:30 2015 (r287830) @@ -1765,9 +1765,9 @@ tcp_usrclosed(struct tcpcb *tp) #ifdef TCP_OFFLOAD tcp_offload_listen_stop(tp); #endif + tcp_state_change(tp, TCPS_CLOSED); /* FALLTHROUGH */ case TCPS_CLOSED: - tcp_state_change(tp, TCPS_CLOSED); tp = tcp_close(tp); /* * tcp_close() should never return NULL here as the socket is ___ svn-src-head@freebsd.org mailing list https://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to "svn-src-head-unsubscr...@freebsd.org"
Re: svn commit: r287465 - head/sys/dev/e1000
On 09/12/15 at 07:23P, Eric Joyner wrote: > For the errata, the (lack of) details are in the specification updates for > HW like I210, I211, 82575 etc. It would be in the ones updated in August > 2015. Yes, I (we) know erratas are there in the h/w specs. Point is, which exact errata problem are you fixing here so I can go look up the spec for more detail. Errata number/name and exact spec should also be quoted for less ambiguity. (Think about someone having to debug this code 5 years down the road.) Cheers, Hiren > > On Fri, Sep 4, 2015 at 9:18 AM hiren panchasara <hi...@strugglingcoder.info> > wrote: > > > + erj > > > > On 09/04/15 at 04:07P, Sean Bruno wrote: > > > Author: sbruno > > > Date: Fri Sep 4 16:07:27 2015 > > > New Revision: 287465 > > > URL: https://svnweb.freebsd.org/changeset/base/287465 > > > > > > Log: > > > igb(4): Update and fix HW errata > > > - HW errata workaround for IPv6 offload w/ extension headers > > It would be useful to know what is the actual problem here. > > > > > - Edited start of if_igb.c (Device IDs / #includes) to match ixgbe/ixl > > I'd also prefer if such changes come via separate commits. :-) > > > > > > > > Differential Revision: https://reviews.freebsd.org/D3165 > > > Submitted by: erj > > > MFC after: 1 month > > > Sponsored by: Intel Corporation > > > > > > Modified: > > > head/sys/dev/e1000/if_igb.c > > > head/sys/dev/e1000/if_igb.h > > > > [skip] > > > > Cheers, > > Hiren > > pgpWj5Hs1hpnr.pgp Description: PGP signature
Re: svn commit: r287465 - head/sys/dev/e1000
+ erj On 09/04/15 at 04:07P, Sean Bruno wrote: > Author: sbruno > Date: Fri Sep 4 16:07:27 2015 > New Revision: 287465 > URL: https://svnweb.freebsd.org/changeset/base/287465 > > Log: > igb(4): Update and fix HW errata > - HW errata workaround for IPv6 offload w/ extension headers It would be useful to know what is the actual problem here. > - Edited start of if_igb.c (Device IDs / #includes) to match ixgbe/ixl I'd also prefer if such changes come via separate commits. :-) > > Differential Revision: https://reviews.freebsd.org/D3165 > Submitted by: erj > MFC after: 1 month > Sponsored by: Intel Corporation > > Modified: > head/sys/dev/e1000/if_igb.c > head/sys/dev/e1000/if_igb.h [skip] Cheers, Hiren pgp0hNkO8FImv.pgp Description: PGP signature
Re: svn commit: r287344 - head/sys/vm
Hi Alan, On 09/01/15 at 06:21P, Alan Cox wrote: > Author: alc > Date: Tue Sep 1 06:21:12 2015 > New Revision: 287344 > URL: https://svnweb.freebsd.org/changeset/base/287344 > > Log: > Handle held pages earlier in the inactive queue scan. > Is this fixing a bug or is it just an enhancement? Can you share more details on what prompted this commit? Cheers, Hiren pgpC0AKcEEJSS.pgp Description: PGP signature
Re: svn commit: r287235 - in head/sys: sys vm
On 08/28/15 at 12:44P, Mark Johnston wrote: Author: markj Date: Fri Aug 28 00:44:17 2015 New Revision: 287235 URL: https://svnweb.freebsd.org/changeset/base/287235 Log: Remove weighted page handling from vm_page_advise(). This was added in r51337 as part of the implementation of madvise(MADV_DONTNEED). Its objective was to ensure that the page daemon would eventually reclaim other unreferenced pages (i.e., unreferenced pages not touched by madvise()) from the active queue. Now that the pagedaemon performs steady scanning of the active page queue, this weighted handling is unnecessary. Instead, always cache clean pages by moving them to the head of the inactive page queue. This simplifies the implementation of vm_page_advise() and eliminates the fragmentation that resulted from the distribution of pages among multiple queues. Suggested by: alc Reviewed by:alc Sponsored by: EMC / Isilon Storage Division Differential Revision: https://reviews.freebsd.org/D3401 Nice work! Can this be mfc'd to 10? Cheers, Hiren pgp7dQIwEFxv_.pgp Description: PGP signature
svn commit: r286700 - in head: sbin/ifconfig sys/net
Author: hiren Date: Wed Aug 12 20:21:04 2015 New Revision: 286700 URL: https://svnweb.freebsd.org/changeset/base/286700 Log: Make LAG LACP fast timeout tunable through IOCTL. Differential Revision:D3300 Submitted by: LN Sundararajan lakshmi.n at msystechnologies Reviewed by: wblock, smh, gnn, hiren, rpokala at panasas MFC after:2 weeks Sponsored by: Panasas Modified: head/sbin/ifconfig/ifconfig.8 head/sbin/ifconfig/iflagg.c head/sys/net/ieee8023ad_lacp.c head/sys/net/ieee8023ad_lacp.h head/sys/net/if_lagg.c head/sys/net/if_lagg.h Modified: head/sbin/ifconfig/ifconfig.8 == --- head/sbin/ifconfig/ifconfig.8 Wed Aug 12 20:16:13 2015 (r286699) +++ head/sbin/ifconfig/ifconfig.8 Wed Aug 12 20:21:04 2015 (r286700) @@ -28,7 +28,7 @@ .\ From: @(#)ifconfig.8 8.3 (Berkeley) 1/5/94 .\ $FreeBSD$ .\ -.Dd May 15, 2015 +.Dd Aug 12, 2015 .Dt IFCONFIG 8 .Os .Sh NAME @@ -2396,6 +2396,10 @@ Disable local hash computation for RSS h Set a shift parameter for RSS local hash computation. Hash is calculated by using flowid bits in a packet header mbuf which are shifted by the number of this parameter. +.It Cm lacp_fast_timeout +Enable lacp fast-timeout on the interface. +.It Cm -lacp_fast_timeout +Disable lacp fast-timeout on the interface. .El .Pp The following parameters are specific to IP tunnel interfaces, Modified: head/sbin/ifconfig/iflagg.c == --- head/sbin/ifconfig/iflagg.c Wed Aug 12 20:16:13 2015(r286699) +++ head/sbin/ifconfig/iflagg.c Wed Aug 12 20:21:04 2015(r286700) @@ -115,6 +115,8 @@ setlaggsetopt(const char *val, int d, in case -LAGG_OPT_LACP_TXTEST: case LAGG_OPT_LACP_RXTEST: case -LAGG_OPT_LACP_RXTEST: + case LAGG_OPT_LACP_TIMEOUT: + case -LAGG_OPT_LACP_TIMEOUT: break; default: err(1, Invalid lagg option); @@ -293,6 +295,8 @@ static struct cmd lagg_cmds[] = { DEF_CMD(-lacp_txtest, -LAGG_OPT_LACP_TXTEST, setlaggsetopt), DEF_CMD(lacp_rxtest, LAGG_OPT_LACP_RXTEST, setlaggsetopt), DEF_CMD(-lacp_rxtest, -LAGG_OPT_LACP_RXTEST, setlaggsetopt), + DEF_CMD(lacp_fast_timeout,LAGG_OPT_LACP_TIMEOUT, setlaggsetopt), + DEF_CMD(-lacp_fast_timeout, -LAGG_OPT_LACP_TIMEOUT, setlaggsetopt), DEF_CMD_ARG(flowid_shift, setlaggflowidshift), }; static struct afswtch af_lagg = { Modified: head/sys/net/ieee8023ad_lacp.c == --- head/sys/net/ieee8023ad_lacp.c Wed Aug 12 20:16:13 2015 (r286699) +++ head/sys/net/ieee8023ad_lacp.c Wed Aug 12 20:21:04 2015 (r286700) @@ -522,7 +522,7 @@ lacp_port_create(struct lagg_port *lgp) int error; boolean_t active = TRUE; /* XXX should be configurable */ - boolean_t fast = FALSE; /* XXX should be configurable */ + boolean_t fast = FALSE; /* Configurable via ioctl */ link_init_sdl(ifp, (struct sockaddr *)sdl, IFT_ETHER); sdl.sdl_alen = ETHER_ADDR_LEN; Modified: head/sys/net/ieee8023ad_lacp.h == --- head/sys/net/ieee8023ad_lacp.h Wed Aug 12 20:16:13 2015 (r286699) +++ head/sys/net/ieee8023ad_lacp.h Wed Aug 12 20:21:04 2015 (r286700) @@ -251,6 +251,7 @@ struct lacp_softc { u_int32_t lsc_tx_test; } lsc_debug; u_int32_t lsc_strict_mode; + boolean_t lsc_fast_timeout; /* if set, fast timeout */ }; #defineLACP_TYPE_ACTORINFO 1 Modified: head/sys/net/if_lagg.c == --- head/sys/net/if_lagg.c Wed Aug 12 20:16:13 2015(r286699) +++ head/sys/net/if_lagg.c Wed Aug 12 20:21:04 2015(r286700) @@ -1257,6 +1257,8 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd ro-ro_opts |= LAGG_OPT_LACP_RXTEST; if (lsc-lsc_strict_mode != 0) ro-ro_opts |= LAGG_OPT_LACP_STRICT; + if (lsc-lsc_fast_timeout != 0) + ro-ro_opts |= LAGG_OPT_LACP_TIMEOUT; ro-ro_active = sc-sc_active; } else { @@ -1292,6 +1294,8 @@ lagg_ioctl(struct ifnet *ifp, u_long cmd case -LAGG_OPT_LACP_RXTEST: case LAGG_OPT_LACP_STRICT: case -LAGG_OPT_LACP_STRICT: + case LAGG_OPT_LACP_TIMEOUT: + case -LAGG_OPT_LACP_TIMEOUT: valid = lacp = 1; break; default: @@ -1320,6 +1324,7 @@ lagg_ioctl(struct ifnet *ifp,
svn commit: r286669 - head/sys/netinet
Author: hiren Date: Wed Aug 12 16:08:37 2015 New Revision: 286669 URL: https://svnweb.freebsd.org/changeset/base/286669 Log: Remove unused TCPTV_SRTTDFLT. We initialize srtt with TCPTV_SRTTBASE when we don't have any rtt estimate. Differential Revision:D3334 Sponsored by: Limelight Networks Modified: head/sys/netinet/tcp_timer.h Modified: head/sys/netinet/tcp_timer.h == --- head/sys/netinet/tcp_timer.hWed Aug 12 15:48:14 2015 (r286668) +++ head/sys/netinet/tcp_timer.hWed Aug 12 16:08:37 2015 (r286669) @@ -76,7 +76,6 @@ #defineTCPTV_SRTTBASE 0 /* base roundtrip time; if 0, no idea yet */ #defineTCPTV_RTOBASE ( 3*hz)/* assumed RTO if no info */ -#defineTCPTV_SRTTDFLT ( 3*hz)/* assumed RTT if no info */ #defineTCPTV_PERSMIN ( 5*hz)/* retransmit persistence */ #defineTCPTV_PERSMAX ( 60*hz)/* maximum persist interval */ ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r286091 - head/sys/netinet
Author: hiren Date: Thu Jul 30 19:24:49 2015 New Revision: 286091 URL: https://svnweb.freebsd.org/changeset/base/286091 Log: Update snd_una description to make it more readable. Differential Revision:https://reviews.freebsd.org/D3179 Reviewed by: gnn Sponsored by: Limelight Networks Modified: head/sys/netinet/tcp_var.h Modified: head/sys/netinet/tcp_var.h == --- head/sys/netinet/tcp_var.h Thu Jul 30 19:08:23 2015(r286090) +++ head/sys/netinet/tcp_var.h Thu Jul 30 19:24:49 2015(r286091) @@ -113,7 +113,7 @@ struct tcpcb { struct vnet *t_vnet; /* back pointer to parent vnet */ - tcp_seq snd_una;/* send unacknowledged */ + tcp_seq snd_una;/* sent but unacknowledged */ tcp_seq snd_max;/* highest sequence number sent; * used to recognize retransmits */ ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r285736 - head/sys/dev/ixgbe
Author: hiren Date: Tue Jul 21 06:48:36 2015 New Revision: 285736 URL: https://svnweb.freebsd.org/changeset/base/285736 Log: Remove a couple of TUNABLE_INT() calls which are unnecessary after r267961. r267961 did remove them but they reappeared when ixgbe(4) rewrite happened in r280182. Sponsored by: Limelight Networks Modified: head/sys/dev/ixgbe/if_ix.c Modified: head/sys/dev/ixgbe/if_ix.c == --- head/sys/dev/ixgbe/if_ix.c Tue Jul 21 06:18:42 2015(r285735) +++ head/sys/dev/ixgbe/if_ix.c Tue Jul 21 06:48:36 2015(r285736) @@ -273,7 +273,6 @@ SYSCTL_INT(_hw_ix, OID_AUTO, max_interru /* How many packets rxeof tries to clean at a time */ static int ixgbe_rx_process_limit = 256; -TUNABLE_INT(hw.ixgbe.rx_process_limit, ixgbe_rx_process_limit); SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, ixgbe_rx_process_limit, 0, Maximum number of received packets to process at a time, @@ -281,7 +280,6 @@ SYSCTL_INT(_hw_ix, OID_AUTO, rx_process_ /* How many packets txeof tries to clean at a time */ static int ixgbe_tx_process_limit = 256; -TUNABLE_INT(hw.ixgbe.tx_process_limit, ixgbe_tx_process_limit); SYSCTL_INT(_hw_ix, OID_AUTO, tx_process_limit, CTLFLAG_RDTUN, ixgbe_tx_process_limit, 0, Maximum number of sent packets to process at a time, ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org