svn commit: r257455 - head/sys/net
Author: andre Date: Thu Oct 31 15:46:10 2013 New Revision: 257455 URL: http://svnweb.freebsd.org/changeset/base/257455 Log: Make struct ifnet readable and comprehensible again by grouping and ordering related variables, fields and locks next to each other. Add more comments to variables. Over time 'ifnet' has accumlated a lot of additional pointers and functionality in an unstructured way making it quite hard to read and understand while obfuscating relationships between fields and variables. Quantify the structure size and how bloated it has become. This is only a mechanical change in preparation for upcoming work to make ifnet opaque to drivers and to separate out the interface queuing. Sponsored by: The FreeBSD Foundation Modified: head/sys/net/if_var.h Modified: head/sys/net/if_var.h == --- head/sys/net/if_var.h Thu Oct 31 15:27:39 2013(r257454) +++ head/sys/net/if_var.h Thu Oct 31 15:46:10 2013(r257455) @@ -96,19 +96,42 @@ VNET_DECLARE(struct pfil_head, link_pfil /* * Structure defining a network interface. * - * (Would like to call this struct ``if'', but C isn't PL/1.) + * Size ILP32: 592 (approx) + * LP64: 1048 (approx) */ - struct ifnet { + /* General book keeping of interface lists. */ + TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */ + LIST_ENTRY(ifnet) if_clones;/* interfaces of a cloner */ + TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */ + /* protected by if_addr_lock */ + u_char if_alloctype; /* if_type at time of allocation */ + + /* Driver and protocol specific information that remains stable. */ void*if_softc; /* pointer to driver state */ + void*if_llsoftc;/* link layer softc */ void*if_l2com; /* pointer to protocol bits */ - struct vnet *if_vnet; /* pointer to network stack instance */ - TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */ - charif_xname[IFNAMSIZ]; /* external name (name + unit) */ const char *if_dname; /* driver name */ int if_dunit; /* unit or IF_DUNIT_NONE */ + u_short if_index; /* numeric abbreviation for this if */ + short if_index_reserved; /* spare space to grow if_index */ + charif_xname[IFNAMSIZ]; /* external name (name + unit) */ + char*if_description;/* interface description */ + + /* Variable fields that are touched by the stack and drivers. */ + int if_flags; /* up/down, broadcast, etc. */ + int if_capabilities;/* interface features capabilities */ + int if_capenable; /* enabled features capabilities */ + void*if_linkmib;/* link-type-specific MIB data */ + size_t if_linkmiblen; /* length of above data */ + int if_drv_flags; /* driver-managed status flags */ u_int if_refcount;/* reference count */ - struct ifaddrhead if_addrhead; /* linked list of addresses per if */ + struct ifaltq if_snd; /* output queue (includes altq) */ + struct if_data if_data;/* type information and statistics */ + struct task if_linktask; /* task for link change events */ + + /* Addresses of different protocol families assigned to this if. */ + struct rwlock if_addr_lock;/* lock to protect address lists */ /* * if_addrhead is the list of all addresses associated to * an interface. @@ -119,21 +142,29 @@ struct ifnet { * However, access to the AF_LINK address through this * field is deprecated. Use if_addr or ifaddr_byindex() instead. */ - int if_pcount; /* number of promiscuous listeners */ - struct carp_if *if_carp; /* carp interface structure */ - struct bpf_if *if_bpf; /* packet filter structure */ - u_short if_index; /* numeric abbreviation for this if */ - short if_index_reserved; /* spare space to grow if_index */ - struct ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */ - int if_flags; /* up/down, broadcast, etc. */ - int if_capabilities;/* interface features capabilities */ - int if_capenable; /* enabled features capabilities */ - void*if_linkmib;/* link-type-specific MIB data */ - size_t if_linkmiblen; /* length of above data */ - struct if_data if_data; + struct ifaddrhead if_addrhead; /* linked list of addresses per if */ struct
Re: svn commit: r257455 - head/sys/net
On 31.10.2013 19:03, Luigi Rizzo wrote: On Thu, Oct 31, 2013 at 03:46:10PM +, Andre Oppermann wrote: Author: andre Date: Thu Oct 31 15:46:10 2013 New Revision: 257455 URL: http://svnweb.freebsd.org/changeset/base/257455 Log: Make struct ifnet readable and comprehensible again by grouping and ordering related variables, fields and locks next to each other. Add more comments to variables. Over time 'ifnet' has accumlated a lot of additional pointers and functionality in an unstructured way making it quite hard to read and understand while obfuscating relationships between fields and variables. Quantify the structure size and how bloated it has become. This is only a mechanical change in preparation for upcoming work to make ifnet opaque to drivers and to separate out the interface queuing. as you do the above I think it would make sense to replace all int/short/long with fixed-size fields as appropriate (and large enough) to make it easier to reason about things such as 'how many flags can i stuff into a field'. The large enough part refers to two things: - bitfields containing flags or capabilities have a tendency to overflow (not just in freebsd, linux has the same) requiring KBI changes. We should probably go for 64 bits unless there are compelling space reasons (not for ifnet). Gleb will handle most of that and it is going to be part of the making ifnet opaque to drivers. - it is useful if certain opaque fields (flow ids, cookies...) can store pointers. Once again, make them at least 64 bit helps A number of mbuf header fields have this property now. :) -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r257455 - head/sys/net
On 31.10.2013 20:27, Ian Lepore wrote: On Thu, 2013-10-31 at 20:08 +0100, Andre Oppermann wrote: On 31.10.2013 19:03, Luigi Rizzo wrote: On Thu, Oct 31, 2013 at 03:46:10PM +, Andre Oppermann wrote: Author: andre Date: Thu Oct 31 15:46:10 2013 New Revision: 257455 URL: http://svnweb.freebsd.org/changeset/base/257455 Log: Make struct ifnet readable and comprehensible again by grouping and ordering related variables, fields and locks next to each other. Add more comments to variables. Over time 'ifnet' has accumlated a lot of additional pointers and functionality in an unstructured way making it quite hard to read and understand while obfuscating relationships between fields and variables. Quantify the structure size and how bloated it has become. This is only a mechanical change in preparation for upcoming work to make ifnet opaque to drivers and to separate out the interface queuing. as you do the above I think it would make sense to replace all int/short/long with fixed-size fields as appropriate (and large enough) to make it easier to reason about things such as 'how many flags can i stuff into a field'. The large enough part refers to two things: - bitfields containing flags or capabilities have a tendency to overflow (not just in freebsd, linux has the same) requiring KBI changes. We should probably go for 64 bits unless there are compelling space reasons (not for ifnet). Gleb will handle most of that and it is going to be part of the making ifnet opaque to drivers. - it is useful if certain opaque fields (flow ids, cookies...) can store pointers. Once again, make them at least 64 bit helps A number of mbuf header fields have this property now. :) Is there any chance all this reworking might get us to a position where the protocol header in an mbuf doesn't have to be 32-bit aligned anymore? We pay a pretty heavy price for that requirement in the drivers of the least capable hardware we support, the systems that have the least horsepower to spare to make an extra copy of each packet to realign it. That's a totally different issue. Here we're talking about kernel structures including the fields in struct mbuf. It is totally not related to any payload packet headers inside the mbuf data section. The problem you're complaining about is the epic design mistake of the ethernet header neither being a power of two nor divisible by 4 or 8 bytes. It is a ridiculous 14 bytes. Many DMA engines are capable of starting on any byte. Some (popular) are not. If they are, the start of the payload can be by shifted by two so that the IP headers are 32bit aligned in memory. If that is not possible either the payload has to be copy-aligned or direct payload to structure casting must be disallowed, or indirected through a macro. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r257455 - head/sys/net
On 31.10.2013 21:15, Ian Lepore wrote: On Thu, 2013-10-31 at 21:05 +0100, Luigi Rizzo wrote: On Thu, Oct 31, 2013 at 01:27:25PM -0600, Ian Lepore wrote: ... Is there any chance all this reworking might get us to a position where the protocol header in an mbuf doesn't have to be 32-bit aligned anymore? We pay a pretty heavy price for that requirement in the drivers of the least capable hardware we support, the systems that have the least horsepower to spare to make an extra copy of each packet to realign it. So are you suggesting to use some 'copy_unaligned_32()' function/macro to access 32-bit protocol fields in the network stack ? (16-bit entries should not be an issue) cheers luigi Or if not that (I have no idea how frequently those fields are accessed and what the performance penalty would be on systems moving a LOT of packets), then a compromise might be a flag the driver can set to say it may provide unaligned incoming packets. Then we could have common code at just one place in the network stack to split the mbuf and realign-copy just the protocol header part to a new mbuf and attach it to the front of the chain. That would minimize both duplicated code and the amount of data that has to be copied, and would put knowledge like how big is the protocol header that needs copying? in a place that knows more about protocols. This is a sensible idea and in fact how some of the drivers already do it internally for strict alignment architectures. I know pretty much nothing about what happens in the network stack after I feed a packet to if_input(), so maybe this is all impractical. But I also know that ARM systems with strict alignment constraints spend a lot of time copying the entire contents of every incoming packet to realign it, and it would be a lot better if they were able to DMA directly into an mbuf cluster that can handed up the stack. Even if copy the entire packet is the only thing if_input() could do, there might still be some benefit in having one common piece of code for it. We have tons of protocols using direct structure casting into the mbuf payload. Changing them all would be a huge task and in many cases also change the way the code is structured. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r257455 - head/sys/net
On 31.10.2013 23:32, Ian Lepore wrote: On Thu, 2013-10-31 at 23:17 +0100, Andre Oppermann wrote: On 31.10.2013 20:27, Ian Lepore wrote: On Thu, 2013-10-31 at 20:08 +0100, Andre Oppermann wrote: On 31.10.2013 19:03, Luigi Rizzo wrote: On Thu, Oct 31, 2013 at 03:46:10PM +, Andre Oppermann wrote: Author: andre Date: Thu Oct 31 15:46:10 2013 New Revision: 257455 URL: http://svnweb.freebsd.org/changeset/base/257455 Log: Make struct ifnet readable and comprehensible again by grouping and ordering related variables, fields and locks next to each other. Add more comments to variables. Over time 'ifnet' has accumlated a lot of additional pointers and functionality in an unstructured way making it quite hard to read and understand while obfuscating relationships between fields and variables. Quantify the structure size and how bloated it has become. This is only a mechanical change in preparation for upcoming work to make ifnet opaque to drivers and to separate out the interface queuing. as you do the above I think it would make sense to replace all int/short/long with fixed-size fields as appropriate (and large enough) to make it easier to reason about things such as 'how many flags can i stuff into a field'. The large enough part refers to two things: - bitfields containing flags or capabilities have a tendency to overflow (not just in freebsd, linux has the same) requiring KBI changes. We should probably go for 64 bits unless there are compelling space reasons (not for ifnet). Gleb will handle most of that and it is going to be part of the making ifnet opaque to drivers. - it is useful if certain opaque fields (flow ids, cookies...) can store pointers. Once again, make them at least 64 bit helps A number of mbuf header fields have this property now. :) Is there any chance all this reworking might get us to a position where the protocol header in an mbuf doesn't have to be 32-bit aligned anymore? We pay a pretty heavy price for that requirement in the drivers of the least capable hardware we support, the systems that have the least horsepower to spare to make an extra copy of each packet to realign it. That's a totally different issue. Here we're talking about kernel structures including the fields in struct mbuf. It is totally not related to any payload packet headers inside the mbuf data section. Of course it is. The nature of my message was purely As long as we're doing big changes It's in a totally different area, but agreed on the big changes thing. ;) The problem you're complaining about is the epic design mistake of the ethernet header neither being a power of two nor divisible by 4 or 8 bytes. It is a ridiculous 14 bytes. Indeed. Many DMA engines are capable of starting on any byte. Some (popular) are not. If they are, the start of the payload can be by shifted by two so that the IP headers are 32bit aligned in memory. If that is not possible either the payload has to be copy-aligned or direct payload to structure casting must be disallowed, or indirected through a macro. I know of only one modern ARM SoC that's able to DMA network packets on a 2-byte boundary (actually even it requires a 4-byte boundary, but it's willing to stuff the first 16 bits with zeroes and offset everything that follows accordingly). I'm sure there are others, but it's the exception rather than the rule. Talk about foot shooting. I'm afraid that packet copying may still be the least evil option in the grand scheme of things. IIRC ARM64 will be able to do misaligned accesses with only a small performance penalty, like x86/AMD64. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r257391 - in head/sys: dev/snc mips/nlm/dev/net
Author: andre Date: Wed Oct 30 16:56:46 2013 New Revision: 257391 URL: http://svnweb.freebsd.org/changeset/base/257391 Log: nclude missing net/if_var.h. Due to header pollution it wasn't noticed before. Modified: head/sys/dev/snc/dp83932.c head/sys/mips/nlm/dev/net/xlpge.c Modified: head/sys/dev/snc/dp83932.c == --- head/sys/dev/snc/dp83932.c Wed Oct 30 16:34:26 2013(r257390) +++ head/sys/dev/snc/dp83932.c Wed Oct 30 16:56:46 2013(r257391) @@ -74,6 +74,7 @@ #include net/ethernet.h #include net/if.h +#include net/if_var.h #include net/if_arp.h #include net/if_dl.h #include net/if_media.h Modified: head/sys/mips/nlm/dev/net/xlpge.c == --- head/sys/mips/nlm/dev/net/xlpge.c Wed Oct 30 16:34:26 2013 (r257390) +++ head/sys/mips/nlm/dev/net/xlpge.c Wed Oct 30 16:56:46 2013 (r257391) @@ -47,6 +47,7 @@ __FBSDID($FreeBSD$); #include sys/taskqueue.h #include net/if.h +#include net/if_var.h #include net/if_arp.h #include net/ethernet.h #include net/if_dl.h ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r257391 - in head/sys: dev/snc mips/nlm/dev/net
On 30.10.2013 18:40, Sergey Kandaurov wrote: On 30 October 2013 20:56, Andre Oppermann an...@freebsd.org wrote: Author: andre Date: Wed Oct 30 16:56:46 2013 New Revision: 257391 URL: http://svnweb.freebsd.org/changeset/base/257391 Log: nclude missing net/if_var.h. Due to header pollution it wasn't noticed before. Modified: head/sys/dev/snc/dp83932.c head/sys/mips/nlm/dev/net/xlpge.c Modified: head/sys/dev/snc/dp83932.c == --- head/sys/dev/snc/dp83932.c Wed Oct 30 16:34:26 2013(r257390) +++ head/sys/dev/snc/dp83932.c Wed Oct 30 16:56:46 2013(r257391) @@ -74,6 +74,7 @@ #include net/ethernet.h #include net/if.h +#include net/if_var.h #include net/if_arp.h #include net/if_dl.h #include net/if_media.h Modified: head/sys/mips/nlm/dev/net/xlpge.c == --- head/sys/mips/nlm/dev/net/xlpge.c Wed Oct 30 16:34:26 2013 (r257390) +++ head/sys/mips/nlm/dev/net/xlpge.c Wed Oct 30 16:56:46 2013 (r257391) @@ -47,6 +47,7 @@ __FBSDID($FreeBSD$); #include sys/taskqueue.h #include net/if.h +#include net/if_var.h #include net/if_arp.h #include net/ethernet.h #include net/if_dl.h To complete TARGET=pc98 KERNCONF=GENERIC crossbuild, I had to apply also this change: Please commit. :) -- Andre Index: sys/dev/snc/if_snc.c === --- sys/dev/snc/if_snc.c(revision 257391) +++ sys/dev/snc/if_snc.c(working copy) @@ -48,6 +48,7 @@ __FBSDID($FreeBSD$); #include net/if.h #include net/if_arp.h #include net/if_media.h +#include net/if_var.h #include dev/snc/dp83932reg.h #include dev/snc/dp83932var.h Index: sys/dev/snc/if_snc_pccard.c === --- sys/dev/snc/if_snc_pccard.c(revision 257391) +++ sys/dev/snc/if_snc_pccard.c(working copy) @@ -41,6 +41,8 @@ __FBSDID($FreeBSD$); #include sys/systm.h #include sys/socket.h #include sys/kernel.h +#include sys/lock.h +#include sys/mutex.h #include sys/module.h #include sys/bus.h ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r257351 - head/sys/net
Author: andre Date: Tue Oct 29 17:48:08 2013 New Revision: 257351 URL: http://svnweb.freebsd.org/changeset/base/257351 Log: Move all interface queue related structures, macros and definitions from net/if_var to it own new net/ifq.h. For now net/ifq.h is unconditionally included through net/if_var.h. This is a mechanical change in preparation to make struct ifnet and the individual interface queue mechanisms opaque. Discussed with: glebius Sponsored by: The FreeBSD Foundation Added: head/sys/net/ifq.h - copied, changed from r257282, head/sys/net/if_var.h Modified: head/sys/net/if_var.h Modified: head/sys/net/if_var.h == --- head/sys/net/if_var.h Tue Oct 29 17:46:26 2013(r257350) +++ head/sys/net/if_var.h Tue Oct 29 17:48:08 2013(r257351) @@ -94,18 +94,6 @@ VNET_DECLARE(struct pfil_head, link_pfil #endif /* _KERNEL */ /* - * Structure defining a queue for a network interface. - */ -struct ifqueue { - struct mbuf *ifq_head; - struct mbuf *ifq_tail; - int ifq_len; - int ifq_maxlen; - int ifq_drops; - struct mtx ifq_mtx; -}; - -/* * Structure defining a network interface. * * (Would like to call this struct ``if'', but C isn't PL/1.) @@ -208,6 +196,8 @@ struct ifnet { void*if_pspare[8]; /* 1 netmap, 7 TDB */ }; +#include net/ifq.h /* XXXAO: temporary unconditional include */ + /* * XXX These aliases are terribly dangerous because they could apply * to anything. @@ -262,96 +252,6 @@ void if_addr_runlock(struct ifnet *ifp); void if_maddr_rlock(struct ifnet *ifp); /* if_multiaddrs */ void if_maddr_runlock(struct ifnet *ifp);/* if_multiaddrs */ -/* - * Output queues (ifp-if_snd) and slow device input queues (*ifp-if_slowq) - * are queues of messages stored on ifqueue structures - * (defined above). Entries are added to and deleted from these structures - * by these macros. - */ -#define IF_LOCK(ifq) mtx_lock((ifq)-ifq_mtx) -#define IF_UNLOCK(ifq) mtx_unlock((ifq)-ifq_mtx) -#defineIF_LOCK_ASSERT(ifq) mtx_assert((ifq)-ifq_mtx, MA_OWNED) -#define_IF_QFULL(ifq) ((ifq)-ifq_len = (ifq)-ifq_maxlen) -#define_IF_DROP(ifq) ((ifq)-ifq_drops++) -#define_IF_QLEN(ifq) ((ifq)-ifq_len) - -#define_IF_ENQUEUE(ifq, m) do {\ - (m)-m_nextpkt = NULL; \ - if ((ifq)-ifq_tail == NULL)\ - (ifq)-ifq_head = m;\ - else\ - (ifq)-ifq_tail-m_nextpkt = m; \ - (ifq)-ifq_tail = m;\ - (ifq)-ifq_len++; \ -} while (0) - -#define IF_ENQUEUE(ifq, m) do {\ - IF_LOCK(ifq); \ - _IF_ENQUEUE(ifq, m);\ - IF_UNLOCK(ifq); \ -} while (0) - -#define_IF_PREPEND(ifq, m) do {\ - (m)-m_nextpkt = (ifq)-ifq_head; \ - if ((ifq)-ifq_tail == NULL)\ - (ifq)-ifq_tail = (m); \ - (ifq)-ifq_head = (m); \ - (ifq)-ifq_len++; \ -} while (0) - -#define IF_PREPEND(ifq, m) do {\ - IF_LOCK(ifq); \ - _IF_PREPEND(ifq, m);\ - IF_UNLOCK(ifq); \ -} while (0) - -#define_IF_DEQUEUE(ifq, m) do {\ - (m) = (ifq)-ifq_head; \ - if (m) {\ - if (((ifq)-ifq_head = (m)-m_nextpkt) == NULL) \ - (ifq)-ifq_tail = NULL; \ - (m)-m_nextpkt = NULL; \ - (ifq)-ifq_len--; \ - } \ -} while (0) - -#define IF_DEQUEUE(ifq, m) do {\ - IF_LOCK(ifq); \ - _IF_DEQUEUE(ifq, m);\ - IF_UNLOCK(ifq); \ -} while (0) - -#define_IF_DEQUEUE_ALL(ifq, m) do {\ - (m) = (ifq)-ifq_head; \ - (ifq)-ifq_head = (ifq)-ifq_tail = NULL;
svn commit: r256920 - head/sys/netinet
Author: andre Date: Tue Oct 22 18:24:34 2013 New Revision: 256920 URL: http://svnweb.freebsd.org/changeset/base/256920 Log: The TCP delayed ACK logic isn't aware of LRO passing up large aggregated segments thinking it received only one segment. This causes it to enable the delay the ACK for 100ms to wait for another segment which may never come because all the data was received already. Doing delayed ACK for LRO segments is bogus for two reasons: a) it pushes us further away from acking every other packet; b) it introduces additional delay in responding to the sender. The latter is especially bad because it is in the nature of LRO to aggregated all segments of a burst with no more coming until an ACK is sent back. Change the delayed ACK logic to detect LRO segments by being larger than the MSS for this connection and issuing an immediate ACK for them to keep the ACK clock ticking without interruption. Reported by: julian, cperciva Tested by:cperciva Reviewed by: lstewart MFC after:3 days Modified: head/sys/netinet/tcp_input.c Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cTue Oct 22 18:14:06 2013 (r256919) +++ head/sys/netinet/tcp_input.cTue Oct 22 18:24:34 2013 (r256920) @@ -508,10 +508,13 @@ do { \ * the ack that opens up a 0-sized window and * - delayed acks are enabled or * - this is a half-synchronized T/TCP connection. + * - the segment size is not larger than the MSS and LRO wasn't used + * for this segment. */ -#define DELAY_ACK(tp) \ +#define DELAY_ACK(tp, tlen)\ ((!tcp_timer_active(tp, TT_DELACK)\ (tp-t_flags TF_RXWIN0SENT) == 0) \ + (tlen = tp-t_maxopd)\ (V_tcp_delack_enabled || (tp-t_flags TF_NEEDSYN))) /* @@ -1863,7 +1866,7 @@ tcp_do_segment(struct mbuf *m, struct tc } /* NB: sorwakeup_locked() does an implicit unlock. */ sorwakeup_locked(so); - if (DELAY_ACK(tp)) { + if (DELAY_ACK(tp, tlen)) { tp-t_flags |= TF_DELACK; } else { tp-t_flags |= TF_ACKNOW; @@ -1954,7 +1957,7 @@ tcp_do_segment(struct mbuf *m, struct tc * If there's data, delay ACK; if there's also a FIN * ACKNOW will be turned on later. */ - if (DELAY_ACK(tp) tlen != 0) + if (DELAY_ACK(tp, tlen) tlen != 0) tcp_timer_activate(tp, TT_DELACK, tcp_delacktime); else @@ -2926,7 +2929,7 @@ dodata: /* XXX */ if (th-th_seq == tp-rcv_nxt LIST_EMPTY(tp-t_segq) TCPS_HAVEESTABLISHED(tp-t_state)) { - if (DELAY_ACK(tp)) + if (DELAY_ACK(tp, tlen)) tp-t_flags |= TF_DELACK; else tp-t_flags |= TF_ACKNOW; ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r255608 - in head/sys: conf kern modules/cxgb/cxgb modules/sfxge modules/ti sys vm
On 16.09.2013 08:25, Konstantin Belousov wrote: Author: kib Date: Mon Sep 16 06:25:54 2013 New Revision: 255608 URL: http://svnweb.freebsd.org/changeset/base/255608 Log: Remove zero-copy sockets code. It only worked for anonymous memory, and the equivalent functionality is now provided by sendfile(2) over posix shared memory filedescriptor. Remove the cow member of struct vm_page, and rearrange the remaining members. While there, make hold_count unsigned. Thanks, it's good to see it gone. :) -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254773 - head/sys/net
On 07.09.2013 17:30, Mikolaj Golub wrote: Hi, On Sat, Aug 24, 2013 at 11:17:25AM +, Andre Oppermann wrote: Author: andre Date: Sat Aug 24 11:17:25 2013 New Revision: 254773 URL: http://svnweb.freebsd.org/changeset/base/254773 Log: Resolve the confusion between the head_list and the hook list. The linked list of pfil hooks is changed to chain and this term is applied consistently. The head_list remains with list term. Add KASSERT to vnet_pfil_uninit(). ... vnet_pfil_uninit(const void *unused) { - /* XXX should panic if list is not empty */ + KASSERT(LIST_EMPTY(V_pfil_head_list), + (%s: pfil_head_list %p not empty, __func__, V_pfil_head_list)); PFIL_LOCK_DESTROY_REAL(V_pfil_lock); return (0); } It is triggered when destroying a vnet, due to inet/inet6 pfil hooks are not being unregistered. The attached patch fixes the issue for me. I am going to commit it if there are no objections -- might the unregistration has been skipped intentionally due to some unresolved issue? There's no reason that I know of. And if there were then unregistering would be unsafe in any case. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254973 - in head/sys: kern sys
Author: andre Date: Tue Aug 27 20:52:02 2013 New Revision: 254973 URL: http://svnweb.freebsd.org/changeset/base/254973 Log: Pad m_hdr on 32bit architectures to to prevent alignment and padding problems with the way MLEN, MHLEN, and struct mbuf are set up. CTASSERT's are provided to detect such issues at compile time in the future. The #define MLEN and MHLEN calculation do not take actual compiler- induced alignment and padding inside the complete struct mbuf into account. Accordingly appropriate attention is required when changing members of struct mbuf. Ideally one would calculate MLEN as (MSIZE - sizeof(((struct mbuf *)0)-m_hdr) but that doesn't work as the compiler refuses to operate on an as of yet incomplete structure. In particular ARM 32bit has more strict alignment requirements which caused 4 bytes of padding between m_hdr and pkthdr in struct mbuf because of the 64bit members in pkthdr. This wasn't picked up by MLEN and MHLEN causing an overflow of the mbuf provided data storage by overestimating its size. I386 didn't show this problem because it handles unaligned access just fine, albeit at a small performance penalty. On 64bit architectures the struct mbuf layout is 64bit aligned in all places. Reported by: Thomas Skibo ThomasSkibo-at-sbcglobal-dot-net Tested by:tuexen, ian, Thomas Skibo (extended patch) Sponsored by: The FreeBSD Foundation Modified: head/sys/kern/uipc_mbuf.c head/sys/sys/mbuf.h Modified: head/sys/kern/uipc_mbuf.c == --- head/sys/kern/uipc_mbuf.c Tue Aug 27 20:43:27 2013(r254972) +++ head/sys/kern/uipc_mbuf.c Tue Aug 27 20:52:02 2013(r254973) @@ -85,6 +85,14 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, m_defrag #endif /* + * Ensure the correct size of various mbuf parameters. It could be off due + * to compiler-induced padding and alignment artifacts. + */ +CTASSERT(sizeof(struct mbuf) == MSIZE); +CTASSERT(MSIZE - offsetof(struct mbuf, m_dat) == MLEN); +CTASSERT(MSIZE - offsetof(struct mbuf, m_pktdat) == MHLEN); + +/* * m_get2() allocates minimum mbuf that would fit size argument. */ struct mbuf * Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Tue Aug 27 20:43:27 2013(r254972) +++ head/sys/sys/mbuf.h Tue Aug 27 20:52:02 2013(r254973) @@ -53,6 +53,10 @@ * externally and attach it to the mbuf in a way similar to that of mbuf * clusters. * + * NB: These calculation do not take actual compiler-induced alignment and + * padding inside the complete struct mbuf into account. Appropriate + * attention is required when changing members of struct mbuf. + * * MLEN is data length in a normal mbuf. * MHLEN is data length in an mbuf with pktheader. * MINCLSIZE is a smallest amount of data that should be put into cluster. @@ -84,7 +88,7 @@ struct mb_args { /* * Header present at the beginning of every mbuf. - * Size ILP32: 20 + * Size ILP32: 24 * LP64: 32 */ struct m_hdr { @@ -94,6 +98,9 @@ struct m_hdr { int32_t mh_len;/* amount of data in this mbuf */ uint32_t mh_type:8, /* type of data in this mbuf */ mh_flags:24; /* flags; see below */ +#if !defined(__LP64__) + uint32_t mh_pad;/* pad for 64bit alignment */ +#endif }; /* ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254910 - head/sys/dev/xen/netback
Author: andre Date: Mon Aug 26 13:17:37 2013 New Revision: 254910 URL: http://svnweb.freebsd.org/changeset/base/254910 Log: Fix mbuf debugging printf()'s after the recent mbuf header changes. Modified: head/sys/dev/xen/netback/netback.c Modified: head/sys/dev/xen/netback/netback.c == --- head/sys/dev/xen/netback/netback.c Mon Aug 26 13:00:25 2013 (r254909) +++ head/sys/dev/xen/netback/netback.c Mon Aug 26 13:17:37 2013 (r254910) @@ -587,14 +587,14 @@ xnb_dump_mbuf(const struct mbuf *m) if (m-m_flags M_PKTHDR) { printf(flowid=%10d, csum_flags=%#8x, csum_data=%#8x, tso_segsz=%5hd\n, - m-m_pkthdr.flowid, m-m_pkthdr.csum_flags, + m-m_pkthdr.flowid, (int)m-m_pkthdr.csum_flags, m-m_pkthdr.csum_data, m-m_pkthdr.tso_segsz); - printf(rcvif=%16p, header=%18p, len=%19d\n, - m-m_pkthdr.rcvif, m-m_pkthdr.header, m-m_pkthdr.len); + printf(rcvif=%16p, len=%19d\n, + m-m_pkthdr.rcvif, m-m_pkthdr.len); } printf(m_next=%16p, m_nextpk=%16p, m_data=%16p\n, m-m_next, m-m_nextpkt, m-m_data); - printf(m_len=%17d, m_flags=%#15x, m_type=%18hd\n, + printf(m_len=%17d, m_flags=%#15x, m_type=%18u\n, m-m_len, m-m_flags, m-m_type); len = m-m_len; ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254527 - in head/sys: net80211 netinet sys
On 26.08.2013 11:43, Gleb Smirnoff wrote: A Author: andre A Date: Mon Aug 19 14:25:11 2013 A New Revision: 254527 A URL: http://svnweb.freebsd.org/changeset/base/254527 A A Log: A Reorder the mbuf defines to make more sense and group related flags A together. A A Add M_FLAG_PRINTF for use with printf(9) %b indentifier. A A Use the generic mbuf flags print names in the net80211 code and adjust A the protocol specific bits for their new positions. A A Change SCTP M_PROTO mapping from 5 to 1 to fit within the 16bit field A they use internally to store some additional information. A A Discussed with: trociny, glebius The first part: reorder the mbuf flag defines wasn't actually discussed. It was part of the patch to the email I sent you, and a couple of others, to which you replied with no objection to the M_FLAGS cleanup. It wasn't explicitly spelled out but very visible in the patch. Tossing values of M_BCAST, M_MCAST, M_VLANTAG for no value except code tidyness isn't a safe idea. These are the flags that device drivers use. After this change binary drivers are still loadable but would be buggy. As I already explained elsewhere, on -current only KLD's compiled at the same time/revision as the kernel are supported. It has been this way as long as I can remember. There never was any guarantee, not even implicit, that a KLD from r254000 will continue to work with a kernel from r254527. In fact, if it were, it would be almost impossible to develop anything on -current. On -stable we do have this ABI+API guarantee, so a KLD from 9.0 is supposed to work on 9.2 as well and we generally keep that promise. We allow ourselves ABI changes in head, but this doesn't mean that we should do them for no important reason. The important reason are the later updates to the mbuf headers and that it simply doesn't make sense to keep the defines spaghetti while the whole thing is being overhauled. It seems from a technical point of view both arguments, maintain bits vs. reordering, are rather weak and prone to bike-shedding. ;) It certainly doesn't make sense to change again and revert it just for the sake of it. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254779 - head/sys/kern
On 26.08.2013 12:50, Gleb Smirnoff wrote: On Sat, Aug 24, 2013 at 12:24:59PM +, Andre Oppermann wrote: A Author: andre A Date: Sat Aug 24 12:24:58 2013 A New Revision: 254779 A URL: http://svnweb.freebsd.org/changeset/base/254779 A A Log: A Avoid code duplication for mbuf initialization and use m_init() instead A in mb_ctor_mbuf() and mb_ctor_pack(). m_init() is inline, but it calls m_pkthdr_init() which isn't inline. It might be that compiler inline it due to m_pkthdr_init() living in the same file as mb_ctor_mbuf() and mb_ctor_pack(), but not sure. It depends on the optimization level. m_pkthdr_init() zeroes much more than deleted code did. Some zeroing operations are definitely superfluous job. It's exactly the same for mb_ctor_mbuf() and one more for mb_ctor_pack(). Overall it has become more because we've got a couple more (small) fields in the mbuf headers now. Looking at the size of the headers it may be faster to just bzero() it instead of doing it one by one. The overall problem is that replicating the same operations in multiple places is dangerous from a consistency point of view. The change is definitely not an no-op change. It might have pessimized the mbuf allocation performance. Heavy inlining is not always an advantage and cause i-cache bloat. But you're right I haven't benchmarked it (yet) and thus we don't know. I've been setting up my 10G benchmark environment but had to re-prioritize due to the impeding freeze on -current. If the benchmarking shows a conclusive pessimization I'm more than happy to compensate for it, either by inlining m_pkthdr_init() as well or some form of macro. Such changes can still be done during API freeze, but before BETA1. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254805 - head/sys/sys
On 26.08.2013 13:00, Gleb Smirnoff wrote: On Sat, Aug 24, 2013 at 07:58:36PM +, Andre Oppermann wrote: A Author: andre A Date: Sat Aug 24 19:58:36 2013 A New Revision: 254805 A URL: http://svnweb.freebsd.org/changeset/base/254805 A A Log: A Add mtodo(m, o) macro taking an additional offset into the mbuf data section. A A Sponsored by: The FreeBSD Foundation A A Modified: A head/sys/sys/mbuf.h A A Modified: head/sys/sys/mbuf.h A == A --- head/sys/sys/mbuf.h Sat Aug 24 19:51:18 2013(r254804) A +++ head/sys/sys/mbuf.h Sat Aug 24 19:58:36 2013(r254805) A @@ -67,8 +67,10 @@ A * type: A * A * mtod(m, t)-- Convert mbuf pointer to data pointer of correct type. A + * mtodo(m, o) -- Same as above but with offset 'o' into data. A */ A #define mtod(m, t) ((t)((m)-m_data)) A +#define mtodo(m, o) ((void *)(((m)-m_data) + (o))) IMO, having a typecast would be better. Then mtodo() would be really same as mtod(), as stated in comment. There was a big discussion about 10 month back when I did this change in my tcp_workqueue branch with the typecast in. The conclusion was that a typecast is really not necessary and only causes one to type a lot more for the compiler to throw away anyway. But yes, the comment isn't perfect in that sense. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254830 - head/sys/kern
Author: andre Date: Sun Aug 25 09:40:15 2013 New Revision: 254830 URL: http://svnweb.freebsd.org/changeset/base/254830 Log: Adjust socow_iodone() after r254799. Sponsored by: The FreeBSD Foundation Modified: head/sys/kern/uipc_cow.c Modified: head/sys/kern/uipc_cow.c == --- head/sys/kern/uipc_cow.cSun Aug 25 08:56:09 2013(r254829) +++ head/sys/kern/uipc_cow.cSun Aug 25 09:40:15 2013(r254830) @@ -70,10 +70,10 @@ struct netsend_cow_stats { static struct netsend_cow_stats socow_stats; -static void socow_iodone(void *addr, void *args); +static void socow_iodone(struct mbuf *m, void *addr, void *args); static void -socow_iodone(void *addr, void *args) +socow_iodone(struct mbuf *m, void *addr, void *args) { struct sf_buf *sf; vm_page_t pp; ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254831 - head/sys/net
Author: andre Date: Sun Aug 25 09:41:37 2013 New Revision: 254831 URL: http://svnweb.freebsd.org/changeset/base/254831 Log: Remove unnecessary setup of the m-pkthdr.header pointer. Sponsored by: The FreeBSD Foundation Modified: head/sys/net/if_fddisubr.c head/sys/net/if_iso88025subr.c Modified: head/sys/net/if_fddisubr.c == --- head/sys/net/if_fddisubr.c Sun Aug 25 09:40:15 2013(r254830) +++ head/sys/net/if_fddisubr.c Sun Aug 25 09:41:37 2013(r254831) @@ -390,7 +390,6 @@ fddi_input(ifp, m) goto dropanyway; } fh = mtod(m, struct fddi_header *); - m-m_pkthdr.header = (void *)fh; /* * Discard packet if interface is not up. Modified: head/sys/net/if_iso88025subr.c == --- head/sys/net/if_iso88025subr.c Sun Aug 25 09:40:15 2013 (r254830) +++ head/sys/net/if_iso88025subr.c Sun Aug 25 09:41:37 2013 (r254831) @@ -476,7 +476,6 @@ iso88025_input(ifp, m) goto dropanyway; } th = mtod(m, struct iso88025_header *); - m-m_pkthdr.header = (void *)th; /* * Discard packet if interface is not up. ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254832 - head/sys/ofed/drivers/net/mlx4
Author: andre Date: Sun Aug 25 09:45:26 2013 New Revision: 254832 URL: http://svnweb.freebsd.org/changeset/base/254832 Log: Change m-pkthdr.header to m-pkthdr.PH_loc.ptr after r254804 to transiently store pointers to packet headers. Sponsored by: The FreeBSD Foundation Modified: head/sys/ofed/drivers/net/mlx4/en_frag.c Modified: head/sys/ofed/drivers/net/mlx4/en_frag.c == --- head/sys/ofed/drivers/net/mlx4/en_frag.cSun Aug 25 09:41:37 2013 (r254831) +++ head/sys/ofed/drivers/net/mlx4/en_frag.cSun Aug 25 09:45:26 2013 (r254832) @@ -87,7 +87,7 @@ static void flush_session(struct mlx4_en u16 more) { struct mbuf *mb = session-fragments; - struct ip *iph = mb-m_pkthdr.header; + struct ip *iph = mb-m_pkthdr.PH_loc.ptr; struct net_device *dev = mb-m_pkthdr.rcvif; /* Update IP length and checksum */ @@ -132,7 +132,7 @@ int mlx4_en_rx_frags(struct mlx4_en_priv u16 offset; iph = (struct ip *)(mtod(mb, char *) + ETHER_HDR_LEN); - mb-m_pkthdr.header = iph; + mb-m_pkthdr.PH_loc.ptr = iph; ip_len = ntohs(iph-ip_len); ip_hlen = iph-ip_hl * 4; data_len = ip_len - ip_hlen; ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254834 - in head/sys: netinet netinet6
Author: andre Date: Sun Aug 25 09:49:00 2013 New Revision: 254834 URL: http://svnweb.freebsd.org/changeset/base/254834 Log: For now limit printf(9) %x of the 64bit pkthdr.csum_flags field to 32bits. The upper 32bits are not occupied for now. Sponsored by: The FreeBSD Foundation Modified: head/sys/netinet/sctp_input.c head/sys/netinet6/sctp6_usrreq.c Modified: head/sys/netinet/sctp_input.c == --- head/sys/netinet/sctp_input.c Sun Aug 25 09:46:03 2013 (r254833) +++ head/sys/netinet/sctp_input.c Sun Aug 25 09:49:00 2013 (r254834) @@ -6026,7 +6026,7 @@ sctp_input_with_port(struct mbuf *i_pak, sctp_input(): Packet of length %d received on %s with csum_flags 0x%x.\n, m-m_pkthdr.len, if_name(m-m_pkthdr.rcvif), - m-m_pkthdr.csum_flags); + (int)m-m_pkthdr.csum_flags); if (m-m_flags M_FLOWID) { mflowid = m-m_pkthdr.flowid; use_mflowid = 1; Modified: head/sys/netinet6/sctp6_usrreq.c == --- head/sys/netinet6/sctp6_usrreq.cSun Aug 25 09:46:03 2013 (r254833) +++ head/sys/netinet6/sctp6_usrreq.cSun Aug 25 09:49:00 2013 (r254834) @@ -112,7 +112,7 @@ sctp6_input_with_port(struct mbuf **i_pa sctp6_input(): Packet of length %d received on %s with csum_flags 0x%x.\n, m-m_pkthdr.len, if_name(m-m_pkthdr.rcvif), - m-m_pkthdr.csum_flags); + (int)m-m_pkthdr.csum_flags); if (m-m_flags M_FLOWID) { mflowid = m-m_pkthdr.flowid; use_mflowid = 1; ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254842 - in head/sys: compat/ndis dev/cas dev/hatm dev/if_ndis dev/iscsi_initiator dev/lge dev/mwl dev/wb kern sys
Author: andre Date: Sun Aug 25 10:57:09 2013 New Revision: 254842 URL: http://svnweb.freebsd.org/changeset/base/254842 Log: Give (*ext_free) an int return value allowing for very sophisticated external mbuf buffer management capabilities in the future. For now only EXT_FREE_OK is defined with current legacy behavior. Sponsored by: The FreeBSD Foundation Modified: head/sys/compat/ndis/kern_ndis.c head/sys/compat/ndis/ndis_var.h head/sys/dev/cas/if_cas.c head/sys/dev/hatm/if_hatm_intr.c head/sys/dev/if_ndis/if_ndis.c head/sys/dev/iscsi_initiator/isc_soc.c head/sys/dev/lge/if_lge.c head/sys/dev/mwl/if_mwl.c head/sys/dev/wb/if_wb.c head/sys/kern/subr_mbpool.c head/sys/kern/uipc_cow.c head/sys/kern/uipc_mbuf.c head/sys/kern/uipc_syscalls.c head/sys/sys/mbpool.h head/sys/sys/mbuf.h head/sys/sys/sf_buf.h Modified: head/sys/compat/ndis/kern_ndis.c == --- head/sys/compat/ndis/kern_ndis.cSun Aug 25 10:28:02 2013 (r254841) +++ head/sys/compat/ndis/kern_ndis.cSun Aug 25 10:57:09 2013 (r254842) @@ -482,16 +482,14 @@ ndis_return(dobj, arg) KeReleaseSpinLock(block-nmb_returnlock, irql); } -void -ndis_return_packet(buf, arg) - void*buf; /* not used */ - void*arg; +int +ndis_return_packet(struct mbuf *m, void *buf, void *arg) { ndis_packet *p; ndis_miniport_block *block; if (arg == NULL) - return; + return (EXT_FREE_OK); p = arg; @@ -500,7 +498,7 @@ ndis_return_packet(buf, arg) /* Release packet when refcount hits zero, otherwise return. */ if (p-np_refcnt) - return; + return (EXT_FREE_OK); block = ((struct ndis_softc *)p-np_softc)-ndis_block; @@ -512,6 +510,8 @@ ndis_return_packet(buf, arg) IoQueueWorkItem(block-nmb_returnitem, (io_workitem_func)kernndis_functbl[7].ipt_wrap, WORKQUEUE_CRITICAL, block); + + return (EXT_FREE_OK); } void Modified: head/sys/compat/ndis/ndis_var.h == --- head/sys/compat/ndis/ndis_var.h Sun Aug 25 10:28:02 2013 (r254841) +++ head/sys/compat/ndis/ndis_var.h Sun Aug 25 10:57:09 2013 (r254842) @@ -1743,7 +1743,7 @@ extern int ndis_halt_nic(void *); extern int ndis_shutdown_nic(void *); extern int ndis_pnpevent_nic(void *, int); extern int ndis_init_nic(void *); -extern void ndis_return_packet(void *, void *); +extern int ndis_return_packet(struct mbuf *, void *, void *); extern int ndis_init_dma(void *); extern int ndis_destroy_dma(void *); extern int ndis_create_sysctls(void *); Modified: head/sys/dev/cas/if_cas.c == --- head/sys/dev/cas/if_cas.c Sun Aug 25 10:28:02 2013(r254841) +++ head/sys/dev/cas/if_cas.c Sun Aug 25 10:57:09 2013(r254842) @@ -132,7 +132,7 @@ static void cas_detach(struct cas_softc static int cas_disable_rx(struct cas_softc *sc); static int cas_disable_tx(struct cas_softc *sc); static voidcas_eint(struct cas_softc *sc, u_int status); -static voidcas_free(struct mbuf *m, void *arg1, void* arg2); +static int cas_free(struct mbuf *m, void *arg1, void* arg2); static voidcas_init(void *xsc); static voidcas_init_locked(struct cas_softc *sc); static voidcas_init_regs(struct cas_softc *sc); @@ -1887,7 +1887,7 @@ cas_rint(struct cas_softc *sc) #endif } -static void +static int cas_free(struct mbuf *m, void *arg1, void *arg2) { struct cas_rxdsoft *rxds; @@ -1904,7 +1904,7 @@ cas_free(struct mbuf *m, void *arg1, voi rxds = sc-sc_rxdsoft[idx]; #endif if (refcount_release(rxds-rxds_refcount) == 0) - return; + return (EXT_FREE_OK); /* * NB: this function can be called via m_freem(9) within @@ -1915,6 +1915,7 @@ cas_free(struct mbuf *m, void *arg1, voi cas_add_rxdesc(sc, idx); if (locked == 0) CAS_UNLOCK(sc); + return (EXT_FREE_OK); } static inline void Modified: head/sys/dev/hatm/if_hatm_intr.c == --- head/sys/dev/hatm/if_hatm_intr.cSun Aug 25 10:28:02 2013 (r254841) +++ head/sys/dev/hatm/if_hatm_intr.cSun Aug 25 10:57:09 2013 (r254842) @@ -260,7 +260,7 @@ hatm_mbuf_page_alloc(struct hatm_softc * /* * Free an mbuf and put it onto the free list. */ -static void +static int hatm_mbuf0_free(struct mbuf *m, void *buf, void *args) { struct hatm_softc *sc = args; @@ -270,8 +270,9 @@ hatm_mbuf0_free(struct mbuf *m, void *bu (freeing unused mbuf %x, c-hdr.flags)); c-hdr.flags = ~MBUF_USED;
svn commit: r254857 - head/sys/sys
Author: andre Date: Sun Aug 25 13:30:37 2013 New Revision: 254857 URL: http://svnweb.freebsd.org/changeset/base/254857 Log: Fix CSUM compatibility mapping. SCTP is a layer 4 protocol. Submitted by: tuexen Modified: head/sys/sys/mbuf.h Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Sun Aug 25 13:10:03 2013(r254856) +++ head/sys/sys/mbuf.h Sun Aug 25 13:30:37 2013(r254857) @@ -418,7 +418,7 @@ struct mbuf { #defineCSUM_IP_VALID CSUM_L3_VALID #defineCSUM_DATA_VALID CSUM_L4_VALID #defineCSUM_PSEUDO_HDR CSUM_L4_CALC -#defineCSUM_SCTP_VALID CSUM_L3_VALID +#defineCSUM_SCTP_VALID CSUM_L4_VALID #defineCSUM_DELAY_DATA (CSUM_TCP|CSUM_UDP) #defineCSUM_DELAY_IP CSUM_IP /* Only v4, no v6 IP hdr csum */ #defineCSUM_DELAY_DATA_IPV6(CSUM_TCP_IPV6|CSUM_UDP_IPV6) ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254769 - head/sys/net
Author: andre Date: Sat Aug 24 10:13:59 2013 New Revision: 254769 URL: http://svnweb.freebsd.org/changeset/base/254769 Log: Introduce typedef for pfil hook callback function and replace all spelled out occurrences with it. Reviewed by: eri Modified: head/sys/net/pfil.c head/sys/net/pfil.h Modified: head/sys/net/pfil.c == --- head/sys/net/pfil.c Sat Aug 24 10:06:51 2013(r254768) +++ head/sys/net/pfil.c Sat Aug 24 10:13:59 2013(r254769) @@ -225,8 +225,7 @@ pfil_head_get(int type, u_long val) * PFIL_WAITOK OK to call malloc with M_WAITOK. */ int -pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int, - struct inpcb *), void *arg, int flags, struct pfil_head *ph) +pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) { struct packet_filter_hook *pfh1 = NULL; struct packet_filter_hook *pfh2 = NULL; @@ -285,8 +284,7 @@ error: * list. */ int -pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int, -struct inpcb *), void *arg, int flags, struct pfil_head *ph) +pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) { int err = 0; @@ -334,9 +332,7 @@ pfil_list_add(pfil_list_t *list, struct * specified list. */ static int -pfil_list_remove(pfil_list_t *list, -int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *), -void *arg) +pfil_list_remove(pfil_list_t *list, pfil_func_t func, void *arg) { struct packet_filter_hook *pfh; Modified: head/sys/net/pfil.h == --- head/sys/net/pfil.h Sat Aug 24 10:06:51 2013(r254768) +++ head/sys/net/pfil.h Sat Aug 24 10:13:59 2013(r254769) @@ -43,14 +43,16 @@ struct mbuf; struct ifnet; struct inpcb; +typedefint (*pfil_func_t)(void *, struct mbuf **, struct ifnet *, int, + struct inpcb *); + /* * The packet filter hooks are designed for anything to call them to * possibly intercept the packet. */ struct packet_filter_hook { TAILQ_ENTRY(packet_filter_hook) pfil_link; - int (*pfil_func)(void *, struct mbuf **, struct ifnet *, int, - struct inpcb *); + pfil_func_t pfil_func; void*pfil_arg; }; @@ -87,10 +89,8 @@ struct pfil_head { LIST_ENTRY(pfil_head) ph_list; }; -intpfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, - int, struct inpcb *), void *, int, struct pfil_head *); -intpfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, - int, struct inpcb *), void *, int, struct pfil_head *); +intpfil_add_hook(pfil_func_t, void *, int, struct pfil_head *); +intpfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *); intpfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *, int, struct inpcb *inp); ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254770 - head/sys/net
Author: andre Date: Sat Aug 24 10:30:20 2013 New Revision: 254770 URL: http://svnweb.freebsd.org/changeset/base/254770 Log: Convert one instance of pfil hook callback missed in r254769. Modified: head/sys/net/pfil.c Modified: head/sys/net/pfil.c == --- head/sys/net/pfil.c Sat Aug 24 10:13:59 2013(r254769) +++ head/sys/net/pfil.c Sat Aug 24 10:30:20 2013(r254770) @@ -53,10 +53,7 @@ MTX_SYSINIT(pfil_heads_lock, pfil_globa MTX_DEF); static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int); - -static int pfil_list_remove(pfil_list_t *, -int (*)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *), -void *); +static int pfil_list_remove(pfil_list_t *, pfil_func_t, void *); LIST_HEAD(pfilheadhead, pfil_head); VNET_DEFINE(struct pfilheadhead, pfil_head_list); ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254771 - head/sys/net
Author: andre Date: Sat Aug 24 10:36:33 2013 New Revision: 254771 URL: http://svnweb.freebsd.org/changeset/base/254771 Log: Internalize pfil_hook_get(). There are no outside consumers of this API, it is only safe for internal use and even the pfil(9) man page says so in the BUGS section. Reviewed by: eri Modified: head/sys/net/pfil.c head/sys/net/pfil.h Modified: head/sys/net/pfil.c == --- head/sys/net/pfil.c Sat Aug 24 10:30:20 2013(r254770) +++ head/sys/net/pfil.c Sat Aug 24 10:36:33 2013(r254771) @@ -52,6 +52,7 @@ static struct mtx pfil_global_lock; MTX_SYSINIT(pfil_heads_lock, pfil_global_lock, pfil_head_list lock, MTX_DEF); +static struct packet_filter_hook *pfil_hook_get(int, struct pfil_head *); static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int); static int pfil_list_remove(pfil_list_t *, pfil_func_t, void *); @@ -89,6 +90,18 @@ pfil_run_hooks(struct pfil_head *ph, str return (rv); } +static struct packet_filter_hook * +pfil_hook_get(int dir, struct pfil_head *ph) +{ + + if (dir == PFIL_IN) + return (TAILQ_FIRST(ph-ph_in)); + else if (dir == PFIL_OUT) + return (TAILQ_FIRST(ph-ph_out)); + else + return (NULL); +} + /* * pfil_try_rlock() acquires rm reader lock for specified head * if this is immediately possible. Modified: head/sys/net/pfil.h == --- head/sys/net/pfil.h Sat Aug 24 10:30:20 2013(r254770) +++ head/sys/net/pfil.h Sat Aug 24 10:36:33 2013(r254771) @@ -132,16 +132,4 @@ struct pfil_head *pfil_head_get(int, u_l #definePFIL_LIST_LOCK()mtx_lock(pfil_global_lock) #definePFIL_LIST_UNLOCK() mtx_unlock(pfil_global_lock) -static __inline struct packet_filter_hook * -pfil_hook_get(int dir, struct pfil_head *ph) -{ - - if (dir == PFIL_IN) - return (TAILQ_FIRST(ph-ph_in)); - else if (dir == PFIL_OUT) - return (TAILQ_FIRST(ph-ph_out)); - else - return (NULL); -} - #endif /* _NET_PFIL_H_ */ ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254772 - head/share/man/man9
Author: andre Date: Sat Aug 24 10:38:02 2013 New Revision: 254772 URL: http://svnweb.freebsd.org/changeset/base/254772 Log: pfil_hook_get() has been internalized in r254771 and is no longer part of the API. It wasn't safe for external use in any case. Modified: head/share/man/man9/pfil.9 Modified: head/share/man/man9/pfil.9 == --- head/share/man/man9/pfil.9 Sat Aug 24 10:36:33 2013(r254771) +++ head/share/man/man9/pfil.9 Sat Aug 24 10:38:02 2013(r254772) @@ -28,7 +28,7 @@ .\ .\ $FreeBSD$ .\ -.Dd October 22, 2012 +.Dd August 23, 2013 .Dt PFIL 9 .Os .Sh NAME @@ -36,7 +36,6 @@ .Nm pfil_head_register , .Nm pfil_head_unregister , .Nm pfil_head_get , -.Nm pfil_hook_get , .Nm pfil_add_hook , .Nm pfil_remove_hook , .Nm pfil_run_hooks , @@ -56,8 +55,6 @@ .Fn pfil_head_unregister struct pfil_head *head .Ft struct pfil_head * .Fn pfil_head_get int af u_long dlt -.Ft struct packet_filter_hook * -.Fn pfil_hook_get int dir struct pfil_head *head .Ft void .Fn pfil_add_hook int (*func)() void *arg int flags struct pfil_head * .Ft void @@ -245,11 +242,7 @@ lock export was added in .Fx 10.0 . .Sh BUGS The -.Fn pfil_hook_get -function -is only safe for internal use. -.Pp -When a +.Fn When a .Vt pfil_head is being modified, no traffic is diverted (to avoid deadlock). ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254773 - head/sys/net
Author: andre Date: Sat Aug 24 11:17:25 2013 New Revision: 254773 URL: http://svnweb.freebsd.org/changeset/base/254773 Log: Resolve the confusion between the head_list and the hook list. The linked list of pfil hooks is changed to chain and this term is applied consistently. The head_list remains with list term. Add KASSERT to vnet_pfil_uninit(). Update and extend comments. Reviewed by: eri (previous version) Modified: head/sys/net/pfil.c head/sys/net/pfil.h Modified: head/sys/net/pfil.c == --- head/sys/net/pfil.c Sat Aug 24 10:38:02 2013(r254772) +++ head/sys/net/pfil.c Sat Aug 24 11:17:25 2013(r254773) @@ -52,9 +52,9 @@ static struct mtx pfil_global_lock; MTX_SYSINIT(pfil_heads_lock, pfil_global_lock, pfil_head_list lock, MTX_DEF); -static struct packet_filter_hook *pfil_hook_get(int, struct pfil_head *); -static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int); -static int pfil_list_remove(pfil_list_t *, pfil_func_t, void *); +static struct packet_filter_hook *pfil_chain_get(int, struct pfil_head *); +static int pfil_chain_add(pfil_chain_t *, struct packet_filter_hook *, int); +static int pfil_chain_remove(pfil_chain_t *, pfil_func_t, void *); LIST_HEAD(pfilheadhead, pfil_head); VNET_DEFINE(struct pfilheadhead, pfil_head_list); @@ -63,7 +63,7 @@ VNET_DEFINE(struct rmlock, pfil_lock); #defineV_pfil_lock VNET(pfil_lock) /* - * pfil_run_hooks() runs the specified packet filter hooks. + * pfil_run_hooks() runs the specified packet filter hook chain. */ int pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp, @@ -76,8 +76,8 @@ pfil_run_hooks(struct pfil_head *ph, str PFIL_RLOCK(ph, rmpt); KASSERT(ph-ph_nhooks = 0, (Pfil hook count dropped 0)); - for (pfh = pfil_hook_get(dir, ph); pfh != NULL; -pfh = TAILQ_NEXT(pfh, pfil_link)) { + for (pfh = pfil_chain_get(dir, ph); pfh != NULL; +pfh = TAILQ_NEXT(pfh, pfil_chain)) { if (pfh-pfil_func != NULL) { rv = (*pfh-pfil_func)(pfh-pfil_arg, m, ifp, dir, inp); @@ -91,7 +91,7 @@ pfil_run_hooks(struct pfil_head *ph, str } static struct packet_filter_hook * -pfil_hook_get(int dir, struct pfil_head *ph) +pfil_chain_get(int dir, struct pfil_head *ph) { if (dir == PFIL_IN) @@ -163,6 +163,7 @@ pfil_wowned(struct pfil_head *ph) return (PFIL_WOWNED(ph)); } + /* * pfil_head_register() registers a pfil_head with the packet filter hook * mechanism. @@ -202,9 +203,9 @@ pfil_head_unregister(struct pfil_head *p PFIL_LIST_LOCK(); LIST_REMOVE(ph, ph_list); PFIL_LIST_UNLOCK(); - TAILQ_FOREACH_SAFE(pfh, ph-ph_in, pfil_link, pfnext) + TAILQ_FOREACH_SAFE(pfh, ph-ph_in, pfil_chain, pfnext) free(pfh, M_IFADDR); - TAILQ_FOREACH_SAFE(pfh, ph-ph_out, pfil_link, pfnext) + TAILQ_FOREACH_SAFE(pfh, ph-ph_out, pfil_chain, pfnext) free(pfh, M_IFADDR); PFIL_LOCK_DESTROY(ph); return (0); @@ -261,7 +262,7 @@ pfil_add_hook(pfil_func_t func, void *ar if (flags PFIL_IN) { pfh1-pfil_func = func; pfh1-pfil_arg = arg; - err = pfil_list_add(ph-ph_in, pfh1, flags ~PFIL_OUT); + err = pfil_chain_add(ph-ph_in, pfh1, flags ~PFIL_OUT); if (err) goto locked_error; ph-ph_nhooks++; @@ -269,10 +270,10 @@ pfil_add_hook(pfil_func_t func, void *ar if (flags PFIL_OUT) { pfh2-pfil_func = func; pfh2-pfil_arg = arg; - err = pfil_list_add(ph-ph_out, pfh2, flags ~PFIL_IN); + err = pfil_chain_add(ph-ph_out, pfh2, flags ~PFIL_IN); if (err) { if (flags PFIL_IN) - pfil_list_remove(ph-ph_in, func, arg); + pfil_chain_remove(ph-ph_in, func, arg); goto locked_error; } ph-ph_nhooks++; @@ -291,7 +292,7 @@ error: /* * pfil_remove_hook removes a specific function from the packet filter hook - * list. + * chain. */ int pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph) @@ -300,12 +301,12 @@ pfil_remove_hook(pfil_func_t func, void PFIL_WLOCK(ph); if (flags PFIL_IN) { - err = pfil_list_remove(ph-ph_in, func, arg); + err = pfil_chain_remove(ph-ph_in, func, arg); if (err == 0) ph-ph_nhooks--; } if ((err == 0) (flags PFIL_OUT)) { - err = pfil_list_remove(ph-ph_out, func, arg); + err = pfil_chain_remove(ph-ph_out, func, arg); if (err == 0) ph-ph_nhooks--;
svn commit: r254774 - head/sys/net
Author: andre Date: Sat Aug 24 11:24:15 2013 New Revision: 254774 URL: http://svnweb.freebsd.org/changeset/base/254774 Log: ename PFIL_LIST_[UN]LOCK() to PFIL_HEADLIST_[UN]LOCK() to avoid confusion with the pfil_head chain locking macros. Modified: head/sys/net/pfil.c head/sys/net/pfil.h Modified: head/sys/net/pfil.c == --- head/sys/net/pfil.c Sat Aug 24 11:17:25 2013(r254773) +++ head/sys/net/pfil.c Sat Aug 24 11:24:15 2013(r254774) @@ -173,11 +173,11 @@ pfil_head_register(struct pfil_head *ph) { struct pfil_head *lph; - PFIL_LIST_LOCK(); + PFIL_HEADLIST_LOCK(); LIST_FOREACH(lph, V_pfil_head_list, ph_list) { if (ph-ph_type == lph-ph_type ph-ph_un.phu_val == lph-ph_un.phu_val) { - PFIL_LIST_UNLOCK(); + PFIL_HEADLIST_UNLOCK(); return (EEXIST); } } @@ -186,7 +186,7 @@ pfil_head_register(struct pfil_head *ph) TAILQ_INIT(ph-ph_in); TAILQ_INIT(ph-ph_out); LIST_INSERT_HEAD(V_pfil_head_list, ph, ph_list); - PFIL_LIST_UNLOCK(); + PFIL_HEADLIST_UNLOCK(); return (0); } @@ -200,9 +200,9 @@ pfil_head_unregister(struct pfil_head *p { struct packet_filter_hook *pfh, *pfnext; - PFIL_LIST_LOCK(); + PFIL_HEADLIST_LOCK(); LIST_REMOVE(ph, ph_list); - PFIL_LIST_UNLOCK(); + PFIL_HEADLIST_UNLOCK(); TAILQ_FOREACH_SAFE(pfh, ph-ph_in, pfil_chain, pfnext) free(pfh, M_IFADDR); TAILQ_FOREACH_SAFE(pfh, ph-ph_out, pfil_chain, pfnext) @@ -219,11 +219,11 @@ pfil_head_get(int type, u_long val) { struct pfil_head *ph; - PFIL_LIST_LOCK(); + PFIL_HEADLIST_LOCK(); LIST_FOREACH(ph, V_pfil_head_list, ph_list) if (ph-ph_type == type ph-ph_un.phu_val == val) break; - PFIL_LIST_UNLOCK(); + PFIL_HEADLIST_UNLOCK(); return (ph); } Modified: head/sys/net/pfil.h == --- head/sys/net/pfil.h Sat Aug 24 11:17:25 2013(r254773) +++ head/sys/net/pfil.h Sat Aug 24 11:24:15 2013(r254774) @@ -138,7 +138,9 @@ int pfil_wowned(struct pfil_head *ph); #definePFIL_RUNLOCK(p, t) rm_runlock((p)-ph_plock, (t)) #definePFIL_WUNLOCK(p) rm_wunlock((p)-ph_plock) #definePFIL_WOWNED(p) rm_wowned((p)-ph_plock) -#definePFIL_LIST_LOCK()mtx_lock(pfil_global_lock) -#definePFIL_LIST_UNLOCK() mtx_unlock(pfil_global_lock) + +/* Internal locking macros for global/vnet pfil_head_list. */ +#definePFIL_HEADLIST_LOCK()mtx_lock(pfil_global_lock) +#definePFIL_HEADLIST_UNLOCK() mtx_unlock(pfil_global_lock) #endif /* _NET_PFIL_H_ */ ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254775 - head/share/man/man9
Author: andre Date: Sat Aug 24 11:57:02 2013 New Revision: 254775 URL: http://svnweb.freebsd.org/changeset/base/254775 Log: Adjust for the pfil_func_t typedef added in r254769. Modified: head/share/man/man9/pfil.9 Modified: head/share/man/man9/pfil.9 == --- head/share/man/man9/pfil.9 Sat Aug 24 11:24:15 2013(r254774) +++ head/share/man/man9/pfil.9 Sat Aug 24 11:57:02 2013(r254775) @@ -49,6 +49,8 @@ .In sys/mbuf.h .In net/if.h .In net/pfil.h +.Bd -literal +typedef int (*pfil_func_t)(void *arg, struct mbuf **mp, struct ifnet *, int dir, struct inpcb); .Ft int .Fn pfil_head_register struct pfil_head *head .Ft int @@ -56,11 +58,9 @@ .Ft struct pfil_head * .Fn pfil_head_get int af u_long dlt .Ft void -.Fn pfil_add_hook int (*func)() void *arg int flags struct pfil_head * +.Fn pfil_add_hook pfil_func_t void *arg int flags struct pfil_head * .Ft void -.Fn pfil_remove_hook int (*func)() void *arg int flags struct pfil_head * -.Ft int -.Fn (*func) void *arg struct mbuf **mp struct ifnet * int dir struct inpcb * +.Fn pfil_remove_hook pfil_func_t void *arg int flags struct pfil_head * .Ft int .Fn pfil_run_hooks struct pfil_head *head struct mbuf **mp struct ifnet * int dir struct inpcb * .Ft void ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254777 - head/sys/net
Author: andre Date: Sat Aug 24 12:03:24 2013 New Revision: 254777 URL: http://svnweb.freebsd.org/changeset/base/254777 Log: Whitespace, style cleanups, and improved comments. Modified: head/sys/net/pfil.h Modified: head/sys/net/pfil.h == --- head/sys/net/pfil.h Sat Aug 24 11:59:51 2013(r254776) +++ head/sys/net/pfil.h Sat Aug 24 12:03:24 2013(r254777) @@ -52,9 +52,9 @@ typedef int (*pfil_func_t)(void *, struc * together and after each other in the specified order. */ struct packet_filter_hook { -TAILQ_ENTRY(packet_filter_hook) pfil_chain; - pfil_func_t pfil_func; - void*pfil_arg; + TAILQ_ENTRY(packet_filter_hook) pfil_chain; + pfil_func_t pfil_func; + void*pfil_arg; }; #define PFIL_IN0x0001 @@ -74,23 +74,23 @@ typedef TAILQ_HEAD(pfil_chain, packet_fi * For packet is then run through the hook chain for inspection. */ struct pfil_head { - pfil_chain_tph_in; - pfil_chain_tph_out; - int ph_type; - int ph_nhooks; + pfil_chain_t ph_in; + pfil_chain_t ph_out; + int ph_type; + int ph_nhooks; #if defined( __linux__ ) || defined( _WIN32 ) - rwlock_tph_mtx; + rwlock_t ph_mtx; #else struct rmlock *ph_plock; /* Pointer to the used lock */ - struct rmlock ph_lock;/* Private lock storage */ - int flags; + struct rmlockph_lock; /* Private lock storage */ + int flags; #endif union { - u_long phu_val; - void*phu_ptr; + u_long phu_val; + void*phu_ptr; } ph_un; -#defineph_af ph_un.phu_val -#defineph_ifnetph_un.phu_ptr +#defineph_afph_un.phu_val +#defineph_ifnet ph_un.phu_ptr LIST_ENTRY(pfil_head) ph_list; }; @@ -98,6 +98,7 @@ struct pfil_head { struct pfil_head *pfil_head_get(int, u_long); intpfil_add_hook(pfil_func_t, void *, int, struct pfil_head *); intpfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *); +#definePFIL_HOOKED(p) ((p)-ph_nhooks 0) /* Public functions to run the packet inspection by protocols. */ intpfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *, @@ -107,16 +108,16 @@ int pfil_run_hooks(struct pfil_head *, s intpfil_head_register(struct pfil_head *); intpfil_head_unregister(struct pfil_head *); -/* Internal pfil locking functions. */ +/* Public pfil locking functions for self managed locks by packet filters. */ struct rm_priotracker; /* Do not require including rmlock header */ -int pfil_try_rlock(struct pfil_head *, struct rm_priotracker *); -void pfil_rlock(struct pfil_head *, struct rm_priotracker *); -void pfil_runlock(struct pfil_head *, struct rm_priotracker *); -void pfil_wlock(struct pfil_head *); -void pfil_wunlock(struct pfil_head *); -int pfil_wowned(struct pfil_head *ph); +intpfil_try_rlock(struct pfil_head *, struct rm_priotracker *); +void pfil_rlock(struct pfil_head *, struct rm_priotracker *); +void pfil_runlock(struct pfil_head *, struct rm_priotracker *); +void pfil_wlock(struct pfil_head *); +void pfil_wunlock(struct pfil_head *); +intpfil_wowned(struct pfil_head *ph); -#definePFIL_HOOKED(p) ((p)-ph_nhooks 0) +/* Internal pfil locking functions. */ #definePFIL_LOCK_INIT_REAL(l, t) \ rm_init_flags(l, PFil t rmlock, RM_RECURSE) #definePFIL_LOCK_DESTROY_REAL(l) \ @@ -132,6 +133,7 @@ int pfil_wowned(struct pfil_head *ph); if ((p)-flags PFIL_FLAG_PRIVATE_LOCK)\ PFIL_LOCK_DESTROY_REAL((p)-ph_plock); \ } while (0) + #definePFIL_TRY_RLOCK(p, t)rm_try_rlock((p)-ph_plock, (t)) #definePFIL_RLOCK(p, t)rm_rlock((p)-ph_plock, (t)) #definePFIL_WLOCK(p) rm_wlock((p)-ph_plock) ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254779 - head/sys/kern
Author: andre Date: Sat Aug 24 12:24:58 2013 New Revision: 254779 URL: http://svnweb.freebsd.org/changeset/base/254779 Log: Avoid code duplication for mbuf initialization and use m_init() instead in mb_ctor_mbuf() and mb_ctor_pack(). Modified: head/sys/kern/kern_mbuf.c Modified: head/sys/kern/kern_mbuf.c == --- head/sys/kern/kern_mbuf.c Sat Aug 24 12:10:11 2013(r254778) +++ head/sys/kern/kern_mbuf.c Sat Aug 24 12:24:58 2013(r254779) @@ -410,18 +410,14 @@ mb_ctor_mbuf(void *mem, int size, void * { struct mbuf *m; struct mb_args *args; -#ifdef MAC int error; -#endif int flags; short type; #ifdef INVARIANTS trash_ctor(mem, size, arg, how); #endif - m = (struct mbuf *)mem; args = (struct mb_args *)arg; - flags = args-flags; type = args-type; /* @@ -431,32 +427,12 @@ mb_ctor_mbuf(void *mem, int size, void * if (type == MT_NOINIT) return (0); - m-m_next = NULL; - m-m_nextpkt = NULL; - m-m_len = 0; - m-m_flags = flags; - m-m_type = type; - if (flags M_PKTHDR) { - m-m_data = m-m_pktdat; - m-m_pkthdr.rcvif = NULL; - m-m_pkthdr.header = NULL; - m-m_pkthdr.len = 0; - m-m_pkthdr.csum_flags = 0; - m-m_pkthdr.csum_data = 0; - m-m_pkthdr.tso_segsz = 0; - m-m_pkthdr.ether_vtag = 0; - m-m_pkthdr.flowid = 0; - m-m_pkthdr.fibnum = 0; - SLIST_INIT(m-m_pkthdr.tags); -#ifdef MAC - /* If the label init fails, fail the alloc */ - error = mac_mbuf_init(m, how); - if (error) - return (error); -#endif - } else - m-m_data = m-m_dat; - return (0); + m = (struct mbuf *)mem; + flags = args-flags; + + error = m_init(m, NULL, size, how, type, flags); + + return (error); } /* @@ -656,34 +632,14 @@ mb_ctor_pack(void *mem, int size, void * #ifdef INVARIANTS trash_ctor(m-m_ext.ext_buf, MCLBYTES, arg, how); #endif - m-m_next = NULL; - m-m_nextpkt = NULL; - m-m_data = m-m_ext.ext_buf; - m-m_len = 0; - m-m_flags = (flags | M_EXT); - m-m_type = type; - - if (flags M_PKTHDR) { - m-m_pkthdr.rcvif = NULL; - m-m_pkthdr.len = 0; - m-m_pkthdr.header = NULL; - m-m_pkthdr.csum_flags = 0; - m-m_pkthdr.csum_data = 0; - m-m_pkthdr.tso_segsz = 0; - m-m_pkthdr.ether_vtag = 0; - m-m_pkthdr.flowid = 0; - m-m_pkthdr.fibnum = 0; - SLIST_INIT(m-m_pkthdr.tags); -#ifdef MAC - /* If the label init fails, fail the alloc */ - error = mac_mbuf_init(m, how); - if (error) - return (error); -#endif - } + + error = m_init(m, NULL, size, how, type, flags); + /* m_ext is already initialized. */ + m-m_data = m-m_ext.ext_buf; + m-m_flags = (flags | M_EXT); - return (0); + return (error); } int ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254780 - in head/sys: kern sys
Author: andre Date: Sat Aug 24 13:15:42 2013 New Revision: 254780 URL: http://svnweb.freebsd.org/changeset/base/254780 Log: dd a 24 bits wide ext_flags field to m_ext by reducing ext_type to 8 bits. ext_type is an enumerator and the number of types we have is a mere dozen. A couple of ext_types are renumbered to fit within 8 bits. EXT_VENDOR[1-4] and EXT_EXP[1-4] types for vendor-internal and experimental local mapping. The ext_flags field is currently unused but has a couple of flags already defined for future use. Again vendor and experimental flags are provided for local mapping. EXT_FLAG_BITS is provided for the printf(9) %b identifier. Initialize and copy ext_flags in the relevant mbuf functions. Improve alignment and packing of struct m_ext on 32 and 64 archs by carefully sorting the fields. Modified: head/sys/kern/kern_mbuf.c head/sys/kern/uipc_mbuf.c head/sys/sys/mbuf.h Modified: head/sys/kern/kern_mbuf.c == --- head/sys/kern/kern_mbuf.c Sat Aug 24 12:24:58 2013(r254779) +++ head/sys/kern/kern_mbuf.c Sat Aug 24 13:15:42 2013(r254780) @@ -547,6 +547,7 @@ mb_ctor_clust(void *mem, int size, void m-m_ext.ext_arg2 = NULL; m-m_ext.ext_size = size; m-m_ext.ext_type = type; + m-m_ext.ext_flags = 0; m-m_ext.ref_cnt = refcnt; } Modified: head/sys/kern/uipc_mbuf.c == --- head/sys/kern/uipc_mbuf.c Sat Aug 24 12:24:58 2013(r254779) +++ head/sys/kern/uipc_mbuf.c Sat Aug 24 13:15:42 2013(r254780) @@ -267,6 +267,7 @@ m_extadd(struct mbuf *mb, caddr_t buf, u mb-m_ext.ext_arg1 = arg1; mb-m_ext.ext_arg2 = arg2; mb-m_ext.ext_type = type; + mb-m_ext.ext_flags = 0; return (0); } @@ -342,6 +343,7 @@ mb_free_ext(struct mbuf *m) m-m_ext.ref_cnt = NULL; m-m_ext.ext_size = 0; m-m_ext.ext_type = 0; + m-m_ext.ext_flags = 0; m-m_flags = ~M_EXT; uma_zfree(zone_mbuf, m); } @@ -368,6 +370,7 @@ mb_dupcl(struct mbuf *n, struct mbuf *m) n-m_ext.ext_size = m-m_ext.ext_size; n-m_ext.ref_cnt = m-m_ext.ref_cnt; n-m_ext.ext_type = m-m_ext.ext_type; + n-m_ext.ext_flags = m-m_ext.ext_flags; n-m_flags |= M_EXT; n-m_flags |= m-m_flags M_RDONLY; } Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Sat Aug 24 12:24:58 2013(r254779) +++ head/sys/sys/mbuf.h Sat Aug 24 13:15:42 2013(r254780) @@ -138,16 +138,19 @@ struct pkthdr { /* * Description of external storage mapped into mbuf; valid only if M_EXT is * set. + * Size ILP32: 28 + * LP64: 48 */ struct m_ext { + volatile u_int *ref_cnt; /* pointer to ref count info */ caddr_t ext_buf; /* start of buffer */ + uint32_t ext_size; /* size of buffer, for ext_free */ + uint32_t ext_type:8,/* type of external storage */ +ext_flags:24; /* external storage mbuf flags */ void(*ext_free) /* free routine if not the usual */ (void *, void *); void*ext_arg1; /* optional argument pointer */ void*ext_arg2; /* optional argument pointer */ - u_intext_size; /* size of buffer, for ext_free */ - volatile u_int *ref_cnt; /* pointer to ref count info */ - int ext_type; /* type of external storage */ }; /* @@ -269,7 +272,7 @@ struct mbuf { M_PROTOFLAGS|M_HASHTYPEBITS) /* - * External buffer types: identify ext_buf type. + * External mbuf storage buffer types. */ #defineEXT_CLUSTER 1 /* mbuf cluster */ #defineEXT_SFBUF 2 /* sendfile(2)'s sf_bufs */ @@ -278,10 +281,48 @@ struct mbuf { #defineEXT_JUMBO16 5 /* jumbo cluster 16184 bytes */ #defineEXT_PACKET 6 /* mbuf+cluster from packet zone */ #defineEXT_MBUF7 /* external mbuf reference (M_IOVEC) */ -#defineEXT_NET_DRV 100 /* custom ext_buf provided by net driver(s) */ -#defineEXT_MOD_TYPE200 /* custom module's ext_buf type */ -#defineEXT_DISPOSABLE 300 /* can throw this buffer away w/page flipping */ -#defineEXT_EXTREF 400 /* has externally maintained ref_cnt ptr */ + +#defineEXT_VENDOR1 224 /* for vendor-internal use */ +#defineEXT_VENDOR2 225 /* for vendor-internal use */ +#defineEXT_VENDOR3 226 /* for vendor-internal use */ +#defineEXT_VENDOR4 227 /* for vendor-internal use */ + +#define
Re: svn commit: r254780 - in head/sys: kern sys
On 24.08.2013 15:15, Andre Oppermann wrote: Author: andre Date: Sat Aug 24 13:15:42 2013 New Revision: 254780 URL: http://svnweb.freebsd.org/changeset/base/254780 Log: dd a 24 bits wide ext_flags field to m_ext by reducing ext_type to 8 bits. ext_type is an enumerator and the number of types we have is a mere dozen. A couple of ext_types are renumbered to fit within 8 bits. EXT_VENDOR[1-4] and EXT_EXP[1-4] types for vendor-internal and experimental local mapping. The ext_flags field is currently unused but has a couple of flags already defined for future use. Again vendor and experimental flags are provided for local mapping. EXT_FLAG_BITS is provided for the printf(9) %b identifier. Initialize and copy ext_flags in the relevant mbuf functions. Improve alignment and packing of struct m_ext on 32 and 64 archs by carefully sorting the fields. Sponsored by: The FreeBSD Foundation -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254799 - in head/sys: dev/cas dev/hatm dev/iscsi_initiator dev/lge dev/mwl kern sys
Author: andre Date: Sat Aug 24 16:57:44 2013 New Revision: 254799 URL: http://svnweb.freebsd.org/changeset/base/254799 Log: Add an mbuf pointer parameter to (*ext_free) to give the external free function access to the mbuf the external memory was attached to. Mechanically adjust all users to include the mbuf parameter. This fixes a long standing annoyance for external free functions. Before one had to sacrifice one of the argument pointers for this. Sponsored by: The FreeBSD Foundation Modified: head/sys/dev/cas/if_cas.c head/sys/dev/hatm/if_hatm_intr.c head/sys/dev/iscsi_initiator/isc_soc.c head/sys/dev/lge/if_lge.c head/sys/dev/mwl/if_mwl.c head/sys/kern/subr_mbpool.c head/sys/kern/uipc_mbuf.c head/sys/kern/uipc_syscalls.c head/sys/sys/mbpool.h head/sys/sys/mbuf.h head/sys/sys/sf_buf.h Modified: head/sys/dev/cas/if_cas.c == --- head/sys/dev/cas/if_cas.c Sat Aug 24 16:55:53 2013(r254798) +++ head/sys/dev/cas/if_cas.c Sat Aug 24 16:57:44 2013(r254799) @@ -132,7 +132,7 @@ static void cas_detach(struct cas_softc static int cas_disable_rx(struct cas_softc *sc); static int cas_disable_tx(struct cas_softc *sc); static voidcas_eint(struct cas_softc *sc, u_int status); -static voidcas_free(void *arg1, void* arg2); +static voidcas_free(struct mbuf *m, void *arg1, void* arg2); static voidcas_init(void *xsc); static voidcas_init_locked(struct cas_softc *sc); static voidcas_init_regs(struct cas_softc *sc); @@ -1888,7 +1888,7 @@ cas_rint(struct cas_softc *sc) } static void -cas_free(void *arg1, void *arg2) +cas_free(struct mbuf *m, void *arg1, void *arg2) { struct cas_rxdsoft *rxds; struct cas_softc *sc; Modified: head/sys/dev/hatm/if_hatm_intr.c == --- head/sys/dev/hatm/if_hatm_intr.cSat Aug 24 16:55:53 2013 (r254798) +++ head/sys/dev/hatm/if_hatm_intr.cSat Aug 24 16:57:44 2013 (r254799) @@ -261,7 +261,7 @@ hatm_mbuf_page_alloc(struct hatm_softc * * Free an mbuf and put it onto the free list. */ static void -hatm_mbuf0_free(void *buf, void *args) +hatm_mbuf0_free(struct mbuf *m, void *buf, void *args) { struct hatm_softc *sc = args; struct mbuf0_chunk *c = buf; @@ -272,7 +272,7 @@ hatm_mbuf0_free(void *buf, void *args) hatm_ext_free(sc-mbuf_list[0], (struct mbufx_free *)c); } static void -hatm_mbuf1_free(void *buf, void *args) +hatm_mbuf1_free(struct mbuf *m, void *buf, void *args) { struct hatm_softc *sc = args; struct mbuf1_chunk *c = buf; @@ -461,7 +461,7 @@ hatm_rx_buffer(struct hatm_softc *sc, u_ hatm_mbuf0_free, c0, sc, M_PKTHDR, EXT_EXTREF); m-m_data += MBUF0_OFFSET; } else - hatm_mbuf0_free(c0, sc); + hatm_mbuf0_free(NULL, c0, sc); } else { struct mbuf1_chunk *c1; @@ -485,7 +485,7 @@ hatm_rx_buffer(struct hatm_softc *sc, u_ hatm_mbuf1_free, c1, sc, M_PKTHDR, EXT_EXTREF); m-m_data += MBUF1_OFFSET; } else - hatm_mbuf1_free(c1, sc); + hatm_mbuf1_free(NULL, c1, sc); } return (m); Modified: head/sys/dev/iscsi_initiator/isc_soc.c == --- head/sys/dev/iscsi_initiator/isc_soc.c Sat Aug 24 16:55:53 2013 (r254798) +++ head/sys/dev/iscsi_initiator/isc_soc.c Sat Aug 24 16:57:44 2013 (r254799) @@ -69,7 +69,7 @@ static int ou_refcnt = 0; | function for freeing external storage for mbuf */ static void -ext_free(void *a, void *b) +ext_free(struct mbuf *m, void *a, void *b) { pduq_t *pq = b; Modified: head/sys/dev/lge/if_lge.c == --- head/sys/dev/lge/if_lge.c Sat Aug 24 16:55:53 2013(r254798) +++ head/sys/dev/lge/if_lge.c Sat Aug 24 16:57:44 2013(r254799) @@ -122,7 +122,7 @@ static int lge_detach(device_t); static int lge_alloc_jumbo_mem(struct lge_softc *); static void lge_free_jumbo_mem(struct lge_softc *); static void *lge_jalloc(struct lge_softc *); -static void lge_jfree(void *, void *); +static void lge_jfree(struct mbuf *, void *, void *); static int lge_newbuf(struct lge_softc *, struct lge_rx_desc *, struct mbuf *); static int lge_encap(struct lge_softc *, struct mbuf *, u_int32_t *); @@ -847,9 +847,7 @@ lge_jalloc(sc) * Release a jumbo buffer. */ static void -lge_jfree(buf, args) - void*buf; - void*args; +lge_jfree(struct mbuf *m, void *buf, void *args) { struct lge_softc*sc; int i;
svn commit: r254800 - in head/sys/dev: cxgb sfxge
Author: andre Date: Sat Aug 24 17:14:14 2013 New Revision: 254800 URL: http://svnweb.freebsd.org/changeset/base/254800 Log: Remove unnecessary setup of the m-pkthdr.header pointer. Sponsored by: The FreeBSD Foundation Modified: head/sys/dev/cxgb/cxgb_sge.c head/sys/dev/sfxge/sfxge_rx.c Modified: head/sys/dev/cxgb/cxgb_sge.c == --- head/sys/dev/cxgb/cxgb_sge.cSat Aug 24 16:57:44 2013 (r254799) +++ head/sys/dev/cxgb/cxgb_sge.cSat Aug 24 17:14:14 2013 (r254800) @@ -2634,7 +2634,6 @@ t3_rx_eth(struct adapter *adap, struct m } m-m_pkthdr.rcvif = ifp; - m-m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad; /* * adjust after conversion to mbuf chain */ Modified: head/sys/dev/sfxge/sfxge_rx.c == --- head/sys/dev/sfxge/sfxge_rx.c Sat Aug 24 16:57:44 2013 (r254799) +++ head/sys/dev/sfxge/sfxge_rx.c Sat Aug 24 17:14:14 2013 (r254800) @@ -282,7 +282,6 @@ static void __sfxge_rx_deliver(struct sf struct ifnet *ifp = sc-ifnet; m-m_pkthdr.rcvif = ifp; - m-m_pkthdr.header = m-m_data; m-m_pkthdr.csum_data = 0x; ifp-if_input(ifp, m); } ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254803 - in head/sys/dev: jme nfe
Author: andre Date: Sat Aug 24 19:38:36 2013 New Revision: 254803 URL: http://svnweb.freebsd.org/changeset/base/254803 Log: Change local variable tso_segsz to tsosegsz to avoid mbuf.h macro conflicts. Sponsored by: The FreeBSD Foundation Modified: head/sys/dev/jme/if_jme.c head/sys/dev/nfe/if_nfe.c Modified: head/sys/dev/jme/if_jme.c == --- head/sys/dev/jme/if_jme.c Sat Aug 24 19:02:36 2013(r254802) +++ head/sys/dev/jme/if_jme.c Sat Aug 24 19:38:36 2013(r254803) @@ -1690,7 +1690,7 @@ jme_encap(struct jme_softc *sc, struct m struct mbuf *m; bus_dma_segment_t txsegs[JME_MAXTXSEGS]; int error, i, nsegs, prod; - uint32_t cflags, tso_segsz; + uint32_t cflags, tsosegsz; JME_LOCK_ASSERT(sc); @@ -1808,10 +1808,10 @@ jme_encap(struct jme_softc *sc, struct m m = *m_head; cflags = 0; - tso_segsz = 0; + tsosegsz = 0; /* Configure checksum offload and TSO. */ if ((m-m_pkthdr.csum_flags CSUM_TSO) != 0) { - tso_segsz = (uint32_t)m-m_pkthdr.tso_segsz + tsosegsz = (uint32_t)m-m_pkthdr.tso_segsz JME_TD_MSS_SHIFT; cflags |= JME_TD_TSO; } else { @@ -1830,7 +1830,7 @@ jme_encap(struct jme_softc *sc, struct m desc = sc-jme_rdata.jme_tx_ring[prod]; desc-flags = htole32(cflags); - desc-buflen = htole32(tso_segsz); + desc-buflen = htole32(tsosegsz); desc-addr_hi = htole32(m-m_pkthdr.len); desc-addr_lo = 0; sc-jme_cdata.jme_tx_cnt++; Modified: head/sys/dev/nfe/if_nfe.c == --- head/sys/dev/nfe/if_nfe.c Sat Aug 24 19:02:36 2013(r254802) +++ head/sys/dev/nfe/if_nfe.c Sat Aug 24 19:38:36 2013(r254803) @@ -2390,7 +2390,7 @@ nfe_encap(struct nfe_softc *sc, struct m bus_dmamap_t map; bus_dma_segment_t segs[NFE_MAX_SCATTER]; int error, i, nsegs, prod, si; - uint32_t tso_segsz; + uint32_t tsosegsz; uint16_t cflags, flags; struct mbuf *m; @@ -2429,9 +2429,9 @@ nfe_encap(struct nfe_softc *sc, struct m m = *m_head; cflags = flags = 0; - tso_segsz = 0; + tsosegsz = 0; if ((m-m_pkthdr.csum_flags CSUM_TSO) != 0) { - tso_segsz = (uint32_t)m-m_pkthdr.tso_segsz + tsosegsz = (uint32_t)m-m_pkthdr.tso_segsz NFE_TX_TSO_SHIFT; cflags = ~(NFE_TX_IP_CSUM | NFE_TX_TCP_UDP_CSUM); cflags |= NFE_TX_TSO; @@ -2482,14 +2482,14 @@ nfe_encap(struct nfe_softc *sc, struct m if ((m-m_flags M_VLANTAG) != 0) desc64-vtag = htole32(NFE_TX_VTAG | m-m_pkthdr.ether_vtag); - if (tso_segsz != 0) { + if (tsosegsz != 0) { /* * XXX * The following indicates the descriptor element * is a 32bit quantity. */ - desc64-length |= htole16((uint16_t)tso_segsz); - desc64-flags |= htole16(tso_segsz 16); + desc64-length |= htole16((uint16_t)tsosegsz); + desc64-flags |= htole16(tsosegsz 16); } /* * finally, set the valid/checksum/TSO bit in the first @@ -2502,14 +2502,14 @@ nfe_encap(struct nfe_softc *sc, struct m else desc32-flags |= htole16(NFE_TX_LASTFRAG_V1); desc32 = sc-txq.desc32[si]; - if (tso_segsz != 0) { + if (tsosegsz != 0) { /* * XXX * The following indicates the descriptor element * is a 32bit quantity. */ - desc32-length |= htole16((uint16_t)tso_segsz); - desc32-flags |= htole16(tso_segsz 16); + desc32-length |= htole16((uint16_t)tsosegsz); + desc32-flags |= htole16(tsosegsz 16); } /* * finally, set the valid/checksum/TSO bit in the first ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254804 - in head/sys: dev/cxgb dev/e1000 dev/ixgbe dev/patm dev/qlxgb kern net netinet netinet6 sys
Author: andre Date: Sat Aug 24 19:51:18 2013 New Revision: 254804 URL: http://svnweb.freebsd.org/changeset/base/254804 Log: Restructure the mbuf pkthdr to make it fit for upcoming capabilities and features. The changes in particular are: o Remove rarely used header pointer and replace it with a 64bit protocol/ layer specific union PH_loc for local use. Protocols can flexibly overlay their own 8 to 64 bit fields to store information while the packet is worked on. o Mechanically convert IP reassembly, IGMP/MLD and ATM to use pkthdr.PH_loc instead of pkthdr.header. o Extend csum_flags to 64bits to allow for additional future offload information to be carried (e.g. iSCSI, IPsec offload, and others). o Move the RSS hash type enumerator from abusing m_flags to its own 8bit rsstype field. Adjust accessor macros. o Add cosqos field to store Class of Service / Quality of Service information with the packet. It is not yet supported in any drivers but allows us to get on par with Cisco/Juniper in routing applications (plus MPLS QoS) with a modernized ALTQ. o Add four 8 bit fields l[2-5]hlen to store the relative header offsets from the start of the packet. This is important for various offload capabilities and to relieve the drivers from having to parse the packet and protocol headers to find out location of checksums and other information. Header parsing in drivers is a lot of copy-paste and unhandled corner cases which we want to avoid. o Add another flexible 64bit union to map various additional persistent packet information, like ether_vtag, tso_segsz and csum fields. Depending on the csum_flags settings some fields may have different usage making it very flexible and adaptable to future capabilities. o Restructure the CSUM flags to better signify their outbound (down the stack) and inbound (up the stack) use. The CSUM flags used to be a bit chaotic and rather poorly documented leading to incorrect use in many places. Bring clarity into their use through better naming. Compatibility mappings are provided to preserve the API. The drivers can be corrected one by one and MFC'd without issue. o The size of pkthdr stays the same at 48/56bytes (32/64bit architectures). Sponsored by: The FreeBSD Foundation Modified: head/sys/dev/cxgb/cxgb_sge.c head/sys/dev/e1000/if_igb.c head/sys/dev/ixgbe/ixgbe.c head/sys/dev/ixgbe/ixv.c head/sys/dev/patm/if_patm.c head/sys/dev/patm/if_patm_tx.c head/sys/dev/qlxgb/qla_hw.c head/sys/kern/kern_mbuf.c head/sys/kern/uipc_mbuf.c head/sys/net/if.h head/sys/netinet/igmp.c head/sys/netinet/ip_input.c head/sys/netinet6/ip6_output.c head/sys/netinet6/mld6.c head/sys/sys/mbuf.h Modified: head/sys/dev/cxgb/cxgb_sge.c == --- head/sys/dev/cxgb/cxgb_sge.cSat Aug 24 19:38:36 2013 (r254803) +++ head/sys/dev/cxgb/cxgb_sge.cSat Aug 24 19:51:18 2013 (r254804) @@ -1470,9 +1470,9 @@ t3_encap(struct sge_qset *qs, struct mbu hdr-len = htonl(mlen | 0x8000); if (__predict_false(mlen TCPPKTHDRSIZE)) { - printf(mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x, + printf(mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x, m0, mlen, m0-m_pkthdr.tso_segsz, - m0-m_pkthdr.csum_flags, m0-m_flags); + (int)m0-m_pkthdr.csum_flags, CSUM_BITS, m0-m_flags); panic(tx tso packet too small); } Modified: head/sys/dev/e1000/if_igb.c == --- head/sys/dev/e1000/if_igb.c Sat Aug 24 19:38:36 2013(r254803) +++ head/sys/dev/e1000/if_igb.c Sat Aug 24 19:51:18 2013(r254804) @@ -4981,7 +4981,7 @@ igb_rx_checksum(u32 staterr, struct mbuf } if (status (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) { - u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); #if __FreeBSD_version = 80 if (sctp) /* reassign */ type = CSUM_SCTP_VALID; Modified: head/sys/dev/ixgbe/ixgbe.c == --- head/sys/dev/ixgbe/ixgbe.c Sat Aug 24 19:38:36 2013(r254803) +++ head/sys/dev/ixgbe/ixgbe.c Sat Aug 24 19:51:18 2013(r254804) @@ -4625,7 +4625,7 @@ ixgbe_rx_checksum(u32 staterr, struct mb mp-m_pkthdr.csum_flags = 0; } if (status IXGBE_RXD_STAT_L4CS) { - u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); + u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR); #if __FreeBSD_version = 80
svn commit: r254805 - head/sys/sys
Author: andre Date: Sat Aug 24 19:58:36 2013 New Revision: 254805 URL: http://svnweb.freebsd.org/changeset/base/254805 Log: Add mtodo(m, o) macro taking an additional offset into the mbuf data section. Sponsored by: The FreeBSD Foundation Modified: head/sys/sys/mbuf.h Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Sat Aug 24 19:51:18 2013(r254804) +++ head/sys/sys/mbuf.h Sat Aug 24 19:58:36 2013(r254805) @@ -67,8 +67,10 @@ * type: * * mtod(m, t) -- Convert mbuf pointer to data pointer of correct type. + * mtodo(m, o) -- Same as above but with offset 'o' into data. */ #definemtod(m, t) ((t)((m)-m_data)) +#definemtodo(m, o) ((void *)(((m)-m_data) + (o))) /* * Argument structure passed to UMA routines during mbuf and packet ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254807 - head/sys/sys
Author: andre Date: Sat Aug 24 20:26:41 2013 New Revision: 254807 URL: http://svnweb.freebsd.org/changeset/base/254807 Log: Compact m_hdr by packing the type and flags fields into one uint32_t. The mbuf type is an enumerator with only a handful of types in use and thus reduced from int to 8bits allowing for 255 types to be specified. Only 5 types have been in use for a long time. The flags field gets the remaining 24 bits with 12 bits for global persistent flags and 12 bits for protocol/layer specific overlays. Some of the global flags/functionality can be moved to the csum_flags or ext_flags bits in the future. MT_VENDOR[1-4] and MT_EXP[1-4] types for vendor-internal and experimental local mapping are added. The size of m_hdr shrinks from 24/40 to 20/32bytes (32/64bit architectures). Sponsored by: The FreeBSD Foundation Modified: head/sys/sys/mbuf.h Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Sat Aug 24 20:06:00 2013(r254806) +++ head/sys/sys/mbuf.h Sat Aug 24 20:26:41 2013(r254807) @@ -82,23 +82,18 @@ struct mb_args { }; #endif /* _KERNEL */ -#if defined(__LP64__) -#define M_HDR_PAD6 -#else -#define M_HDR_PAD2 -#endif - /* * Header present at the beginning of every mbuf. + * Size ILP32: 20 + * LP64: 32 */ struct m_hdr { struct mbuf *mh_next; /* next buffer in chain */ struct mbuf *mh_nextpkt;/* next chain in queue/record */ caddr_t mh_data; /* location of data */ - int mh_len;/* amount of data in this mbuf */ - int mh_flags; /* flags; see below */ - shortmh_type; /* type of data in this mbuf */ - uint8_t pad[M_HDR_PAD];/* word align */ + int32_t mh_len;/* amount of data in this mbuf */ + uint32_t mh_type:8, /* type of data in this mbuf */ +mh_flags:24; /* flags; see below */ }; /* @@ -206,7 +201,10 @@ struct mbuf { #definem_dat M_dat.M_databuf /* - * mbuf flags. + * mbuf flags of global significance and layer crossing. + * Those of only protocol/layer specific significance are to be mapped + * to M_PROTO[1-12] and cleared at layer handoff boundaries. + * NB: Limited to the lower 24 bits. */ #defineM_EXT 0x0001 /* has associated external storage */ #defineM_PKTHDR0x0002 /* start of record */ @@ -430,12 +428,24 @@ struct mbuf { #defineCSUM_FRAGMENT 0x0 /* Unused */ /* - * mbuf types. + * mbuf types describing the content of the mbuf (including external storage). */ #defineMT_NOTMBUF 0 /* USED INTERNALLY ONLY! Object is not mbuf */ #defineMT_DATA 1 /* dynamic (data) allocation */ #defineMT_HEADER MT_DATA /* packet header, use M_PKTHDR instead */ + +#defineMT_VENDOR1 4 /* for vendor-internal use */ +#defineMT_VENDOR2 5 /* for vendor-internal use */ +#defineMT_VENDOR3 6 /* for vendor-internal use */ +#defineMT_VENDOR4 7 /* for vendor-internal use */ + #defineMT_SONAME 8 /* socket name */ + +#defineMT_EXP1 9 /* for experimental use */ +#defineMT_EXP2 10 /* for experimental use */ +#defineMT_EXP3 11 /* for experimental use */ +#defineMT_EXP4 12 /* for experimental use */ + #defineMT_CONTROL 14 /* extra-data protocol message */ #defineMT_OOBDATA 15 /* expedited data */ #defineMT_NTYPES 16 /* number of mbuf types for mbtypes[] */ ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254812 - in head/sys: kern sys
Author: andre Date: Sat Aug 24 21:09:57 2013 New Revision: 254812 URL: http://svnweb.freebsd.org/changeset/base/254812 Log: Remove unused m_free_fast(). The difference to m_free() is only 2 predictable branches nowadays. However as a pre-condition the caller had to ensure that the mbuf pkthdr did not have any mtags attached to it, costing some potential branches again. Sponsored by: The FreeBSD Foundation Modified: head/sys/kern/kern_mbuf.c head/sys/sys/mbuf.h Modified: head/sys/kern/kern_mbuf.c == --- head/sys/kern/kern_mbuf.c Sat Aug 24 21:08:55 2013(r254811) +++ head/sys/kern/kern_mbuf.c Sat Aug 24 21:09:57 2013(r254812) @@ -447,7 +447,7 @@ mb_dtor_mbuf(void *mem, int size, void * m = (struct mbuf *)mem; flags = (unsigned long)arg; - if ((flags MB_NOTAGS) == 0 (m-m_flags M_PKTHDR) != 0) + if ((m-m_flags M_PKTHDR) !SLIST_EMPTY(m-m_pkthdr.tags)) m_tag_delete_chain(m, NULL); KASSERT((m-m_flags M_EXT) == 0, (%s: M_EXT set, __func__)); KASSERT((m-m_flags M_NOFREE) == 0, (%s: M_NOFREE set, __func__)); Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Sat Aug 24 21:08:55 2013(r254811) +++ head/sys/sys/mbuf.h Sat Aug 24 21:09:57 2013(r254812) @@ -453,8 +453,6 @@ struct mbuf { #defineMT_NOINIT 255 /* Not a type but a flag to allocate a non-initialized mbuf */ -#define MB_NOTAGS 0x1UL /* no tags attached to mbuf */ - /* * Compatibility with historic mbuf allocator. */ @@ -636,17 +634,6 @@ m_getcl(int how, short type, int flags) return (uma_zalloc_arg(zone_pack, args, how)); } -static __inline void -m_free_fast(struct mbuf *m) -{ -#ifdef INVARIANTS - if (m-m_flags M_PKTHDR) - KASSERT(SLIST_EMPTY(m-m_pkthdr.tags), (doing fast free of mbuf with tags)); -#endif - - uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS); -} - static __inline struct mbuf * m_free(struct mbuf *m) { ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254814 - head/sys/kern
Author: andre Date: Sat Aug 24 21:25:53 2013 New Revision: 254814 URL: http://svnweb.freebsd.org/changeset/base/254814 Log: After r254779 error must always be present in mb_ctor_pack(), not only when MAC is defined. Reported by: gjb / tinderbox Sponsored by: The FreeBSD Foundation Modified: head/sys/kern/kern_mbuf.c Modified: head/sys/kern/kern_mbuf.c == --- head/sys/kern/kern_mbuf.c Sat Aug 24 21:13:38 2013(r254813) +++ head/sys/kern/kern_mbuf.c Sat Aug 24 21:25:53 2013(r254814) @@ -619,10 +619,7 @@ mb_ctor_pack(void *mem, int size, void * { struct mbuf *m; struct mb_args *args; -#ifdef MAC - int error; -#endif - int flags; + int error, flags; short type; m = (struct mbuf *)mem; ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254779 - head/sys/kern
On 24.08.2013 23:13, Glen Barber wrote: On Sat, Aug 24, 2013 at 12:24:59PM +, Andre Oppermann wrote: Author: andre Date: Sat Aug 24 12:24:58 2013 New Revision: 254779 URL: http://svnweb.freebsd.org/changeset/base/254779 Log: Avoid code duplication for mbuf initialization and use m_init() instead in mb_ctor_mbuf() and mb_ctor_pack(). Modified: head/sys/kern/kern_mbuf.c [...] -#ifdef MAC - /* If the label init fails, fail the alloc */ - error = mac_mbuf_init(m, how); - if (error) - return (error); -#endif - } else - m-m_data = m-m_dat; - return (0); + m = (struct mbuf *)mem; + flags = args-flags; + + error = m_init(m, NULL, size, how, type, flags); + + return (error); } This breaks head/. cc -c -O -pipe -std=c99 -g -Wall -Wredundant-decls -Wnested-externs -Wstrict-prototypes -Wmissing-prototypes -Wpointer-arith -Winline -Wcast-qual -Wundef -Wno-pointer-sign -fformat-extensions -Wmissing-include-dirs -fdiagnostics-show-option -Wno-error-tautological-compare -Wno-error-empty-body -Wno-error-parentheses-equality -nostdinc -I. -I/src/sys -I/src/sys/contrib/altq -I/src/sys/contrib/libfdt -D_KERNEL -DHAVE_KERNEL_OPTION_HEADERS -include opt_global.h -funwind-tables -mllvm -arm-enable-ehabi -ffreestanding -Werror /src/sys/kern/kern_mbuf.c /src/sys/kern/kern_mbuf.c:637:2: error: use of undeclared identifier 'error' error = m_init(m, NULL, size, how, type, flags); ^ /src/sys/kern/kern_mbuf.c:643:10: error: use of undeclared identifier 'error' return (error); ^ http://tinderbox.freebsd.org/tinderbox-head-build-HEAD-armv6-arm.full Sorry, and thanks for the report. Fixed in r254814. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254815 - head/share/man/man9
Author: andre Date: Sat Aug 24 21:30:35 2013 New Revision: 254815 URL: http://svnweb.freebsd.org/changeset/base/254815 Log: Fix BUGS section after botched modify in r254772. Reported by: bjk Modified: head/share/man/man9/pfil.9 Modified: head/share/man/man9/pfil.9 == --- head/share/man/man9/pfil.9 Sat Aug 24 21:25:53 2013(r254814) +++ head/share/man/man9/pfil.9 Sat Aug 24 21:30:35 2013(r254815) @@ -241,8 +241,7 @@ Fine-grained locking was added in lock export was added in .Fx 10.0 . .Sh BUGS -The -.Fn When a +.Pp When a .Vt pfil_head is being modified, no traffic is diverted (to avoid deadlock). ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254772 - head/share/man/man9
On 24.08.2013 23:25, Benjamin Kaduk wrote: On Sat, Aug 24, 2013 at 6:38 AM, Andre Oppermann an...@freebsd.org mailto:an...@freebsd.org wrote: Author: andre Date: Sat Aug 24 10:38:02 2013 New Revision: 254772 URL: http://svnweb.freebsd.org/changeset/base/254772 Log: pfil_hook_get() has been internalized in r254771 and is no longer part of the API. It wasn't safe for external use in any case. Modified: head/share/man/man9/pfil.9 == --- head/share/man/man9/pfil.9 Sat Aug 24 10:36:33 2013(r254771) +++ head/share/man/man9/pfil.9 Sat Aug 24 10:38:02 2013(r254772) @@ -245,11 +242,7 @@ lock export was added in .Fx 10.0 . .Sh BUGS The -.Fn pfil_hook_get -function -is only safe for internal use. -.Pp -When a +.Fn When a This hunk looks pretty bogus. Thanks for the report. I'm a total mdoc noob. (Hopefully) fixed in r254815. -- Andre -Ben .Vt pfil_head is being modified, no traffic is diverted (to avoid deadlock). ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254520 - in head/sys: kern sys
On 19.08.2013 23:45, Navdeep Parhar wrote: On 08/19/13 13:58, Andre Oppermann wrote: On 19.08.2013 19:33, Navdeep Parhar wrote: On 08/19/13 04:16, Andre Oppermann wrote: Author: andre Date: Mon Aug 19 11:16:53 2013 New Revision: 254520 URL: http://svnweb.freebsd.org/changeset/base/254520 Log: Remove the unused M_NOFREE mbuf flag. It didn't have any in-tree users for a very long time, if ever. Should such a functionality ever be needed again the appropriate and much better way to do it is through a custom EXT_SOMETHING external mbuf type together with a dedicated *ext_free function. Discussed with:trociny, glebius Modified: head/sys/kern/kern_mbuf.c head/sys/kern/uipc_mbuf.c head/sys/sys/mbuf.h Hello Andre, Is this just garbage collection or is there some other reason for this? This is garbage collection and removal of not quite right, rotten, functionality. I recently tried some experiments to reduce the number of mbuf and cluster allocations in a 40G NIC driver. M_NOFREE and EXT_EXTREF proved very useful and the code changes to the kernel were minimal. See user/np/cxl_tuning. The experiment was quite successful and I was planning to bring in most of those changes to HEAD. I was hoping to get some runtime mileage on the approach in general before tweaking the ctors/dtors for jumpbo, jumbo9, jumbo16 to allow for an mbuf+refcnt within the cluster. But now M_NOFREE has vanished without a warning... I'm looking through your experimental code and that is some really good numbers you're achieving there! However a couple things don't feel quite right, hackish even, and not fit for HEAD. This is a bit the same situation we had with some of the first 1GigE cards quite a number of years back (mostly ti(4)). There we ended up with a couple of just good enough hacks to make it fast. Most of the remains I've collected today. If M_NOFREE and EXT_EXTREF are properly supported in the tree (and I'm arguing that they were, before r254520) then the changes are perfectly legitimate. The only hackish part was that I was getting the cluster from the jumbop zone while bypassing its normal refcnt mechanism. This I did so as to use the same zone as m_uiotombuf to keep it hot for all consumers (driver + network stack). If you insist I'll revert the commit removing M_NOFREE. EXT_EXTREF isn't touched yet, but should get better support. The hackish part for me is that the driver again manages its own memory pool. Windows works that way, NetBSD is moving towards it while FreeBSD has and remains at a central network memory pool. The latter (our current) way of doing it seems more efficient overall especially on heavily loaded networked machines. There may be significant queues building (think app blocked having many sockets buffer fill up) up delaying the freeing and returning of network memory resources. Together with fragmentation this can lead to bad very outcomes. Router applications with many interfaces also greatly benefit from central memory pools. So I'm really not sure that we should move back in the driver owned pool direction with lots of code duplication and copy-pasting (see NetBSD). Also it is kinda weird to have a kernel based pool for data going down the stack and another one in each driver for those going up. Actually I'm of the opinion that we should stay with the central memory pool and fix so that it works just as well for those cases a driver pool currently performs better. I believe most of the improvements you've shown can be implemented in a more generic and safe way into the mbuf system. Also a number of things in your experimental code may have side-effects in situations other than netperf runs. Agreed. As I mentioned my long term plan was to tweak the jumboX zones to allow them to allocate cluster with embedded mbuf + refcount. The M_NOFREE+EXT_EXTREF approach is the perfect bridge from here to there. It is non-intrusive and lends itself well to experimentation. Agreed, full support on fixing the refcount issue. To summarize what I get from your experimental branch commits: - the Chelsio T4/T5 card can DMA multiple ethernet frames (packets) into a single memory buffer, provided it is large enough. - you make use of that feature and point multiple m_ext mbufs into that buffer to separate the packets and send them up the stack. - you embed the m_ext refcount into the single memory buffer as well. yes, yes, and yes. - you recycle mbufs? (I'm not entirely clear on that as I'm not familiar with the cxgbe code) I recycle the cluster (and the embedded mbuf in it) when possible. All depends on whether it's still in use by the time the rx queue needs it back. There's always a couple of problems with driver managed pools. Driver shutdown/ unloading requires all such managed mbufs to have returned before it can proceed. This may take a undetermined long time as mbufs are sitting in socket buffers or other
Re: svn commit: r254520 - in head/sys: kern sys
On 20.08.2013 00:03, Navdeep Parhar wrote: On 08/19/13 14:08, Andre Oppermann wrote: On 19.08.2013 19:40, Peter Grehan wrote: I recently tried some experiments to reduce the number of mbuf and cluster allocations in a 40G NIC driver. M_NOFREE and EXT_EXTREF proved very useful and the code changes to the kernel were minimal. See user/np/cxl_tuning. The experiment was quite successful and I was planning to bring in most of those changes to HEAD. I was hoping to get some runtime mileage on the approach in general before tweaking the ctors/dtors for jumpbo, jumbo9, jumbo16 to allow for an mbuf+refcnt within the cluster. But now M_NOFREE has vanished without a warning... I also had a virtualization work-in-progress where static mbufs were allocated in the kernel and M_NOFREE set. Might be worth sending a prior heads-up for these type of changes. I'm sorry for ambushing but this stuff has to be done. I have provided an alternative way of handling it and I'm happy to help you with your use case to make it good for you and to prevent the mbuf system from getting bloated and hackish again. I don't know what Peter's use case is but I'm curious about the already-available alternative to M_NOFREE, if that's what you meant. Can you please elaborate? When you supply your own (*ext_free) function you can simply omit freeing the mbuf itself. Should make sure not to leak it though. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: M_NOFREE removal (was Re: svn commit: r254520 - in head/sys: kern sys)
On 20.08.2013 00:38, Peter Grehan wrote: Hi Andre, (moving to the more appropriate freebsd-net) I'm sorry for ambushing but this stuff has to be done. I have provided an alternative way of handling it and I'm happy to help you with your use case to make it good for you and to prevent the mbuf system from getting bloated and hackish again. Sure. I'm not really upset since my code wasn't too far along, but with any API, you never know who consumers might be so it's always worth being proactive about announcing it's removal. Agreed. OTOH there wasn't any in-tree user of it and posting before such removals always yields at least one obscure hand wavy use or potential use of it. So nothing ever happens. Can you please describe your intended use of M_NOFREE to better understand the shortcomings of the current mbuf systems and the additional advantages of the M_NOFREE case? I was looking at something similar to Linux's vhost-net, where a guest's virtio ring would be processed in-kernel. An mbuf chain with external buffers would be used to pass guest tx buffer/len segments directly into FreeBSD drivers. Is that like page flipping? The intent of M_NOFREE was to avoid small mbuf allocations/frees in what is a hot path. This code was intended to run at 10/40G. Note this code isn't really generic - it would require interfaces to be 'owned' by the guest, except that direct PCI-level pass-through wouldn't be needed. Do you have some example code showing how that is or can be done? If there's an alternative to M_NOFREE, I'd be more than happy to use that. Set up your own (*ext_free) function and omit freeing of the mbuf itself. Make sure to properly track your mbufs to avoid leaking them. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254519 - in head/sys: netinet netinet6 sys
On 20.08.2013 05:04, Julian Elischer wrote: On 8/19/13 7:08 PM, Andre Oppermann wrote: Author: andre Date: Mon Aug 19 11:08:36 2013 New Revision: 254519 URL: http://svnweb.freebsd.org/changeset/base/254519 Log: Move the global M_SKIP_FIREWALL mbuf flags to a protocol layer specific flag instead. The flag is only used within the IP and IPv6 layer 3 protocols. wel, maybe Layer 2 usage of ipfw could make use of this flag as well. Cisco were using L2 ipfw some years back. I don't know if this affects them at all. L2 firewalls are not affected directly. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: M_NOFREE removal (was Re: svn commit: r254520 - in head/sys: kern sys)
On 20.08.2013 05:13, Julian Elischer wrote: On 8/20/13 6:38 AM, Peter Grehan wrote: Hi Andre, (moving to the more appropriate freebsd-net) I'm sorry for ambushing but this stuff has to be done. I have provided an alternative way of handling it and I'm happy to help you with your use case to make it good for you and to prevent the mbuf system from getting bloated and hackish again. Sure. I'm not really upset since my code wasn't too far along, but with any API, you never know who consumers might be so it's always worth being proactive about announcing it's removal. Can you please describe your intended use of M_NOFREE to better understand the shortcomings of the current mbuf systems and the additional advantages of the M_NOFREE case? I was looking at something similar to Linux's vhost-net, where a guest's virtio ring would be processed in-kernel. An mbuf chain with external buffers would be used to pass guest tx buffer/len segments directly into FreeBSD drivers. The intent of M_NOFREE was to avoid small mbuf allocations/frees in what is a hot path. This code was intended to run at 10/40G. Note this code isn't really generic - it would require interfaces to be 'owned' by the guest, except that direct PCI-level pass-through wouldn't be needed. If there's an alternative to M_NOFREE, I'd be more than happy to use that. I think an alternative would be a reference counted version. we used to have that and NetBSD had a quite sophisticated mbuf system where there were multiple owners of mbufs.. they wouldn't be freed until the last one freed it but I don't remember the details. I just had a glance at the NetBSD mbuf system and it doesn't look convincing either. There may be some serious scalability issues with this ownership thing. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254520 - in head/sys: kern sys
On 21.08.2013 17:03, Peter Grehan wrote: The way to go should be 4K clusters as they are native to the architecture. IIRC a PCIe DMA can't cross a 4K boundary anyway That's a 4G boundary, for some devices. I meant a single PCIe DMA transaction can be at most 4K before it has to set up another one? -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254524 - head/sys/sys
On 20.08.2013 20:13, Davide Italiano wrote: On Tue, Aug 20, 2013 at 7:42 PM, Andre Oppermann an...@freebsd.org wrote: On 19.08.2013 19:37, Navdeep Parhar wrote: Why reuse the freed up bits so soon (at least one of which I think was prematurely GC'ed -- see my other email on M_NOFREE). There was room beyond M_HASHTYPEBITS, no? This is HEAD where kernel and modules have to be (re)compiled together at any point in time. On stable this reuse would not have been possible. In a subsequent commit I compacted and ordered the flags. There's a couple of free ones remaining. I have some additional mbuf changes coming up to be posted for possible objections later today. The close HEAD freeze deadline has got me rushed a bit to allow 10.x backporting of the checksum/offload overhaul. -- Andre In my opinion the possibility we have about breaking KPI/KBI should not be abused, even if it's allowed in HEAD. In other words,people should go for preserving it when (as in this case) it's easy and without additional costs. Your point about this is HEAD, it can be broken at any time makes relatively little sense to me. Note that in the worst case such KPI/KBI breakages are annoying for $VENDORS who maintain out-of-tree code, which need to rebuild/change their code, or, if they get pissed off, drop FreeBSD support. In your case, it's just a matter of code cleaness about few lines, which, IMHO and always IMHO, doesn't justify the breakage. Preserving the API but having to recompile is always fair game in HEAD. In fact it happens all the time and the only supported way to track HEAD is to recompile kernel and modules together. For removing (crufty) functionality I agree that it shouldn't be done just for the sake of it and also shouldn't be done often. Sometimes it is necessary in the name of progress. Having to support old, crufty or broken ways of doing something is a waste of developer time too. It's always a judgement call. In this case there is a perfectly valid alternative way of doing that also is less dangerous to leak mbufs. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254524 - head/sys/sys
On 21.08.2013 17:59, Davide Italiano wrote: On Wed, Aug 21, 2013 at 5:30 PM, Andre Oppermann an...@freebsd.org wrote: On 20.08.2013 20:13, Davide Italiano wrote: On Tue, Aug 20, 2013 at 7:42 PM, Andre Oppermann an...@freebsd.org wrote: On 19.08.2013 19:37, Navdeep Parhar wrote: Why reuse the freed up bits so soon (at least one of which I think was prematurely GC'ed -- see my other email on M_NOFREE). There was room beyond M_HASHTYPEBITS, no? This is HEAD where kernel and modules have to be (re)compiled together at any point in time. On stable this reuse would not have been possible. In a subsequent commit I compacted and ordered the flags. There's a couple of free ones remaining. I have some additional mbuf changes coming up to be posted for possible objections later today. The close HEAD freeze deadline has got me rushed a bit to allow 10.x backporting of the checksum/offload overhaul. -- Andre In my opinion the possibility we have about breaking KPI/KBI should not be abused, even if it's allowed in HEAD. In other words,people should go for preserving it when (as in this case) it's easy and without additional costs. Your point about this is HEAD, it can be broken at any time makes relatively little sense to me. Note that in the worst case such KPI/KBI breakages are annoying for $VENDORS who maintain out-of-tree code, which need to rebuild/change their code, or, if they get pissed off, drop FreeBSD support. In your case, it's just a matter of code cleaness about few lines, which, IMHO and always IMHO, doesn't justify the breakage. Preserving the API but having to recompile is always fair game in HEAD. In fact it happens all the time and the only supported way to track HEAD is to recompile kernel and modules together. For removing (crufty) functionality I agree that it shouldn't be done just for the sake of it and also shouldn't be done often. Sometimes it is necessary in the name of progress. Having to support old, crufty or broken ways of doing something is a waste of developer time too. It's always a judgement call. In this case there is a perfectly valid alternative way of doing that also is less dangerous to leak mbufs. -- Andre I don't see in any way how flags reordering might be in any way connected to mbufs leaks, alas. There's a similar recent('ish) discussion and it was decided to not compact/reorder flags. See r253662/r253775 for references. So, I'm not sure what's the de-facto policy, but still, as a community FreeBSD should decide one strategy and be stuck with that. It's a matter of being consistent, which, IMHO, is something that should not be undervaluated. I fail to see what problem you have with the flag reordering. Recompile and done. That's why we work with #defines instead of magic 0x1234 values. If someone outside the tree was using the spare bits for their own private purposes, yes, they would quickly have to check and possibly adjust them. That's trivial however and should be expected when tracking an OpenSource operating system. If you want total ABI stability then build on Windows but even they made a big break somewhere around NIDS5 or NDIS6. We can't get stuck on every bit because there may be someone somewhere out there we don't know about using it. On any x-stable such a reorder wouldn't be possible obviously because of the ABI preservation guarantee. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254605 - in head/sys: kern sys
Author: andre Date: Wed Aug 21 18:12:04 2013 New Revision: 254605 URL: http://svnweb.freebsd.org/changeset/base/254605 Log: Revert r254520 and resurrect the M_NOFREE mbuf flag and functionality. Requested by: np, grehan Modified: head/sys/kern/kern_mbuf.c head/sys/kern/uipc_mbuf.c head/sys/sys/mbuf.h Modified: head/sys/kern/kern_mbuf.c == --- head/sys/kern/kern_mbuf.c Wed Aug 21 17:47:11 2013(r254604) +++ head/sys/kern/kern_mbuf.c Wed Aug 21 18:12:04 2013(r254605) @@ -474,6 +474,7 @@ mb_dtor_mbuf(void *mem, int size, void * if ((flags MB_NOTAGS) == 0 (m-m_flags M_PKTHDR) != 0) m_tag_delete_chain(m, NULL); KASSERT((m-m_flags M_EXT) == 0, (%s: M_EXT set, __func__)); + KASSERT((m-m_flags M_NOFREE) == 0, (%s: M_NOFREE set, __func__)); #ifdef INVARIANTS trash_dtor(mem, size, arg); #endif Modified: head/sys/kern/uipc_mbuf.c == --- head/sys/kern/uipc_mbuf.c Wed Aug 21 17:47:11 2013(r254604) +++ head/sys/kern/uipc_mbuf.c Wed Aug 21 18:12:04 2013(r254605) @@ -278,10 +278,16 @@ m_extadd(struct mbuf *mb, caddr_t buf, u void mb_free_ext(struct mbuf *m) { + int skipmbuf; KASSERT((m-m_flags M_EXT) == M_EXT, (%s: M_EXT not set, __func__)); KASSERT(m-m_ext.ref_cnt != NULL, (%s: ref_cnt not set, __func__)); + /* +* check if the header is embedded in the cluster +*/ + skipmbuf = (m-m_flags M_NOFREE); + /* Free attached storage if this mbuf is the only reference to it. */ if (*(m-m_ext.ref_cnt) == 1 || atomic_fetchadd_int(m-m_ext.ref_cnt, -1) == 1) { @@ -322,6 +328,8 @@ mb_free_ext(struct mbuf *m) (%s: unknown ext_type, __func__)); } } + if (skipmbuf) + return; /* * Free this mbuf back to the mbuf zone with all m_ext @@ -386,7 +394,7 @@ m_demote(struct mbuf *m0, int all) m_freem(m-m_nextpkt); m-m_nextpkt = NULL; } - m-m_flags = m-m_flags (M_EXT|M_RDONLY); + m-m_flags = m-m_flags (M_EXT|M_RDONLY|M_NOFREE); } } Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Wed Aug 21 17:47:11 2013(r254604) +++ head/sys/sys/mbuf.h Wed Aug 21 18:12:04 2013(r254605) @@ -191,6 +191,7 @@ struct mbuf { #defineM_PROMISC 0x0040 /* packet was not for us */ #defineM_VLANTAG 0x0080 /* ether_vtag is valid */ #defineM_FLOWID0x0100 /* deprecated: flowid is valid */ +#defineM_NOFREE0x0200 /* do not free mbuf, embedded in cluster */ #defineM_PROTO10x1000 /* protocol-specific */ #defineM_PROTO20x2000 /* protocol-specific */ @@ -526,7 +527,7 @@ m_free(struct mbuf *m) if (m-m_flags M_EXT) mb_free_ext(m); - else + else if ((m-m_flags M_NOFREE) == 0) uma_zfree(zone_mbuf, m); return (n); } ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: M_NOFREE removal (was Re: svn commit: r254520 - in head/sys: kern sys)
On 21.08.2013 18:38, Navdeep Parhar wrote: On 08/21/13 08:08, Andre Oppermann wrote: On 20.08.2013 00:38, Peter Grehan wrote: snip If there's an alternative to M_NOFREE, I'd be more than happy to use that. Set up your own (*ext_free) function and omit freeing of the mbuf itself. Make sure to properly track your mbufs to avoid leaking them. How is this an alternate to M_NOFREE? Your suggestion indicates that you may have removed M_NOFREE thinking it did something other than what it actually did. And this suggestion doesn't even work -- note the uma_zfree(zone_mbuf, m) at the end of mb_free_ext that would run after any custom ext_free. To recap: M_NOFREE was the way to tell the mbuf subsystem that the mbuf does not come from zone_mbuf. Nothing more and nothing less. The mbuf was in other ways like any other mbuf and could have a pkthdr (or not), internal storage (or not), external cluster (or not), etc. etc. Mea culpa. You're totally right. I have mixed up my mental model with another working tree I carry along for quite some time. In it (*ext_free) completely replaces mb_free_ext() making it very easy to keep the mbuf. We should move that way hopefully sooner than later, simplifying a couple of things including externally managed refcounts. Sorry for the chaos and not getting it. M_NOFREE is back with r254605. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: M_NOFREE removal (was Re: svn commit: r254520 - in head/sys: kern sys)
On 21.08.2013 20:23, Navdeep Parhar wrote: I believe we need an extra patch to get M_NOFREE correct. I've had it forever in some of my internal repos but never committed it upstream (just plain forgot). Since this stuff is fresh in your mind, can you review this: diff -r cd78031b7885 sys/sys/mbuf.h --- a/sys/sys/mbuf.hFri Aug 16 13:35:26 2013 -0700 +++ b/sys/sys/mbuf.hWed Aug 21 10:55:57 2013 -0700 @@ -535,6 +535,8 @@ m_free(struct mbuf *m) { struct mbuf *n = m-m_next; + if ((m-m_flags (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE)) + m_tag_delete_chain(m, NULL); if (m-m_flags M_EXT) mb_free_ext(m); else if ((m-m_flags M_NOFREE) == 0) It prevents leaks of tags from M_NOFREE+pkthdr mbufs. Good point. Looks correct. But then I wonder if it is really a smart thing to allow single mbufs (w/o M_EXT) to be M_NOFREE at the same time. They easily get lost. If it doesn't have an external buffer attached there is no way to know when and if it was freed. If M_NOFREE is only allowed together with M_EXT then the tag chain delete should happen in mb_ext_free() next to 'skipmbuf' instead. Index: kern/uipc_mbuf.c === --- kern/uipc_mbuf.c(revision 254605) +++ kern/uipc_mbuf.c(working copy) @@ -283,11 +283,6 @@ KASSERT((m-m_flags M_EXT) == M_EXT, (%s: M_EXT not set, __func__)); KASSERT(m-m_ext.ref_cnt != NULL, (%s: ref_cnt not set, __func__)); - /* -* check if the header is embedded in the cluster -*/ - skipmbuf = (m-m_flags M_NOFREE); - /* Free attached storage if this mbuf is the only reference to it. */ if (*(m-m_ext.ref_cnt) == 1 || atomic_fetchadd_int(m-m_ext.ref_cnt, -1) == 1) { @@ -328,8 +323,14 @@ (%s: unknown ext_type, __func__)); } } - if (skipmbuf) + + /* +* Do not free if the mbuf is embedded in the cluster. +*/ + if (m-m_flags M_NOFREE) { + m_tag_delete_chain(m, NULL); return; + } /* * Free this mbuf back to the mbuf zone with all m_ext -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254520 - in head/sys: kern sys
On 21.08.2013 21:59, Navdeep Parhar wrote: On 08/21/13 12:41, Scott Long wrote: On Aug 21, 2013, at 8:59 AM, Andre Oppermann an...@freebsd.org wrote: On 19.08.2013 23:45, Navdeep Parhar wrote: On 08/19/13 13:58, Andre Oppermann wrote: On 19.08.2013 19:33, Navdeep Parhar wrote: On 08/19/13 04:16, Andre Oppermann wrote: Author: andre Date: Mon Aug 19 11:16:53 2013 New Revision: 254520 URL: http://svnweb.freebsd.org/changeset/base/254520 Log: Remove the unused M_NOFREE mbuf flag. It didn't have any in-tree users for a very long time, if ever. Should such a functionality ever be needed again the appropriate and much better way to do it is through a custom EXT_SOMETHING external mbuf type together with a dedicated *ext_free function. Discussed with:trociny, glebius Modified: head/sys/kern/kern_mbuf.c head/sys/kern/uipc_mbuf.c head/sys/sys/mbuf.h Hello Andre, Is this just garbage collection or is there some other reason for this? This is garbage collection and removal of not quite right, rotten, functionality. I recently tried some experiments to reduce the number of mbuf and cluster allocations in a 40G NIC driver. M_NOFREE and EXT_EXTREF proved very useful and the code changes to the kernel were minimal. See user/np/cxl_tuning. The experiment was quite successful and I was planning to bring in most of those changes to HEAD. I was hoping to get some runtime mileage on the approach in general before tweaking the ctors/dtors for jumpbo, jumbo9, jumbo16 to allow for an mbuf+refcnt within the cluster. But now M_NOFREE has vanished without a warning... I'm looking through your experimental code and that is some really good numbers you're achieving there! However a couple things don't feel quite right, hackish even, and not fit for HEAD. This is a bit the same situation we had with some of the first 1GigE cards quite a number of years back (mostly ti(4)). There we ended up with a couple of just good enough hacks to make it fast. Most of the remains I've collected today. If M_NOFREE and EXT_EXTREF are properly supported in the tree (and I'm arguing that they were, before r254520) then the changes are perfectly legitimate. The only hackish part was that I was getting the cluster from the jumbop zone while bypassing its normal refcnt mechanism. This I did so as to use the same zone as m_uiotombuf to keep it hot for all consumers (driver + network stack). If you insist I'll revert the commit removing M_NOFREE. EXT_EXTREF isn't touched yet, but should get better support. The hackish part for me is that the driver again manages its own memory pool. Windows works that way, NetBSD is moving towards it while FreeBSD has and remains at a central network memory pool. The latter (our current) way of doing it seems more efficient overall especially on heavily loaded networked machines. There may be significant queues building (think app blocked having many sockets buffer fill up) up delaying the freeing and returning of network memory resources. Together with fragmentation this can lead to bad very outcomes. Router applications with many interfaces also greatly benefit from central memory pools. So I'm really not sure that we should move back in the driver owned pool direction with lots of code duplication and copy-pasting (see NetBSD). Also it is kinda weird to have a kernel based pool for data going down the stack and another one in each driver for those going up. Actually I'm of the opinion that we should stay with the central memory pool and fix so that it works just as well for those cases a driver pool currently performs better. The central memory pool approach is too slow, unfortunately. There's a reason that other OS's are moving to them. At Netflix we are currently working on some approaches to private memory pools in order to achieve better efficiency, and we're closely watching and anticipating Navdeep's work. I should point out that I went to great lengths to use the jumbop zone in my experiments, and not create my own pool of memory for the rx buffers. The hope was to share cache warmth (sounds very cosy :-) with the likes of m_uiotombuf (which uses jumbop too) etc. So I'm actually in the camp that prefers central pools. I'm just trying out ways to reduce the trips we have to make to the pool(s) involved. Laying down mbufs within clusters, and packing multiple frames per cluster clearly helps. Careful cluster recycling within the NIC seems to work too. What you describe does make a lot of sense. Jumbop is the optimal size for the VM. We should really look at and pushing forward to have a nicer M_NOFREE+M_EXTREF API for 10 and new HEAD going forward. As always it seems to depend on the use case and what is being measured too. Is it single stream performance? Concurrent streams? Both ways or only one way, in or out? Each makes very different use of many parts of the stack and driver leading to different
Re: M_NOFREE removal (was Re: svn commit: r254520 - in head/sys: kern sys)
On 21.08.2013 21:40, Navdeep Parhar wrote: On 08/21/13 12:22, Andre Oppermann wrote: On 21.08.2013 20:23, Navdeep Parhar wrote: I believe we need an extra patch to get M_NOFREE correct. I've had it forever in some of my internal repos but never committed it upstream (just plain forgot). Since this stuff is fresh in your mind, can you review this: diff -r cd78031b7885 sys/sys/mbuf.h --- a/sys/sys/mbuf.hFri Aug 16 13:35:26 2013 -0700 +++ b/sys/sys/mbuf.hWed Aug 21 10:55:57 2013 -0700 @@ -535,6 +535,8 @@ m_free(struct mbuf *m) { struct mbuf *n = m-m_next; +if ((m-m_flags (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE)) +m_tag_delete_chain(m, NULL); if (m-m_flags M_EXT) mb_free_ext(m); else if ((m-m_flags M_NOFREE) == 0) It prevents leaks of tags from M_NOFREE+pkthdr mbufs. Good point. Looks correct. But then I wonder if it is really a smart thing to allow single mbufs (w/o M_EXT) to be M_NOFREE at the same time. They easily get lost. If it doesn't have an external buffer attached there is no way to know when and if it was freed. If M_NOFREE is only allowed together with M_EXT then the tag chain delete should happen in mb_ext_free() next to 'skipmbuf' instead. Index: kern/uipc_mbuf.c === --- kern/uipc_mbuf.c(revision 254605) +++ kern/uipc_mbuf.c(working copy) @@ -283,11 +283,6 @@ KASSERT((m-m_flags M_EXT) == M_EXT, (%s: M_EXT not set, __func__)); KASSERT(m-m_ext.ref_cnt != NULL, (%s: ref_cnt not set, __func__)); - /* -* check if the header is embedded in the cluster -*/ - skipmbuf = (m-m_flags M_NOFREE); - /* Free attached storage if this mbuf is the only reference to it. */ if (*(m-m_ext.ref_cnt) == 1 || atomic_fetchadd_int(m-m_ext.ref_cnt, -1) == 1) { @@ -328,8 +323,14 @@ (%s: unknown ext_type, __func__)); } } - if (skipmbuf) + + /* +* Do not free if the mbuf is embedded in the cluster. +*/ + if (m-m_flags M_NOFREE) { + m_tag_delete_chain(m, NULL); return; + } The problem with this is that the mbuf may already be gone if it was embedded in the cluster that was just freed by the ext free. That's why skipmbuf is used to cache the M_NOFREE bit. Next try: ;) Index: kern/uipc_mbuf.c === --- kern/uipc_mbuf.c(revision 254605) +++ kern/uipc_mbuf.c(working copy) @@ -284,9 +284,12 @@ KASSERT(m-m_ext.ref_cnt != NULL, (%s: ref_cnt not set, __func__)); /* -* check if the header is embedded in the cluster +* Do not free if the mbuf is embedded in the cluster +* but make sure to get rid of any tag chains first. */ skipmbuf = (m-m_flags M_NOFREE); + if (skipmbuf (m-m_flags M_PKTHDR)) + m_tag_delete_chain(m, NULL); /* Free attached storage if this mbuf is the only reference to it. */ if (*(m-m_ext.ref_cnt) == 1 || -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: M_NOFREE removal (was Re: svn commit: r254520 - in head/sys: kern sys)
On 21.08.2013 22:52, Navdeep Parhar wrote: On 08/21/13 13:44, Andre Oppermann wrote: On 21.08.2013 21:40, Navdeep Parhar wrote: On 08/21/13 12:22, Andre Oppermann wrote: On 21.08.2013 20:23, Navdeep Parhar wrote: I believe we need an extra patch to get M_NOFREE correct. I've had it forever in some of my internal repos but never committed it upstream (just plain forgot). Since this stuff is fresh in your mind, can you review this: diff -r cd78031b7885 sys/sys/mbuf.h --- a/sys/sys/mbuf.hFri Aug 16 13:35:26 2013 -0700 +++ b/sys/sys/mbuf.hWed Aug 21 10:55:57 2013 -0700 @@ -535,6 +535,8 @@ m_free(struct mbuf *m) { struct mbuf *n = m-m_next; +if ((m-m_flags (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE)) +m_tag_delete_chain(m, NULL); if (m-m_flags M_EXT) mb_free_ext(m); else if ((m-m_flags M_NOFREE) == 0) It prevents leaks of tags from M_NOFREE+pkthdr mbufs. Good point. Looks correct. But then I wonder if it is really a smart thing to allow single mbufs (w/o M_EXT) to be M_NOFREE at the same time. They easily get lost. If it doesn't have an external buffer attached there is no way to know when and if it was freed. If M_NOFREE is only allowed together with M_EXT then the tag chain delete should happen in mb_ext_free() next to 'skipmbuf' instead. Index: kern/uipc_mbuf.c === --- kern/uipc_mbuf.c(revision 254605) +++ kern/uipc_mbuf.c(working copy) @@ -283,11 +283,6 @@ KASSERT((m-m_flags M_EXT) == M_EXT, (%s: M_EXT not set, __func__)); KASSERT(m-m_ext.ref_cnt != NULL, (%s: ref_cnt not set, __func__)); - /* -* check if the header is embedded in the cluster -*/ - skipmbuf = (m-m_flags M_NOFREE); - /* Free attached storage if this mbuf is the only reference to it. */ if (*(m-m_ext.ref_cnt) == 1 || atomic_fetchadd_int(m-m_ext.ref_cnt, -1) == 1) { @@ -328,8 +323,14 @@ (%s: unknown ext_type, __func__)); } } - if (skipmbuf) + + /* +* Do not free if the mbuf is embedded in the cluster. +*/ + if (m-m_flags M_NOFREE) { + m_tag_delete_chain(m, NULL); return; + } The problem with this is that the mbuf may already be gone if it was embedded in the cluster that was just freed by the ext free. That's why skipmbuf is used to cache the M_NOFREE bit. Next try: ;) It is most flexible to let M_NOFREE work without any assumptions attached (must be M_EXT, etc.) So I still prefer my patch to this. If you don't have any strong preferences may I commit that one instead? I don't mind having your patch. Though I don't see how it possibly can't leak mbufs if they are not M_EXT. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254524 - head/sys/sys
On 19.08.2013 19:37, Navdeep Parhar wrote: On 08/19/13 06:56, Andre Oppermann wrote: Author: andre Date: Mon Aug 19 13:56:14 2013 New Revision: 254524 URL: http://svnweb.freebsd.org/changeset/base/254524 Log: Add four additional M_PROTOFLAGS[9-12] for protocol specific use. Discussed with: trociny, glebius, adrian Modified: head/sys/sys/mbuf.h Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Mon Aug 19 13:27:32 2013(r254523) +++ head/sys/sys/mbuf.h Mon Aug 19 13:56:14 2013(r254524) @@ -196,22 +196,24 @@ struct mbuf { #define M_FRAG 0x0800 /* packet is a fragment of a larger packet */ #define M_FIRSTFRAG 0x1000 /* packet is first fragment */ #define M_LASTFRAG 0x2000 /* packet is last fragment */ -/* 0x4000free */ -/* 0x8000free */ +#defineM_PROTO90x4000 /* protocol-specific */ +#defineM_PROTO10 0x8000 /* protocol-specific */ #define M_VLANTAG 0x0001 /* ether_vtag is valid */ #define M_PROMISC 0x0002 /* packet was not for us */ -/* 0x0004free */ +#defineM_PROTO11 0x0004 /* protocol-specific */ #define M_PROTO60x0008 /* protocol-specific */ #define M_PROTO70x0010 /* protocol-specific */ #define M_PROTO80x0020 /* protocol-specific */ #define M_FLOWID0x0040 /* deprecated: flowid is valid */ +#defineM_PROTO12 0x0080 /* protocol-specific */ #define M_HASHTYPEBITS 0x0F00 /* mask of bits holding flowid hash type */ Why reuse the freed up bits so soon (at least one of which I think was prematurely GC'ed -- see my other email on M_NOFREE). There was room beyond M_HASHTYPEBITS, no? This is HEAD where kernel and modules have to be (re)compiled together at any point in time. On stable this reuse would not have been possible. In a subsequent commit I compacted and ordered the flags. There's a couple of free ones remaining. I have some additional mbuf changes coming up to be posted for possible objections later today. The close HEAD freeze deadline has got me rushed a bit to allow 10.x backporting of the checksum/offload overhaul. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254516 - in head/sys/dev: bce bxe mge ti
Author: andre Date: Mon Aug 19 10:20:20 2013 New Revision: 254516 URL: http://svnweb.freebsd.org/changeset/base/254516 Log: Remove unused and incomplete support for delayed fragment checksums from bce(4), bxe(4), mge(4) and ti(4) drivers. Modified: head/sys/dev/bce/if_bce.c head/sys/dev/bxe/if_bxe.c head/sys/dev/mge/if_mge.c head/sys/dev/ti/if_ti.c Modified: head/sys/dev/bce/if_bce.c == --- head/sys/dev/bce/if_bce.c Mon Aug 19 09:49:51 2013(r254515) +++ head/sys/dev/bce/if_bce.c Mon Aug 19 10:20:20 2013(r254516) @@ -9821,9 +9821,7 @@ bce_dump_mbuf(struct bce_softc *sc, stru if (mp-m_flags M_PKTHDR) { BCE_PRINTF(- m_pkthdr: len = %d, flags = 0x%b, csum_flags = %b\n, mp-m_pkthdr.len, - mp-m_flags, \20\12M_BCAST\13M_MCAST\14M_FRAG - \15M_FIRSTFRAG\16M_LASTFRAG\21M_VLANTAG - \22M_PROMISC\23M_NOFREE, + mp-m_flags, M_FLAG_PRINTF, mp-m_pkthdr.csum_flags, \20\1CSUM_IP\2CSUM_TCP\3CSUM_UDP \5CSUM_FRAGMENT\6CSUM_TSO\11CSUM_IP_CHECKED Modified: head/sys/dev/bxe/if_bxe.c == --- head/sys/dev/bxe/if_bxe.c Mon Aug 19 09:49:51 2013(r254515) +++ head/sys/dev/bxe/if_bxe.c Mon Aug 19 10:20:20 2013(r254516) @@ -16265,9 +16265,7 @@ void bxe_dump_mbuf(struct bxe_softc *sc, if (m-m_flags M_PKTHDR) { BXE_PRINTF(- m_pkthdr: len = %d, flags = 0x%b, csum_flags = %b\n, m-m_pkthdr.len, - m-m_flags, \20\12M_BCAST\13M_MCAST\14M_FRAG - \15M_FIRSTFRAG\16M_LASTFRAG\21M_VLANTAG - \22M_PROMISC\23M_NOFREE, + m-m_flags, M_FLAG_PRINTF, m-m_pkthdr.csum_flags, \20\1CSUM_IP\2CSUM_TCP\3CSUM_UDP \5CSUM_FRAGMENT\6CSUM_TSO\11CSUM_IP_CHECKED Modified: head/sys/dev/mge/if_mge.c == --- head/sys/dev/mge/if_mge.c Mon Aug 19 09:49:51 2013(r254515) +++ head/sys/dev/mge/if_mge.c Mon Aug 19 10:20:20 2013(r254516) @@ -1703,9 +1703,7 @@ mge_offload_setup_descriptor(struct mge_ ip = (struct ip *)(m0-m_data + ehlen); cmd_status |= MGE_TX_IP_HDR_SIZE(ip-ip_hl); - - if ((m0-m_flags M_FRAG) == 0) - cmd_status |= MGE_TX_NOT_FRAGMENT; + cmd_status |= MGE_TX_NOT_FRAGMENT; } if (csum_flags CSUM_IP) Modified: head/sys/dev/ti/if_ti.c == --- head/sys/dev/ti/if_ti.c Mon Aug 19 09:49:51 2013(r254515) +++ head/sys/dev/ti/if_ti.c Mon Aug 19 10:20:20 2013(r254516) @@ -3159,24 +3159,6 @@ ti_start_locked(struct ifnet *ifp) break; /* -* XXX -* safety overkill. If this is a fragmented packet chain -* with delayed TCP/UDP checksums, then only encapsulate -* it if we have enough descriptors to handle the entire -* chain at once. -* (paranoia -- may not actually be needed) -*/ - if (m_head-m_flags M_FIRSTFRAG - m_head-m_pkthdr.csum_flags (CSUM_DELAY_DATA)) { - if ((TI_TX_RING_CNT - sc-ti_txcnt) - m_head-m_pkthdr.csum_data + 16) { - IFQ_DRV_PREPEND(ifp-if_snd, m_head); - ifp-if_drv_flags |= IFF_DRV_OACTIVE; - break; - } - } - - /* * Pack the data into the transmit ring. If we * don't have room, set the OACTIVE flag and wait * for the NIC to drain the ring. ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254517 - head/sys/netinet
Author: andre Date: Mon Aug 19 10:30:15 2013 New Revision: 254517 URL: http://svnweb.freebsd.org/changeset/base/254517 Log: Remove unused M_FRAG, M_FIRSTFRAG and M_LASTFRAG tagging from ip_fragment(). There wasn't any real driver (and hardware) support for it. Modern hardware does full fragmentation/segmentation offload instead. Modified: head/sys/netinet/ip_output.c Modified: head/sys/netinet/ip_output.c == --- head/sys/netinet/ip_output.cMon Aug 19 10:20:20 2013 (r254516) +++ head/sys/netinet/ip_output.cMon Aug 19 10:30:15 2013 (r254517) @@ -784,7 +784,7 @@ smart_frag_failure: IPSTAT_INC(ips_odropped); goto done; } - m-m_flags |= (m0-m_flags M_MCAST) | M_FRAG; + m-m_flags |= (m0-m_flags M_MCAST); /* * In the first mbuf, leave room for the link header, then * copy the original IP header including options. The payload @@ -801,10 +801,9 @@ smart_frag_failure: m-m_len = mhlen; /* XXX do we need to add ip_off below ? */ mhip-ip_off = ((off - hlen) 3) + ip_off; - if (off + len = ip_len) { /* last fragment */ + if (off + len = ip_len) len = ip_len - off; - m-m_flags |= M_LASTFRAG; - } else + else mhip-ip_off |= IP_MF; mhip-ip_len = htons((u_short)(len + mhlen)); m-m_next = m_copym(m0, off, len, M_NOWAIT); @@ -831,10 +830,6 @@ smart_frag_failure: } IPSTAT_ADD(ips_ofragments, nfrags); - /* set first marker for fragment chain */ - m0-m_flags |= M_FIRSTFRAG | M_FRAG; - m0-m_pkthdr.csum_data = nfrags; - /* * Update first fragment by trimming what's been copied out * and updating header. ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254518 - head/sys/netinet
Author: andre Date: Mon Aug 19 10:34:10 2013 New Revision: 254518 URL: http://svnweb.freebsd.org/changeset/base/254518 Log: Move ip_reassemble()'s use of the global M_FRAG mbuf flag to a protocol layer specific flag instead. The flag is only relevant while the packet stays in the IP reassembly queue. Discussed with: trociny, glebius Modified: head/sys/netinet/ip_input.c head/sys/netinet/ip_var.h Modified: head/sys/netinet/ip_input.c == --- head/sys/netinet/ip_input.c Mon Aug 19 10:30:15 2013(r254517) +++ head/sys/netinet/ip_input.c Mon Aug 19 10:34:10 2013(r254518) @@ -911,9 +911,9 @@ found: IPSTAT_INC(ips_toosmall); /* XXX */ goto dropfrag; } - m-m_flags |= M_FRAG; + m-m_flags |= M_IP_FRAG; } else - m-m_flags = ~M_FRAG; + m-m_flags = ~M_IP_FRAG; ip-ip_off = htons(ntohs(ip-ip_off) 3); /* @@ -1060,7 +1060,7 @@ found: next += ntohs(GETIP(q)-ip_len); } /* Make sure the last packet didn't have the IP_MF flag */ - if (p-m_flags M_FRAG) { + if (p-m_flags M_IP_FRAG) { if (fp-ipq_nfrags V_maxfragsperpacket) { IPSTAT_ADD(ips_fragdropped, fp-ipq_nfrags); ip_freef(head, fp); Modified: head/sys/netinet/ip_var.h == --- head/sys/netinet/ip_var.h Mon Aug 19 10:30:15 2013(r254517) +++ head/sys/netinet/ip_var.h Mon Aug 19 10:34:10 2013(r254518) @@ -167,6 +167,7 @@ voidkmod_ipstat_dec(int statnum); */ #defineM_FASTFWD_OURS M_PROTO1/* changed dst to local */ #defineM_IP_NEXTHOPM_PROTO2/* explicit ip nexthop */ +#defineM_IP_FRAG M_PROTO4/* fragment reassembly */ #ifdef __NO_STRICT_ALIGNMENT #define IP_HDR_ALIGNED_P(ip) 1 ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254519 - in head/sys: netinet netinet6 sys
Author: andre Date: Mon Aug 19 11:08:36 2013 New Revision: 254519 URL: http://svnweb.freebsd.org/changeset/base/254519 Log: Move the global M_SKIP_FIREWALL mbuf flags to a protocol layer specific flag instead. The flag is only used within the IP and IPv6 layer 3 protocols. Because some firewall packages treat IPv4 and IPv6 packets the same the flag should have the same value for both. Discussed with: trociny, glebius Modified: head/sys/netinet/ip_var.h head/sys/netinet6/ip6_var.h head/sys/sys/mbuf.h Modified: head/sys/netinet/ip_var.h == --- head/sys/netinet/ip_var.h Mon Aug 19 10:34:10 2013(r254518) +++ head/sys/netinet/ip_var.h Mon Aug 19 11:08:36 2013(r254519) @@ -163,10 +163,12 @@ void kmod_ipstat_dec(int statnum); #define IP_ALLOWBROADCAST SO_BROADCAST/* 0x20 can send broadcast packets */ /* - * mbuf flag used by ip_fastfwd + * IPv4 protocol layer specific mbuf flags. */ #defineM_FASTFWD_OURS M_PROTO1/* changed dst to local */ #defineM_IP_NEXTHOPM_PROTO2/* explicit ip nexthop */ +#defineM_SKIP_FIREWALL M_PROTO3/* skip firewall processing, + keep in sync with IP6 */ #defineM_IP_FRAG M_PROTO4/* fragment reassembly */ #ifdef __NO_STRICT_ALIGNMENT Modified: head/sys/netinet6/ip6_var.h == --- head/sys/netinet6/ip6_var.h Mon Aug 19 10:34:10 2013(r254518) +++ head/sys/netinet6/ip6_var.h Mon Aug 19 11:08:36 2013(r254519) @@ -293,7 +293,12 @@ struct ip6aux { #defineIPV6_FORWARDING 0x02/* most of IPv6 header exists */ #defineIPV6_MINMTU 0x04/* use minimum MTU (IPV6_USE_MIN_MTU) */ -#defineM_IP6_NEXTHOP M_PROTO7/* explicit ip nexthop */ +/* + * IPv6 protocol layer specific mbuf flags. + */ +#defineM_IP6_NEXTHOP M_PROTO2/* explicit ip nexthop */ +#defineM_SKIP_FIREWALL M_PROTO3/* skip firewall processing, + keep in sync with IPv4 */ #ifdef __NO_STRICT_ALIGNMENT #define IP6_HDR_ALIGNED_P(ip) 1 Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Mon Aug 19 10:34:10 2013(r254518) +++ head/sys/sys/mbuf.h Mon Aug 19 11:08:36 2013(r254519) @@ -196,7 +196,7 @@ struct mbuf { #defineM_FRAG 0x0800 /* packet is a fragment of a larger packet */ #defineM_FIRSTFRAG 0x1000 /* packet is first fragment */ #defineM_LASTFRAG 0x2000 /* packet is last fragment */ -#defineM_SKIP_FIREWALL 0x4000 /* skip firewall processing */ +/* 0x4000free */ /* 0x8000free */ #defineM_VLANTAG 0x0001 /* ether_vtag is valid */ #defineM_PROMISC 0x0002 /* packet was not for us */ @@ -253,7 +253,7 @@ struct mbuf { * Flags preserved when copying m_pkthdr. */ #defineM_COPYFLAGS \ -(M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_SKIP_FIREWALL|M_BCAST|M_MCAST|\ +(M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_BCAST|M_MCAST|\ M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_HASHTYPEBITS) /* ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254520 - in head/sys: kern sys
Author: andre Date: Mon Aug 19 11:16:53 2013 New Revision: 254520 URL: http://svnweb.freebsd.org/changeset/base/254520 Log: Remove the unused M_NOFREE mbuf flag. It didn't have any in-tree users for a very long time, if ever. Should such a functionality ever be needed again the appropriate and much better way to do it is through a custom EXT_SOMETHING external mbuf type together with a dedicated *ext_free function. Discussed with: trociny, glebius Modified: head/sys/kern/kern_mbuf.c head/sys/kern/uipc_mbuf.c head/sys/sys/mbuf.h Modified: head/sys/kern/kern_mbuf.c == --- head/sys/kern/kern_mbuf.c Mon Aug 19 11:08:36 2013(r254519) +++ head/sys/kern/kern_mbuf.c Mon Aug 19 11:16:53 2013(r254520) @@ -474,7 +474,6 @@ mb_dtor_mbuf(void *mem, int size, void * if ((flags MB_NOTAGS) == 0 (m-m_flags M_PKTHDR) != 0) m_tag_delete_chain(m, NULL); KASSERT((m-m_flags M_EXT) == 0, (%s: M_EXT set, __func__)); - KASSERT((m-m_flags M_NOFREE) == 0, (%s: M_NOFREE set, __func__)); #ifdef INVARIANTS trash_dtor(mem, size, arg); #endif Modified: head/sys/kern/uipc_mbuf.c == --- head/sys/kern/uipc_mbuf.c Mon Aug 19 11:08:36 2013(r254519) +++ head/sys/kern/uipc_mbuf.c Mon Aug 19 11:16:53 2013(r254520) @@ -278,17 +278,10 @@ m_extadd(struct mbuf *mb, caddr_t buf, u void mb_free_ext(struct mbuf *m) { - int skipmbuf; KASSERT((m-m_flags M_EXT) == M_EXT, (%s: M_EXT not set, __func__)); KASSERT(m-m_ext.ref_cnt != NULL, (%s: ref_cnt not set, __func__)); - - /* -* check if the header is embedded in the cluster -*/ - skipmbuf = (m-m_flags M_NOFREE); - /* Free attached storage if this mbuf is the only reference to it. */ if (*(m-m_ext.ref_cnt) == 1 || atomic_fetchadd_int(m-m_ext.ref_cnt, -1) == 1) { @@ -329,8 +322,6 @@ mb_free_ext(struct mbuf *m) (%s: unknown ext_type, __func__)); } } - if (skipmbuf) - return; /* * Free this mbuf back to the mbuf zone with all m_ext @@ -395,7 +386,7 @@ m_demote(struct mbuf *m0, int all) m_freem(m-m_nextpkt); m-m_nextpkt = NULL; } - m-m_flags = m-m_flags (M_EXT|M_RDONLY|M_NOFREE); + m-m_flags = m-m_flags (M_EXT|M_RDONLY); } } Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Mon Aug 19 11:08:36 2013(r254519) +++ head/sys/sys/mbuf.h Mon Aug 19 11:16:53 2013(r254520) @@ -200,7 +200,7 @@ struct mbuf { /* 0x8000free */ #defineM_VLANTAG 0x0001 /* ether_vtag is valid */ #defineM_PROMISC 0x0002 /* packet was not for us */ -#defineM_NOFREE0x0004 /* do not free mbuf, embedded in cluster */ +/* 0x0004free */ #defineM_PROTO60x0008 /* protocol-specific */ #defineM_PROTO70x0010 /* protocol-specific */ #defineM_PROTO80x0020 /* protocol-specific */ @@ -515,7 +515,7 @@ m_free(struct mbuf *m) if (m-m_flags M_EXT) mb_free_ext(m); - else if ((m-m_flags M_NOFREE) == 0) + else uma_zfree(zone_mbuf, m); return (n); } ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254520 - in head/sys: kern sys
On 19.08.2013 13:23, Davide Italiano wrote: On Mon, Aug 19, 2013 at 1:16 PM, Andre Oppermann an...@freebsd.org wrote: Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Mon Aug 19 11:08:36 2013(r254519) +++ head/sys/sys/mbuf.h Mon Aug 19 11:16:53 2013(r254520) @@ -200,7 +200,7 @@ struct mbuf { /* 0x8000free */ #defineM_VLANTAG 0x0001 /* ether_vtag is valid */ #defineM_PROMISC 0x0002 /* packet was not for us */ -#defineM_NOFREE0x0004 /* do not free mbuf, embedded in cluster */ +/* 0x0004free */ I think you should just use M_UNUSED or something similar here for consistency, like it's happening in td_pflags (sys/sys/proc.h), e.g.: #define TDP_UNUSED9 0x0100 /* --available-- */ There's a couple of more changes upcoming that will take care of it. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254521 - in head/sys: netinet sys
Author: andre Date: Mon Aug 19 12:30:18 2013 New Revision: 254521 URL: http://svnweb.freebsd.org/changeset/base/254521 Log: Move the SCTP specific definition of M_NOTIFICATION onto a protocol specific mbuf flag from sys/mbuf.h to netinet/sctp_os_bsd.h. It is only relevant within SCTP. Discussed with: tuexen Modified: head/sys/netinet/sctp_os_bsd.h head/sys/sys/mbuf.h Modified: head/sys/netinet/sctp_os_bsd.h == --- head/sys/netinet/sctp_os_bsd.h Mon Aug 19 11:16:53 2013 (r254520) +++ head/sys/netinet/sctp_os_bsd.h Mon Aug 19 12:30:18 2013 (r254521) @@ -429,6 +429,11 @@ typedef struct rtentry sctp_rtentry_t; #define SCTP_ZERO_COPY_SENDQ_EVENT(inp, so) /* + * SCTP protocol specific mbuf flags. + */ +#defineM_NOTIFICATION M_PROTO5/* SCTP notification */ + +/* * IP output routines */ #define SCTP_IP_OUTPUT(result, o_pak, ro, stcb, vrf_id) \ Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Mon Aug 19 11:16:53 2013(r254520) +++ head/sys/sys/mbuf.h Mon Aug 19 12:30:18 2013(r254521) @@ -207,8 +207,6 @@ struct mbuf { #defineM_FLOWID0x0040 /* deprecated: flowid is valid */ #defineM_HASHTYPEBITS 0x0F00 /* mask of bits holding flowid hash type */ -#defineM_NOTIFICATION M_PROTO5/* SCTP notification */ - /* * Flags to purge when crossing layers. */ ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254523 - in head/sys: net net80211 netinet netinet6 netpfil/pf ofed/drivers/infiniband/ulp/ipoib sys
Author: andre Date: Mon Aug 19 13:27:32 2013 New Revision: 254523 URL: http://svnweb.freebsd.org/changeset/base/254523 Log: Add m_clrprotoflags() to clear protocol specific mbuf flags at up and downwards layer crossings. Consistently use it within IP, IPv6 and ethernet protocols. Discussed with: trociny, glebius Modified: head/sys/net/if_ethersubr.c head/sys/net80211/ieee80211_hostap.c head/sys/net80211/ieee80211_input.c head/sys/netinet/if_ether.c head/sys/netinet/igmp.c head/sys/netinet/ip_fastfwd.c head/sys/netinet/ip_output.c head/sys/netinet/sctp_os_bsd.h head/sys/netinet6/ip6_mroute.c head/sys/netinet6/mld6.c head/sys/netinet6/nd6.c head/sys/netinet6/send.c head/sys/netpfil/pf/pf.c head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c head/sys/sys/mbuf.h Modified: head/sys/net/if_ethersubr.c == --- head/sys/net/if_ethersubr.c Mon Aug 19 12:37:13 2013(r254522) +++ head/sys/net/if_ethersubr.c Mon Aug 19 13:27:32 2013(r254523) @@ -774,7 +774,7 @@ ether_demux(struct ifnet *ifp, struct mb * Strip off Ethernet header. */ m-m_flags = ~M_VLANTAG; - m-m_flags = ~(M_PROTOFLAGS); + m_clrprotoflags(m); m_adj(m, ETHER_HDR_LEN); /* Modified: head/sys/net80211/ieee80211_hostap.c == --- head/sys/net80211/ieee80211_hostap.cMon Aug 19 12:37:13 2013 (r254522) +++ head/sys/net80211/ieee80211_hostap.cMon Aug 19 13:27:32 2013 (r254523) @@ -355,7 +355,8 @@ hostap_deliver_data(struct ieee80211vap struct ifnet *ifp = vap-iv_ifp; /* clear driver/net80211 flags before passing up */ - m-m_flags = ~(M_80211_RX | M_MCAST | M_BCAST); + m-m_flags = ~(M_MCAST | M_BCAST); + m_clrprotoflags(m); KASSERT(vap-iv_opmode == IEEE80211_M_HOSTAP, (gack, opmode %d, vap-iv_opmode)); Modified: head/sys/net80211/ieee80211_input.c == --- head/sys/net80211/ieee80211_input.c Mon Aug 19 12:37:13 2013 (r254522) +++ head/sys/net80211/ieee80211_input.c Mon Aug 19 13:27:32 2013 (r254523) @@ -250,7 +250,8 @@ ieee80211_deliver_data(struct ieee80211v struct ifnet *ifp = vap-iv_ifp; /* clear driver/net80211 flags before passing up */ - m-m_flags = ~(M_80211_RX | M_MCAST | M_BCAST); + m-m_flags = ~(M_MCAST | M_BCAST); + m_clrprotoflags(m); /* NB: see hostap_deliver_data, this path doesn't handle hostap */ KASSERT(vap-iv_opmode != IEEE80211_M_HOSTAP, (gack, hostap)); Modified: head/sys/netinet/if_ether.c == --- head/sys/netinet/if_ether.c Mon Aug 19 12:37:13 2013(r254522) +++ head/sys/netinet/if_ether.c Mon Aug 19 13:27:32 2013(r254523) @@ -281,6 +281,7 @@ arprequest(struct ifnet *ifp, const stru sa.sa_family = AF_ARP; sa.sa_len = 2; m-m_flags |= M_BCAST; + m_clrprotoflags(m); /* Avoid confusing lower layers. */ (*ifp-if_output)(ifp, m, sa, NULL); ARPSTAT_INC(txrequests); } @@ -784,6 +785,8 @@ match: for (; m_hold != NULL; m_hold = m_hold_next) { m_hold_next = m_hold-m_nextpkt; m_hold-m_nextpkt = NULL; + /* Avoid confusing lower layers. */ + m_clrprotoflags(m_hold); (*ifp-if_output)(ifp, m_hold, sa, NULL); } } else @@ -888,6 +891,7 @@ reply: m-m_pkthdr.rcvif = NULL; sa.sa_family = AF_ARP; sa.sa_len = 2; + m_clrprotoflags(m); /* Avoid confusing lower layers. */ (*ifp-if_output)(ifp, m, sa, NULL); ARPSTAT_INC(txreplies); return; Modified: head/sys/netinet/igmp.c == --- head/sys/netinet/igmp.c Mon Aug 19 12:37:13 2013(r254522) +++ head/sys/netinet/igmp.c Mon Aug 19 13:27:32 2013(r254523) @@ -3450,7 +3450,7 @@ igmp_intr(struct mbuf *m) } igmp_scrub_context(m0); - m-m_flags = ~(M_PROTOFLAGS); + m_clrprotoflags(m); m0-m_pkthdr.rcvif = V_loif; #ifdef MAC mac_netinet_igmp_send(ifp, m0); Modified: head/sys/netinet/ip_fastfwd.c == --- head/sys/netinet/ip_fastfwd.c Mon Aug 19 12:37:13 2013 (r254522) +++ head/sys/netinet/ip_fastfwd.c Mon Aug 19 13:27:32 2013 (r254523) @@ -525,6 +525,10 @@ passout: if (ip_len = mtu || (ifp-if_hwassist CSUM_FRAGMENT (ip_off IP_DF) == 0)) {
svn commit: r254524 - head/sys/sys
Author: andre Date: Mon Aug 19 13:56:14 2013 New Revision: 254524 URL: http://svnweb.freebsd.org/changeset/base/254524 Log: Add four additional M_PROTOFLAGS[9-12] for protocol specific use. Discussed with: trociny, glebius, adrian Modified: head/sys/sys/mbuf.h Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Mon Aug 19 13:27:32 2013(r254523) +++ head/sys/sys/mbuf.h Mon Aug 19 13:56:14 2013(r254524) @@ -196,22 +196,24 @@ struct mbuf { #defineM_FRAG 0x0800 /* packet is a fragment of a larger packet */ #defineM_FIRSTFRAG 0x1000 /* packet is first fragment */ #defineM_LASTFRAG 0x2000 /* packet is last fragment */ -/* 0x4000free */ -/* 0x8000free */ +#defineM_PROTO90x4000 /* protocol-specific */ +#defineM_PROTO10 0x8000 /* protocol-specific */ #defineM_VLANTAG 0x0001 /* ether_vtag is valid */ #defineM_PROMISC 0x0002 /* packet was not for us */ -/* 0x0004free */ +#defineM_PROTO11 0x0004 /* protocol-specific */ #defineM_PROTO60x0008 /* protocol-specific */ #defineM_PROTO70x0010 /* protocol-specific */ #defineM_PROTO80x0020 /* protocol-specific */ #defineM_FLOWID0x0040 /* deprecated: flowid is valid */ +#defineM_PROTO12 0x0080 /* protocol-specific */ #defineM_HASHTYPEBITS 0x0F00 /* mask of bits holding flowid hash type */ /* * Flags to purge when crossing layers. */ #defineM_PROTOFLAGS \ -(M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8) +(M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\ + M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12) /* * Network interface cards are able to hash protocol fields (such as IPv4 ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254526 - in head/sys: net80211 sys
Author: andre Date: Mon Aug 19 14:07:31 2013 New Revision: 254526 URL: http://svnweb.freebsd.org/changeset/base/254526 Log: Migrate the net80211 protocol specific use of M_FRAG, M_FIRSTFRAG and M_LASTFRAG flags to protocol specific flags. Remove the now unused M_FRAG, M_FIRSTFRAG and M_LASTFRAG mbuf flags. Discussed with: trociny, glebius, adrian Modified: head/sys/net80211/ieee80211_freebsd.h head/sys/sys/mbuf.h Modified: head/sys/net80211/ieee80211_freebsd.h == --- head/sys/net80211/ieee80211_freebsd.h Mon Aug 19 14:04:35 2013 (r254525) +++ head/sys/net80211/ieee80211_freebsd.h Mon Aug 19 14:07:31 2013 (r254526) @@ -234,9 +234,12 @@ struct mbuf *ieee80211_getmgtframe(uint8 #defineM_FFM_PROTO6/* fast frame */ #defineM_TXCB M_PROTO7/* do tx complete callback */ #defineM_AMPDU_MPDUM_PROTO8/* ok for A-MPDU aggregation */ +#defineM_FRAG M_PROTO9/* frame fragmentation */ +#defineM_FIRSTFRAG M_PROTO10 /* first frame fragment */ +#defineM_LASTFRAG M_PROTO11 /* last frame fragment */ #defineM_80211_TX \ - (M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_ENCAP|M_EAPOL|M_PWR_SAV|\ -M_MORE_DATA|M_FF|M_TXCB|M_AMPDU_MPDU) + (M_ENCAP|M_EAPOL|M_PWR_SAV|M_MORE_DATA|M_FF|M_TXCB| \ +M_AMPDU_MPDU|M_FRAG|M_FIRSTFRAG|M_LASTFRAG) /* rx path usage */ #defineM_AMPDU M_PROTO1/* A-MPDU subframe */ Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Mon Aug 19 14:04:35 2013(r254525) +++ head/sys/sys/mbuf.h Mon Aug 19 14:07:31 2013(r254526) @@ -193,9 +193,9 @@ struct mbuf { #defineM_PROTO50x0100 /* protocol-specific */ #defineM_BCAST 0x0200 /* send/received as link-level broadcast */ #defineM_MCAST 0x0400 /* send/received as link-level multicast */ -#defineM_FRAG 0x0800 /* packet is a fragment of a larger packet */ -#defineM_FIRSTFRAG 0x1000 /* packet is first fragment */ -#defineM_LASTFRAG 0x2000 /* packet is last fragment */ +/* 0x0800free */ +/* 0x1000free */ +/* 0x2000free */ #defineM_PROTO90x4000 /* protocol-specific */ #defineM_PROTO10 0x8000 /* protocol-specific */ #defineM_VLANTAG 0x0001 /* ether_vtag is valid */ @@ -254,7 +254,7 @@ struct mbuf { */ #defineM_COPYFLAGS \ (M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_BCAST|M_MCAST|\ - M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_HASHTYPEBITS) + M_VLANTAG|M_PROMISC|M_HASHTYPEBITS) /* * External buffer types: identify ext_buf type. ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r254527 - in head/sys: net80211 netinet sys
Author: andre Date: Mon Aug 19 14:25:11 2013 New Revision: 254527 URL: http://svnweb.freebsd.org/changeset/base/254527 Log: Reorder the mbuf defines to make more sense and group related flags together. Add M_FLAG_PRINTF for use with printf(9) %b indentifier. Use the generic mbuf flags print names in the net80211 code and adjust the protocol specific bits for their new positions. Change SCTP M_PROTO mapping from 5 to 1 to fit within the 16bit field they use internally to store some additional information. Discussed with: trociny, glebius Modified: head/sys/net80211/ieee80211_freebsd.h head/sys/netinet/sctp_os_bsd.h head/sys/sys/mbuf.h Modified: head/sys/net80211/ieee80211_freebsd.h == --- head/sys/net80211/ieee80211_freebsd.h Mon Aug 19 14:07:31 2013 (r254526) +++ head/sys/net80211/ieee80211_freebsd.h Mon Aug 19 14:25:11 2013 (r254527) @@ -250,16 +250,13 @@ struct mbuf *ieee80211_getmgtframe(uint8 #defineM_80211_RX (M_AMPDU|M_WEP|M_AMPDU_MPDU) #defineIEEE80211_MBUF_TX_FLAG_BITS \ - \20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_ENCAP\6M_WEP\7M_EAPOL \ - \10M_PWR_SAV\11M_MORE_DATA\12M_BCAST\13M_MCAST\14M_FRAG\15M_FIRSTFRAG \ - \16M_LASTFRAG\17M_SKIP_FIREWALL\20M_FREELIST\21M_VLANTAG\22M_PROMISC \ - \23M_NOFREE\24M_FF\25M_TXCB\26M_AMPDU_MPDU\27M_FLOWID + M_FLAG_BITS \ + \15M_ENCAP\17M_EAPOL\20M_PWR_SAV\21M_MORE_DATA\22M_FF\23M_TXCB \ + \24M_AMPDU_MPDU\25M_FRAG\26M_FIRSTFRAG\27M_LASTFRAG #defineIEEE80211_MBUF_RX_FLAG_BITS \ - \20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_AMPDU\6M_WEP\7M_PROTO3 \ - \10M_PROTO4\11M_PROTO5\12M_BCAST\13M_MCAST\14M_FRAG\15M_FIRSTFRAG \ - \16M_LASTFRAG\17M_SKIP_FIREWALL\20M_FREELIST\21M_VLANTAG\22M_PROMISC \ - \23M_NOFREE\24M_PROTO6\25M_PROTO7\26M_AMPDU_MPDU\27M_FLOWID + M_FLAG_BITS \ + \15M_AMPDU\16M_WEP\24M_AMPDU_MPDU /* * Store WME access control bits in the vlan tag. Modified: head/sys/netinet/sctp_os_bsd.h == --- head/sys/netinet/sctp_os_bsd.h Mon Aug 19 14:07:31 2013 (r254526) +++ head/sys/netinet/sctp_os_bsd.h Mon Aug 19 14:25:11 2013 (r254527) @@ -431,7 +431,7 @@ typedef struct rtentry sctp_rtentry_t; /* * SCTP protocol specific mbuf flags. */ -#defineM_NOTIFICATION M_PROTO5/* SCTP notification */ +#defineM_NOTIFICATION M_PROTO1/* SCTP notification */ /* * IP output routines Modified: head/sys/sys/mbuf.h == --- head/sys/sys/mbuf.h Mon Aug 19 14:07:31 2013(r254526) +++ head/sys/sys/mbuf.h Mon Aug 19 14:25:11 2013(r254527) @@ -186,26 +186,25 @@ struct mbuf { #defineM_PKTHDR0x0002 /* start of record */ #defineM_EOR 0x0004 /* end of record */ #defineM_RDONLY0x0008 /* associated data is marked read-only */ -#defineM_PROTO10x0010 /* protocol-specific */ -#defineM_PROTO20x0020 /* protocol-specific */ -#defineM_PROTO30x0040 /* protocol-specific */ -#defineM_PROTO40x0080 /* protocol-specific */ -#defineM_PROTO50x0100 /* protocol-specific */ -#defineM_BCAST 0x0200 /* send/received as link-level broadcast */ -#defineM_MCAST 0x0400 /* send/received as link-level multicast */ -/* 0x0800free */ -/* 0x1000free */ -/* 0x2000free */ -#defineM_PROTO90x4000 /* protocol-specific */ -#defineM_PROTO10 0x8000 /* protocol-specific */ -#defineM_VLANTAG 0x0001 /* ether_vtag is valid */ -#defineM_PROMISC 0x0002 /* packet was not for us */ -#defineM_PROTO11 0x0004 /* protocol-specific */ -#defineM_PROTO60x0008 /* protocol-specific */ -#defineM_PROTO70x0010 /* protocol-specific */ -#defineM_PROTO80x0020 /* protocol-specific */ -#defineM_FLOWID0x0040 /* deprecated: flowid is valid */ +#defineM_BCAST 0x0010 /* send/received as link-level broadcast */ +#defineM_MCAST 0x0020 /* send/received as link-level multicast */ +#defineM_PROMISC 0x0040 /* packet was not for us */ +#defineM_VLANTAG 0x0080 /* ether_vtag is valid */ +#defineM_FLOWID0x0100 /* deprecated: flowid is valid */ + +#defineM_PROTO10x1000 /* protocol-specific */ +#defineM_PROTO20x2000 /* protocol-specific */ +#defineM_PROTO30x4000
svn commit: r254537 - head/sys/sys
Author: andre Date: Mon Aug 19 16:47:06 2013 New Revision: 254537 URL: http://svnweb.freebsd.org/changeset/base/254537 Log: Bump __FreeBSD_version to 146 after the addition of M_PROTO[9-12] and removal of M_NOFREE|M_FRAG|M_FIRSTFRAG|M_LASTFRAG mbuf flags. Modified: head/sys/sys/param.h Modified: head/sys/sys/param.h == --- head/sys/sys/param.hMon Aug 19 16:16:49 2013(r254536) +++ head/sys/sys/param.hMon Aug 19 16:47:06 2013(r254537) @@ -58,7 +58,7 @@ * in the range 5 to 9. */ #undef __FreeBSD_version -#define __FreeBSD_version 145 /* Master, propagated to newvers */ +#define __FreeBSD_version 146 /* Master, propagated to newvers */ /* * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD, ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254527 - in head/sys: net80211 netinet sys
On 19.08.2013 17:12, Adrian Chadd wrote: Hi, Would you please bump FreeBSD_version ? Done: New Revision: 254537 URL: http://svnweb.freebsd.org/changeset/base/254537 Log: Bump __FreeBSD_version to 146 after the addition of M_PROTO[9-12] and removal of M_NOFREE|M_FRAG|M_FIRSTFRAG|M_LASTFRAG mbuf flags. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r254520 - in head/sys: kern sys
On 19.08.2013 19:33, Navdeep Parhar wrote: On 08/19/13 04:16, Andre Oppermann wrote: Author: andre Date: Mon Aug 19 11:16:53 2013 New Revision: 254520 URL: http://svnweb.freebsd.org/changeset/base/254520 Log: Remove the unused M_NOFREE mbuf flag. It didn't have any in-tree users for a very long time, if ever. Should such a functionality ever be needed again the appropriate and much better way to do it is through a custom EXT_SOMETHING external mbuf type together with a dedicated *ext_free function. Discussed with: trociny, glebius Modified: head/sys/kern/kern_mbuf.c head/sys/kern/uipc_mbuf.c head/sys/sys/mbuf.h Hello Andre, Is this just garbage collection or is there some other reason for this? This is garbage collection and removal of not quite right, rotten, functionality. I recently tried some experiments to reduce the number of mbuf and cluster allocations in a 40G NIC driver. M_NOFREE and EXT_EXTREF proved very useful and the code changes to the kernel were minimal. See user/np/cxl_tuning. The experiment was quite successful and I was planning to bring in most of those changes to HEAD. I was hoping to get some runtime mileage on the approach in general before tweaking the ctors/dtors for jumpbo, jumbo9, jumbo16 to allow for an mbuf+refcnt within the cluster. But now M_NOFREE has vanished without a warning... I'm looking through your experimental code and that is some really good numbers you're achieving there! However a couple things don't feel quite right, hackish even, and not fit for HEAD. This is a bit the same situation we had with some of the first 1GigE cards quite a number of years back (mostly ti(4)). There we ended up with a couple of just good enough hacks to make it fast. Most of the remains I've collected today. I believe most of the improvements you've shown can be implemented in a more generic and safe way into the mbuf system. Also a number of things in your experimental code may have side-effects in situations other than netperf runs. To summarize what I get from your experimental branch commits: - the Chelsio T4/T5 card can DMA multiple ethernet frames (packets) into a single memory buffer, provided it is large enough. - you make use of that feature and point multiple m_ext mbufs into that buffer to separate the packets and send them up the stack. - you embed the m_ext refcount into the single memory buffer as well. - you recycle mbufs? (I'm not entirely clear on that as I'm not familiar with the cxgbe code) Lets examine and discuss these parts: - M_NOFREE wasn't really safe with bpf anyway at least for packets going down the stack. - Instead of M_NOFREE a custom *ext_free should be used that has the same and even more functionality. - Recycling mbufs may cause imbalances to the mbuf pool with multiple cores and possibly NUMA in the future. Not that we are very good with it at the moment but bypassing the mbuf allocator shouldn't become the norm. If it is a problem/bottleneck again it should be fixed, not worked around and copy-pasted n-times in so many drivers. - jumbo9 and jumbo16 mbufs should not be used because they are more special than necessary with being KVM and physically contiguous. This probably isn't necessary for the T4/T5 cards and any other modern DMA engine. Under heavy diverse network the kernel memory becomes fragmented and can't find memory fulfilling both criteria anymore. In fact both are an artifact of the early GigE hacks when high speed DMA engines were in their infancy. Both jumbo9 and jumbo16 should go away without direct replacement. In your T4/T5 case the alternative would be either to a) allocate your own memory directly from KVM with the necessary properties (KVM and/or phys contig); b) have such a generic kernel mbuf type fulfilling the same role. There may be some cache line issues on non-x86 systems that have to be though and taken care of. - Refcounts should have the option of being separately allocated. It was mandatory to use the refcount zone so far because there wasn't any other type of refcount. Now that we have one we should revisit the issue. Actually the entire mbuf allocation and initialization could be streamlined as a whole. On a side note a different mbuf handling for the RX DMA rings may give some significant improvements as well: allocate just a cluster without a mbuf through m_cljget() and put it into the RX ring. Only when the DMA has put a packet into it allocated attach the mbuf (in the drivers RX function). This avoids the cache pollution from touching the mbuf fields during initial allocation, including the refcount. Another nice trick would be to shorten the mbuf by 4 bytes (in ext_size) and put the refcount there. Lets work on these together. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe
Re: svn commit: r254520 - in head/sys: kern sys
On 19.08.2013 19:40, Peter Grehan wrote: I recently tried some experiments to reduce the number of mbuf and cluster allocations in a 40G NIC driver. M_NOFREE and EXT_EXTREF proved very useful and the code changes to the kernel were minimal. See user/np/cxl_tuning. The experiment was quite successful and I was planning to bring in most of those changes to HEAD. I was hoping to get some runtime mileage on the approach in general before tweaking the ctors/dtors for jumpbo, jumbo9, jumbo16 to allow for an mbuf+refcnt within the cluster. But now M_NOFREE has vanished without a warning... I also had a virtualization work-in-progress where static mbufs were allocated in the kernel and M_NOFREE set. Might be worth sending a prior heads-up for these type of changes. I'm sorry for ambushing but this stuff has to be done. I have provided an alternative way of handling it and I'm happy to help you with your use case to make it good for you and to prevent the mbuf system from getting bloated and hackish again. Mbuf is a core system for the kernel and we should avoid to kitchen-sink it again while at the same time to keep speedy enough to keep up with the speed requirements. I believe it would be bad to have Navdeep, you and others invent their own network buffer management routines over again in slightly different ways tailored to each immediate use case. Can you please describe your intended use of M_NOFREE to better understand the shortcomings of the current mbuf systems and the additional advantages of the M_NOFREE case? -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r253395 - head/sys/netinet
Author: andre Date: Tue Jul 16 16:37:08 2013 New Revision: 253395 URL: http://svnweb.freebsd.org/changeset/base/253395 Log: Free the non-fatal timestamp missing debug string manually as it is not covered by the catch-all free for the error cases. Found by: Coverity Modified: head/sys/netinet/tcp_syncache.c Modified: head/sys/netinet/tcp_syncache.c == --- head/sys/netinet/tcp_syncache.c Tue Jul 16 15:51:32 2013 (r253394) +++ head/sys/netinet/tcp_syncache.c Tue Jul 16 16:37:08 2013 (r253395) @@ -1041,9 +1041,12 @@ syncache_expand(struct in_conninfo *inc, * reports of non-compliants stacks. */ if ((sc-sc_flags SCF_TIMESTAMP) !(to-to_flags TOF_TS)) { - if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { log(LOG_DEBUG, %s; %s: Timestamp missing, no action\n, s, __func__); + free(s, M_TCPLOG); + s = NULL; + } } /* ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r253210 - in head/sys: conf netinet
On 15.07.2013 20:38, Mikolaj Golub wrote: On Mon, Jul 15, 2013 at 11:36:16AM +0200, Ulrich Spörlein wrote: Hey Andre, I don't see why this commit triggers it, but Coverity Scan found a new resource leak in this file. syncache_expand() will leak *s when line 1071 is reached. The failed: case below correctly frees the resources. 1068/* how do we find the inp for the new socket? */ 22. Condition sc != scs, taking true branch 1069if (sc != scs) 1070syncache_free(sc); CID null (#1 of 1): Resource leak (RESOURCE_LEAK) 23. leaked_storage: Variable s going out of scope leaks the storage it points to. 1071return (1); 1072failed: 1073if (sc != NULL sc != scs) 1074syncache_free(sc); 1075if (s != NULL) 1076free(s, M_TCPLOG); 1077*lsop = NULL; 1078return (0); 1079} It looks like free(s, M_TCPLOG) is missed in this branch: 1043 if ((sc-sc_flags SCF_TIMESTAMP) !(to-to_flags TOF_TS)) { 1044 if ((s = tcp_log_addrs(inc, th, NULL, NULL))) 1045 log(LOG_DEBUG, %s; %s: Timestamp missing, 1046 no action\n, s, __func__); 1047 } Yes, I just figured that out and prepared a patch. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r253254 - head/sys/netinet
Author: andre Date: Fri Jul 12 07:43:56 2013 New Revision: 253254 URL: http://svnweb.freebsd.org/changeset/base/253254 Log: Unbreak VIMAGE by correctly naming the vnet pointer in struct tcp_syncache. Reported by: trociny, rodrigc Modified: head/sys/netinet/tcp_syncache.h Modified: head/sys/netinet/tcp_syncache.h == --- head/sys/netinet/tcp_syncache.h Fri Jul 12 06:54:29 2013 (r253253) +++ head/sys/netinet/tcp_syncache.h Fri Jul 12 07:43:56 2013 (r253254) @@ -118,7 +118,7 @@ struct tcp_syncache { u_int cache_limit; u_int rexmt_limit; u_int hash_secret; - struct vnet *sch_vnet; + struct vnet *vnet; struct syncookie_secret secret; }; ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r253254 - head/sys/netinet
On 12.07.2013 09:43, Andre Oppermann wrote: Author: andre Date: Fri Jul 12 07:43:56 2013 New Revision: 253254 URL: http://svnweb.freebsd.org/changeset/base/253254 Log: Unbreak VIMAGE by correctly naming the vnet pointer in struct tcp_syncache. Reported by: trociny, rodrigc Sorry for the breakage. I'm extending my test protocol to include VIMAGE, INET-only and INET6-only builds. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r253204 - head/sys/kern
Author: andre Date: Thu Jul 11 12:46:35 2013 New Revision: 253204 URL: http://svnweb.freebsd.org/changeset/base/253204 Log: Fix style issues, a typo in kern.ipc.nmbufs and correctly plave and expose the value of the tunable maxmbufmem as kern.ipc.maxmbufmem through sysctl. Reported by: smh MFC after:1 day Modified: head/sys/kern/kern_mbuf.c Modified: head/sys/kern/kern_mbuf.c == --- head/sys/kern/kern_mbuf.c Thu Jul 11 12:42:46 2013(r253203) +++ head/sys/kern/kern_mbuf.c Thu Jul 11 12:46:35 2013(r253204) @@ -104,13 +104,18 @@ int nmbjumbo9;/* limits number of 9k int nmbjumbo16;/* limits number of 16k jumbo clusters */ struct mbstat mbstat; +static quad_t maxmbufmem; /* overall real memory limit for all mbufs */ + +SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, maxmbufmem, 0, +Maximum real memory allocateable to various mbuf types); + /* * tunable_mbinit() has to be run before any mbuf allocations are done. */ static void tunable_mbinit(void *dummy) { - quad_t realmem, maxmbufmem; + quad_t realmem; /* * The default limit for all mbuf related memory is 1/2 of all @@ -120,7 +125,7 @@ tunable_mbinit(void *dummy) realmem = qmin((quad_t)physmem * PAGE_SIZE, vm_map_max(kmem_map) - vm_map_min(kmem_map)); maxmbufmem = realmem / 2; - TUNABLE_QUAD_FETCH(kern.maxmbufmem, maxmbufmem); + TUNABLE_QUAD_FETCH(kern.ipc.maxmbufmem, maxmbufmem); if (maxmbufmem realmem / 4 * 3) maxmbufmem = realmem / 4 * 3; @@ -204,7 +209,7 @@ sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) newnmbjumbo9 = nmbjumbo9; error = sysctl_handle_int(oidp, newnmbjumbo9, 0, req); if (error == 0 req-newptr) { - if (newnmbjumbo9 nmbjumbo9 + if (newnmbjumbo9 nmbjumbo9 nmbufs = nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo9 = newnmbjumbo9; uma_zone_set_max(zone_jumbo9, nmbjumbo9); @@ -258,7 +263,7 @@ sysctl_nmbufs(SYSCTL_HANDLER_ARGS) } return (error); } -SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW, +SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW, nmbufs, 0, sysctl_nmbufs, IU, Maximum number of mbufs allowed); ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r253207 - head/sys/kern
Author: andre Date: Thu Jul 11 12:53:13 2013 New Revision: 253207 URL: http://svnweb.freebsd.org/changeset/base/253207 Log: Make use of the fact that uma_zone_set_max(9) already returns the rounded limit making a call to uma_zone_get_max(9) unnecessary. MFC after:1 day Modified: head/sys/kern/kern_mbuf.c Modified: head/sys/kern/kern_mbuf.c == --- head/sys/kern/kern_mbuf.c Thu Jul 11 12:49:12 2013(r253206) +++ head/sys/kern/kern_mbuf.c Thu Jul 11 12:53:13 2013(r253207) @@ -167,8 +167,7 @@ sysctl_nmbclusters(SYSCTL_HANDLER_ARGS) if (newnmbclusters nmbclusters nmbufs = nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbclusters = newnmbclusters; - uma_zone_set_max(zone_clust, nmbclusters); - nmbclusters = uma_zone_get_max(zone_clust); + nmbclusters = uma_zone_set_max(zone_clust, nmbclusters); EVENTHANDLER_INVOKE(nmbclusters_change); } else error = EINVAL; @@ -190,8 +189,7 @@ sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS) if (newnmbjumbop nmbjumbop nmbufs = nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbop = newnmbjumbop; - uma_zone_set_max(zone_jumbop, nmbjumbop); - nmbjumbop = uma_zone_get_max(zone_jumbop); + nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop); } else error = EINVAL; } @@ -212,8 +210,7 @@ sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS) if (newnmbjumbo9 nmbjumbo9 nmbufs = nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo9 = newnmbjumbo9; - uma_zone_set_max(zone_jumbo9, nmbjumbo9); - nmbjumbo9 = uma_zone_get_max(zone_jumbo9); + nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9); } else error = EINVAL; } @@ -234,8 +231,7 @@ sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS) if (newnmbjumbo16 nmbjumbo16 nmbufs = nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) { nmbjumbo16 = newnmbjumbo16; - uma_zone_set_max(zone_jumbo16, nmbjumbo16); - nmbjumbo16 = uma_zone_get_max(zone_jumbo16); + nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16); } else error = EINVAL; } @@ -255,8 +251,7 @@ sysctl_nmbufs(SYSCTL_HANDLER_ARGS) if (error == 0 req-newptr) { if (newnmbufs nmbufs) { nmbufs = newnmbufs; - uma_zone_set_max(zone_mbuf, nmbufs); - nmbufs = uma_zone_get_max(zone_mbuf); + nmbufs = uma_zone_set_max(zone_mbuf, nmbufs); EVENTHANDLER_INVOKE(nmbufs_change); } else error = EINVAL; ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r253208 - head/sys/crypto/siphash
Author: andre Date: Thu Jul 11 14:18:38 2013 New Revision: 253208 URL: http://svnweb.freebsd.org/changeset/base/253208 Log: SipHash is a cryptographically strong pseudo-random function (a.k.a. keyed hash function) optimized for speed on short messages returning a 64bit hash/ digest value. SipHash is simpler and much faster than other secure MACs and competitive in speed with popular non-cryptographic hash functions. It uses a 128-bit key without the hidden cost of a key expansion step. SipHash iterates a simple round function consisting of four additions, four xors, and six rotations, interleaved with xors of message blocks for a pre-defined number of compression and finalization rounds. The absence of secret load/store addresses or secret branch conditions avoid timing attacks. No state is shared between messages. Hashing is deterministic and doesn't use nonces. It is not susceptible to length extension attacks. Target applications include network traffic authentication, message authentication (MAC) and hash-tables protection against hash-flooding denial-of-service attacks. The number of update/finalization rounds is defined during initialization: SipHash24_Init() for the fast and reasonable strong version. SipHash48_Init() for the strong version (half as fast). SipHash usage is similar to other hash functions: struct SIPHASH_CTX ctx; char *k = 16bytes long key char *s = string; uint64_t h = 0; SipHash24_Init(ctx); SipHash_SetKey(ctx, k); SipHash_Update(ctx, s, strlen(s)); SipHash_Final(h, ctx); /* or */ h = SipHash_End(ctx);/* or */ h = SipHash24(ctx, k, s, strlen(s)); It was designed by Jean-Philippe Aumasson and Daniel J. Bernstein and is described in the paper SipHash: a fast short-input PRF, 2012.09.18: https://131002.net/siphash/siphash.pdf Permanent ID: b9a943a805fbfc6fde808af9fc0ecdfa Implemented by: andre (based on the paper) Reviewed by: cperciva Added: head/sys/crypto/siphash/ head/sys/crypto/siphash/siphash.c (contents, props changed) head/sys/crypto/siphash/siphash.h (contents, props changed) head/sys/crypto/siphash/siphash_test.c (contents, props changed) Added: head/sys/crypto/siphash/siphash.c == --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/sys/crypto/siphash/siphash.c Thu Jul 11 14:18:38 2013 (r253208) @@ -0,0 +1,241 @@ +/*- + * Copyright (c) 2013 Andre Oppermann an...@freebsd.org + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + *notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + *notice, this list of conditions and the following disclaimer in the + *documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + *products derived from this software without specific prior written + *permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +/* + * SipHash is a family of PRFs SipHash-c-d where the integer parameters c and d + * are the number of compression rounds and the number of finalization rounds. + * A compression round is identical to a finalization round and this round + * function is called SipRound. Given a 128-bit key k and a (possibly empty) + * byte string m, SipHash-c-d returns a 64-bit value SipHash-c-d(k; m). + * + * Implemented from the paper SipHash: a fast short-input PRF, 2012.09.18, + * by Jean-Philippe Aumasson and Daniel J. Bernstein, + * Permanent Document ID b9a943a805fbfc6fde808af9fc0ecdfa + * https://131002.net/siphash/siphash.pdf + * https://131002.net/siphash/ + */ + +#include sys/cdefs.h +__FBSDID($FreeBSD$); + +#include sys/param.h +#include sys/types.h +#include sys/systm.h +#include sys/libkern.h +#include sys/endian.h + +#include crypto/siphash/siphash.h + +static voidSipRounds
svn commit: r253210 - in head/sys: conf netinet
Author: andre Date: Thu Jul 11 15:29:25 2013 New Revision: 253210 URL: http://svnweb.freebsd.org/changeset/base/253210 Log: Improve SYN cookies by encoding the MSS, WSCALE (window scaling) and SACK information into the ISN (initial sequence number) without the additional use of timestamp bits and switching to the very fast and cryptographically strong SipHash-2-4 MAC hash algorithm to protect the SYN cookie against forgeries. The purpose of SYN cookies is to encode all necessary session state in the 32 bits of our initial sequence number to avoid storing any information locally in memory. This is especially important when under heavy spoofed SYN attacks where we would either run out of memory or the syncache would fill with bogus connection attempts swamping out legitimate connections. The original SYN cookies method only stored an indexed MSS values in the cookie. This isn't sufficient anymore and breaks down in the presence of WSCALE information which is only exchanged during SYN and SYN-ACK. If we can't keep track of it then we may severely underestimate the available send or receive window. This is compounded with large windows whose size information on the TCP segment header is even lower numerically. A number of years back SYN cookies were extended to store the additional state in the TCP timestamp fields, if available on a connection. While timestamps are common among the BSD, Linux and other *nix systems Windows never enabled them by default and thus are not present for the vast majority of clients seen on the Internet. The common parameters used on TCP sessions have changed quite a bit since SYN cookies very invented some 17 years ago. Today we have a lot more bandwidth available making the use window scaling almost mandatory. Also SACK has become standard making recovering from packet loss much more efficient. This change moves all necessary information into the ISS removing the need for timestamps. Both the MSS (16 bits) and send WSCALE (4 bits) are stored in 3 bit indexed form together with a single bit for SACK. While this is significantly less than the original range, it is sufficient to encode all common values with minimal rounding. The MSS depends on the MTU of the path and with the dominance of ethernet the main value seen is around 1460 bytes. Encapsulations for DSL lines and some other overheads reduce it by a few more bytes for many connections seen. Rounding down to the next lower value in some cases isn't a problem as we send only slightly more packets for the same amount of data. The send WSCALE index is bit more tricky as rounding down under-estimates the available send space available towards the remote host, however a small number values dominate and are carefully selected again. The receive WSCALE isn't encoded at all but recalculated based on the local receive socket buffer size when a valid SYN cookie returns. A listen socket buffer size is unlikely to change while active. The index values for MSS and WSCALE are selected for minimal rounding errors based on large traffic surveys. These values have to be periodically validated against newer traffic surveys adjusting the arrays tcp_sc_msstab[] and tcp_sc_wstab[] if necessary. In addition the hash MAC to protect the SYN cookies is changed from MD5 to SipHash-2-4, a much faster and cryptographically secure algorithm. Reviewed by: dwmalone Tested by:Fabian Keil f...@fabiankeil.de Modified: head/sys/conf/files head/sys/netinet/tcp_syncache.c head/sys/netinet/tcp_syncache.h Modified: head/sys/conf/files == --- head/sys/conf/files Thu Jul 11 15:02:38 2013(r253209) +++ head/sys/conf/files Thu Jul 11 15:29:25 2013(r253210) @@ -547,6 +547,8 @@ crypto/sha1.c optional carp | crypto | netgraph_mppc_encryption | sctp crypto/sha2/sha2.c optional crypto | geom_bde | ipsec | random | \ sctp | zfs +crypto/siphash/siphash.c optional inet | inet6 +crypto/siphash/siphash_test.c optional inet | inet6 ddb/db_access.coptional ddb ddb/db_break.c optional ddb ddb/db_capture.c optional ddb Modified: head/sys/netinet/tcp_syncache.c == --- head/sys/netinet/tcp_syncache.c Thu Jul 11 15:02:38 2013 (r253209) +++ head/sys/netinet/tcp_syncache.c Thu Jul 11 15:29:25 2013 (r253210) @@ -1,12 +1,12 @@ /*- * Copyright (c) 2001 McAfee, Inc. - * Copyright (c) 2006 Andre Oppermann, Internet Business Solutions AG + * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG * All rights reserved. * * This software
Re: svn commit: r253208 - head/sys/crypto/siphash
On 11.07.2013 16:18, Andre Oppermann wrote: Author: andre Date: Thu Jul 11 14:18:38 2013 New Revision: 253208 URL: http://svnweb.freebsd.org/changeset/base/253208 Log: SipHash is a cryptographically strong pseudo-random function (a.k.a. keyed hash function) optimized for speed on short messages returning a 64bit hash/ digest value. ... The number of update/finalization rounds is defined during initialization: SipHash24_Init() for the fast and reasonable strong version. SipHash48_Init() for the strong version (half as fast). SipHash usage is similar to other hash functions: struct SIPHASH_CTX ctx; char *k = 16bytes long key char *s = string; uint64_t h = 0; SipHash24_Init(ctx); SipHash_SetKey(ctx, k); SipHash_Update(ctx, s, strlen(s)); SipHash_Final(h, ctx); /* or */ h = SipHash_End(ctx);/* or */ h = SipHash24(ctx, k, s, strlen(s)); Looking for a man page whiz to hack one up. :) -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r253214 - head/sys/crypto/siphash
Author: andre Date: Thu Jul 11 16:27:11 2013 New Revision: 253214 URL: http://svnweb.freebsd.org/changeset/base/253214 Log: Fix const propagation issues to make GCC happy. Submitted by: Michael Butler i...@protected-networks.net Modified: head/sys/crypto/siphash/siphash.c Modified: head/sys/crypto/siphash/siphash.c == --- head/sys/crypto/siphash/siphash.c Thu Jul 11 15:55:57 2013 (r253213) +++ head/sys/crypto/siphash/siphash.c Thu Jul 11 16:27:11 2013 (r253214) @@ -119,7 +119,8 @@ SipBuf(SIPHASH_CTX *ctx, const uint8_t * void SipHash_Update(SIPHASH_CTX *ctx, const void *src, size_t len) { - uint64_t m, *p; + uint64_t m; + const uint64_t *p; const uint8_t *s; size_t rem; @@ -144,13 +145,13 @@ SipHash_Update(SIPHASH_CTX *ctx, const v /* Optimze for 64bit aligned/unaligned access. */ if (((uintptr_t)s 0x7) == 0) { - for (p = (uint64_t *)s; len 0; len--, p++) { + for (p = (const uint64_t *)s; len 0; len--, p++) { m = le64toh(*p); ctx-v[3] ^= m; SipRounds(ctx, 0); ctx-v[0] ^= m; } - s = (uint8_t *)p; + s = (const uint8_t *)p; } else { for (; len 0; len--, s += 8) { m = le64dec(s); ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r253150 - head/sys/netinet
Author: andre Date: Wed Jul 10 12:06:01 2013 New Revision: 253150 URL: http://svnweb.freebsd.org/changeset/base/253150 Log: Extend debug logging of TCP timestamp related specification violations. Update related comments and style. Modified: head/sys/netinet/tcp_input.c head/sys/netinet/tcp_syncache.c Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cWed Jul 10 10:57:09 2013 (r253149) +++ head/sys/netinet/tcp_input.cWed Jul 10 12:06:01 2013 (r253150) @@ -1446,6 +1446,8 @@ tcp_do_segment(struct mbuf *m, struct tc int thflags, acked, ourfinisacked, needoutput = 0; int rstreason, todrop, win; u_long tiwin; + char *s; + struct in_conninfo *inc; struct tcpopt to; #ifdef TCPDEBUG @@ -1458,6 +1460,7 @@ tcp_do_segment(struct mbuf *m, struct tc short ostate = 0; #endif thflags = th-th_flags; + inc = tp-t_inpcb-inp_inc; tp-sackhint.last_sack_ack = 0; /* @@ -1546,6 +1549,24 @@ tcp_do_segment(struct mbuf *m, struct tc if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks())) to.to_tsecr = 0; } + /* +* If timestamps were negotiated during SYN/ACK they should +* appear on every segment during this session and vice versa. +*/ + if ((tp-t_flags TF_RCVD_TSTMP) !(to.to_flags TOF_TS)) { + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, %s; %s: Timestamp missing, + no action\n, s, __func__); + free(s, M_TCPLOG); + } + } + if (!(tp-t_flags TF_RCVD_TSTMP) (to.to_flags TOF_TS)) { + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, %s; %s: Timestamp not expected, + no action\n, s, __func__); + free(s, M_TCPLOG); + } + } /* * Process options only when we get SYN/ACK back. The SYN case @@ -2213,15 +2234,14 @@ tcp_do_segment(struct mbuf *m, struct tc */ if ((so-so_state SS_NOFDREF) tp-t_state TCPS_CLOSE_WAIT tlen) { - char *s; - KASSERT(ti_locked == TI_WLOCKED, (%s: SS_NOFDEREF CLOSE_WAIT tlen ti_locked %d, __func__, ti_locked)); INP_INFO_WLOCK_ASSERT(V_tcbinfo); - if ((s = tcp_log_addrs(tp-t_inpcb-inp_inc, th, NULL, NULL))) { - log(LOG_DEBUG, %s; %s: %s: Received %d bytes of data after socket - was closed, sending RST and removing tcpcb\n, + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) { + log(LOG_DEBUG, %s; %s: %s: Received %d bytes of data + after socket was closed, + sending RST and removing tcpcb\n, s, __func__, tcpstates[tp-t_state], tlen); free(s, M_TCPLOG); } Modified: head/sys/netinet/tcp_syncache.c == --- head/sys/netinet/tcp_syncache.c Wed Jul 10 10:57:09 2013 (r253149) +++ head/sys/netinet/tcp_syncache.c Wed Jul 10 12:06:01 2013 (r253150) @@ -992,12 +992,29 @@ syncache_expand(struct in_conninfo *inc, goto failed; } + /* +* If timestamps were not negotiated during SYN/ACK they +* must not appear on any segment during this session. +*/ if (!(sc-sc_flags SCF_TIMESTAMP) (to-to_flags TOF_TS)) { if ((s = tcp_log_addrs(inc, th, NULL, NULL))) log(LOG_DEBUG, %s; %s: Timestamp not expected, segment rejected\n, s, __func__); goto failed; } + + /* +* If timestamps were negotiated during SYN/ACK they should +* appear on every segment during this session. +* XXXAO: This is only informal as there have been unverified +* reports of non-compliants stacks. +*/ + if ((sc-sc_flags SCF_TIMESTAMP) !(to-to_flags TOF_TS)) { + if ((s = tcp_log_addrs(inc, th, NULL, NULL))) + log(LOG_DEBUG, %s; %s: Timestamp missing, + no action\n, s, __func__); + } + /* * If timestamps were negotiated the reflected timestamp * must be equal to what we actually sent in the SYN|ACK. ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r253081 - in head: sys/net sys/netinet sys/netinet6 sys/netipsec tools/tools/crypto usr.bin/netstat
On 09.07.2013 11:32, Andrey V. Elsukov wrote: Author: ae Date: Tue Jul 9 09:32:06 2013 New Revision: 253081 URL: http://svnweb.freebsd.org/changeset/base/253081 Log: Prepare network statistics structures for migration to PCPU counters. Use uint64_t as type for all fields of structures. Yes! Finally 64 bit counters! :) -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r252209 - in head: share/man/man9 sys/kern sys/sys
On 25.06.2013 20:44, John Baldwin wrote: Author: jhb Date: Tue Jun 25 18:44:15 2013 New Revision: 252209 URL: http://svnweb.freebsd.org/changeset/base/252209 Log: Several improvements to rmlock(9). Many of these are based on patches provided by Isilon. - Add an rm_assert() supporting various lock assertions similar to other locking primitives. Because rmlocks track readers the assertions are always fully accurate unlike rw_assert() and sx_assert(). - Flesh out the lock class methods for rmlocks to support sleeping via condvars and rm_sleep() (but only while holding write locks), rmlock details in 'show lock' in DDB, and the lc_owner method used by dtrace. - Add an internal destroyed cookie so that API functions can assert that an rmlock is not destroyed. - Make use of rm_assert() to add various assertions to the API (e.g. to assert locks are held when an unlock routine is called). - Give RM_SLEEPABLE locks their own lock class and always use the rmlock's own lock_object with WITNESS. - Use THREAD_NO_SLEEPING() / THREAD_SLEEPING_OK() to disallow sleeping while holding a read lock on an rmlock. Thanks! Would it make sense to move struct rm_queue from struct pcpu itself to using DPCPU as a next step? Submitted by:andre Actually these were only relayed by me and came from Max Laier / Stephan Uphoff. So all fame to them. Obtained from: EMC/Isilon -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r251894 - in head: lib/libmemstat sys/vm
On 18.06.2013 06:50, Jeff Roberson wrote: Author: jeff Date: Tue Jun 18 04:50:20 2013 New Revision: 251894 URL: http://svnweb.freebsd.org/changeset/base/251894 Log: Refine UMA bucket allocation to reduce space consumption and improve performance. - Always free to the alloc bucket if there is space. This gives LIFO allocation order to improve hot-cache performance. This also allows for zones with a single bucket per-cpu rather than a pair if the entire working set fits in one bucket. - Enable per-cpu caches of buckets. To prevent recursive bucket allocation one bucket zone still has per-cpu caches disabled. - Pick the initial bucket size based on a table driven maximum size per-bucket rather than the number of items per-page. This gives more sane initial sizes. - Only grow the bucket size when we face contention on the zone lock, this causes bucket sizes to grow more slowly. - Adjust the number of items per-bucket to account for the header space. This packs the buckets more efficiently per-page while making them not quite powers of two. - Eliminate the per-zone free bucket list. Always return buckets back to the bucket zone. This ensures that as zones grow into larger bucket sizes they eventually discard the smaller sizes. It persists fewer buckets in the system. The locking is slightly trickier. - Only switch buckets in zalloc, not zfree, this eliminates pathological cases where we ping-pong between two buckets. - Ensure that the thread that fills a new bucket gets to allocate from it to give a better upper bound on allocation time. There used to be a problem with per CPU caches accumulating large amounts of items without freeing back to the global (or socket) pool. Do these updates to UMA change this situation and/or do you have further improvements coming up? -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r251886 - in head: contrib/apr contrib/apr-util contrib/serf contrib/sqlite3 contrib/subversion share/mk usr.bin usr.bin/svn usr.bin/svn/lib usr.bin/svn/lib/libapr usr.bin/svn/lib/liba
On 18.06.2013 18:40, Tijl Coosemans wrote: On 2013-06-18 04:53, Peter Wemm wrote: Author: peter Date: Tue Jun 18 02:53:45 2013 New Revision: 251886 URL: http://svnweb.freebsd.org/changeset/base/251886 Log: Introduce svnlite so that we can check out our source code again. This is actually a fully functional build except: * All internal shared libraries are static linked to make sure there is no interference with ports (and to reduce build time). * It does not have the python/perl/etc plugin or API support. * By default, it installs as svnlite rather than svn. * If WITH_SVN added in make.conf, you get svn. * If WITHOUT_SVNLITE is in make.conf, this is completely disabled. To be absolutely clear, this is not intended for any use other than checking out freebsd source and committing, like we once did with cvs. It should be usable for small scale local repositories that don't need the python/perl plugin architecture. This ties the repo to the oldest supported release, meaning that years from now we won't be able to use some new subversion feature because an old FreeBSD release doesn't support it. AFAIK there is a checkout-only SVN client available, as in cvsup, but I don't remember the name. I don't find it unreasonable to ask developers to install the port. And for users it seems all they need is something like portsnap for base. Portsnap already distributes ports svn so it shouldn't be too hard to adapt it for base. And the extra layer it adds is very convenient. Apart from a bigger than usual update maybe, portsnap users never even noticed it was switched from cvs to svn at some point. Installing SVN from ports is very painful because of the huge dependency chain it carries, with the largest being Python and Perl IIRC. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r251886 - in head: contrib/apr contrib/apr-util contrib/serf contrib/sqlite3 contrib/subversion share/mk usr.bin usr.bin/svn usr.bin/svn/lib usr.bin/svn/lib/libapr usr.bin/svn/lib/liba
On 18.06.2013 19:04, Alexey Dokuchaev wrote: Being able to checkout the sources is very desirable, but not at the cost of importing another heavy 3rd-party tool, which Subversion is. Just wanted to note that applaud Peter for actually doing something (tm) even though it came as a surprise to many it seems. Now that we're having the discussion we can converge towards the best or least controversial option. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r251296 - in head/sys: net netinet
Author: andre Date: Mon Jun 3 12:55:13 2013 New Revision: 251296 URL: http://svnweb.freebsd.org/changeset/base/251296 Log: Allow drivers to specify a maximum TSO length in bytes if they are limited in the amount of data they can handle at once. Drivers can set ifp-if_hw_tsomax before calling ether_ifattach() to change the limit. The lowest allowable size is IP_MAXPACKET / 8 (8192 bytes) as anything less wouldn't be very useful anymore. The upper limit is still at IP_MAXPACKET (65536 bytes). Raising it requires further auditing of the IPv4/v6 code path's as the length field in the IP header would overflow leading to confusion in firewalls and others packet handler on the real size of the packet. The placement into struct ifnet is a bit hackish but the best place that was found. When the stack/driver boundary is updated it should be handled in a better way. Submitted by: cperciva (earlier version) Reviewed by: cperciva Tested by:cperciva MFC after:1 week (using spare struct members to preserve ABI) Modified: head/sys/net/if.c head/sys/net/if_var.h head/sys/netinet/tcp_input.c head/sys/netinet/tcp_output.c head/sys/netinet/tcp_subr.c head/sys/netinet/tcp_var.h Modified: head/sys/net/if.c == --- head/sys/net/if.c Mon Jun 3 12:43:09 2013(r251295) +++ head/sys/net/if.c Mon Jun 3 12:55:13 2013(r251296) @@ -74,18 +74,18 @@ #include net/vnet.h #if defined(INET) || defined(INET6) -/*XXX*/ #include netinet/in.h #include netinet/in_var.h +#include netinet/ip.h #include netinet/ip_carp.h +#ifdef INET +#include netinet/if_ether.h +#endif /* INET */ #ifdef INET6 #include netinet6/in6_var.h #include netinet6/in6_ifattach.h -#endif -#endif -#ifdef INET -#include netinet/if_ether.h -#endif +#endif /* INET6 */ +#endif /* INET || INET6 */ #include security/mac/mac_framework.h @@ -653,6 +653,13 @@ if_attach_internal(struct ifnet *ifp, in TAILQ_INSERT_HEAD(ifp-if_addrhead, ifa, ifa_link); /* Reliably crash if used uninitialized. */ ifp-if_broadcastaddr = NULL; + + /* Initialize to max value. */ + if (ifp-if_hw_tsomax == 0) + ifp-if_hw_tsomax = IP_MAXPACKET; + KASSERT(ifp-if_hw_tsomax = IP_MAXPACKET + ifp-if_hw_tsomax = IP_MAXPACKET / 8, + (%s: tsomax outside of range, __func__)); } #ifdef VIMAGE else { Modified: head/sys/net/if_var.h == --- head/sys/net/if_var.h Mon Jun 3 12:43:09 2013(r251295) +++ head/sys/net/if_var.h Mon Jun 3 12:55:13 2013(r251296) @@ -204,6 +204,11 @@ struct ifnet { u_int if_fib; /* interface FIB */ u_char if_alloctype; /* if_type at time of allocation */ + u_int if_hw_tsomax; /* tso burst length limit, the minmum +* is (IP_MAXPACKET / 8). +* XXXAO: Have to find a better place +* for it eventually. */ + /* * Spare fields are added so that we can modify sensitive data * structures without changing the kernel binary interface, and must Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cMon Jun 3 12:43:09 2013 (r251295) +++ head/sys/netinet/tcp_input.cMon Jun 3 12:55:13 2013 (r251296) @@ -3434,7 +3434,7 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt */ void tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer, -struct hc_metrics_lite *metricptr, int *mtuflags) +struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap) { int mss = 0; u_long maxmtu = 0; @@ -3461,7 +3461,7 @@ tcp_mss_update(struct tcpcb *tp, int off /* Initialize. */ #ifdef INET6 if (isipv6) { - maxmtu = tcp_maxmtu6(inp-inp_inc, mtuflags); + maxmtu = tcp_maxmtu6(inp-inp_inc, cap); tp-t_maxopd = tp-t_maxseg = V_tcp_v6mssdflt; } #endif @@ -3470,7 +3470,7 @@ tcp_mss_update(struct tcpcb *tp, int off #endif #ifdef INET { - maxmtu = tcp_maxmtu(inp-inp_inc, mtuflags); + maxmtu = tcp_maxmtu(inp-inp_inc, cap); tp-t_maxopd = tp-t_maxseg = V_tcp_mssdflt; } #endif @@ -3605,11 +3605,12 @@ tcp_mss(struct tcpcb *tp, int offer) struct inpcb *inp; struct socket *so; struct hc_metrics_lite metrics; - int mtuflags = 0; + struct tcp_ifcap cap; KASSERT(tp != NULL, (%s: tp == NULL, __func__)); - - tcp_mss_update(tp, offer, -1, metrics, mtuflags); + +
svn commit: r251297 - head/sys/dev/xen/netfront
Author: andre Date: Mon Jun 3 13:00:33 2013 New Revision: 251297 URL: http://svnweb.freebsd.org/changeset/base/251297 Log: Specify a maximum TSO length limiting the segment chain to what the Xen host side can handle after defragmentation. This prevents the driver from throwing away too long TSO chains and improves the performance on Amazon AWS instances with 10GigE virtual interfaces to the normally expected throughput. Submitted by: cperciva (earlier version) Reviewed by: cperciva Tested by:cperciva MFC after:1 week Modified: head/sys/dev/xen/netfront/netfront.c Modified: head/sys/dev/xen/netfront/netfront.c == --- head/sys/dev/xen/netfront/netfront.cMon Jun 3 12:55:13 2013 (r251296) +++ head/sys/dev/xen/netfront/netfront.cMon Jun 3 13:00:33 2013 (r251297) @@ -134,6 +134,7 @@ static const int MODPARM_rx_flip = 0; * to mirror the Linux MAX_SKB_FRAGS constant. */ #defineMAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2) +#defineNF_TSO_MAXBURST ((IP_MAXPACKET / PAGE_SIZE) * MCLBYTES) #define RX_COPY_THRESHOLD 256 @@ -2122,6 +2123,7 @@ create_netdev(device_t dev) ifp-if_hwassist = XN_CSUM_FEATURES; ifp-if_capabilities = IFCAP_HWCSUM; + ifp-if_hw_tsomax = NF_TSO_MAXBURST; ether_ifattach(ifp, np-mac); callout_init(np-xn_stat_ch, CALLOUT_MPSAFE); ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r250658 - in head: share/mk sys/conf tools/build/options
On 15.05.2013 15:04, Brooks Davis wrote: Author: brooks Date: Wed May 15 13:04:10 2013 New Revision: 250658 URL: http://svnweb.freebsd.org/changeset/base/250658 Log: Add a new option WITHOUT_FORMAT_EXTENSIONS to disable flags related to checking our kernel printf extensions. This is useful to allow compilers without these extensions to build kernels. Sponsored by:DARPA, AFRL This breaks make depend at least on amd64: ../../../conf/kern.mk, line 37: Malformed conditional (${MK_FORMAT_EXTENSIONS} == no) ../../../conf/kern.mk, line 39: if-less else ../../../conf/kern.mk, line 41: if-less endif make: fatal errors encountered -- cannot continue -- Andre Added: head/tools/build/options/WITHOUT_FORMAT_EXTENSIONS (contents, props changed) Modified: head/share/mk/bsd.own.mk head/sys/conf/kern.mk Modified: head/share/mk/bsd.own.mk == --- head/share/mk/bsd.own.mkWed May 15 08:38:49 2013(r250657) +++ head/share/mk/bsd.own.mkWed May 15 13:04:10 2013(r250658) @@ -268,6 +268,7 @@ __DEFAULT_YES_OPTIONS = \ ED_CRYPTO \ EXAMPLES \ FLOPPY \ +FORMAT_EXTENSIONS \ FORTH \ FP_LIBC \ FREEBSD_UPDATE \ Modified: head/sys/conf/kern.mk == --- head/sys/conf/kern.mk Wed May 15 08:38:49 2013(r250657) +++ head/sys/conf/kern.mk Wed May 15 13:04:10 2013(r250658) @@ -5,7 +5,7 @@ # CWARNFLAGS?= -Wall -Wredundant-decls -Wnested-externs -Wstrict-prototypes \ -Wmissing-prototypes -Wpointer-arith -Winline -Wcast-qual \ - -Wundef -Wno-pointer-sign -fformat-extensions \ + -Wundef -Wno-pointer-sign ${FORMAT_EXTENTIONS} \ -Wmissing-include-dirs -fdiagnostics-show-option \ ${CWARNEXTRA} # @@ -29,7 +29,15 @@ NO_WSOMETIMES_UNINITIALIZED= -Wno-error- # enough to error out the whole kernel build. Display them anyway, so there is # some incentive to fix them eventually. CWARNEXTRA?= -Wno-error-tautological-compare -Wno-error-empty-body \ - -Wno-error-parentheses-equality + -Wno-error-parentheses-equality ${NO_WFORMAT} +.endif + +# External compilers may not support our format extensions. Allow them +# to be disabled. WARNING: format checking is disabled in this case. +.if ${MK_FORMAT_EXTENSIONS} == no +NO_WFORMAT=-Wno-format +.else +FORMAT_EXTENTIONS= -fformat-extensions .endif # Added: head/tools/build/options/WITHOUT_FORMAT_EXTENSIONS == --- /dev/null 00:00:00 1970 (empty, because file is newly added) +++ head/tools/build/options/WITHOUT_FORMAT_EXTENSIONS Wed May 15 13:04:10 2013(r250658) @@ -0,0 +1,5 @@ +.\ $FreeBSD$ +Set to not enable +.Fl fformat-extensions +when compiling the kernel. +Also disables all format checking. ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r250365 - head/sys/kern
Author: andre Date: Wed May 8 14:13:14 2013 New Revision: 250365 URL: http://svnweb.freebsd.org/changeset/base/250365 Log: When the accept queue is full print the number of already pending new connections instead of by how many we're over the limit, which is always 1. Noticed by: jmallet MFC after:1 week Modified: head/sys/kern/uipc_socket.c Modified: head/sys/kern/uipc_socket.c == --- head/sys/kern/uipc_socket.c Wed May 8 13:26:17 2013(r250364) +++ head/sys/kern/uipc_socket.c Wed May 8 14:13:14 2013(r250365) @@ -515,7 +515,7 @@ sonewconn(struct socket *head, int conns #endif log(LOG_DEBUG, %s: pcb %p: Listen queue overflow: %i already in queue awaiting acceptance\n, - __func__, head-so_pcb, over); + __func__, head-so_pcb, head-so_qlen); return (NULL); } VNET_ASSERT(head-so_vnet != NULL, (%s:%d so_vnet is NULL, head=%p, ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r250300 - in head/sys: kern net netinet sys
Author: andre Date: Mon May 6 16:42:18 2013 New Revision: 250300 URL: http://svnweb.freebsd.org/changeset/base/250300 Log: Back out r249318, r249320 and r249327 due to a heisenbug most likely related to a race condition in the ipi_hash_lock with the exact cause currently unknown but under investigation. Modified: head/sys/kern/uipc_socket.c head/sys/net/if.c head/sys/net/if_llatbl.c head/sys/net/if_llatbl.h head/sys/net/if_var.h head/sys/netinet/in_pcb.h head/sys/netinet/in_var.h head/sys/netinet/ip_id.c head/sys/netinet/ip_input.c head/sys/netinet/tcp_subr.c head/sys/sys/socketvar.h Modified: head/sys/kern/uipc_socket.c == --- head/sys/kern/uipc_socket.c Mon May 6 16:11:53 2013(r250299) +++ head/sys/kern/uipc_socket.c Mon May 6 16:42:18 2013(r250300) @@ -240,14 +240,14 @@ SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO * accept_mtx locks down per-socket fields relating to accept queues. See * socketvar.h for an annotation of the protected fields of struct socket. */ -struct mtx_padalign accept_mtx; +struct mtx accept_mtx; MTX_SYSINIT(accept_mtx, accept_mtx, accept, MTX_DEF); /* * so_global_mtx protects so_gencnt, numopensockets, and the per-socket * so_gencnt field. */ -static struct mtx_padalign so_global_mtx; +static struct mtx so_global_mtx; MTX_SYSINIT(so_global_mtx, so_global_mtx, so_glabel, MTX_DEF); /* Modified: head/sys/net/if.c == --- head/sys/net/if.c Mon May 6 16:11:53 2013(r250299) +++ head/sys/net/if.c Mon May 6 16:42:18 2013(r250300) @@ -206,7 +206,7 @@ VNET_DEFINE(struct ifindex_entry *, ifin * also to stablize it over long-running ioctls, without introducing priority * inversions and deadlocks. */ -struct rwlock_padalign ifnet_rwlock; +struct rwlock ifnet_rwlock; struct sx ifnet_sxlock; /* Modified: head/sys/net/if_llatbl.c == --- head/sys/net/if_llatbl.cMon May 6 16:11:53 2013(r250299) +++ head/sys/net/if_llatbl.cMon May 6 16:42:18 2013(r250300) @@ -67,7 +67,7 @@ static VNET_DEFINE(SLIST_HEAD(, lltable) static void vnet_lltable_init(void); -struct rwlock_padalign lltable_rwlock; +struct rwlock lltable_rwlock; RW_SYSINIT(lltable_rwlock, lltable_rwlock, lltable_rwlock); /* Modified: head/sys/net/if_llatbl.h == --- head/sys/net/if_llatbl.hMon May 6 16:11:53 2013(r250299) +++ head/sys/net/if_llatbl.hMon May 6 16:42:18 2013(r250300) @@ -43,7 +43,7 @@ struct rt_addrinfo; struct llentry; LIST_HEAD(llentries, llentry); -extern struct rwlock_padalign lltable_rwlock; +extern struct rwlock lltable_rwlock; #defineLLTABLE_RLOCK() rw_rlock(lltable_rwlock) #defineLLTABLE_RUNLOCK() rw_runlock(lltable_rwlock) #defineLLTABLE_WLOCK() rw_wlock(lltable_rwlock) Modified: head/sys/net/if_var.h == --- head/sys/net/if_var.h Mon May 6 16:11:53 2013(r250299) +++ head/sys/net/if_var.h Mon May 6 16:42:18 2013(r250300) @@ -191,9 +191,9 @@ struct ifnet { void*if_unused[2]; void*if_afdata[AF_MAX]; int if_afdata_initialized; + struct rwlock if_afdata_lock; struct task if_linktask; /* task for link change events */ - struct rwlock_padalign if_afdata_lock; - struct rwlock_padalign if_addr_lock; /* lock to protect address lists */ + struct rwlock if_addr_lock;/* lock to protect address lists */ LIST_ENTRY(ifnet) if_clones;/* interfaces of a cloner */ TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */ @@ -832,7 +832,7 @@ struct ifmultiaddr { #ifdef _KERNEL -extern struct rwlock_padalign ifnet_rwlock; +extern struct rwlock ifnet_rwlock; extern struct sx ifnet_sxlock; #defineIFNET_LOCK_INIT() do { \ Modified: head/sys/netinet/in_pcb.h == --- head/sys/netinet/in_pcb.h Mon May 6 16:11:53 2013(r250299) +++ head/sys/netinet/in_pcb.h Mon May 6 16:42:18 2013(r250300) @@ -330,7 +330,7 @@ struct inpcbinfo { /* * Global lock protecting non-pcbgroup hash lookup tables. */ - struct rwlock_padalign ipi_hash_lock; + struct rwlockipi_hash_lock; /* * Global hash of inpcbs, hashed by local and foreign addresses and Modified: head/sys/netinet/in_var.h == --- head/sys/netinet/in_var.h Mon May 6 16:11:53
svn commit: r249843 - head/sys/kern
Author: andre Date: Wed Apr 24 13:54:55 2013 New Revision: 249843 URL: http://svnweb.freebsd.org/changeset/base/249843 Log: Base the calculation of maxmbufmem in part on kmem_map size instead of kernel_map size to prevent kernel memory exhaustion by mbufs and a subsequent panic on physical page allocation failure. On architectures without a direct map all mbuf memory (except for jumbo mbufs larger than PAGE_SIZE) comes from kmem_map. It is the limiting factor hence. For architectures with a direct map using the size of kmem_map is a good proxy of available kernel memory as well. If it is much smaller the mbuf limit may be sub-optimal but remains reasonable, while avoiding panics under exhaustion. The overall mbuf memory limit calculation may be reconsidered again later, however due to the many different mbuf sizes and different backing KVM maps it is a tricky subject. Found by: pho's new network stress test Pointed out by: alc (kmem_map instead of kernel_map) Tested by:pho Modified: head/sys/kern/kern_mbuf.c Modified: head/sys/kern/kern_mbuf.c == --- head/sys/kern/kern_mbuf.c Wed Apr 24 13:19:48 2013(r249842) +++ head/sys/kern/kern_mbuf.c Wed Apr 24 13:54:55 2013(r249843) @@ -118,7 +118,7 @@ tunable_mbinit(void *dummy) * At most it can be 3/4 of available kernel memory. */ realmem = qmin((quad_t)physmem * PAGE_SIZE, - vm_map_max(kernel_map) - vm_map_min(kernel_map)); + vm_map_max(kmem_map) - vm_map_min(kmem_map)); maxmbufmem = realmem / 2; TUNABLE_QUAD_FETCH(kern.maxmbufmem, maxmbufmem); if (maxmbufmem realmem / 4 * 3) ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r249317 - head/sys/netinet
Author: andre Date: Tue Apr 9 20:52:26 2013 New Revision: 249317 URL: http://svnweb.freebsd.org/changeset/base/249317 Log: Fix a race condition on tcp listen socket teardown with pending connections in the accept queue and contiguous new incoming SYNs. Compared to the original submitters patch I've moved the test next to the SYN handling to have it together in a logical unit and reworded the comment explaining the issue. Submitted by: Matt Miller m...@matthewjmiller.net Submitted by: Juan Mojica jmoj...@gmail.com Reviewed by: Matt Miller (changes) Tested by:pho MFC after:1 week Modified: head/sys/netinet/tcp_input.c Modified: head/sys/netinet/tcp_input.c == --- head/sys/netinet/tcp_input.cTue Apr 9 20:21:35 2013 (r249316) +++ head/sys/netinet/tcp_input.cTue Apr 9 20:52:26 2013 (r249317) @@ -1405,6 +1405,15 @@ relocked: */ INP_INFO_UNLOCK_ASSERT(V_tcbinfo); return; + } else if (tp-t_state == TCPS_LISTEN) { + /* +* When a listen socket is torn down the SO_ACCEPTCONN +* flag is removed first while connections are drained +* from the accept queue in a unlock/lock cycle of the +* ACCEPT_LOCK, opening a race condition allowing a SYN +* attempt go through unhandled. +*/ + goto dropunlock; } #ifdef TCP_SIGNATURE ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
svn commit: r249318 - in head/sys: kern net netinet
Author: andre Date: Tue Apr 9 21:02:20 2013 New Revision: 249318 URL: http://svnweb.freebsd.org/changeset/base/249318 Log: Change certain heavily used network related mutexes and rwlocks to reside on their own cache line to prevent false sharing with other nearby structures, especially for those in the .bss segment. NB: Those mutexes and rwlocks with variables next to them that get changed on every invocation do not benefit from their own cache line. Actually it may be net negative because two cache misses would be incurred in those cases. Modified: head/sys/kern/uipc_socket.c head/sys/net/if.c head/sys/net/if_llatbl.c head/sys/net/if_var.h head/sys/netinet/in_pcb.h head/sys/netinet/ip_id.c head/sys/netinet/ip_input.c head/sys/netinet/tcp_subr.c Modified: head/sys/kern/uipc_socket.c == --- head/sys/kern/uipc_socket.c Tue Apr 9 20:52:26 2013(r249317) +++ head/sys/kern/uipc_socket.c Tue Apr 9 21:02:20 2013(r249318) @@ -240,14 +240,14 @@ SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO * accept_mtx locks down per-socket fields relating to accept queues. See * socketvar.h for an annotation of the protected fields of struct socket. */ -struct mtx accept_mtx; +struct mtx_padalign accept_mtx; MTX_SYSINIT(accept_mtx, accept_mtx, accept, MTX_DEF); /* * so_global_mtx protects so_gencnt, numopensockets, and the per-socket * so_gencnt field. */ -static struct mtx so_global_mtx; +static struct so_global_mtx so_global_mtx; MTX_SYSINIT(so_global_mtx, so_global_mtx, so_glabel, MTX_DEF); /* Modified: head/sys/net/if.c == --- head/sys/net/if.c Tue Apr 9 20:52:26 2013(r249317) +++ head/sys/net/if.c Tue Apr 9 21:02:20 2013(r249318) @@ -206,7 +206,7 @@ VNET_DEFINE(struct ifindex_entry *, ifin * also to stablize it over long-running ioctls, without introducing priority * inversions and deadlocks. */ -struct rwlock ifnet_rwlock; +struct rwlock_padalign ifnet_rwlock; struct sx ifnet_sxlock; /* Modified: head/sys/net/if_llatbl.c == --- head/sys/net/if_llatbl.cTue Apr 9 20:52:26 2013(r249317) +++ head/sys/net/if_llatbl.cTue Apr 9 21:02:20 2013(r249318) @@ -67,7 +67,7 @@ static VNET_DEFINE(SLIST_HEAD(, lltable) static void vnet_lltable_init(void); -struct rwlock lltable_rwlock; +struct rwlock_padalign lltable_rwlock; RW_SYSINIT(lltable_rwlock, lltable_rwlock, lltable_rwlock); /* Modified: head/sys/net/if_var.h == --- head/sys/net/if_var.h Tue Apr 9 20:52:26 2013(r249317) +++ head/sys/net/if_var.h Tue Apr 9 21:02:20 2013(r249318) @@ -191,9 +191,9 @@ struct ifnet { void*if_unused[2]; void*if_afdata[AF_MAX]; int if_afdata_initialized; - struct rwlock if_afdata_lock; struct task if_linktask; /* task for link change events */ - struct rwlock if_addr_lock;/* lock to protect address lists */ + struct rwlock_padalign if_afdata_lock; + struct rwlock_padalign if_addr_lock; /* lock to protect address lists */ LIST_ENTRY(ifnet) if_clones;/* interfaces of a cloner */ TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */ Modified: head/sys/netinet/in_pcb.h == --- head/sys/netinet/in_pcb.h Tue Apr 9 20:52:26 2013(r249317) +++ head/sys/netinet/in_pcb.h Tue Apr 9 21:02:20 2013(r249318) @@ -330,7 +330,7 @@ struct inpcbinfo { /* * Global lock protecting non-pcbgroup hash lookup tables. */ - struct rwlockipi_hash_lock; + struct rwlock_padalign ipi_hash_lock; /* * Global hash of inpcbs, hashed by local and foreign addresses and Modified: head/sys/netinet/ip_id.c == --- head/sys/netinet/ip_id.cTue Apr 9 20:52:26 2013(r249317) +++ head/sys/netinet/ip_id.cTue Apr 9 21:02:20 2013(r249318) @@ -97,7 +97,7 @@ static int array_ptr = 0; static int array_size = 8192; static int random_id_collisions = 0; static int random_id_total = 0; -static struct mtx ip_id_mtx; +static struct mtx_padalign ip_id_mtx; static voidip_initid(void); static int sysctl_ip_id_change(SYSCTL_HANDLER_ARGS); Modified: head/sys/netinet/ip_input.c == --- head/sys/netinet/ip_input.c Tue Apr 9 20:52:26 2013(r249317) +++ head/sys/netinet/ip_input.c Tue Apr 9 21:02:20 2013
Re: svn commit: r248417 - head/sys/sys
On 17.03.2013 10:33, Gleb Smirnoff wrote: On Sun, Mar 17, 2013 at 10:02:09AM +0100, Andre Oppermann wrote: A On 17.03.2013 08:39, Gleb Smirnoff wrote: A Author: glebius A Date: Sun Mar 17 07:39:45 2013 A New Revision: 248417 A URL: http://svnweb.freebsd.org/changeset/base/248417 A A Log: A Add MEXT_ALIGN() macro, similar to M_ALIGN() and MH_ALIGN(), but for A mbufs with external buffer. A A While you are cleaning up the mbuf usage wouldn't it make sense to remove A these macros, instead of adding new ones, and use m_align() which handles A all these cases internally? I'm thinking about this. Maybe it is worth to request tail alignment as a flag to the allocating function itself? IMHO that would overload the allocation function(s). The explicit step of doing m_align() for those who need it is fine and alerts the reader of what is going on. I'm all for simplification and unification, on the other hand it shouldn't be taken too far creating new complexity on the other side. -- Andre ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org
Re: svn commit: r248323 - head/sys/netinet
On 15.03.2013 13:53, Gleb Smirnoff wrote: Author: glebius Date: Fri Mar 15 12:53:53 2013 New Revision: 248323 URL: http://svnweb.freebsd.org/changeset/base/248323 Log: - Use m_getcl() instead of hand allocating. Sponsored by:Nginx, Inc. Modified: head/sys/netinet/tcp_output.c Modified: head/sys/netinet/tcp_output.c == --- head/sys/netinet/tcp_output.c Fri Mar 15 12:52:59 2013 (r248322) +++ head/sys/netinet/tcp_output.c Fri Mar 15 12:53:53 2013 (r248323) @@ -842,23 +842,19 @@ send: TCPSTAT_INC(tcps_sndpack); TCPSTAT_ADD(tcps_sndbyte, len); } - MGETHDR(m, M_NOWAIT, MT_DATA); +#ifdef INET6 + if (MHLEN hdrlen + max_linkhdr) + m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR); + else +#endif + m = m_gethdr(M_NOWAIT, MT_DATA); + m = m_getm2(hdrlen + max_linkhdr, M_NOWAIT, MT_DATA, M_PKTHDR); would be even more compact. Since max_linkhdr could be large as well, the possibility of a cluster applies to IPv4 too. -- Andre if (m == NULL) { SOCKBUF_UNLOCK(so-so_snd); error = ENOBUFS; goto out; } -#ifdef INET6 - if (MHLEN hdrlen + max_linkhdr) { - MCLGET(m, M_NOWAIT); - if ((m-m_flags M_EXT) == 0) { - SOCKBUF_UNLOCK(so-so_snd); - m_freem(m); - error = ENOBUFS; - goto out; - } - } -#endif + m-m_data += max_linkhdr; m-m_len = hdrlen; ___ svn-src-head@freebsd.org mailing list http://lists.freebsd.org/mailman/listinfo/svn-src-head To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org