svn commit: r257455 - head/sys/net

2013-10-31 Thread Andre Oppermann
Author: andre
Date: Thu Oct 31 15:46:10 2013
New Revision: 257455
URL: http://svnweb.freebsd.org/changeset/base/257455

Log:
  Make struct ifnet readable and comprehensible again by grouping
  and ordering related variables, fields and locks next to each
  other.  Add more comments to variables.
  
  Over time 'ifnet' has accumlated a lot of additional pointers and
  functionality in an unstructured way making it quite hard to read
  and understand while obfuscating relationships between fields and
  variables.
  
  Quantify the structure size and how bloated it has become.
  
  This is only a mechanical change in preparation for upcoming
  work to make ifnet opaque to drivers and to separate out the
  interface queuing.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/net/if_var.h

Modified: head/sys/net/if_var.h
==
--- head/sys/net/if_var.h   Thu Oct 31 15:27:39 2013(r257454)
+++ head/sys/net/if_var.h   Thu Oct 31 15:46:10 2013(r257455)
@@ -96,19 +96,42 @@ VNET_DECLARE(struct pfil_head, link_pfil
 /*
  * Structure defining a network interface.
  *
- * (Would like to call this struct ``if'', but C isn't PL/1.)
+ * Size ILP32:  592 (approx)
+ *  LP64: 1048 (approx)
  */
-
 struct ifnet {
+   /* General book keeping of interface lists. */
+   TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
+   LIST_ENTRY(ifnet) if_clones;/* interfaces of a cloner */
+   TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
+   /* protected by if_addr_lock */
+   u_char  if_alloctype;   /* if_type at time of allocation */
+
+   /* Driver and protocol specific information that remains stable. */
void*if_softc;  /* pointer to driver state */
+   void*if_llsoftc;/* link layer softc */
void*if_l2com;  /* pointer to protocol bits */
-   struct vnet *if_vnet;   /* pointer to network stack instance */
-   TAILQ_ENTRY(ifnet) if_link; /* all struct ifnets are chained */
-   charif_xname[IFNAMSIZ]; /* external name (name + unit) */
const char *if_dname;   /* driver name */
int if_dunit;   /* unit or IF_DUNIT_NONE */
+   u_short if_index;   /* numeric abbreviation for this if  */
+   short   if_index_reserved;  /* spare space to grow if_index */
+   charif_xname[IFNAMSIZ]; /* external name (name + unit) */
+   char*if_description;/* interface description */
+
+   /* Variable fields that are touched by the stack and drivers. */
+   int if_flags;   /* up/down, broadcast, etc. */
+   int if_capabilities;/* interface features  capabilities */
+   int if_capenable;   /* enabled features  capabilities */
+   void*if_linkmib;/* link-type-specific MIB data */
+   size_t  if_linkmiblen;  /* length of above data */
+   int if_drv_flags;   /* driver-managed status flags */
u_int   if_refcount;/* reference count */
-   struct  ifaddrhead if_addrhead; /* linked list of addresses per if */
+   struct  ifaltq if_snd;  /* output queue (includes altq) */
+   struct  if_data if_data;/* type information and statistics */
+   struct  task if_linktask;   /* task for link change events */
+
+   /* Addresses of different protocol families assigned to this if. */
+   struct  rwlock if_addr_lock;/* lock to protect address lists */
/*
 * if_addrhead is the list of all addresses associated to
 * an interface.
@@ -119,21 +142,29 @@ struct ifnet {
 * However, access to the AF_LINK address through this
 * field is deprecated. Use if_addr or ifaddr_byindex() instead.
 */
-   int if_pcount;  /* number of promiscuous listeners */
-   struct  carp_if *if_carp;   /* carp interface structure */
-   struct  bpf_if *if_bpf; /* packet filter structure */
-   u_short if_index;   /* numeric abbreviation for this if  */
-   short   if_index_reserved;  /* spare space to grow if_index */
-   struct  ifvlantrunk *if_vlantrunk; /* pointer to 802.1q data */
-   int if_flags;   /* up/down, broadcast, etc. */
-   int if_capabilities;/* interface features  capabilities */
-   int if_capenable;   /* enabled features  capabilities */
-   void*if_linkmib;/* link-type-specific MIB data */
-   size_t  if_linkmiblen;  /* length of above data */
-   struct  if_data if_data;
+   struct  ifaddrhead if_addrhead; /* linked list of addresses per if */
struct 

Re: svn commit: r257455 - head/sys/net

2013-10-31 Thread Andre Oppermann

On 31.10.2013 19:03, Luigi Rizzo wrote:

On Thu, Oct 31, 2013 at 03:46:10PM +, Andre Oppermann wrote:

Author: andre
Date: Thu Oct 31 15:46:10 2013
New Revision: 257455
URL: http://svnweb.freebsd.org/changeset/base/257455

Log:
   Make struct ifnet readable and comprehensible again by grouping
   and ordering related variables, fields and locks next to each
   other.  Add more comments to variables.




   Over time 'ifnet' has accumlated a lot of additional pointers and
   functionality in an unstructured way making it quite hard to read
   and understand while obfuscating relationships between fields and
   variables.

   Quantify the structure size and how bloated it has become.

   This is only a mechanical change in preparation for upcoming
   work to make ifnet opaque to drivers and to separate out the
   interface queuing.


as you do the above I think it would make sense to replace
all int/short/long with fixed-size fields as appropriate
(and large enough) to make it easier to reason about things
such as 'how many flags can i stuff into a field'.

The large enough part refers to two things:
- bitfields containing flags or capabilities have a tendency
   to overflow (not just in freebsd, linux has the same)
   requiring KBI changes. We should probably go for 64 bits
   unless there are compelling space reasons (not for ifnet).


Gleb will handle most of that and it is going to be part of the making
ifnet opaque to drivers.


- it is useful if certain opaque fields (flow ids, cookies...)
   can store pointers. Once again, make them at least 64 bit helps


A number of mbuf header fields have this property now. :)

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r257455 - head/sys/net

2013-10-31 Thread Andre Oppermann

On 31.10.2013 20:27, Ian Lepore wrote:

On Thu, 2013-10-31 at 20:08 +0100, Andre Oppermann wrote:

On 31.10.2013 19:03, Luigi Rizzo wrote:

On Thu, Oct 31, 2013 at 03:46:10PM +, Andre Oppermann wrote:

Author: andre
Date: Thu Oct 31 15:46:10 2013
New Revision: 257455
URL: http://svnweb.freebsd.org/changeset/base/257455

Log:
Make struct ifnet readable and comprehensible again by grouping
and ordering related variables, fields and locks next to each
other.  Add more comments to variables.




Over time 'ifnet' has accumlated a lot of additional pointers and
functionality in an unstructured way making it quite hard to read
and understand while obfuscating relationships between fields and
variables.

Quantify the structure size and how bloated it has become.

This is only a mechanical change in preparation for upcoming
work to make ifnet opaque to drivers and to separate out the
interface queuing.


as you do the above I think it would make sense to replace
all int/short/long with fixed-size fields as appropriate
(and large enough) to make it easier to reason about things
such as 'how many flags can i stuff into a field'.

The large enough part refers to two things:
- bitfields containing flags or capabilities have a tendency
to overflow (not just in freebsd, linux has the same)
requiring KBI changes. We should probably go for 64 bits
unless there are compelling space reasons (not for ifnet).


Gleb will handle most of that and it is going to be part of the making
ifnet opaque to drivers.


- it is useful if certain opaque fields (flow ids, cookies...)
can store pointers. Once again, make them at least 64 bit helps


A number of mbuf header fields have this property now. :)


Is there any chance all this reworking might get us to a position where
the protocol header in an mbuf doesn't have to be 32-bit aligned
anymore?  We pay a pretty heavy price for that requirement in the
drivers of the least capable hardware we support, the systems that have
the least horsepower to spare to make an extra copy of each packet to
realign it.


That's a totally different issue.  Here we're talking about kernel
structures including the fields in struct mbuf.  It is totally not
related to any payload packet headers inside the mbuf data section.

The problem you're complaining about is the epic design mistake of
the ethernet header neither being a power of two nor divisible by
4 or 8 bytes.  It is a ridiculous 14 bytes.

Many DMA engines are capable of starting on any byte.  Some (popular)
are not.  If they are, the start of the payload can be by shifted by
two so that the IP headers are 32bit aligned in memory.  If that is
not possible either the payload has to be copy-aligned or direct
payload to structure casting must be disallowed, or indirected through
a macro.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r257455 - head/sys/net

2013-10-31 Thread Andre Oppermann

On 31.10.2013 21:15, Ian Lepore wrote:

On Thu, 2013-10-31 at 21:05 +0100, Luigi Rizzo wrote:

On Thu, Oct 31, 2013 at 01:27:25PM -0600, Ian Lepore wrote:
...

Is there any chance all this reworking might get us to a position where
the protocol header in an mbuf doesn't have to be 32-bit aligned
anymore?  We pay a pretty heavy price for that requirement in the
drivers of the least capable hardware we support, the systems that have
the least horsepower to spare to make an extra copy of each packet to
realign it.


So are you suggesting to use some 'copy_unaligned_32()' function/macro to
access 32-bit protocol fields in the network stack ?
(16-bit entries should not be an issue)

cheers
luigi


Or if not that (I have no idea how frequently those fields are accessed
and what the performance penalty would be on systems moving a LOT of
packets), then a compromise might be a flag the driver can set to say it
may provide unaligned incoming packets.  Then we could have common code
at just one place in the network stack to split the mbuf and
realign-copy just the protocol header part to a new mbuf and attach it
to the front of the chain.  That would minimize both duplicated code and
the amount of data that has to be copied, and would put knowledge like
how big is the protocol header that needs copying? in a place that
knows more about protocols.


This is a sensible idea and in fact how some of the drivers already do
it internally for strict alignment architectures.


I know pretty much nothing about what happens in the network stack after
I feed a packet to if_input(), so maybe this is all impractical.  But I
also know that ARM systems with strict alignment constraints spend a lot
of time copying the entire contents of every incoming packet to realign
it, and it would be a lot better if they were able to DMA directly into
an mbuf cluster that can handed up the stack.  Even if copy the entire
packet is the only thing if_input() could do, there might still be some
benefit in having one common piece of code for it.


We have tons of protocols using direct structure casting into the mbuf
payload.  Changing them all would be a huge task and in many cases also
change the way the code is structured.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r257455 - head/sys/net

2013-10-31 Thread Andre Oppermann

On 31.10.2013 23:32, Ian Lepore wrote:

On Thu, 2013-10-31 at 23:17 +0100, Andre Oppermann wrote:

On 31.10.2013 20:27, Ian Lepore wrote:

On Thu, 2013-10-31 at 20:08 +0100, Andre Oppermann wrote:

On 31.10.2013 19:03, Luigi Rizzo wrote:

On Thu, Oct 31, 2013 at 03:46:10PM +, Andre Oppermann wrote:

Author: andre
Date: Thu Oct 31 15:46:10 2013
New Revision: 257455
URL: http://svnweb.freebsd.org/changeset/base/257455

Log:
 Make struct ifnet readable and comprehensible again by grouping
 and ordering related variables, fields and locks next to each
 other.  Add more comments to variables.




 Over time 'ifnet' has accumlated a lot of additional pointers and
 functionality in an unstructured way making it quite hard to read
 and understand while obfuscating relationships between fields and
 variables.

 Quantify the structure size and how bloated it has become.

 This is only a mechanical change in preparation for upcoming
 work to make ifnet opaque to drivers and to separate out the
 interface queuing.


as you do the above I think it would make sense to replace
all int/short/long with fixed-size fields as appropriate
(and large enough) to make it easier to reason about things
such as 'how many flags can i stuff into a field'.

The large enough part refers to two things:
- bitfields containing flags or capabilities have a tendency
 to overflow (not just in freebsd, linux has the same)
 requiring KBI changes. We should probably go for 64 bits
 unless there are compelling space reasons (not for ifnet).


Gleb will handle most of that and it is going to be part of the making
ifnet opaque to drivers.


- it is useful if certain opaque fields (flow ids, cookies...)
 can store pointers. Once again, make them at least 64 bit helps


A number of mbuf header fields have this property now. :)


Is there any chance all this reworking might get us to a position where
the protocol header in an mbuf doesn't have to be 32-bit aligned
anymore?  We pay a pretty heavy price for that requirement in the
drivers of the least capable hardware we support, the systems that have
the least horsepower to spare to make an extra copy of each packet to
realign it.


That's a totally different issue.  Here we're talking about kernel
structures including the fields in struct mbuf.  It is totally not
related to any payload packet headers inside the mbuf data section.



Of course it is.  The nature of my message was purely As long as we're
doing big changes


It's in a totally different area, but agreed on the big changes thing. ;)


The problem you're complaining about is the epic design mistake of
the ethernet header neither being a power of two nor divisible by
4 or 8 bytes.  It is a ridiculous 14 bytes.



Indeed.


Many DMA engines are capable of starting on any byte.  Some (popular)
are not.  If they are, the start of the payload can be by shifted by
two so that the IP headers are 32bit aligned in memory.  If that is
not possible either the payload has to be copy-aligned or direct
payload to structure casting must be disallowed, or indirected through
a macro.



I know of only one modern ARM SoC that's able to DMA network packets on
a 2-byte boundary (actually even it requires a 4-byte boundary, but it's
willing to stuff the first 16 bits with zeroes and offset everything
that follows accordingly).  I'm sure there are others, but it's the
exception rather than the rule.


Talk about foot shooting.  I'm afraid that packet copying may still be
the least evil option in the grand scheme of things.  IIRC ARM64 will
be able to do misaligned accesses with only a small performance penalty,
like x86/AMD64.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r257391 - in head/sys: dev/snc mips/nlm/dev/net

2013-10-30 Thread Andre Oppermann
Author: andre
Date: Wed Oct 30 16:56:46 2013
New Revision: 257391
URL: http://svnweb.freebsd.org/changeset/base/257391

Log:
  nclude missing net/if_var.h.
  
  Due to header pollution it wasn't noticed before.

Modified:
  head/sys/dev/snc/dp83932.c
  head/sys/mips/nlm/dev/net/xlpge.c

Modified: head/sys/dev/snc/dp83932.c
==
--- head/sys/dev/snc/dp83932.c  Wed Oct 30 16:34:26 2013(r257390)
+++ head/sys/dev/snc/dp83932.c  Wed Oct 30 16:56:46 2013(r257391)
@@ -74,6 +74,7 @@
 
 #include net/ethernet.h
 #include net/if.h
+#include net/if_var.h
 #include net/if_arp.h
 #include net/if_dl.h
 #include net/if_media.h

Modified: head/sys/mips/nlm/dev/net/xlpge.c
==
--- head/sys/mips/nlm/dev/net/xlpge.c   Wed Oct 30 16:34:26 2013
(r257390)
+++ head/sys/mips/nlm/dev/net/xlpge.c   Wed Oct 30 16:56:46 2013
(r257391)
@@ -47,6 +47,7 @@ __FBSDID($FreeBSD$);
 #include sys/taskqueue.h
 
 #include net/if.h
+#include net/if_var.h
 #include net/if_arp.h
 #include net/ethernet.h
 #include net/if_dl.h
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r257391 - in head/sys: dev/snc mips/nlm/dev/net

2013-10-30 Thread Andre Oppermann

On 30.10.2013 18:40, Sergey Kandaurov wrote:

On 30 October 2013 20:56, Andre Oppermann an...@freebsd.org wrote:

Author: andre
Date: Wed Oct 30 16:56:46 2013
New Revision: 257391
URL: http://svnweb.freebsd.org/changeset/base/257391

Log:
   nclude missing net/if_var.h.

   Due to header pollution it wasn't noticed before.

Modified:
   head/sys/dev/snc/dp83932.c
   head/sys/mips/nlm/dev/net/xlpge.c

Modified: head/sys/dev/snc/dp83932.c
==
--- head/sys/dev/snc/dp83932.c  Wed Oct 30 16:34:26 2013(r257390)
+++ head/sys/dev/snc/dp83932.c  Wed Oct 30 16:56:46 2013(r257391)
@@ -74,6 +74,7 @@

  #include net/ethernet.h
  #include net/if.h
+#include net/if_var.h
  #include net/if_arp.h
  #include net/if_dl.h
  #include net/if_media.h

Modified: head/sys/mips/nlm/dev/net/xlpge.c
==
--- head/sys/mips/nlm/dev/net/xlpge.c   Wed Oct 30 16:34:26 2013
(r257390)
+++ head/sys/mips/nlm/dev/net/xlpge.c   Wed Oct 30 16:56:46 2013
(r257391)
@@ -47,6 +47,7 @@ __FBSDID($FreeBSD$);
  #include sys/taskqueue.h

  #include net/if.h
+#include net/if_var.h
  #include net/if_arp.h
  #include net/ethernet.h
  #include net/if_dl.h


To complete TARGET=pc98 KERNCONF=GENERIC crossbuild,
I had to apply also this change:


Please commit. :)

--
Andre


Index: sys/dev/snc/if_snc.c
===
--- sys/dev/snc/if_snc.c(revision 257391)
+++ sys/dev/snc/if_snc.c(working copy)
@@ -48,6 +48,7 @@ __FBSDID($FreeBSD$);
  #include net/if.h
  #include net/if_arp.h
  #include net/if_media.h
+#include net/if_var.h

  #include dev/snc/dp83932reg.h
  #include dev/snc/dp83932var.h
Index: sys/dev/snc/if_snc_pccard.c
===
--- sys/dev/snc/if_snc_pccard.c(revision 257391)
+++ sys/dev/snc/if_snc_pccard.c(working copy)
@@ -41,6 +41,8 @@ __FBSDID($FreeBSD$);
  #include sys/systm.h
  #include sys/socket.h
  #include sys/kernel.h
+#include sys/lock.h
+#include sys/mutex.h

  #include sys/module.h
  #include sys/bus.h



___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r257351 - head/sys/net

2013-10-29 Thread Andre Oppermann
Author: andre
Date: Tue Oct 29 17:48:08 2013
New Revision: 257351
URL: http://svnweb.freebsd.org/changeset/base/257351

Log:
  Move all interface queue related structures, macros and definitions
  from net/if_var to it own new net/ifq.h.
  
  For now net/ifq.h is unconditionally included through net/if_var.h.
  
  This is a mechanical change in preparation to make struct ifnet and
  the individual interface queue mechanisms opaque.
  
  Discussed with:   glebius
  Sponsored by: The FreeBSD Foundation

Added:
  head/sys/net/ifq.h
 - copied, changed from r257282, head/sys/net/if_var.h
Modified:
  head/sys/net/if_var.h

Modified: head/sys/net/if_var.h
==
--- head/sys/net/if_var.h   Tue Oct 29 17:46:26 2013(r257350)
+++ head/sys/net/if_var.h   Tue Oct 29 17:48:08 2013(r257351)
@@ -94,18 +94,6 @@ VNET_DECLARE(struct pfil_head, link_pfil
 #endif /* _KERNEL */
 
 /*
- * Structure defining a queue for a network interface.
- */
-struct ifqueue {
-   struct  mbuf *ifq_head;
-   struct  mbuf *ifq_tail;
-   int ifq_len;
-   int ifq_maxlen;
-   int ifq_drops;
-   struct  mtx ifq_mtx;
-};
-
-/*
  * Structure defining a network interface.
  *
  * (Would like to call this struct ``if'', but C isn't PL/1.)
@@ -208,6 +196,8 @@ struct ifnet {
void*if_pspare[8];  /* 1 netmap, 7 TDB */
 };
 
+#include net/ifq.h   /* XXXAO: temporary unconditional include */
+
 /*
  * XXX These aliases are terribly dangerous because they could apply
  * to anything.
@@ -262,96 +252,6 @@ void   if_addr_runlock(struct ifnet *ifp);
 void   if_maddr_rlock(struct ifnet *ifp);  /* if_multiaddrs */
 void   if_maddr_runlock(struct ifnet *ifp);/* if_multiaddrs */
 
-/*
- * Output queues (ifp-if_snd) and slow device input queues (*ifp-if_slowq)
- * are queues of messages stored on ifqueue structures
- * (defined above).  Entries are added to and deleted from these structures
- * by these macros.
- */
-#define IF_LOCK(ifq)   mtx_lock((ifq)-ifq_mtx)
-#define IF_UNLOCK(ifq) mtx_unlock((ifq)-ifq_mtx)
-#defineIF_LOCK_ASSERT(ifq) mtx_assert((ifq)-ifq_mtx, MA_OWNED)
-#define_IF_QFULL(ifq)  ((ifq)-ifq_len = (ifq)-ifq_maxlen)
-#define_IF_DROP(ifq)   ((ifq)-ifq_drops++)
-#define_IF_QLEN(ifq)   ((ifq)-ifq_len)
-
-#define_IF_ENQUEUE(ifq, m) do {\
-   (m)-m_nextpkt = NULL;  \
-   if ((ifq)-ifq_tail == NULL)\
-   (ifq)-ifq_head = m;\
-   else\
-   (ifq)-ifq_tail-m_nextpkt = m; \
-   (ifq)-ifq_tail = m;\
-   (ifq)-ifq_len++;   \
-} while (0)
-
-#define IF_ENQUEUE(ifq, m) do {\
-   IF_LOCK(ifq);   \
-   _IF_ENQUEUE(ifq, m);\
-   IF_UNLOCK(ifq); \
-} while (0)
-
-#define_IF_PREPEND(ifq, m) do {\
-   (m)-m_nextpkt = (ifq)-ifq_head;   \
-   if ((ifq)-ifq_tail == NULL)\
-   (ifq)-ifq_tail = (m);  \
-   (ifq)-ifq_head = (m);  \
-   (ifq)-ifq_len++;   \
-} while (0)
-
-#define IF_PREPEND(ifq, m) do {\
-   IF_LOCK(ifq);   \
-   _IF_PREPEND(ifq, m);\
-   IF_UNLOCK(ifq); \
-} while (0)
-
-#define_IF_DEQUEUE(ifq, m) do {\
-   (m) = (ifq)-ifq_head;  \
-   if (m) {\
-   if (((ifq)-ifq_head = (m)-m_nextpkt) == NULL) \
-   (ifq)-ifq_tail = NULL; \
-   (m)-m_nextpkt = NULL;  \
-   (ifq)-ifq_len--;   \
-   }   \
-} while (0)
-
-#define IF_DEQUEUE(ifq, m) do {\
-   IF_LOCK(ifq);   \
-   _IF_DEQUEUE(ifq, m);\
-   IF_UNLOCK(ifq); \
-} while (0)
-
-#define_IF_DEQUEUE_ALL(ifq, m) do {\
-   (m) = (ifq)-ifq_head;  \
-   (ifq)-ifq_head = (ifq)-ifq_tail = NULL;  

svn commit: r256920 - head/sys/netinet

2013-10-22 Thread Andre Oppermann
Author: andre
Date: Tue Oct 22 18:24:34 2013
New Revision: 256920
URL: http://svnweb.freebsd.org/changeset/base/256920

Log:
  The TCP delayed ACK logic isn't aware of LRO passing up large aggregated
  segments thinking it received only one segment. This causes it to enable
  the delay the ACK for 100ms to wait for another segment which may never
  come because all the data was received already.
  
  Doing delayed ACK for LRO segments is bogus for two reasons: a) it pushes
  us further away from acking every other packet; b) it introduces additional
  delay in responding to the sender.  The latter is especially bad because it
  is in the nature of LRO to aggregated all segments of a burst with no more
  coming until an ACK is sent back.
  
  Change the delayed ACK logic to detect LRO segments by being larger than
  the MSS for this connection and issuing an immediate ACK for them to keep
  the ACK clock ticking without interruption.
  
  Reported by:  julian, cperciva
  Tested by:cperciva
  Reviewed by:  lstewart
  MFC after:3 days

Modified:
  head/sys/netinet/tcp_input.c

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cTue Oct 22 18:14:06 2013
(r256919)
+++ head/sys/netinet/tcp_input.cTue Oct 22 18:24:34 2013
(r256920)
@@ -508,10 +508,13 @@ do { \
  *   the ack that opens up a 0-sized window and
  * - delayed acks are enabled or
  * - this is a half-synchronized T/TCP connection.
+ * - the segment size is not larger than the MSS and LRO wasn't used
+ *   for this segment.
  */
-#define DELAY_ACK(tp)  \
+#define DELAY_ACK(tp, tlen)\
((!tcp_timer_active(tp, TT_DELACK)\
(tp-t_flags  TF_RXWIN0SENT) == 0)   \
+   (tlen = tp-t_maxopd)\
(V_tcp_delack_enabled || (tp-t_flags  TF_NEEDSYN)))
 
 /*
@@ -1863,7 +1866,7 @@ tcp_do_segment(struct mbuf *m, struct tc
}
/* NB: sorwakeup_locked() does an implicit unlock. */
sorwakeup_locked(so);
-   if (DELAY_ACK(tp)) {
+   if (DELAY_ACK(tp, tlen)) {
tp-t_flags |= TF_DELACK;
} else {
tp-t_flags |= TF_ACKNOW;
@@ -1954,7 +1957,7 @@ tcp_do_segment(struct mbuf *m, struct tc
 * If there's data, delay ACK; if there's also a FIN
 * ACKNOW will be turned on later.
 */
-   if (DELAY_ACK(tp)  tlen != 0)
+   if (DELAY_ACK(tp, tlen)  tlen != 0)
tcp_timer_activate(tp, TT_DELACK,
tcp_delacktime);
else
@@ -2926,7 +2929,7 @@ dodata:   
/* XXX */
if (th-th_seq == tp-rcv_nxt 
LIST_EMPTY(tp-t_segq) 
TCPS_HAVEESTABLISHED(tp-t_state)) {
-   if (DELAY_ACK(tp))
+   if (DELAY_ACK(tp, tlen))
tp-t_flags |= TF_DELACK;
else
tp-t_flags |= TF_ACKNOW;
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r255608 - in head/sys: conf kern modules/cxgb/cxgb modules/sfxge modules/ti sys vm

2013-09-16 Thread Andre Oppermann

On 16.09.2013 08:25, Konstantin Belousov wrote:

Author: kib
Date: Mon Sep 16 06:25:54 2013
New Revision: 255608
URL: http://svnweb.freebsd.org/changeset/base/255608

Log:
   Remove zero-copy sockets code.  It only worked for anonymous memory,
   and the equivalent functionality is now provided by sendfile(2) over
   posix shared memory filedescriptor.

   Remove the cow member of struct vm_page, and rearrange the remaining
   members.  While there, make hold_count unsigned.


Thanks, it's good to see it gone. :)

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254773 - head/sys/net

2013-09-08 Thread Andre Oppermann

On 07.09.2013 17:30, Mikolaj Golub wrote:

Hi,

On Sat, Aug 24, 2013 at 11:17:25AM +, Andre Oppermann wrote:

Author: andre
Date: Sat Aug 24 11:17:25 2013
New Revision: 254773
URL: http://svnweb.freebsd.org/changeset/base/254773

Log:
   Resolve the confusion between the head_list and the hook list.

   The linked list of pfil hooks is changed to chain and this term
   is applied consistently.  The head_list remains with list term.

   Add KASSERT to vnet_pfil_uninit().


...


  vnet_pfil_uninit(const void *unused)
  {

-   /*  XXX should panic if list is not empty */
+   KASSERT(LIST_EMPTY(V_pfil_head_list),
+   (%s: pfil_head_list %p not empty, __func__, V_pfil_head_list));
PFIL_LOCK_DESTROY_REAL(V_pfil_lock);
return (0);
  }



It is triggered when destroying a vnet, due to inet/inet6 pfil hooks
are not being unregistered.

The attached patch fixes the issue for me. I am going to commit it if
there are no objections -- might the unregistration has been skipped
intentionally due to some unresolved issue?


There's no reason that I know of.  And if there were then unregistering
would be unsafe in any case.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254973 - in head/sys: kern sys

2013-08-27 Thread Andre Oppermann
Author: andre
Date: Tue Aug 27 20:52:02 2013
New Revision: 254973
URL: http://svnweb.freebsd.org/changeset/base/254973

Log:
  Pad m_hdr on 32bit architectures to to prevent alignment and padding
  problems with the way MLEN, MHLEN, and struct mbuf are set up.
  
  CTASSERT's are provided to detect such issues at compile time in the
  future.
  
  The #define MLEN and MHLEN calculation do not take actual compiler-
  induced alignment and padding inside the complete struct mbuf into
  account.  Accordingly appropriate attention is required when changing
  members of struct mbuf.
  
  Ideally one would calculate MLEN as (MSIZE - sizeof(((struct mbuf *)0)-m_hdr)
  but that doesn't work as the compiler refuses to operate on an as of
  yet incomplete structure.
  
  In particular ARM 32bit has more strict alignment requirements which
  caused 4 bytes of padding between m_hdr and pkthdr in struct mbuf
  because of the 64bit members in pkthdr.  This wasn't picked up by MLEN
  and MHLEN causing an overflow of the mbuf provided data storage by
  overestimating its size.
  
  I386 didn't show this problem because it handles unaligned access just
  fine, albeit at a small performance penalty.
  
  On 64bit architectures the struct mbuf layout is 64bit aligned in all
  places.
  
  Reported by:  Thomas Skibo ThomasSkibo-at-sbcglobal-dot-net
  Tested by:tuexen, ian, Thomas Skibo (extended patch)
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/kern/uipc_mbuf.c
  head/sys/sys/mbuf.h

Modified: head/sys/kern/uipc_mbuf.c
==
--- head/sys/kern/uipc_mbuf.c   Tue Aug 27 20:43:27 2013(r254972)
+++ head/sys/kern/uipc_mbuf.c   Tue Aug 27 20:52:02 2013(r254973)
@@ -85,6 +85,14 @@ SYSCTL_INT(_kern_ipc, OID_AUTO, m_defrag
 #endif
 
 /*
+ * Ensure the correct size of various mbuf parameters.  It could be off due
+ * to compiler-induced padding and alignment artifacts.
+ */
+CTASSERT(sizeof(struct mbuf) == MSIZE);
+CTASSERT(MSIZE - offsetof(struct mbuf, m_dat) == MLEN);
+CTASSERT(MSIZE - offsetof(struct mbuf, m_pktdat) == MHLEN);
+
+/*
  * m_get2() allocates minimum mbuf that would fit size argument.
  */
 struct mbuf *

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Tue Aug 27 20:43:27 2013(r254972)
+++ head/sys/sys/mbuf.h Tue Aug 27 20:52:02 2013(r254973)
@@ -53,6 +53,10 @@
  * externally and attach it to the mbuf in a way similar to that of mbuf
  * clusters.
  *
+ * NB: These calculation do not take actual compiler-induced alignment and
+ * padding inside the complete struct mbuf into account.  Appropriate
+ * attention is required when changing members of struct mbuf.
+ *
  * MLEN is data length in a normal mbuf.
  * MHLEN is data length in an mbuf with pktheader.
  * MINCLSIZE is a smallest amount of data that should be put into cluster.
@@ -84,7 +88,7 @@ struct mb_args {
 
 /*
  * Header present at the beginning of every mbuf.
- * Size ILP32: 20
+ * Size ILP32: 24
  *  LP64: 32
  */
 struct m_hdr {
@@ -94,6 +98,9 @@ struct m_hdr {
int32_t  mh_len;/* amount of data in this mbuf */
uint32_t mh_type:8, /* type of data in this mbuf */
 mh_flags:24;   /* flags; see below */
+#if !defined(__LP64__)
+   uint32_t mh_pad;/* pad for 64bit alignment */
+#endif
 };
 
 /*
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254910 - head/sys/dev/xen/netback

2013-08-26 Thread Andre Oppermann
Author: andre
Date: Mon Aug 26 13:17:37 2013
New Revision: 254910
URL: http://svnweb.freebsd.org/changeset/base/254910

Log:
  Fix mbuf debugging printf()'s after the recent mbuf header changes.

Modified:
  head/sys/dev/xen/netback/netback.c

Modified: head/sys/dev/xen/netback/netback.c
==
--- head/sys/dev/xen/netback/netback.c  Mon Aug 26 13:00:25 2013
(r254909)
+++ head/sys/dev/xen/netback/netback.c  Mon Aug 26 13:17:37 2013
(r254910)
@@ -587,14 +587,14 @@ xnb_dump_mbuf(const struct mbuf *m)
if (m-m_flags  M_PKTHDR) {
printf(flowid=%10d, csum_flags=%#8x, csum_data=%#8x, 
   tso_segsz=%5hd\n,
-  m-m_pkthdr.flowid, m-m_pkthdr.csum_flags,
+  m-m_pkthdr.flowid, (int)m-m_pkthdr.csum_flags,
   m-m_pkthdr.csum_data, m-m_pkthdr.tso_segsz);
-   printf(rcvif=%16p,  header=%18p, len=%19d\n,
-  m-m_pkthdr.rcvif, m-m_pkthdr.header, m-m_pkthdr.len);
+   printf(rcvif=%16p,  len=%19d\n,
+  m-m_pkthdr.rcvif, m-m_pkthdr.len);
}
printf(m_next=%16p, m_nextpk=%16p, m_data=%16p\n,
   m-m_next, m-m_nextpkt, m-m_data);
-   printf(m_len=%17d, m_flags=%#15x, m_type=%18hd\n,
+   printf(m_len=%17d, m_flags=%#15x, m_type=%18u\n,
   m-m_len, m-m_flags, m-m_type);
 
len = m-m_len;
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254527 - in head/sys: net80211 netinet sys

2013-08-26 Thread Andre Oppermann

On 26.08.2013 11:43, Gleb Smirnoff wrote:

A Author: andre
A Date: Mon Aug 19 14:25:11 2013
A New Revision: 254527
A URL: http://svnweb.freebsd.org/changeset/base/254527
A
A Log:
A   Reorder the mbuf defines to make more sense and group related flags
A   together.
A
A   Add M_FLAG_PRINTF for use with printf(9) %b indentifier.
A
A   Use the generic mbuf flags print names in the net80211 code and adjust
A   the protocol specific bits for their new positions.
A
A   Change SCTP M_PROTO mapping from 5 to 1 to fit within the 16bit field
A   they use internally to store some additional information.
A
A   Discussed with: trociny, glebius

The first part: reorder the mbuf flag defines wasn't actually discussed.


It was part of the patch to the email I sent you, and a couple of others,
to which you replied with no objection to the M_FLAGS cleanup.  It wasn't
explicitly spelled out but very visible in the patch.


Tossing values of M_BCAST, M_MCAST, M_VLANTAG for no value except code
tidyness isn't a safe idea. These are the flags that device drivers use.
After this change binary drivers are still loadable but would be buggy.


As I already explained elsewhere, on -current only KLD's compiled at the same
time/revision as the kernel are supported.  It has been this way as long as
I can remember.  There never was any guarantee, not even implicit, that a KLD
from r254000 will continue to work with a kernel from r254527.  In fact, if
it were, it would be almost impossible to develop anything on -current.

On -stable we do have this ABI+API guarantee, so a KLD from 9.0 is supposed
to work on 9.2 as well and we generally keep that promise.


We allow ourselves ABI changes in head, but this doesn't mean that we
should do them for no important reason.


The important reason are the later updates to the mbuf headers and that
it simply doesn't make sense to keep the defines spaghetti while the whole
thing is being overhauled.

It seems from a technical point of view both arguments, maintain bits vs.
reordering, are rather weak and prone to bike-shedding. ;)  It certainly
doesn't make sense to change again and revert it just for the sake of it.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254779 - head/sys/kern

2013-08-26 Thread Andre Oppermann

On 26.08.2013 12:50, Gleb Smirnoff wrote:

On Sat, Aug 24, 2013 at 12:24:59PM +, Andre Oppermann wrote:
A Author: andre
A Date: Sat Aug 24 12:24:58 2013
A New Revision: 254779
A URL: http://svnweb.freebsd.org/changeset/base/254779
A
A Log:
A   Avoid code duplication for mbuf initialization and use m_init() instead
A   in mb_ctor_mbuf() and mb_ctor_pack().

m_init() is inline, but it calls m_pkthdr_init() which isn't inline. It
might be that compiler inline it due to m_pkthdr_init() living in the same
file as mb_ctor_mbuf() and mb_ctor_pack(), but not sure.


It depends on the optimization level.


m_pkthdr_init() zeroes much more than deleted code did. Some zeroing
operations are definitely superfluous job.


It's exactly the same for mb_ctor_mbuf() and one more for mb_ctor_pack().

Overall it has become more because we've got a couple more (small) fields
in the mbuf headers now.  Looking at the size of the headers it may be
faster to just bzero() it instead of doing it one by one.

The overall problem is that replicating the same operations in multiple
places is dangerous from a consistency point of view.


The change is definitely not an no-op change. It might have pessimized
the mbuf allocation performance.


Heavy inlining is not always an advantage and cause i-cache bloat.  But
you're right I haven't benchmarked it (yet) and thus we don't know.

I've been setting up my 10G benchmark environment but had to re-prioritize
due to the impeding freeze on -current.  If the benchmarking shows a conclusive
pessimization I'm more than happy to compensate for it, either by inlining
m_pkthdr_init() as well or some form of macro.  Such changes can still be
done during API freeze, but before BETA1.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254805 - head/sys/sys

2013-08-26 Thread Andre Oppermann

On 26.08.2013 13:00, Gleb Smirnoff wrote:

On Sat, Aug 24, 2013 at 07:58:36PM +, Andre Oppermann wrote:
A Author: andre
A Date: Sat Aug 24 19:58:36 2013
A New Revision: 254805
A URL: http://svnweb.freebsd.org/changeset/base/254805
A
A Log:
A   Add mtodo(m, o) macro taking an additional offset into the mbuf data 
section.
A
A   Sponsored by:   The FreeBSD Foundation
A
A Modified:
A   head/sys/sys/mbuf.h
A
A Modified: head/sys/sys/mbuf.h
A 
==
A --- head/sys/sys/mbuf.h   Sat Aug 24 19:51:18 2013(r254804)
A +++ head/sys/sys/mbuf.h   Sat Aug 24 19:58:36 2013(r254805)
A @@ -67,8 +67,10 @@
A   * type:
A   *
A   * mtod(m, t)-- Convert mbuf pointer to data pointer of correct type.
A + * mtodo(m, o) -- Same as above but with offset 'o' into data.
A   */
A  #define  mtod(m, t)  ((t)((m)-m_data))
A +#define  mtodo(m, o) ((void *)(((m)-m_data) + (o)))

IMO, having a typecast would be better. Then mtodo() would be really same as
mtod(), as stated in comment.


There was a big discussion about 10 month back when I did this change in my
tcp_workqueue branch with the typecast in.  The conclusion was that a typecast
is really not necessary and only causes one to type a lot more for the compiler
to throw away anyway.  But yes, the comment isn't perfect in that sense.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254830 - head/sys/kern

2013-08-25 Thread Andre Oppermann
Author: andre
Date: Sun Aug 25 09:40:15 2013
New Revision: 254830
URL: http://svnweb.freebsd.org/changeset/base/254830

Log:
  Adjust socow_iodone() after r254799.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/kern/uipc_cow.c

Modified: head/sys/kern/uipc_cow.c
==
--- head/sys/kern/uipc_cow.cSun Aug 25 08:56:09 2013(r254829)
+++ head/sys/kern/uipc_cow.cSun Aug 25 09:40:15 2013(r254830)
@@ -70,10 +70,10 @@ struct netsend_cow_stats {
 
 static struct netsend_cow_stats socow_stats;
 
-static void socow_iodone(void *addr, void *args);
+static void socow_iodone(struct mbuf *m, void *addr, void *args);
 
 static void
-socow_iodone(void *addr, void *args)
+socow_iodone(struct mbuf *m, void *addr, void *args)
 {  
struct sf_buf *sf;
vm_page_t pp;
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254831 - head/sys/net

2013-08-25 Thread Andre Oppermann
Author: andre
Date: Sun Aug 25 09:41:37 2013
New Revision: 254831
URL: http://svnweb.freebsd.org/changeset/base/254831

Log:
  Remove unnecessary setup of the m-pkthdr.header pointer.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/net/if_fddisubr.c
  head/sys/net/if_iso88025subr.c

Modified: head/sys/net/if_fddisubr.c
==
--- head/sys/net/if_fddisubr.c  Sun Aug 25 09:40:15 2013(r254830)
+++ head/sys/net/if_fddisubr.c  Sun Aug 25 09:41:37 2013(r254831)
@@ -390,7 +390,6 @@ fddi_input(ifp, m)
goto dropanyway;
}
fh = mtod(m, struct fddi_header *);
-   m-m_pkthdr.header = (void *)fh;
 
/*
 * Discard packet if interface is not up.

Modified: head/sys/net/if_iso88025subr.c
==
--- head/sys/net/if_iso88025subr.c  Sun Aug 25 09:40:15 2013
(r254830)
+++ head/sys/net/if_iso88025subr.c  Sun Aug 25 09:41:37 2013
(r254831)
@@ -476,7 +476,6 @@ iso88025_input(ifp, m)
goto dropanyway;
}
th = mtod(m, struct iso88025_header *);
-   m-m_pkthdr.header = (void *)th;
 
/*
 * Discard packet if interface is not up.
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254832 - head/sys/ofed/drivers/net/mlx4

2013-08-25 Thread Andre Oppermann
Author: andre
Date: Sun Aug 25 09:45:26 2013
New Revision: 254832
URL: http://svnweb.freebsd.org/changeset/base/254832

Log:
  Change m-pkthdr.header to m-pkthdr.PH_loc.ptr after r254804
  to transiently store pointers to packet headers.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/ofed/drivers/net/mlx4/en_frag.c

Modified: head/sys/ofed/drivers/net/mlx4/en_frag.c
==
--- head/sys/ofed/drivers/net/mlx4/en_frag.cSun Aug 25 09:41:37 2013
(r254831)
+++ head/sys/ofed/drivers/net/mlx4/en_frag.cSun Aug 25 09:45:26 2013
(r254832)
@@ -87,7 +87,7 @@ static void flush_session(struct mlx4_en
  u16 more)
 {
struct mbuf *mb = session-fragments;
-   struct ip *iph = mb-m_pkthdr.header;
+   struct ip *iph = mb-m_pkthdr.PH_loc.ptr;
struct net_device *dev = mb-m_pkthdr.rcvif;
 
/* Update IP length and checksum */
@@ -132,7 +132,7 @@ int mlx4_en_rx_frags(struct mlx4_en_priv
u16 offset;
 
iph = (struct ip *)(mtod(mb, char *) + ETHER_HDR_LEN);
-   mb-m_pkthdr.header = iph;
+   mb-m_pkthdr.PH_loc.ptr = iph;
ip_len = ntohs(iph-ip_len);
ip_hlen = iph-ip_hl * 4;
data_len = ip_len - ip_hlen;
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254834 - in head/sys: netinet netinet6

2013-08-25 Thread Andre Oppermann
Author: andre
Date: Sun Aug 25 09:49:00 2013
New Revision: 254834
URL: http://svnweb.freebsd.org/changeset/base/254834

Log:
  For now limit printf(9) %x of the 64bit pkthdr.csum_flags field to 32bits.
  The upper 32bits are not occupied for now.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/netinet/sctp_input.c
  head/sys/netinet6/sctp6_usrreq.c

Modified: head/sys/netinet/sctp_input.c
==
--- head/sys/netinet/sctp_input.c   Sun Aug 25 09:46:03 2013
(r254833)
+++ head/sys/netinet/sctp_input.c   Sun Aug 25 09:49:00 2013
(r254834)
@@ -6026,7 +6026,7 @@ sctp_input_with_port(struct mbuf *i_pak,
sctp_input(): Packet of length %d received on %s with csum_flags 
0x%x.\n,
m-m_pkthdr.len,
if_name(m-m_pkthdr.rcvif),
-   m-m_pkthdr.csum_flags);
+   (int)m-m_pkthdr.csum_flags);
if (m-m_flags  M_FLOWID) {
mflowid = m-m_pkthdr.flowid;
use_mflowid = 1;

Modified: head/sys/netinet6/sctp6_usrreq.c
==
--- head/sys/netinet6/sctp6_usrreq.cSun Aug 25 09:46:03 2013
(r254833)
+++ head/sys/netinet6/sctp6_usrreq.cSun Aug 25 09:49:00 2013
(r254834)
@@ -112,7 +112,7 @@ sctp6_input_with_port(struct mbuf **i_pa
sctp6_input(): Packet of length %d received on %s with csum_flags 
0x%x.\n,
m-m_pkthdr.len,
if_name(m-m_pkthdr.rcvif),
-   m-m_pkthdr.csum_flags);
+   (int)m-m_pkthdr.csum_flags);
if (m-m_flags  M_FLOWID) {
mflowid = m-m_pkthdr.flowid;
use_mflowid = 1;
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254842 - in head/sys: compat/ndis dev/cas dev/hatm dev/if_ndis dev/iscsi_initiator dev/lge dev/mwl dev/wb kern sys

2013-08-25 Thread Andre Oppermann
Author: andre
Date: Sun Aug 25 10:57:09 2013
New Revision: 254842
URL: http://svnweb.freebsd.org/changeset/base/254842

Log:
  Give (*ext_free) an int return value allowing for very sophisticated
  external mbuf buffer management capabilities in the future.
  
  For now only EXT_FREE_OK is defined with current legacy behavior.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/compat/ndis/kern_ndis.c
  head/sys/compat/ndis/ndis_var.h
  head/sys/dev/cas/if_cas.c
  head/sys/dev/hatm/if_hatm_intr.c
  head/sys/dev/if_ndis/if_ndis.c
  head/sys/dev/iscsi_initiator/isc_soc.c
  head/sys/dev/lge/if_lge.c
  head/sys/dev/mwl/if_mwl.c
  head/sys/dev/wb/if_wb.c
  head/sys/kern/subr_mbpool.c
  head/sys/kern/uipc_cow.c
  head/sys/kern/uipc_mbuf.c
  head/sys/kern/uipc_syscalls.c
  head/sys/sys/mbpool.h
  head/sys/sys/mbuf.h
  head/sys/sys/sf_buf.h

Modified: head/sys/compat/ndis/kern_ndis.c
==
--- head/sys/compat/ndis/kern_ndis.cSun Aug 25 10:28:02 2013
(r254841)
+++ head/sys/compat/ndis/kern_ndis.cSun Aug 25 10:57:09 2013
(r254842)
@@ -482,16 +482,14 @@ ndis_return(dobj, arg)
KeReleaseSpinLock(block-nmb_returnlock, irql);
 }
 
-void
-ndis_return_packet(buf, arg)
-   void*buf;   /* not used */
-   void*arg;
+int
+ndis_return_packet(struct mbuf *m, void *buf, void *arg)
 {
ndis_packet *p;
ndis_miniport_block *block;
 
if (arg == NULL)
-   return;
+   return (EXT_FREE_OK);
 
p = arg;
 
@@ -500,7 +498,7 @@ ndis_return_packet(buf, arg)
 
/* Release packet when refcount hits zero, otherwise return. */
if (p-np_refcnt)
-   return;
+   return (EXT_FREE_OK);
 
block = ((struct ndis_softc *)p-np_softc)-ndis_block;
 
@@ -512,6 +510,8 @@ ndis_return_packet(buf, arg)
IoQueueWorkItem(block-nmb_returnitem,
(io_workitem_func)kernndis_functbl[7].ipt_wrap,
WORKQUEUE_CRITICAL, block);
+
+   return (EXT_FREE_OK);
 }
 
 void

Modified: head/sys/compat/ndis/ndis_var.h
==
--- head/sys/compat/ndis/ndis_var.h Sun Aug 25 10:28:02 2013
(r254841)
+++ head/sys/compat/ndis/ndis_var.h Sun Aug 25 10:57:09 2013
(r254842)
@@ -1743,7 +1743,7 @@ extern int ndis_halt_nic(void *);
 extern int ndis_shutdown_nic(void *);
 extern int ndis_pnpevent_nic(void *, int);
 extern int ndis_init_nic(void *);
-extern void ndis_return_packet(void *, void *);
+extern int ndis_return_packet(struct mbuf *, void *, void *);
 extern int ndis_init_dma(void *);
 extern int ndis_destroy_dma(void *);
 extern int ndis_create_sysctls(void *);

Modified: head/sys/dev/cas/if_cas.c
==
--- head/sys/dev/cas/if_cas.c   Sun Aug 25 10:28:02 2013(r254841)
+++ head/sys/dev/cas/if_cas.c   Sun Aug 25 10:57:09 2013(r254842)
@@ -132,7 +132,7 @@ static void cas_detach(struct cas_softc 
 static int cas_disable_rx(struct cas_softc *sc);
 static int cas_disable_tx(struct cas_softc *sc);
 static voidcas_eint(struct cas_softc *sc, u_int status);
-static voidcas_free(struct mbuf *m, void *arg1, void* arg2);
+static int cas_free(struct mbuf *m, void *arg1, void* arg2);
 static voidcas_init(void *xsc);
 static voidcas_init_locked(struct cas_softc *sc);
 static voidcas_init_regs(struct cas_softc *sc);
@@ -1887,7 +1887,7 @@ cas_rint(struct cas_softc *sc)
 #endif
 }
 
-static void
+static int
 cas_free(struct mbuf *m, void *arg1, void *arg2)
 {
struct cas_rxdsoft *rxds;
@@ -1904,7 +1904,7 @@ cas_free(struct mbuf *m, void *arg1, voi
rxds = sc-sc_rxdsoft[idx];
 #endif
if (refcount_release(rxds-rxds_refcount) == 0)
-   return;
+   return (EXT_FREE_OK);
 
/*
 * NB: this function can be called via m_freem(9) within
@@ -1915,6 +1915,7 @@ cas_free(struct mbuf *m, void *arg1, voi
cas_add_rxdesc(sc, idx);
if (locked == 0)
CAS_UNLOCK(sc);
+   return (EXT_FREE_OK);
 }
 
 static inline void

Modified: head/sys/dev/hatm/if_hatm_intr.c
==
--- head/sys/dev/hatm/if_hatm_intr.cSun Aug 25 10:28:02 2013
(r254841)
+++ head/sys/dev/hatm/if_hatm_intr.cSun Aug 25 10:57:09 2013
(r254842)
@@ -260,7 +260,7 @@ hatm_mbuf_page_alloc(struct hatm_softc *
 /*
  * Free an mbuf and put it onto the free list.
  */
-static void
+static int
 hatm_mbuf0_free(struct mbuf *m, void *buf, void *args)
 {
struct hatm_softc *sc = args;
@@ -270,8 +270,9 @@ hatm_mbuf0_free(struct mbuf *m, void *bu
(freeing unused mbuf %x, c-hdr.flags));
c-hdr.flags = ~MBUF_USED;

svn commit: r254857 - head/sys/sys

2013-08-25 Thread Andre Oppermann
Author: andre
Date: Sun Aug 25 13:30:37 2013
New Revision: 254857
URL: http://svnweb.freebsd.org/changeset/base/254857

Log:
  Fix CSUM compatibility mapping.  SCTP is a layer 4 protocol.
  
  Submitted by: tuexen

Modified:
  head/sys/sys/mbuf.h

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Sun Aug 25 13:10:03 2013(r254856)
+++ head/sys/sys/mbuf.h Sun Aug 25 13:30:37 2013(r254857)
@@ -418,7 +418,7 @@ struct mbuf {
 #defineCSUM_IP_VALID   CSUM_L3_VALID
 #defineCSUM_DATA_VALID CSUM_L4_VALID
 #defineCSUM_PSEUDO_HDR CSUM_L4_CALC
-#defineCSUM_SCTP_VALID CSUM_L3_VALID
+#defineCSUM_SCTP_VALID CSUM_L4_VALID
 #defineCSUM_DELAY_DATA (CSUM_TCP|CSUM_UDP)
 #defineCSUM_DELAY_IP   CSUM_IP /* Only v4, no v6 IP 
hdr csum */
 #defineCSUM_DELAY_DATA_IPV6(CSUM_TCP_IPV6|CSUM_UDP_IPV6)
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254769 - head/sys/net

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 10:13:59 2013
New Revision: 254769
URL: http://svnweb.freebsd.org/changeset/base/254769

Log:
  Introduce typedef for pfil hook callback function and replace all
  spelled out occurrences with it.
  
  Reviewed by:  eri

Modified:
  head/sys/net/pfil.c
  head/sys/net/pfil.h

Modified: head/sys/net/pfil.c
==
--- head/sys/net/pfil.c Sat Aug 24 10:06:51 2013(r254768)
+++ head/sys/net/pfil.c Sat Aug 24 10:13:59 2013(r254769)
@@ -225,8 +225,7 @@ pfil_head_get(int type, u_long val)
  * PFIL_WAITOK OK to call malloc with M_WAITOK.
  */
 int
-pfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
-  struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+pfil_add_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
 {
struct packet_filter_hook *pfh1 = NULL;
struct packet_filter_hook *pfh2 = NULL;
@@ -285,8 +284,7 @@ error:
  * list.
  */
 int
-pfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *, int,
-struct inpcb *), void *arg, int flags, struct pfil_head *ph)
+pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
 {
int err = 0;
 
@@ -334,9 +332,7 @@ pfil_list_add(pfil_list_t *list, struct 
  * specified list.
  */
 static int
-pfil_list_remove(pfil_list_t *list,
-int (*func)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
-void *arg)
+pfil_list_remove(pfil_list_t *list, pfil_func_t func, void *arg)
 {
struct packet_filter_hook *pfh;
 

Modified: head/sys/net/pfil.h
==
--- head/sys/net/pfil.h Sat Aug 24 10:06:51 2013(r254768)
+++ head/sys/net/pfil.h Sat Aug 24 10:13:59 2013(r254769)
@@ -43,14 +43,16 @@ struct mbuf;
 struct ifnet;
 struct inpcb;
 
+typedefint (*pfil_func_t)(void *, struct mbuf **, struct ifnet *, 
int,
+   struct inpcb *);
+
 /*
  * The packet filter hooks are designed for anything to call them to
  * possibly intercept the packet.
  */
 struct packet_filter_hook {
 TAILQ_ENTRY(packet_filter_hook) pfil_link;
-   int (*pfil_func)(void *, struct mbuf **, struct ifnet *, int,
-   struct inpcb *);
+   pfil_func_t pfil_func;
void*pfil_arg;
 };
 
@@ -87,10 +89,8 @@ struct pfil_head {
LIST_ENTRY(pfil_head) ph_list;
 };
 
-intpfil_add_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
-   int, struct inpcb *), void *, int, struct pfil_head *);
-intpfil_remove_hook(int (*func)(void *, struct mbuf **, struct ifnet *,
-   int, struct inpcb *), void *, int, struct pfil_head *);
+intpfil_add_hook(pfil_func_t, void *, int, struct pfil_head *);
+intpfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *);
 intpfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *,
int, struct inpcb *inp);
 
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254770 - head/sys/net

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 10:30:20 2013
New Revision: 254770
URL: http://svnweb.freebsd.org/changeset/base/254770

Log:
  Convert one instance of pfil hook callback missed in r254769.

Modified:
  head/sys/net/pfil.c

Modified: head/sys/net/pfil.c
==
--- head/sys/net/pfil.c Sat Aug 24 10:13:59 2013(r254769)
+++ head/sys/net/pfil.c Sat Aug 24 10:30:20 2013(r254770)
@@ -53,10 +53,7 @@ MTX_SYSINIT(pfil_heads_lock, pfil_globa
   MTX_DEF);
 
 static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int);
-
-static int pfil_list_remove(pfil_list_t *,
-int (*)(void *, struct mbuf **, struct ifnet *, int, struct inpcb *),
-void *);
+static int pfil_list_remove(pfil_list_t *, pfil_func_t, void *);
 
 LIST_HEAD(pfilheadhead, pfil_head);
 VNET_DEFINE(struct pfilheadhead, pfil_head_list);
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254771 - head/sys/net

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 10:36:33 2013
New Revision: 254771
URL: http://svnweb.freebsd.org/changeset/base/254771

Log:
  Internalize pfil_hook_get().  There are no outside consumers of
  this API, it is only safe for internal use and even the pfil(9)
  man page says so in the BUGS section.
  
  Reviewed by:  eri

Modified:
  head/sys/net/pfil.c
  head/sys/net/pfil.h

Modified: head/sys/net/pfil.c
==
--- head/sys/net/pfil.c Sat Aug 24 10:30:20 2013(r254770)
+++ head/sys/net/pfil.c Sat Aug 24 10:36:33 2013(r254771)
@@ -52,6 +52,7 @@ static struct mtx pfil_global_lock;
 MTX_SYSINIT(pfil_heads_lock, pfil_global_lock, pfil_head_list lock,
   MTX_DEF);
 
+static struct packet_filter_hook *pfil_hook_get(int, struct pfil_head *);
 static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int);
 static int pfil_list_remove(pfil_list_t *, pfil_func_t, void *);
 
@@ -89,6 +90,18 @@ pfil_run_hooks(struct pfil_head *ph, str
return (rv);
 }
 
+static struct packet_filter_hook *
+pfil_hook_get(int dir, struct pfil_head *ph)
+{
+
+   if (dir == PFIL_IN)
+   return (TAILQ_FIRST(ph-ph_in));
+   else if (dir == PFIL_OUT)
+   return (TAILQ_FIRST(ph-ph_out));
+   else
+   return (NULL);
+}
+
 /*
  * pfil_try_rlock() acquires rm reader lock for specified head
  * if this is immediately possible.

Modified: head/sys/net/pfil.h
==
--- head/sys/net/pfil.h Sat Aug 24 10:30:20 2013(r254770)
+++ head/sys/net/pfil.h Sat Aug 24 10:36:33 2013(r254771)
@@ -132,16 +132,4 @@ struct pfil_head *pfil_head_get(int, u_l
 #definePFIL_LIST_LOCK()mtx_lock(pfil_global_lock)
 #definePFIL_LIST_UNLOCK()  mtx_unlock(pfil_global_lock)
 
-static __inline struct packet_filter_hook *
-pfil_hook_get(int dir, struct pfil_head *ph)
-{
-
-   if (dir == PFIL_IN)
-   return (TAILQ_FIRST(ph-ph_in));
-   else if (dir == PFIL_OUT)
-   return (TAILQ_FIRST(ph-ph_out));
-   else
-   return (NULL);
-}
-
 #endif /* _NET_PFIL_H_ */
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254772 - head/share/man/man9

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 10:38:02 2013
New Revision: 254772
URL: http://svnweb.freebsd.org/changeset/base/254772

Log:
  pfil_hook_get() has been internalized in r254771 and is no longer
  part of the API.  It wasn't safe for external use in any case.

Modified:
  head/share/man/man9/pfil.9

Modified: head/share/man/man9/pfil.9
==
--- head/share/man/man9/pfil.9  Sat Aug 24 10:36:33 2013(r254771)
+++ head/share/man/man9/pfil.9  Sat Aug 24 10:38:02 2013(r254772)
@@ -28,7 +28,7 @@
 .\
 .\ $FreeBSD$
 .\
-.Dd October 22, 2012
+.Dd August 23, 2013
 .Dt PFIL 9
 .Os
 .Sh NAME
@@ -36,7 +36,6 @@
 .Nm pfil_head_register ,
 .Nm pfil_head_unregister ,
 .Nm pfil_head_get ,
-.Nm pfil_hook_get ,
 .Nm pfil_add_hook ,
 .Nm pfil_remove_hook ,
 .Nm pfil_run_hooks ,
@@ -56,8 +55,6 @@
 .Fn pfil_head_unregister struct pfil_head *head
 .Ft struct pfil_head *
 .Fn pfil_head_get int af u_long dlt
-.Ft struct packet_filter_hook *
-.Fn pfil_hook_get int dir struct pfil_head *head
 .Ft void
 .Fn pfil_add_hook int (*func)() void *arg int flags struct pfil_head *
 .Ft void
@@ -245,11 +242,7 @@ lock export was added in
 .Fx 10.0 .
 .Sh BUGS
 The
-.Fn pfil_hook_get
-function
-is only safe for internal use.
-.Pp
-When a
+.Fn When a
 .Vt pfil_head
 is being modified, no traffic is diverted
 (to avoid deadlock).
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254773 - head/sys/net

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 11:17:25 2013
New Revision: 254773
URL: http://svnweb.freebsd.org/changeset/base/254773

Log:
  Resolve the confusion between the head_list and the hook list.
  
  The linked list of pfil hooks is changed to chain and this term
  is applied consistently.  The head_list remains with list term.
  
  Add KASSERT to vnet_pfil_uninit().
  
  Update and extend comments.
  
  Reviewed by:  eri (previous version)

Modified:
  head/sys/net/pfil.c
  head/sys/net/pfil.h

Modified: head/sys/net/pfil.c
==
--- head/sys/net/pfil.c Sat Aug 24 10:38:02 2013(r254772)
+++ head/sys/net/pfil.c Sat Aug 24 11:17:25 2013(r254773)
@@ -52,9 +52,9 @@ static struct mtx pfil_global_lock;
 MTX_SYSINIT(pfil_heads_lock, pfil_global_lock, pfil_head_list lock,
   MTX_DEF);
 
-static struct packet_filter_hook *pfil_hook_get(int, struct pfil_head *);
-static int pfil_list_add(pfil_list_t *, struct packet_filter_hook *, int);
-static int pfil_list_remove(pfil_list_t *, pfil_func_t, void *);
+static struct packet_filter_hook *pfil_chain_get(int, struct pfil_head *);
+static int pfil_chain_add(pfil_chain_t *, struct packet_filter_hook *, int);
+static int pfil_chain_remove(pfil_chain_t *, pfil_func_t, void *);
 
 LIST_HEAD(pfilheadhead, pfil_head);
 VNET_DEFINE(struct pfilheadhead, pfil_head_list);
@@ -63,7 +63,7 @@ VNET_DEFINE(struct rmlock, pfil_lock);
 #defineV_pfil_lock VNET(pfil_lock)
 
 /*
- * pfil_run_hooks() runs the specified packet filter hooks.
+ * pfil_run_hooks() runs the specified packet filter hook chain.
  */
 int
 pfil_run_hooks(struct pfil_head *ph, struct mbuf **mp, struct ifnet *ifp,
@@ -76,8 +76,8 @@ pfil_run_hooks(struct pfil_head *ph, str
 
PFIL_RLOCK(ph, rmpt);
KASSERT(ph-ph_nhooks = 0, (Pfil hook count dropped  0));
-   for (pfh = pfil_hook_get(dir, ph); pfh != NULL;
-pfh = TAILQ_NEXT(pfh, pfil_link)) {
+   for (pfh = pfil_chain_get(dir, ph); pfh != NULL;
+pfh = TAILQ_NEXT(pfh, pfil_chain)) {
if (pfh-pfil_func != NULL) {
rv = (*pfh-pfil_func)(pfh-pfil_arg, m, ifp, dir,
inp);
@@ -91,7 +91,7 @@ pfil_run_hooks(struct pfil_head *ph, str
 }
 
 static struct packet_filter_hook *
-pfil_hook_get(int dir, struct pfil_head *ph)
+pfil_chain_get(int dir, struct pfil_head *ph)
 {
 
if (dir == PFIL_IN)
@@ -163,6 +163,7 @@ pfil_wowned(struct pfil_head *ph)
 
return (PFIL_WOWNED(ph));
 }
+
 /*
  * pfil_head_register() registers a pfil_head with the packet filter hook
  * mechanism.
@@ -202,9 +203,9 @@ pfil_head_unregister(struct pfil_head *p
PFIL_LIST_LOCK();
LIST_REMOVE(ph, ph_list);
PFIL_LIST_UNLOCK();
-   TAILQ_FOREACH_SAFE(pfh, ph-ph_in, pfil_link, pfnext)
+   TAILQ_FOREACH_SAFE(pfh, ph-ph_in, pfil_chain, pfnext)
free(pfh, M_IFADDR);
-   TAILQ_FOREACH_SAFE(pfh, ph-ph_out, pfil_link, pfnext)
+   TAILQ_FOREACH_SAFE(pfh, ph-ph_out, pfil_chain, pfnext)
free(pfh, M_IFADDR);
PFIL_LOCK_DESTROY(ph);
return (0);
@@ -261,7 +262,7 @@ pfil_add_hook(pfil_func_t func, void *ar
if (flags  PFIL_IN) {
pfh1-pfil_func = func;
pfh1-pfil_arg = arg;
-   err = pfil_list_add(ph-ph_in, pfh1, flags  ~PFIL_OUT);
+   err = pfil_chain_add(ph-ph_in, pfh1, flags  ~PFIL_OUT);
if (err)
goto locked_error;
ph-ph_nhooks++;
@@ -269,10 +270,10 @@ pfil_add_hook(pfil_func_t func, void *ar
if (flags  PFIL_OUT) {
pfh2-pfil_func = func;
pfh2-pfil_arg = arg;
-   err = pfil_list_add(ph-ph_out, pfh2, flags  ~PFIL_IN);
+   err = pfil_chain_add(ph-ph_out, pfh2, flags  ~PFIL_IN);
if (err) {
if (flags  PFIL_IN)
-   pfil_list_remove(ph-ph_in, func, arg);
+   pfil_chain_remove(ph-ph_in, func, arg);
goto locked_error;
}
ph-ph_nhooks++;
@@ -291,7 +292,7 @@ error:
 
 /*
  * pfil_remove_hook removes a specific function from the packet filter hook
- * list.
+ * chain.
  */
 int
 pfil_remove_hook(pfil_func_t func, void *arg, int flags, struct pfil_head *ph)
@@ -300,12 +301,12 @@ pfil_remove_hook(pfil_func_t func, void 
 
PFIL_WLOCK(ph);
if (flags  PFIL_IN) {
-   err = pfil_list_remove(ph-ph_in, func, arg);
+   err = pfil_chain_remove(ph-ph_in, func, arg);
if (err == 0)
ph-ph_nhooks--;
}
if ((err == 0)  (flags  PFIL_OUT)) {
-   err = pfil_list_remove(ph-ph_out, func, arg);
+   err = pfil_chain_remove(ph-ph_out, func, arg);
if (err == 0)
ph-ph_nhooks--;
  

svn commit: r254774 - head/sys/net

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 11:24:15 2013
New Revision: 254774
URL: http://svnweb.freebsd.org/changeset/base/254774

Log:
  ename PFIL_LIST_[UN]LOCK() to PFIL_HEADLIST_[UN]LOCK() to avoid
  confusion with the pfil_head chain locking macros.

Modified:
  head/sys/net/pfil.c
  head/sys/net/pfil.h

Modified: head/sys/net/pfil.c
==
--- head/sys/net/pfil.c Sat Aug 24 11:17:25 2013(r254773)
+++ head/sys/net/pfil.c Sat Aug 24 11:24:15 2013(r254774)
@@ -173,11 +173,11 @@ pfil_head_register(struct pfil_head *ph)
 {
struct pfil_head *lph;
 
-   PFIL_LIST_LOCK();
+   PFIL_HEADLIST_LOCK();
LIST_FOREACH(lph, V_pfil_head_list, ph_list) {
if (ph-ph_type == lph-ph_type 
ph-ph_un.phu_val == lph-ph_un.phu_val) {
-   PFIL_LIST_UNLOCK();
+   PFIL_HEADLIST_UNLOCK();
return (EEXIST);
}
}
@@ -186,7 +186,7 @@ pfil_head_register(struct pfil_head *ph)
TAILQ_INIT(ph-ph_in);
TAILQ_INIT(ph-ph_out);
LIST_INSERT_HEAD(V_pfil_head_list, ph, ph_list);
-   PFIL_LIST_UNLOCK();
+   PFIL_HEADLIST_UNLOCK();
return (0);
 }
 
@@ -200,9 +200,9 @@ pfil_head_unregister(struct pfil_head *p
 {
struct packet_filter_hook *pfh, *pfnext;

-   PFIL_LIST_LOCK();
+   PFIL_HEADLIST_LOCK();
LIST_REMOVE(ph, ph_list);
-   PFIL_LIST_UNLOCK();
+   PFIL_HEADLIST_UNLOCK();
TAILQ_FOREACH_SAFE(pfh, ph-ph_in, pfil_chain, pfnext)
free(pfh, M_IFADDR);
TAILQ_FOREACH_SAFE(pfh, ph-ph_out, pfil_chain, pfnext)
@@ -219,11 +219,11 @@ pfil_head_get(int type, u_long val)
 {
struct pfil_head *ph;
 
-   PFIL_LIST_LOCK();
+   PFIL_HEADLIST_LOCK();
LIST_FOREACH(ph, V_pfil_head_list, ph_list)
if (ph-ph_type == type  ph-ph_un.phu_val == val)
break;
-   PFIL_LIST_UNLOCK();
+   PFIL_HEADLIST_UNLOCK();
return (ph);
 }
 

Modified: head/sys/net/pfil.h
==
--- head/sys/net/pfil.h Sat Aug 24 11:17:25 2013(r254773)
+++ head/sys/net/pfil.h Sat Aug 24 11:24:15 2013(r254774)
@@ -138,7 +138,9 @@ int pfil_wowned(struct pfil_head *ph);
 #definePFIL_RUNLOCK(p, t)  rm_runlock((p)-ph_plock, (t))
 #definePFIL_WUNLOCK(p) rm_wunlock((p)-ph_plock)
 #definePFIL_WOWNED(p)  rm_wowned((p)-ph_plock)
-#definePFIL_LIST_LOCK()mtx_lock(pfil_global_lock)
-#definePFIL_LIST_UNLOCK()  mtx_unlock(pfil_global_lock)
+
+/* Internal locking macros for global/vnet pfil_head_list. */
+#definePFIL_HEADLIST_LOCK()mtx_lock(pfil_global_lock)
+#definePFIL_HEADLIST_UNLOCK()  mtx_unlock(pfil_global_lock)
 
 #endif /* _NET_PFIL_H_ */
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254775 - head/share/man/man9

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 11:57:02 2013
New Revision: 254775
URL: http://svnweb.freebsd.org/changeset/base/254775

Log:
  Adjust for the pfil_func_t typedef added in r254769.

Modified:
  head/share/man/man9/pfil.9

Modified: head/share/man/man9/pfil.9
==
--- head/share/man/man9/pfil.9  Sat Aug 24 11:24:15 2013(r254774)
+++ head/share/man/man9/pfil.9  Sat Aug 24 11:57:02 2013(r254775)
@@ -49,6 +49,8 @@
 .In sys/mbuf.h
 .In net/if.h
 .In net/pfil.h
+.Bd -literal
+typedef int (*pfil_func_t)(void *arg, struct mbuf **mp, struct ifnet *, int 
dir, struct inpcb);
 .Ft int
 .Fn pfil_head_register struct pfil_head *head
 .Ft int
@@ -56,11 +58,9 @@
 .Ft struct pfil_head *
 .Fn pfil_head_get int af u_long dlt
 .Ft void
-.Fn pfil_add_hook int (*func)() void *arg int flags struct pfil_head *
+.Fn pfil_add_hook pfil_func_t void *arg int flags struct pfil_head *
 .Ft void
-.Fn pfil_remove_hook int (*func)() void *arg int flags struct pfil_head 
*
-.Ft int
-.Fn (*func) void *arg struct mbuf **mp struct ifnet * int dir struct 
inpcb *
+.Fn pfil_remove_hook pfil_func_t void *arg int flags struct pfil_head *
 .Ft int
 .Fn pfil_run_hooks struct pfil_head *head struct mbuf **mp struct ifnet 
* int dir struct inpcb *
 .Ft void
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254777 - head/sys/net

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 12:03:24 2013
New Revision: 254777
URL: http://svnweb.freebsd.org/changeset/base/254777

Log:
  Whitespace, style cleanups, and improved comments.

Modified:
  head/sys/net/pfil.h

Modified: head/sys/net/pfil.h
==
--- head/sys/net/pfil.h Sat Aug 24 11:59:51 2013(r254776)
+++ head/sys/net/pfil.h Sat Aug 24 12:03:24 2013(r254777)
@@ -52,9 +52,9 @@ typedef   int (*pfil_func_t)(void *, struc
  * together and after each other in the specified order.
  */
 struct packet_filter_hook {
-TAILQ_ENTRY(packet_filter_hook) pfil_chain;
-   pfil_func_t pfil_func;
-   void*pfil_arg;
+   TAILQ_ENTRY(packet_filter_hook) pfil_chain;
+   pfil_func_t  pfil_func;
+   void*pfil_arg;
 };
 
 #define PFIL_IN0x0001
@@ -74,23 +74,23 @@ typedef TAILQ_HEAD(pfil_chain, packet_fi
  * For packet is then run through the hook chain for inspection.
  */
 struct pfil_head {
-   pfil_chain_tph_in;
-   pfil_chain_tph_out;
-   int ph_type;
-   int ph_nhooks;
+   pfil_chain_t ph_in;
+   pfil_chain_t ph_out;
+   int  ph_type;
+   int  ph_nhooks;
 #if defined( __linux__ ) || defined( _WIN32 )
-   rwlock_tph_mtx;
+   rwlock_t ph_mtx;
 #else
struct rmlock   *ph_plock;  /* Pointer to the used lock */
-   struct rmlock   ph_lock;/* Private lock storage */
-   int flags;
+   struct rmlockph_lock;   /* Private lock storage */
+   int  flags;
 #endif
union {
-   u_long  phu_val;
-   void*phu_ptr;
+   u_long   phu_val;
+   void*phu_ptr;
} ph_un;
-#defineph_af   ph_un.phu_val
-#defineph_ifnetph_un.phu_ptr
+#defineph_afph_un.phu_val
+#defineph_ifnet ph_un.phu_ptr
LIST_ENTRY(pfil_head) ph_list;
 };
 
@@ -98,6 +98,7 @@ struct pfil_head {
 struct pfil_head *pfil_head_get(int, u_long);
 intpfil_add_hook(pfil_func_t, void *, int, struct pfil_head *);
 intpfil_remove_hook(pfil_func_t, void *, int, struct pfil_head *);
+#definePFIL_HOOKED(p) ((p)-ph_nhooks  0)
 
 /* Public functions to run the packet inspection by protocols. */
 intpfil_run_hooks(struct pfil_head *, struct mbuf **, struct ifnet *,
@@ -107,16 +108,16 @@ int   pfil_run_hooks(struct pfil_head *, s
 intpfil_head_register(struct pfil_head *);
 intpfil_head_unregister(struct pfil_head *);
 
-/* Internal pfil locking functions. */
+/* Public pfil locking functions for self managed locks by packet filters. */
 struct rm_priotracker; /* Do not require including rmlock header */
-int pfil_try_rlock(struct pfil_head *, struct rm_priotracker *);
-void pfil_rlock(struct pfil_head *, struct rm_priotracker *);
-void pfil_runlock(struct pfil_head *, struct rm_priotracker *);
-void pfil_wlock(struct pfil_head *);
-void pfil_wunlock(struct pfil_head *);
-int pfil_wowned(struct pfil_head *ph);
+intpfil_try_rlock(struct pfil_head *, struct rm_priotracker *);
+void   pfil_rlock(struct pfil_head *, struct rm_priotracker *);
+void   pfil_runlock(struct pfil_head *, struct rm_priotracker *);
+void   pfil_wlock(struct pfil_head *);
+void   pfil_wunlock(struct pfil_head *);
+intpfil_wowned(struct pfil_head *ph);
 
-#definePFIL_HOOKED(p) ((p)-ph_nhooks  0)
+/* Internal pfil locking functions. */
 #definePFIL_LOCK_INIT_REAL(l, t)   \
rm_init_flags(l, PFil  t  rmlock, RM_RECURSE)
 #definePFIL_LOCK_DESTROY_REAL(l)   \
@@ -132,6 +133,7 @@ int pfil_wowned(struct pfil_head *ph);
if ((p)-flags  PFIL_FLAG_PRIVATE_LOCK)\
PFIL_LOCK_DESTROY_REAL((p)-ph_plock);  \
 } while (0)
+
 #definePFIL_TRY_RLOCK(p, t)rm_try_rlock((p)-ph_plock, (t))
 #definePFIL_RLOCK(p, t)rm_rlock((p)-ph_plock, (t))
 #definePFIL_WLOCK(p)   rm_wlock((p)-ph_plock)
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254779 - head/sys/kern

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 12:24:58 2013
New Revision: 254779
URL: http://svnweb.freebsd.org/changeset/base/254779

Log:
  Avoid code duplication for mbuf initialization and use m_init() instead
  in mb_ctor_mbuf() and mb_ctor_pack().

Modified:
  head/sys/kern/kern_mbuf.c

Modified: head/sys/kern/kern_mbuf.c
==
--- head/sys/kern/kern_mbuf.c   Sat Aug 24 12:10:11 2013(r254778)
+++ head/sys/kern/kern_mbuf.c   Sat Aug 24 12:24:58 2013(r254779)
@@ -410,18 +410,14 @@ mb_ctor_mbuf(void *mem, int size, void *
 {
struct mbuf *m;
struct mb_args *args;
-#ifdef MAC
int error;
-#endif
int flags;
short type;
 
 #ifdef INVARIANTS
trash_ctor(mem, size, arg, how);
 #endif
-   m = (struct mbuf *)mem;
args = (struct mb_args *)arg;
-   flags = args-flags;
type = args-type;
 
/*
@@ -431,32 +427,12 @@ mb_ctor_mbuf(void *mem, int size, void *
if (type == MT_NOINIT)
return (0);
 
-   m-m_next = NULL;
-   m-m_nextpkt = NULL;
-   m-m_len = 0;
-   m-m_flags = flags;
-   m-m_type = type;
-   if (flags  M_PKTHDR) {
-   m-m_data = m-m_pktdat;
-   m-m_pkthdr.rcvif = NULL;
-   m-m_pkthdr.header = NULL;
-   m-m_pkthdr.len = 0;
-   m-m_pkthdr.csum_flags = 0;
-   m-m_pkthdr.csum_data = 0;
-   m-m_pkthdr.tso_segsz = 0;
-   m-m_pkthdr.ether_vtag = 0;
-   m-m_pkthdr.flowid = 0;
-   m-m_pkthdr.fibnum = 0;
-   SLIST_INIT(m-m_pkthdr.tags);
-#ifdef MAC
-   /* If the label init fails, fail the alloc */
-   error = mac_mbuf_init(m, how);
-   if (error)
-   return (error);
-#endif
-   } else
-   m-m_data = m-m_dat;
-   return (0);
+   m = (struct mbuf *)mem;
+   flags = args-flags;
+
+   error = m_init(m, NULL, size, how, type, flags);
+
+   return (error);
 }
 
 /*
@@ -656,34 +632,14 @@ mb_ctor_pack(void *mem, int size, void *
 #ifdef INVARIANTS
trash_ctor(m-m_ext.ext_buf, MCLBYTES, arg, how);
 #endif
-   m-m_next = NULL;
-   m-m_nextpkt = NULL;
-   m-m_data = m-m_ext.ext_buf;
-   m-m_len = 0;
-   m-m_flags = (flags | M_EXT);
-   m-m_type = type;
-
-   if (flags  M_PKTHDR) {
-   m-m_pkthdr.rcvif = NULL;
-   m-m_pkthdr.len = 0;
-   m-m_pkthdr.header = NULL;
-   m-m_pkthdr.csum_flags = 0;
-   m-m_pkthdr.csum_data = 0;
-   m-m_pkthdr.tso_segsz = 0;
-   m-m_pkthdr.ether_vtag = 0;
-   m-m_pkthdr.flowid = 0;
-   m-m_pkthdr.fibnum = 0;
-   SLIST_INIT(m-m_pkthdr.tags);
-#ifdef MAC
-   /* If the label init fails, fail the alloc */
-   error = mac_mbuf_init(m, how);
-   if (error)
-   return (error);
-#endif
-   }
+
+   error = m_init(m, NULL, size, how, type, flags);
+
/* m_ext is already initialized. */
+   m-m_data = m-m_ext.ext_buf;
+   m-m_flags = (flags | M_EXT);
 
-   return (0);
+   return (error);
 }
 
 int
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254780 - in head/sys: kern sys

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 13:15:42 2013
New Revision: 254780
URL: http://svnweb.freebsd.org/changeset/base/254780

Log:
  dd a 24 bits wide ext_flags field to m_ext by reducing ext_type
  to 8 bits.  ext_type is an enumerator and the number of types we
  have is a mere dozen.
  
  A couple of ext_types are renumbered to fit within 8 bits.
  
  EXT_VENDOR[1-4] and EXT_EXP[1-4] types for vendor-internal and
  experimental local mapping.
  
  The ext_flags field is currently unused but has a couple of flags
  already defined for future use.  Again vendor and experimental
  flags are provided for local mapping.
  
  EXT_FLAG_BITS is provided for the printf(9) %b identifier.
  
  Initialize and copy ext_flags in the relevant mbuf functions.
  
  Improve alignment and packing of struct m_ext on 32 and 64 archs
  by carefully sorting the fields.

Modified:
  head/sys/kern/kern_mbuf.c
  head/sys/kern/uipc_mbuf.c
  head/sys/sys/mbuf.h

Modified: head/sys/kern/kern_mbuf.c
==
--- head/sys/kern/kern_mbuf.c   Sat Aug 24 12:24:58 2013(r254779)
+++ head/sys/kern/kern_mbuf.c   Sat Aug 24 13:15:42 2013(r254780)
@@ -547,6 +547,7 @@ mb_ctor_clust(void *mem, int size, void 
m-m_ext.ext_arg2 = NULL;
m-m_ext.ext_size = size;
m-m_ext.ext_type = type;
+   m-m_ext.ext_flags = 0;
m-m_ext.ref_cnt = refcnt;
}
 

Modified: head/sys/kern/uipc_mbuf.c
==
--- head/sys/kern/uipc_mbuf.c   Sat Aug 24 12:24:58 2013(r254779)
+++ head/sys/kern/uipc_mbuf.c   Sat Aug 24 13:15:42 2013(r254780)
@@ -267,6 +267,7 @@ m_extadd(struct mbuf *mb, caddr_t buf, u
mb-m_ext.ext_arg1 = arg1;
mb-m_ext.ext_arg2 = arg2;
mb-m_ext.ext_type = type;
+   mb-m_ext.ext_flags = 0;
 
return (0);
 }
@@ -342,6 +343,7 @@ mb_free_ext(struct mbuf *m)
m-m_ext.ref_cnt = NULL;
m-m_ext.ext_size = 0;
m-m_ext.ext_type = 0;
+   m-m_ext.ext_flags = 0;
m-m_flags = ~M_EXT;
uma_zfree(zone_mbuf, m);
 }
@@ -368,6 +370,7 @@ mb_dupcl(struct mbuf *n, struct mbuf *m)
n-m_ext.ext_size = m-m_ext.ext_size;
n-m_ext.ref_cnt = m-m_ext.ref_cnt;
n-m_ext.ext_type = m-m_ext.ext_type;
+   n-m_ext.ext_flags = m-m_ext.ext_flags;
n-m_flags |= M_EXT;
n-m_flags |= m-m_flags  M_RDONLY;
 }

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Sat Aug 24 12:24:58 2013(r254779)
+++ head/sys/sys/mbuf.h Sat Aug 24 13:15:42 2013(r254780)
@@ -138,16 +138,19 @@ struct pkthdr {
 /*
  * Description of external storage mapped into mbuf; valid only if M_EXT is
  * set.
+ * Size ILP32: 28
+ *  LP64: 48
  */
 struct m_ext {
+   volatile u_int  *ref_cnt;   /* pointer to ref count info */
caddr_t  ext_buf;   /* start of buffer */
+   uint32_t ext_size;  /* size of buffer, for ext_free */
+   uint32_t ext_type:8,/* type of external storage */
+ext_flags:24;  /* external storage mbuf flags */
void(*ext_free) /* free routine if not the usual */
(void *, void *);
void*ext_arg1;  /* optional argument pointer */
void*ext_arg2;  /* optional argument pointer */
-   u_intext_size;  /* size of buffer, for ext_free */
-   volatile u_int  *ref_cnt;   /* pointer to ref count info */
-   int  ext_type;  /* type of external storage */
 };
 
 /*
@@ -269,7 +272,7 @@ struct mbuf {
  M_PROTOFLAGS|M_HASHTYPEBITS)
 
 /*
- * External buffer types: identify ext_buf type.
+ * External mbuf storage buffer types.
  */
 #defineEXT_CLUSTER 1   /* mbuf cluster */
 #defineEXT_SFBUF   2   /* sendfile(2)'s sf_bufs */
@@ -278,10 +281,48 @@ struct mbuf {
 #defineEXT_JUMBO16 5   /* jumbo cluster 16184 bytes */
 #defineEXT_PACKET  6   /* mbuf+cluster from packet zone */
 #defineEXT_MBUF7   /* external mbuf reference (M_IOVEC) */
-#defineEXT_NET_DRV 100 /* custom ext_buf provided by net 
driver(s) */
-#defineEXT_MOD_TYPE200 /* custom module's ext_buf type */
-#defineEXT_DISPOSABLE  300 /* can throw this buffer away w/page 
flipping */
-#defineEXT_EXTREF  400 /* has externally maintained ref_cnt 
ptr */
+
+#defineEXT_VENDOR1 224 /* for vendor-internal use */
+#defineEXT_VENDOR2 225 /* for vendor-internal use */
+#defineEXT_VENDOR3 226 /* for vendor-internal use */
+#defineEXT_VENDOR4 227 /* for vendor-internal use */
+
+#define

Re: svn commit: r254780 - in head/sys: kern sys

2013-08-24 Thread Andre Oppermann

On 24.08.2013 15:15, Andre Oppermann wrote:

Author: andre
Date: Sat Aug 24 13:15:42 2013
New Revision: 254780
URL: http://svnweb.freebsd.org/changeset/base/254780

Log:
   dd a 24 bits wide ext_flags field to m_ext by reducing ext_type
   to 8 bits.  ext_type is an enumerator and the number of types we
   have is a mere dozen.

   A couple of ext_types are renumbered to fit within 8 bits.

   EXT_VENDOR[1-4] and EXT_EXP[1-4] types for vendor-internal and
   experimental local mapping.

   The ext_flags field is currently unused but has a couple of flags
   already defined for future use.  Again vendor and experimental
   flags are provided for local mapping.

   EXT_FLAG_BITS is provided for the printf(9) %b identifier.

   Initialize and copy ext_flags in the relevant mbuf functions.

   Improve alignment and packing of struct m_ext on 32 and 64 archs
   by carefully sorting the fields.


Sponsored by:   The FreeBSD Foundation

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254799 - in head/sys: dev/cas dev/hatm dev/iscsi_initiator dev/lge dev/mwl kern sys

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 16:57:44 2013
New Revision: 254799
URL: http://svnweb.freebsd.org/changeset/base/254799

Log:
  Add an mbuf pointer parameter to (*ext_free) to give the external
  free function access to the mbuf the external memory was attached
  to.
  
  Mechanically adjust all users to include the mbuf parameter.
  
  This fixes a long standing annoyance for external free functions.
  Before one had to sacrifice one of the argument pointers for this.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/dev/cas/if_cas.c
  head/sys/dev/hatm/if_hatm_intr.c
  head/sys/dev/iscsi_initiator/isc_soc.c
  head/sys/dev/lge/if_lge.c
  head/sys/dev/mwl/if_mwl.c
  head/sys/kern/subr_mbpool.c
  head/sys/kern/uipc_mbuf.c
  head/sys/kern/uipc_syscalls.c
  head/sys/sys/mbpool.h
  head/sys/sys/mbuf.h
  head/sys/sys/sf_buf.h

Modified: head/sys/dev/cas/if_cas.c
==
--- head/sys/dev/cas/if_cas.c   Sat Aug 24 16:55:53 2013(r254798)
+++ head/sys/dev/cas/if_cas.c   Sat Aug 24 16:57:44 2013(r254799)
@@ -132,7 +132,7 @@ static void cas_detach(struct cas_softc 
 static int cas_disable_rx(struct cas_softc *sc);
 static int cas_disable_tx(struct cas_softc *sc);
 static voidcas_eint(struct cas_softc *sc, u_int status);
-static voidcas_free(void *arg1, void* arg2);
+static voidcas_free(struct mbuf *m, void *arg1, void* arg2);
 static voidcas_init(void *xsc);
 static voidcas_init_locked(struct cas_softc *sc);
 static voidcas_init_regs(struct cas_softc *sc);
@@ -1888,7 +1888,7 @@ cas_rint(struct cas_softc *sc)
 }
 
 static void
-cas_free(void *arg1, void *arg2)
+cas_free(struct mbuf *m, void *arg1, void *arg2)
 {
struct cas_rxdsoft *rxds;
struct cas_softc *sc;

Modified: head/sys/dev/hatm/if_hatm_intr.c
==
--- head/sys/dev/hatm/if_hatm_intr.cSat Aug 24 16:55:53 2013
(r254798)
+++ head/sys/dev/hatm/if_hatm_intr.cSat Aug 24 16:57:44 2013
(r254799)
@@ -261,7 +261,7 @@ hatm_mbuf_page_alloc(struct hatm_softc *
  * Free an mbuf and put it onto the free list.
  */
 static void
-hatm_mbuf0_free(void *buf, void *args)
+hatm_mbuf0_free(struct mbuf *m, void *buf, void *args)
 {
struct hatm_softc *sc = args;
struct mbuf0_chunk *c = buf;
@@ -272,7 +272,7 @@ hatm_mbuf0_free(void *buf, void *args)
hatm_ext_free(sc-mbuf_list[0], (struct mbufx_free *)c);
 }
 static void
-hatm_mbuf1_free(void *buf, void *args)
+hatm_mbuf1_free(struct mbuf *m, void *buf, void *args)
 {
struct hatm_softc *sc = args;
struct mbuf1_chunk *c = buf;
@@ -461,7 +461,7 @@ hatm_rx_buffer(struct hatm_softc *sc, u_
hatm_mbuf0_free, c0, sc, M_PKTHDR, EXT_EXTREF);
m-m_data += MBUF0_OFFSET;
} else
-   hatm_mbuf0_free(c0, sc);
+   hatm_mbuf0_free(NULL, c0, sc);
 
} else {
struct mbuf1_chunk *c1;
@@ -485,7 +485,7 @@ hatm_rx_buffer(struct hatm_softc *sc, u_
hatm_mbuf1_free, c1, sc, M_PKTHDR, EXT_EXTREF);
m-m_data += MBUF1_OFFSET;
} else
-   hatm_mbuf1_free(c1, sc);
+   hatm_mbuf1_free(NULL, c1, sc);
}
 
return (m);

Modified: head/sys/dev/iscsi_initiator/isc_soc.c
==
--- head/sys/dev/iscsi_initiator/isc_soc.c  Sat Aug 24 16:55:53 2013
(r254798)
+++ head/sys/dev/iscsi_initiator/isc_soc.c  Sat Aug 24 16:57:44 2013
(r254799)
@@ -69,7 +69,7 @@ static int ou_refcnt = 0;
  | function for freeing external storage for mbuf
  */
 static void
-ext_free(void *a, void *b)
+ext_free(struct mbuf *m, void *a, void *b)
 {
  pduq_t *pq = b;
 

Modified: head/sys/dev/lge/if_lge.c
==
--- head/sys/dev/lge/if_lge.c   Sat Aug 24 16:55:53 2013(r254798)
+++ head/sys/dev/lge/if_lge.c   Sat Aug 24 16:57:44 2013(r254799)
@@ -122,7 +122,7 @@ static int lge_detach(device_t);
 static int lge_alloc_jumbo_mem(struct lge_softc *);
 static void lge_free_jumbo_mem(struct lge_softc *);
 static void *lge_jalloc(struct lge_softc *);
-static void lge_jfree(void *, void *);
+static void lge_jfree(struct mbuf *, void *, void *);
 
 static int lge_newbuf(struct lge_softc *, struct lge_rx_desc *, struct mbuf *);
 static int lge_encap(struct lge_softc *, struct mbuf *, u_int32_t *);
@@ -847,9 +847,7 @@ lge_jalloc(sc)
  * Release a jumbo buffer.
  */
 static void
-lge_jfree(buf, args)
-   void*buf;
-   void*args;
+lge_jfree(struct mbuf *m, void *buf, void *args)
 {
struct lge_softc*sc;
int i;


svn commit: r254800 - in head/sys/dev: cxgb sfxge

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 17:14:14 2013
New Revision: 254800
URL: http://svnweb.freebsd.org/changeset/base/254800

Log:
  Remove unnecessary setup of the m-pkthdr.header pointer.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/dev/cxgb/cxgb_sge.c
  head/sys/dev/sfxge/sfxge_rx.c

Modified: head/sys/dev/cxgb/cxgb_sge.c
==
--- head/sys/dev/cxgb/cxgb_sge.cSat Aug 24 16:57:44 2013
(r254799)
+++ head/sys/dev/cxgb/cxgb_sge.cSat Aug 24 17:14:14 2013
(r254800)
@@ -2634,7 +2634,6 @@ t3_rx_eth(struct adapter *adap, struct m
} 
 
m-m_pkthdr.rcvif = ifp;
-   m-m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
/*
 * adjust after conversion to mbuf chain
 */

Modified: head/sys/dev/sfxge/sfxge_rx.c
==
--- head/sys/dev/sfxge/sfxge_rx.c   Sat Aug 24 16:57:44 2013
(r254799)
+++ head/sys/dev/sfxge/sfxge_rx.c   Sat Aug 24 17:14:14 2013
(r254800)
@@ -282,7 +282,6 @@ static void __sfxge_rx_deliver(struct sf
struct ifnet *ifp = sc-ifnet;
 
m-m_pkthdr.rcvif = ifp;
-   m-m_pkthdr.header = m-m_data;
m-m_pkthdr.csum_data = 0x;
ifp-if_input(ifp, m);
 }
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254803 - in head/sys/dev: jme nfe

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 19:38:36 2013
New Revision: 254803
URL: http://svnweb.freebsd.org/changeset/base/254803

Log:
  Change local variable tso_segsz to tsosegsz to avoid mbuf.h macro conflicts.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/dev/jme/if_jme.c
  head/sys/dev/nfe/if_nfe.c

Modified: head/sys/dev/jme/if_jme.c
==
--- head/sys/dev/jme/if_jme.c   Sat Aug 24 19:02:36 2013(r254802)
+++ head/sys/dev/jme/if_jme.c   Sat Aug 24 19:38:36 2013(r254803)
@@ -1690,7 +1690,7 @@ jme_encap(struct jme_softc *sc, struct m
struct mbuf *m;
bus_dma_segment_t txsegs[JME_MAXTXSEGS];
int error, i, nsegs, prod;
-   uint32_t cflags, tso_segsz;
+   uint32_t cflags, tsosegsz;
 
JME_LOCK_ASSERT(sc);
 
@@ -1808,10 +1808,10 @@ jme_encap(struct jme_softc *sc, struct m
 
m = *m_head;
cflags = 0;
-   tso_segsz = 0;
+   tsosegsz = 0;
/* Configure checksum offload and TSO. */
if ((m-m_pkthdr.csum_flags  CSUM_TSO) != 0) {
-   tso_segsz = (uint32_t)m-m_pkthdr.tso_segsz 
+   tsosegsz = (uint32_t)m-m_pkthdr.tso_segsz 
JME_TD_MSS_SHIFT;
cflags |= JME_TD_TSO;
} else {
@@ -1830,7 +1830,7 @@ jme_encap(struct jme_softc *sc, struct m
 
desc = sc-jme_rdata.jme_tx_ring[prod];
desc-flags = htole32(cflags);
-   desc-buflen = htole32(tso_segsz);
+   desc-buflen = htole32(tsosegsz);
desc-addr_hi = htole32(m-m_pkthdr.len);
desc-addr_lo = 0;
sc-jme_cdata.jme_tx_cnt++;

Modified: head/sys/dev/nfe/if_nfe.c
==
--- head/sys/dev/nfe/if_nfe.c   Sat Aug 24 19:02:36 2013(r254802)
+++ head/sys/dev/nfe/if_nfe.c   Sat Aug 24 19:38:36 2013(r254803)
@@ -2390,7 +2390,7 @@ nfe_encap(struct nfe_softc *sc, struct m
bus_dmamap_t map;
bus_dma_segment_t segs[NFE_MAX_SCATTER];
int error, i, nsegs, prod, si;
-   uint32_t tso_segsz;
+   uint32_t tsosegsz;
uint16_t cflags, flags;
struct mbuf *m;
 
@@ -2429,9 +2429,9 @@ nfe_encap(struct nfe_softc *sc, struct m
 
m = *m_head;
cflags = flags = 0;
-   tso_segsz = 0;
+   tsosegsz = 0;
if ((m-m_pkthdr.csum_flags  CSUM_TSO) != 0) {
-   tso_segsz = (uint32_t)m-m_pkthdr.tso_segsz 
+   tsosegsz = (uint32_t)m-m_pkthdr.tso_segsz 
NFE_TX_TSO_SHIFT;
cflags = ~(NFE_TX_IP_CSUM | NFE_TX_TCP_UDP_CSUM);
cflags |= NFE_TX_TSO;
@@ -2482,14 +2482,14 @@ nfe_encap(struct nfe_softc *sc, struct m
if ((m-m_flags  M_VLANTAG) != 0)
desc64-vtag = htole32(NFE_TX_VTAG |
m-m_pkthdr.ether_vtag);
-   if (tso_segsz != 0) {
+   if (tsosegsz != 0) {
/*
 * XXX
 * The following indicates the descriptor element
 * is a 32bit quantity.
 */
-   desc64-length |= htole16((uint16_t)tso_segsz);
-   desc64-flags |= htole16(tso_segsz  16);
+   desc64-length |= htole16((uint16_t)tsosegsz);
+   desc64-flags |= htole16(tsosegsz  16);
}
/*
 * finally, set the valid/checksum/TSO bit in the first
@@ -2502,14 +2502,14 @@ nfe_encap(struct nfe_softc *sc, struct m
else
desc32-flags |= htole16(NFE_TX_LASTFRAG_V1);
desc32 = sc-txq.desc32[si];
-   if (tso_segsz != 0) {
+   if (tsosegsz != 0) {
/*
 * XXX
 * The following indicates the descriptor element
 * is a 32bit quantity.
 */
-   desc32-length |= htole16((uint16_t)tso_segsz);
-   desc32-flags |= htole16(tso_segsz  16);
+   desc32-length |= htole16((uint16_t)tsosegsz);
+   desc32-flags |= htole16(tsosegsz  16);
}
/*
 * finally, set the valid/checksum/TSO bit in the first
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254804 - in head/sys: dev/cxgb dev/e1000 dev/ixgbe dev/patm dev/qlxgb kern net netinet netinet6 sys

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 19:51:18 2013
New Revision: 254804
URL: http://svnweb.freebsd.org/changeset/base/254804

Log:
  Restructure the mbuf pkthdr to make it fit for upcoming capabilities and
  features.  The changes in particular are:
  
  o Remove rarely used header pointer and replace it with a 64bit protocol/
layer specific union PH_loc for local use.  Protocols can flexibly overlay
their own 8 to 64 bit fields to store information while the packet is
worked on.
  
  o Mechanically convert IP reassembly, IGMP/MLD and ATM to use pkthdr.PH_loc
instead of pkthdr.header.
  
  o Extend csum_flags to 64bits to allow for additional future offload
information to be carried (e.g. iSCSI, IPsec offload, and others).
  
  o Move the RSS hash type enumerator from abusing m_flags to its own 8bit
rsstype field.  Adjust accessor macros.
  
  o Add cosqos field to store Class of Service / Quality of Service information
with the packet.  It is not yet supported in any drivers but allows us to
get on par with Cisco/Juniper in routing applications (plus MPLS QoS) with
a modernized ALTQ.
  
  o Add four 8 bit fields l[2-5]hlen to store the relative header offsets
from the start of the packet.  This is important for various offload
capabilities and to relieve the drivers from having to parse the packet
and protocol headers to find out location of checksums and other
information.  Header parsing in drivers is a lot of copy-paste and
unhandled corner cases which we want to avoid.
  
  o Add another flexible 64bit union to map various additional persistent
packet information, like ether_vtag, tso_segsz and csum fields.
Depending on the csum_flags settings some fields may have different usage
making it very flexible and adaptable to future capabilities.
  
  o Restructure the CSUM flags to better signify their outbound (down the
stack) and inbound (up the stack) use.  The CSUM flags used to be a bit
chaotic and rather poorly documented leading to incorrect use in many
places.  Bring clarity into their use through better naming.
Compatibility mappings are provided to preserve the API.  The drivers
can be corrected one by one and MFC'd without issue.
  
  o The size of pkthdr stays the same at 48/56bytes (32/64bit architectures).
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/dev/cxgb/cxgb_sge.c
  head/sys/dev/e1000/if_igb.c
  head/sys/dev/ixgbe/ixgbe.c
  head/sys/dev/ixgbe/ixv.c
  head/sys/dev/patm/if_patm.c
  head/sys/dev/patm/if_patm_tx.c
  head/sys/dev/qlxgb/qla_hw.c
  head/sys/kern/kern_mbuf.c
  head/sys/kern/uipc_mbuf.c
  head/sys/net/if.h
  head/sys/netinet/igmp.c
  head/sys/netinet/ip_input.c
  head/sys/netinet6/ip6_output.c
  head/sys/netinet6/mld6.c
  head/sys/sys/mbuf.h

Modified: head/sys/dev/cxgb/cxgb_sge.c
==
--- head/sys/dev/cxgb/cxgb_sge.cSat Aug 24 19:38:36 2013
(r254803)
+++ head/sys/dev/cxgb/cxgb_sge.cSat Aug 24 19:51:18 2013
(r254804)
@@ -1470,9 +1470,9 @@ t3_encap(struct sge_qset *qs, struct mbu
hdr-len = htonl(mlen | 0x8000);
 
if (__predict_false(mlen  TCPPKTHDRSIZE)) {
-   
printf(mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x,
+   
printf(mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x,
m0, mlen, m0-m_pkthdr.tso_segsz,
-   m0-m_pkthdr.csum_flags, m0-m_flags);
+   (int)m0-m_pkthdr.csum_flags, CSUM_BITS, 
m0-m_flags);
panic(tx tso packet too small);
}
 

Modified: head/sys/dev/e1000/if_igb.c
==
--- head/sys/dev/e1000/if_igb.c Sat Aug 24 19:38:36 2013(r254803)
+++ head/sys/dev/e1000/if_igb.c Sat Aug 24 19:51:18 2013(r254804)
@@ -4981,7 +4981,7 @@ igb_rx_checksum(u32 staterr, struct mbuf
}
 
if (status  (E1000_RXD_STAT_TCPCS | E1000_RXD_STAT_UDPCS)) {
-   u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+   u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 #if __FreeBSD_version = 80
if (sctp) /* reassign */
type = CSUM_SCTP_VALID;

Modified: head/sys/dev/ixgbe/ixgbe.c
==
--- head/sys/dev/ixgbe/ixgbe.c  Sat Aug 24 19:38:36 2013(r254803)
+++ head/sys/dev/ixgbe/ixgbe.c  Sat Aug 24 19:51:18 2013(r254804)
@@ -4625,7 +4625,7 @@ ixgbe_rx_checksum(u32 staterr, struct mb
mp-m_pkthdr.csum_flags = 0;
}
if (status  IXGBE_RXD_STAT_L4CS) {
-   u16 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
+   u64 type = (CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
 #if __FreeBSD_version = 80
   

svn commit: r254805 - head/sys/sys

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 19:58:36 2013
New Revision: 254805
URL: http://svnweb.freebsd.org/changeset/base/254805

Log:
  Add mtodo(m, o) macro taking an additional offset into the mbuf data section.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/sys/mbuf.h

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Sat Aug 24 19:51:18 2013(r254804)
+++ head/sys/sys/mbuf.h Sat Aug 24 19:58:36 2013(r254805)
@@ -67,8 +67,10 @@
  * type:
  *
  * mtod(m, t)  -- Convert mbuf pointer to data pointer of correct type.
+ * mtodo(m, o) -- Same as above but with offset 'o' into data.
  */
 #definemtod(m, t)  ((t)((m)-m_data))
+#definemtodo(m, o) ((void *)(((m)-m_data) + (o)))
 
 /*
  * Argument structure passed to UMA routines during mbuf and packet
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254807 - head/sys/sys

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 20:26:41 2013
New Revision: 254807
URL: http://svnweb.freebsd.org/changeset/base/254807

Log:
  Compact m_hdr by packing the type and flags fields into one uint32_t.
  
  The mbuf type is an enumerator with only a handful of types in use and
  thus reduced from int to 8bits allowing for 255 types to be specified.
  Only 5 types have been in use for a long time.
  
  The flags field gets the remaining 24 bits with 12 bits for global
  persistent flags and 12 bits for protocol/layer specific overlays.
  Some of the global flags/functionality can be moved to the csum_flags
  or ext_flags bits in the future.
  
  MT_VENDOR[1-4] and MT_EXP[1-4] types for vendor-internal and
  experimental local mapping are added.
  
  The size of m_hdr shrinks from 24/40 to 20/32bytes (32/64bit architectures).
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/sys/mbuf.h

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Sat Aug 24 20:06:00 2013(r254806)
+++ head/sys/sys/mbuf.h Sat Aug 24 20:26:41 2013(r254807)
@@ -82,23 +82,18 @@ struct mb_args {
 };
 #endif /* _KERNEL */
 
-#if defined(__LP64__)
-#define M_HDR_PAD6
-#else
-#define M_HDR_PAD2
-#endif
-
 /*
  * Header present at the beginning of every mbuf.
+ * Size ILP32: 20
+ *  LP64: 32
  */
 struct m_hdr {
struct mbuf *mh_next;   /* next buffer in chain */
struct mbuf *mh_nextpkt;/* next chain in queue/record */
caddr_t  mh_data;   /* location of data */
-   int  mh_len;/* amount of data in this mbuf */
-   int  mh_flags;  /* flags; see below */
-   shortmh_type;   /* type of data in this mbuf */
-   uint8_t  pad[M_HDR_PAD];/* word align  */
+   int32_t  mh_len;/* amount of data in this mbuf */
+   uint32_t mh_type:8, /* type of data in this mbuf */
+mh_flags:24;   /* flags; see below */
 };
 
 /*
@@ -206,7 +201,10 @@ struct mbuf {
 #definem_dat   M_dat.M_databuf
 
 /*
- * mbuf flags.
+ * mbuf flags of global significance and layer crossing.
+ * Those of only protocol/layer specific significance are to be mapped
+ * to M_PROTO[1-12] and cleared at layer handoff boundaries.
+ * NB: Limited to the lower 24 bits.
  */
 #defineM_EXT   0x0001 /* has associated external storage */
 #defineM_PKTHDR0x0002 /* start of record */
@@ -430,12 +428,24 @@ struct mbuf {
 #defineCSUM_FRAGMENT   0x0 /* Unused */
 
 /*
- * mbuf types.
+ * mbuf types describing the content of the mbuf (including external storage).
  */
 #defineMT_NOTMBUF  0   /* USED INTERNALLY ONLY! Object is not 
mbuf */
 #defineMT_DATA 1   /* dynamic (data) allocation */
 #defineMT_HEADER   MT_DATA /* packet header, use M_PKTHDR instead 
*/
+
+#defineMT_VENDOR1  4   /* for vendor-internal use */
+#defineMT_VENDOR2  5   /* for vendor-internal use */
+#defineMT_VENDOR3  6   /* for vendor-internal use */
+#defineMT_VENDOR4  7   /* for vendor-internal use */
+
 #defineMT_SONAME   8   /* socket name */
+
+#defineMT_EXP1 9   /* for experimental use */
+#defineMT_EXP2 10  /* for experimental use */
+#defineMT_EXP3 11  /* for experimental use */
+#defineMT_EXP4 12  /* for experimental use */
+
 #defineMT_CONTROL  14  /* extra-data protocol message */
 #defineMT_OOBDATA  15  /* expedited data  */
 #defineMT_NTYPES   16  /* number of mbuf types for mbtypes[] */
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254812 - in head/sys: kern sys

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 21:09:57 2013
New Revision: 254812
URL: http://svnweb.freebsd.org/changeset/base/254812

Log:
  Remove unused m_free_fast().  The difference to m_free() is only
  2 predictable branches nowadays.  However as a pre-condition the
  caller had to ensure that the mbuf pkthdr did not have any mtags
  attached to it, costing some potential branches again.
  
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/kern/kern_mbuf.c
  head/sys/sys/mbuf.h

Modified: head/sys/kern/kern_mbuf.c
==
--- head/sys/kern/kern_mbuf.c   Sat Aug 24 21:08:55 2013(r254811)
+++ head/sys/kern/kern_mbuf.c   Sat Aug 24 21:09:57 2013(r254812)
@@ -447,7 +447,7 @@ mb_dtor_mbuf(void *mem, int size, void *
m = (struct mbuf *)mem;
flags = (unsigned long)arg;
 
-   if ((flags  MB_NOTAGS) == 0  (m-m_flags  M_PKTHDR) != 0)
+   if ((m-m_flags  M_PKTHDR)  !SLIST_EMPTY(m-m_pkthdr.tags))
m_tag_delete_chain(m, NULL);
KASSERT((m-m_flags  M_EXT) == 0, (%s: M_EXT set, __func__));
KASSERT((m-m_flags  M_NOFREE) == 0, (%s: M_NOFREE set, __func__));

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Sat Aug 24 21:08:55 2013(r254811)
+++ head/sys/sys/mbuf.h Sat Aug 24 21:09:57 2013(r254812)
@@ -453,8 +453,6 @@ struct mbuf {
 #defineMT_NOINIT   255 /* Not a type but a flag to allocate
   a non-initialized mbuf */
 
-#define MB_NOTAGS  0x1UL   /* no tags attached to mbuf */
-
 /*
  * Compatibility with historic mbuf allocator.
  */
@@ -636,17 +634,6 @@ m_getcl(int how, short type, int flags)
return (uma_zalloc_arg(zone_pack, args, how));
 }
 
-static __inline void
-m_free_fast(struct mbuf *m)
-{
-#ifdef INVARIANTS
-   if (m-m_flags  M_PKTHDR)
-   KASSERT(SLIST_EMPTY(m-m_pkthdr.tags), (doing fast free of 
mbuf with tags));
-#endif
-
-   uma_zfree_arg(zone_mbuf, m, (void *)MB_NOTAGS);
-}
-
 static __inline struct mbuf *
 m_free(struct mbuf *m)
 {
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254814 - head/sys/kern

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 21:25:53 2013
New Revision: 254814
URL: http://svnweb.freebsd.org/changeset/base/254814

Log:
  After r254779 error must always be present in mb_ctor_pack(),
  not only when MAC is defined.
  
  Reported by:  gjb / tinderbox
  Sponsored by: The FreeBSD Foundation

Modified:
  head/sys/kern/kern_mbuf.c

Modified: head/sys/kern/kern_mbuf.c
==
--- head/sys/kern/kern_mbuf.c   Sat Aug 24 21:13:38 2013(r254813)
+++ head/sys/kern/kern_mbuf.c   Sat Aug 24 21:25:53 2013(r254814)
@@ -619,10 +619,7 @@ mb_ctor_pack(void *mem, int size, void *
 {
struct mbuf *m;
struct mb_args *args;
-#ifdef MAC
-   int error;
-#endif
-   int flags;
+   int error, flags;
short type;
 
m = (struct mbuf *)mem;
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254779 - head/sys/kern

2013-08-24 Thread Andre Oppermann

On 24.08.2013 23:13, Glen Barber wrote:

On Sat, Aug 24, 2013 at 12:24:59PM +, Andre Oppermann wrote:

Author: andre
Date: Sat Aug 24 12:24:58 2013
New Revision: 254779
URL: http://svnweb.freebsd.org/changeset/base/254779

Log:
   Avoid code duplication for mbuf initialization and use m_init() instead
   in mb_ctor_mbuf() and mb_ctor_pack().

Modified:
   head/sys/kern/kern_mbuf.c




[...]



-#ifdef MAC
-   /* If the label init fails, fail the alloc */
-   error = mac_mbuf_init(m, how);
-   if (error)
-   return (error);
-#endif
-   } else
-   m-m_data = m-m_dat;
-   return (0);
+   m = (struct mbuf *)mem;
+   flags = args-flags;
+
+   error = m_init(m, NULL, size, how, type, flags);
+
+   return (error);
  }




This breaks head/.

cc  -c -O -pipe  -std=c99 -g -Wall -Wredundant-decls -Wnested-externs
-Wstrict-prototypes  -Wmissing-prototypes -Wpointer-arith
-Winline -Wcast-qual  -Wundef -Wno-pointer-sign -fformat-extensions
-Wmissing-include-dirs -fdiagnostics-show-option
-Wno-error-tautological-compare -Wno-error-empty-body
-Wno-error-parentheses-equality  -nostdinc  -I. -I/src/sys
-I/src/sys/contrib/altq -I/src/sys/contrib/libfdt -D_KERNEL
-DHAVE_KERNEL_OPTION_HEADERS -include opt_global.h  -funwind-tables -mllvm
-arm-enable-ehabi -ffreestanding -Werror  /src/sys/kern/kern_mbuf.c
/src/sys/kern/kern_mbuf.c:637:2: error: use of undeclared identifier 'error'
 error = m_init(m, NULL, size, how, type, flags);
^
/src/sys/kern/kern_mbuf.c:643:10: error: use of undeclared identifier 'error'
return (error);
^

http://tinderbox.freebsd.org/tinderbox-head-build-HEAD-armv6-arm.full


Sorry, and thanks for the report.  Fixed in r254814.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254815 - head/share/man/man9

2013-08-24 Thread Andre Oppermann
Author: andre
Date: Sat Aug 24 21:30:35 2013
New Revision: 254815
URL: http://svnweb.freebsd.org/changeset/base/254815

Log:
  Fix BUGS section after botched modify in r254772.
  
  Reported by:  bjk

Modified:
  head/share/man/man9/pfil.9

Modified: head/share/man/man9/pfil.9
==
--- head/share/man/man9/pfil.9  Sat Aug 24 21:25:53 2013(r254814)
+++ head/share/man/man9/pfil.9  Sat Aug 24 21:30:35 2013(r254815)
@@ -241,8 +241,7 @@ Fine-grained locking was added in
 lock export was added in
 .Fx 10.0 .
 .Sh BUGS
-The
-.Fn When a
+.Pp When a
 .Vt pfil_head
 is being modified, no traffic is diverted
 (to avoid deadlock).
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254772 - head/share/man/man9

2013-08-24 Thread Andre Oppermann

On 24.08.2013 23:25, Benjamin Kaduk wrote:

On Sat, Aug 24, 2013 at 6:38 AM, Andre Oppermann an...@freebsd.org 
mailto:an...@freebsd.org wrote:

Author: andre
Date: Sat Aug 24 10:38:02 2013
New Revision: 254772
URL: http://svnweb.freebsd.org/changeset/base/254772

Log:
   pfil_hook_get() has been internalized in r254771 and is no longer
   part of the API.  It wasn't safe for external use in any case.

Modified: head/share/man/man9/pfil.9

==
--- head/share/man/man9/pfil.9  Sat Aug 24 10:36:33 2013(r254771)
+++ head/share/man/man9/pfil.9  Sat Aug 24 10:38:02 2013(r254772)
@@ -245,11 +242,7 @@ lock export was added in
  .Fx 10.0 .
  .Sh BUGS
  The
-.Fn pfil_hook_get
-function
-is only safe for internal use.
-.Pp
-When a
+.Fn When a


This hunk looks pretty bogus.


Thanks for the report.  I'm a total mdoc noob.  (Hopefully) fixed in r254815.

--
Andre


-Ben

  .Vt pfil_head
  is being modified, no traffic is diverted
  (to avoid deadlock).



___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254520 - in head/sys: kern sys

2013-08-21 Thread Andre Oppermann

On 19.08.2013 23:45, Navdeep Parhar wrote:

On 08/19/13 13:58, Andre Oppermann wrote:

On 19.08.2013 19:33, Navdeep Parhar wrote:

On 08/19/13 04:16, Andre Oppermann wrote:

Author: andre
Date: Mon Aug 19 11:16:53 2013
New Revision: 254520
URL: http://svnweb.freebsd.org/changeset/base/254520

Log:
Remove the unused M_NOFREE mbuf flag.  It didn't have any in-tree
users
for a very long time, if ever.

Should such a functionality ever be needed again the appropriate and
much better way to do it is through a custom EXT_SOMETHING
external mbuf
type together with a dedicated *ext_free function.

Discussed with:trociny, glebius

Modified:
head/sys/kern/kern_mbuf.c
head/sys/kern/uipc_mbuf.c
head/sys/sys/mbuf.h



Hello Andre,

Is this just garbage collection or is there some other reason for this?


This is garbage collection and removal of not quite right, rotten,
functionality.


I recently tried some experiments to reduce the number of mbuf and
cluster allocations in a 40G NIC driver.  M_NOFREE and EXT_EXTREF proved
very useful and the code changes to the kernel were minimal.  See
user/np/cxl_tuning.  The experiment was quite successful and I was
planning to bring in most of those changes to HEAD.  I was hoping to get
some runtime mileage on the approach in general before tweaking the
ctors/dtors for jumpbo, jumbo9, jumbo16 to allow for an mbuf+refcnt
within the cluster.  But now M_NOFREE has vanished without a warning...


I'm looking through your experimental code and that is some really good
numbers you're achieving there!

However a couple things don't feel quite right, hackish even, and not
fit for HEAD.  This is a bit the same situation we had with some of the
first 1GigE cards quite a number of years back (mostly ti(4)).  There
we ended up with a couple of just good enough hacks to make it fast.
Most of the remains I've collected today.


If M_NOFREE and EXT_EXTREF are properly supported in the tree (and I'm
arguing that they were, before r254520) then the changes are perfectly
legitimate.  The only hackish part was that I was getting the cluster
from the jumbop zone while bypassing its normal refcnt mechanism.  This
I did so as to use the same zone as m_uiotombuf to keep it hot for all
consumers (driver + network stack).


If you insist I'll revert the commit removing M_NOFREE.  EXT_EXTREF isn't
touched yet, but should get better support.

The hackish part for me is that the driver again manages its own memory
pool.  Windows works that way, NetBSD is moving towards it while FreeBSD
has and remains at a central network memory pool.  The latter (our current)
way of doing it seems more efficient overall especially on heavily loaded
networked machines.  There may be significant queues building (think app
blocked having many sockets buffer fill up) up delaying the freeing and
returning of network memory resources.  Together with fragmentation this
can lead to bad very outcomes.  Router applications with many interfaces
also greatly benefit from central memory pools.

So I'm really not sure that we should move back in the driver owned pool
direction with lots of code duplication and copy-pasting (see NetBSD).
Also it is kinda weird to have a kernel based pool for data going down
the stack and another one in each driver for those going up.

Actually I'm of the opinion that we should stay with the central memory
pool and fix so that it works just as well for those cases a driver pool
currently performs better.


I believe most of the improvements you've shown can be implemented in
a more generic and safe way into the mbuf system.  Also a number of things
in your experimental code may have side-effects in situations other than
netperf runs.


Agreed.  As I mentioned my long term plan was to tweak the jumboX zones
to allow them to allocate cluster with embedded mbuf + refcount.  The
M_NOFREE+EXT_EXTREF approach is the perfect bridge from here to there.
It is non-intrusive and lends itself well to experimentation.


Agreed, full support on fixing the refcount issue.


To summarize what I get from your experimental branch commits:
- the Chelsio T4/T5 card can DMA multiple ethernet frames (packets) into
   a single memory buffer, provided it is large enough.
- you make use of that feature and point multiple m_ext mbufs into that
   buffer to separate the packets and send them up the stack.
- you embed the m_ext refcount into the single memory buffer as well.


yes, yes, and yes.


- you recycle mbufs? (I'm not entirely clear on that as I'm not familiar
   with the cxgbe code)


I recycle the cluster (and the embedded mbuf in it) when possible.  All
depends on whether it's still in use by the time the rx queue needs it back.


There's always a couple of problems with driver managed pools.  Driver shutdown/
unloading requires all such managed mbufs to have returned before it can 
proceed.
This may take a undetermined long time as mbufs are sitting in socket buffers or
other

Re: svn commit: r254520 - in head/sys: kern sys

2013-08-21 Thread Andre Oppermann

On 20.08.2013 00:03, Navdeep Parhar wrote:

On 08/19/13 14:08, Andre Oppermann wrote:

On 19.08.2013 19:40, Peter Grehan wrote:

I recently tried some experiments to reduce the number of mbuf and
cluster allocations in a 40G NIC driver.  M_NOFREE and EXT_EXTREF proved
very useful and the code changes to the kernel were minimal.  See
user/np/cxl_tuning.  The experiment was quite successful and I was
planning to bring in most of those changes to HEAD.  I was hoping to get
some runtime mileage on the approach in general before tweaking the
ctors/dtors for jumpbo, jumbo9, jumbo16 to allow for an mbuf+refcnt
within the cluster.  But now M_NOFREE has vanished without a warning...


   I also had a virtualization work-in-progress where static mbufs were
allocated in the kernel and
M_NOFREE set.

   Might be worth sending a prior heads-up for these type of changes.


I'm sorry for ambushing but this stuff has to be done.  I have provided
an alternative way of handling it and I'm happy to help you with your
use case to make it good for you and to prevent the mbuf system from
getting bloated and hackish again.


I don't know what Peter's use case is but I'm curious about the
already-available alternative to M_NOFREE, if that's what you meant.
Can you please elaborate?


When you supply your own (*ext_free) function you can simply omit freeing
the mbuf itself.  Should make sure not to leak it though.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: M_NOFREE removal (was Re: svn commit: r254520 - in head/sys: kern sys)

2013-08-21 Thread Andre Oppermann

On 20.08.2013 00:38, Peter Grehan wrote:

Hi Andre,

  (moving to the more appropriate freebsd-net)


I'm sorry for ambushing but this stuff has to be done.  I have provided
an alternative way of handling it and I'm happy to help you with your
use case to make it good for you and to prevent the mbuf system from
getting bloated and hackish again.


  Sure. I'm not really upset since my code wasn't too far along, but with any 
API, you never know
who consumers might be so it's always worth being proactive about announcing 
it's removal.


Agreed.  OTOH there wasn't any in-tree user of it and posting before such 
removals
always yields at least one obscure hand wavy use or potential use of it.  So 
nothing
ever happens.


Can you please describe your intended use of M_NOFREE to better understand
the shortcomings of the current mbuf systems and the additional advantages
of the M_NOFREE case?


  I was looking at something similar to Linux's vhost-net, where a guest's 
virtio ring would be
processed in-kernel. An mbuf chain with external buffers would be used to pass 
guest tx buffer/len
segments directly into FreeBSD drivers.


Is that like page flipping?


  The intent of M_NOFREE was to avoid small mbuf allocations/frees in what is a 
hot path. This code
was intended to run at 10/40G.

  Note this code isn't really generic - it would require interfaces to be 
'owned' by the guest,
except that direct PCI-level pass-through wouldn't be needed.


Do you have some example code showing how that is or can be done?


  If there's an alternative to M_NOFREE, I'd be more than happy to use that.


Set up your own (*ext_free) function and omit freeing of the mbuf itself.  Make
sure to properly track your mbufs to avoid leaking them.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254519 - in head/sys: netinet netinet6 sys

2013-08-21 Thread Andre Oppermann

On 20.08.2013 05:04, Julian Elischer wrote:

On 8/19/13 7:08 PM, Andre Oppermann wrote:

Author: andre
Date: Mon Aug 19 11:08:36 2013
New Revision: 254519
URL: http://svnweb.freebsd.org/changeset/base/254519

Log:
   Move the global M_SKIP_FIREWALL mbuf flags to a protocol layer specific
   flag instead.  The flag is only used within the IP and IPv6 layer 3
   protocols.


wel, maybe
Layer 2 usage of ipfw could make use of this flag as well.
Cisco were using L2 ipfw some years back. I don't know if this affects them at 
all.


L2 firewalls are not affected directly.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: M_NOFREE removal (was Re: svn commit: r254520 - in head/sys: kern sys)

2013-08-21 Thread Andre Oppermann

On 20.08.2013 05:13, Julian Elischer wrote:

On 8/20/13 6:38 AM, Peter Grehan wrote:

Hi Andre,

 (moving to the more appropriate freebsd-net)


I'm sorry for ambushing but this stuff has to be done.  I have provided
an alternative way of handling it and I'm happy to help you with your
use case to make it good for you and to prevent the mbuf system from
getting bloated and hackish again.


 Sure. I'm not really upset since my code wasn't too far along, but with any 
API, you never know
who consumers might be so it's always worth being proactive about announcing 
it's removal.


Can you please describe your intended use of M_NOFREE to better understand
the shortcomings of the current mbuf systems and the additional advantages
of the M_NOFREE case?


 I was looking at something similar to Linux's vhost-net, where a guest's 
virtio ring would be
processed in-kernel. An mbuf chain with external buffers would be used to pass 
guest tx buffer/len
segments directly into FreeBSD drivers.

 The intent of M_NOFREE was to avoid small mbuf allocations/frees in what is a 
hot path. This code
was intended to run at 10/40G.

 Note this code isn't really generic - it would require interfaces to be 
'owned' by the guest,
except that direct PCI-level pass-through wouldn't be needed.

 If there's an alternative to M_NOFREE, I'd be more than happy to use that.


I think an alternative would be a reference counted version.  we used to have 
that and NetBSD had a
quite sophisticated mbuf system where there were multiple owners of mbufs..
they wouldn't be freed until the last one freed it but I don't remember the 
details.


I just had a glance at the NetBSD mbuf system and it doesn't look convincing
either.  There may be some serious scalability issues with this ownership
thing.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254520 - in head/sys: kern sys

2013-08-21 Thread Andre Oppermann

On 21.08.2013 17:03, Peter Grehan wrote:

The way to go should be 4K clusters as they are native to the architecture.
IIRC a PCIe DMA can't cross a 4K boundary anyway


  That's a 4G boundary, for some devices.


I meant a single PCIe DMA transaction can be at most 4K before it has to set
up another one?

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254524 - head/sys/sys

2013-08-21 Thread Andre Oppermann

On 20.08.2013 20:13, Davide Italiano wrote:

On Tue, Aug 20, 2013 at 7:42 PM, Andre Oppermann an...@freebsd.org wrote:

On 19.08.2013 19:37, Navdeep Parhar wrote:

Why reuse the freed up bits so soon (at least one of which I think was
prematurely GC'ed -- see my other email on M_NOFREE).  There was room
beyond M_HASHTYPEBITS, no?


This is HEAD where kernel and modules have to be (re)compiled together
at any point in time.  On stable this reuse would not have been possible.

In a subsequent commit I compacted and ordered the flags.  There's a couple
of free ones remaining.

I have some additional mbuf changes coming up to be posted for possible
objections later today.  The close HEAD freeze deadline has got me rushed
a bit to allow 10.x backporting of the checksum/offload overhaul.

--
Andre



In my opinion the possibility we have about breaking KPI/KBI should
not be abused, even if it's allowed in HEAD. In other words,people
should go for preserving it  when (as in this case) it's easy and
without additional costs. Your point about this is HEAD, it can be
broken at any time makes relatively little sense  to me. Note that in
the worst case such KPI/KBI breakages are annoying for $VENDORS who
maintain out-of-tree code, which need to rebuild/change their code,
or, if they get pissed off, drop FreeBSD support.
In your case, it's just a matter of code cleaness about few lines,
which, IMHO and always IMHO, doesn't justify the breakage.


Preserving the API but having to recompile is always fair game in HEAD.
In fact it happens all the time and the only supported way to track HEAD
is to recompile kernel and modules together.

For removing (crufty) functionality I agree that it shouldn't be done
just for the sake of it and also shouldn't be done often.  Sometimes it
is necessary in the name of progress.  Having to support old, crufty or
broken ways of doing something is a waste of developer time too.

It's always a judgement call.  In this case there is a perfectly valid
alternative way of doing that also is less dangerous to leak mbufs.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254524 - head/sys/sys

2013-08-21 Thread Andre Oppermann

On 21.08.2013 17:59, Davide Italiano wrote:

On Wed, Aug 21, 2013 at 5:30 PM, Andre Oppermann an...@freebsd.org wrote:

On 20.08.2013 20:13, Davide Italiano wrote:


On Tue, Aug 20, 2013 at 7:42 PM, Andre Oppermann an...@freebsd.org
wrote:


On 19.08.2013 19:37, Navdeep Parhar wrote:


Why reuse the freed up bits so soon (at least one of which I think was
prematurely GC'ed -- see my other email on M_NOFREE).  There was room
beyond M_HASHTYPEBITS, no?



This is HEAD where kernel and modules have to be (re)compiled together
at any point in time.  On stable this reuse would not have been possible.

In a subsequent commit I compacted and ordered the flags.  There's a
couple
of free ones remaining.

I have some additional mbuf changes coming up to be posted for possible
objections later today.  The close HEAD freeze deadline has got me rushed
a bit to allow 10.x backporting of the checksum/offload overhaul.

--
Andre



In my opinion the possibility we have about breaking KPI/KBI should
not be abused, even if it's allowed in HEAD. In other words,people
should go for preserving it  when (as in this case) it's easy and
without additional costs. Your point about this is HEAD, it can be
broken at any time makes relatively little sense  to me. Note that in
the worst case such KPI/KBI breakages are annoying for $VENDORS who
maintain out-of-tree code, which need to rebuild/change their code,
or, if they get pissed off, drop FreeBSD support.
In your case, it's just a matter of code cleaness about few lines,
which, IMHO and always IMHO, doesn't justify the breakage.



Preserving the API but having to recompile is always fair game in HEAD.
In fact it happens all the time and the only supported way to track HEAD
is to recompile kernel and modules together.

For removing (crufty) functionality I agree that it shouldn't be done
just for the sake of it and also shouldn't be done often.  Sometimes it
is necessary in the name of progress.  Having to support old, crufty or
broken ways of doing something is a waste of developer time too.

It's always a judgement call.  In this case there is a perfectly valid
alternative way of doing that also is less dangerous to leak mbufs.

--
Andre



I don't see in any way how flags reordering might be in any way
connected to mbufs leaks, alas.
There's a similar recent('ish) discussion and it was decided to not
compact/reorder flags. See r253662/r253775 for references.
So, I'm not sure what's the de-facto policy, but still, as a community
FreeBSD should decide one strategy and be stuck with that. It's a
matter of being consistent, which, IMHO, is something that should not
be undervaluated.


I fail to see what problem you have with the flag reordering.  Recompile
and done.  That's why we work with #defines instead of magic 0x1234 values.
If someone outside the tree was using the spare bits for their own private
purposes, yes, they would quickly have to check and possibly adjust them.
That's trivial however and should be expected when tracking an OpenSource
operating system.  If you want total ABI stability then build on Windows
but even they made a big break somewhere around NIDS5 or NDIS6.  We can't
get stuck on every bit because there may be someone somewhere out there
we don't know about using it.  On any x-stable such a reorder wouldn't
be possible obviously because of the ABI preservation guarantee.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254605 - in head/sys: kern sys

2013-08-21 Thread Andre Oppermann
Author: andre
Date: Wed Aug 21 18:12:04 2013
New Revision: 254605
URL: http://svnweb.freebsd.org/changeset/base/254605

Log:
  Revert r254520 and resurrect the M_NOFREE mbuf flag and functionality.
  
  Requested by: np, grehan

Modified:
  head/sys/kern/kern_mbuf.c
  head/sys/kern/uipc_mbuf.c
  head/sys/sys/mbuf.h

Modified: head/sys/kern/kern_mbuf.c
==
--- head/sys/kern/kern_mbuf.c   Wed Aug 21 17:47:11 2013(r254604)
+++ head/sys/kern/kern_mbuf.c   Wed Aug 21 18:12:04 2013(r254605)
@@ -474,6 +474,7 @@ mb_dtor_mbuf(void *mem, int size, void *
if ((flags  MB_NOTAGS) == 0  (m-m_flags  M_PKTHDR) != 0)
m_tag_delete_chain(m, NULL);
KASSERT((m-m_flags  M_EXT) == 0, (%s: M_EXT set, __func__));
+   KASSERT((m-m_flags  M_NOFREE) == 0, (%s: M_NOFREE set, __func__));
 #ifdef INVARIANTS
trash_dtor(mem, size, arg);
 #endif

Modified: head/sys/kern/uipc_mbuf.c
==
--- head/sys/kern/uipc_mbuf.c   Wed Aug 21 17:47:11 2013(r254604)
+++ head/sys/kern/uipc_mbuf.c   Wed Aug 21 18:12:04 2013(r254605)
@@ -278,10 +278,16 @@ m_extadd(struct mbuf *mb, caddr_t buf, u
 void
 mb_free_ext(struct mbuf *m)
 {
+   int skipmbuf;

KASSERT((m-m_flags  M_EXT) == M_EXT, (%s: M_EXT not set, __func__));
KASSERT(m-m_ext.ref_cnt != NULL, (%s: ref_cnt not set, __func__));
 
+   /*
+* check if the header is embedded in the cluster
+*/
+   skipmbuf = (m-m_flags  M_NOFREE);
+
/* Free attached storage if this mbuf is the only reference to it. */
if (*(m-m_ext.ref_cnt) == 1 ||
atomic_fetchadd_int(m-m_ext.ref_cnt, -1) == 1) {
@@ -322,6 +328,8 @@ mb_free_ext(struct mbuf *m)
(%s: unknown ext_type, __func__));
}
}
+   if (skipmbuf)
+   return;

/*
 * Free this mbuf back to the mbuf zone with all m_ext
@@ -386,7 +394,7 @@ m_demote(struct mbuf *m0, int all)
m_freem(m-m_nextpkt);
m-m_nextpkt = NULL;
}
-   m-m_flags = m-m_flags  (M_EXT|M_RDONLY);
+   m-m_flags = m-m_flags  (M_EXT|M_RDONLY|M_NOFREE);
}
 }
 

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Wed Aug 21 17:47:11 2013(r254604)
+++ head/sys/sys/mbuf.h Wed Aug 21 18:12:04 2013(r254605)
@@ -191,6 +191,7 @@ struct mbuf {
 #defineM_PROMISC   0x0040 /* packet was not for us */
 #defineM_VLANTAG   0x0080 /* ether_vtag is valid */
 #defineM_FLOWID0x0100 /* deprecated: flowid is valid */
+#defineM_NOFREE0x0200 /* do not free mbuf, embedded in 
cluster */
 
 #defineM_PROTO10x1000 /* protocol-specific */
 #defineM_PROTO20x2000 /* protocol-specific */
@@ -526,7 +527,7 @@ m_free(struct mbuf *m)
 
if (m-m_flags  M_EXT)
mb_free_ext(m);
-   else
+   else if ((m-m_flags  M_NOFREE) == 0)
uma_zfree(zone_mbuf, m);
return (n);
 }
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: M_NOFREE removal (was Re: svn commit: r254520 - in head/sys: kern sys)

2013-08-21 Thread Andre Oppermann

On 21.08.2013 18:38, Navdeep Parhar wrote:

On 08/21/13 08:08, Andre Oppermann wrote:

On 20.08.2013 00:38, Peter Grehan wrote:

snip



   If there's an alternative to M_NOFREE, I'd be more than happy to use
that.


Set up your own (*ext_free) function and omit freeing of the mbuf
itself.  Make
sure to properly track your mbufs to avoid leaking them.



How is this an alternate to M_NOFREE?  Your suggestion indicates that
you may have removed M_NOFREE thinking it did something other than what
it actually did.  And this suggestion doesn't even work -- note the
uma_zfree(zone_mbuf, m) at the end of mb_free_ext that would run after
any custom ext_free.

To recap: M_NOFREE was the way to tell the mbuf subsystem that the mbuf
does not come from zone_mbuf.  Nothing more and nothing less.  The mbuf
was in other ways like any other mbuf and could have a pkthdr (or not),
internal storage (or not), external cluster (or not), etc. etc.


Mea culpa.  You're totally right.  I have mixed up my mental model with
another working tree I carry along for quite some time.  In it (*ext_free)
completely replaces mb_free_ext() making it very easy to keep the mbuf.
We should move that way hopefully sooner than later, simplifying a couple
of things including externally managed refcounts.

Sorry for the chaos and not getting it.

M_NOFREE is back with r254605.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: M_NOFREE removal (was Re: svn commit: r254520 - in head/sys: kern sys)

2013-08-21 Thread Andre Oppermann

On 21.08.2013 20:23, Navdeep Parhar wrote:

I believe we need an extra patch to get M_NOFREE correct.  I've had it
forever in some of my internal repos but never committed it upstream
(just plain forgot).  Since this stuff is fresh in your mind, can you
review this:

diff -r cd78031b7885 sys/sys/mbuf.h
--- a/sys/sys/mbuf.hFri Aug 16 13:35:26 2013 -0700
+++ b/sys/sys/mbuf.hWed Aug 21 10:55:57 2013 -0700
@@ -535,6 +535,8 @@ m_free(struct mbuf *m)
  {
struct mbuf *n = m-m_next;

+   if ((m-m_flags  (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
+   m_tag_delete_chain(m, NULL);
if (m-m_flags  M_EXT)
mb_free_ext(m);
else if ((m-m_flags  M_NOFREE) == 0)

It prevents leaks of tags from M_NOFREE+pkthdr mbufs.


Good point.  Looks correct.

But then I wonder if it is really a smart thing to allow single
mbufs (w/o M_EXT) to be M_NOFREE at the same time.  They easily
get lost.  If it doesn't have an external buffer attached there
is no way to know when and if it was freed.

If M_NOFREE is only allowed together with M_EXT then the tag chain
delete should happen in mb_ext_free() next to 'skipmbuf' instead.

Index: kern/uipc_mbuf.c
===
--- kern/uipc_mbuf.c(revision 254605)
+++ kern/uipc_mbuf.c(working copy)
@@ -283,11 +283,6 @@
KASSERT((m-m_flags  M_EXT) == M_EXT, (%s: M_EXT not set, __func__));
KASSERT(m-m_ext.ref_cnt != NULL, (%s: ref_cnt not set, __func__));

-   /*
-* check if the header is embedded in the cluster
-*/
-   skipmbuf = (m-m_flags  M_NOFREE);
-
/* Free attached storage if this mbuf is the only reference to it. */
if (*(m-m_ext.ref_cnt) == 1 ||
atomic_fetchadd_int(m-m_ext.ref_cnt, -1) == 1) {
@@ -328,8 +323,14 @@
(%s: unknown ext_type, __func__));
}
}
-   if (skipmbuf)
+
+   /*
+* Do not free if the mbuf is embedded in the cluster.
+*/
+   if (m-m_flags  M_NOFREE) {
+   m_tag_delete_chain(m, NULL);
return;
+   }

/*
 * Free this mbuf back to the mbuf zone with all m_ext

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254520 - in head/sys: kern sys

2013-08-21 Thread Andre Oppermann

On 21.08.2013 21:59, Navdeep Parhar wrote:

On 08/21/13 12:41, Scott Long wrote:


On Aug 21, 2013, at 8:59 AM, Andre Oppermann an...@freebsd.org wrote:


On 19.08.2013 23:45, Navdeep Parhar wrote:

On 08/19/13 13:58, Andre Oppermann wrote:

On 19.08.2013 19:33, Navdeep Parhar wrote:

On 08/19/13 04:16, Andre Oppermann wrote:

Author: andre
Date: Mon Aug 19 11:16:53 2013
New Revision: 254520
URL: http://svnweb.freebsd.org/changeset/base/254520

Log:
Remove the unused M_NOFREE mbuf flag.  It didn't have any in-tree
users
for a very long time, if ever.

Should such a functionality ever be needed again the appropriate and
much better way to do it is through a custom EXT_SOMETHING
external mbuf
type together with a dedicated *ext_free function.

Discussed with:trociny, glebius

Modified:
head/sys/kern/kern_mbuf.c
head/sys/kern/uipc_mbuf.c
head/sys/sys/mbuf.h



Hello Andre,

Is this just garbage collection or is there some other reason for this?


This is garbage collection and removal of not quite right, rotten,
functionality.


I recently tried some experiments to reduce the number of mbuf and
cluster allocations in a 40G NIC driver.  M_NOFREE and EXT_EXTREF proved
very useful and the code changes to the kernel were minimal.  See
user/np/cxl_tuning.  The experiment was quite successful and I was
planning to bring in most of those changes to HEAD.  I was hoping to get
some runtime mileage on the approach in general before tweaking the
ctors/dtors for jumpbo, jumbo9, jumbo16 to allow for an mbuf+refcnt
within the cluster.  But now M_NOFREE has vanished without a warning...


I'm looking through your experimental code and that is some really good
numbers you're achieving there!

However a couple things don't feel quite right, hackish even, and not
fit for HEAD.  This is a bit the same situation we had with some of the
first 1GigE cards quite a number of years back (mostly ti(4)).  There
we ended up with a couple of just good enough hacks to make it fast.
Most of the remains I've collected today.


If M_NOFREE and EXT_EXTREF are properly supported in the tree (and I'm
arguing that they were, before r254520) then the changes are perfectly
legitimate.  The only hackish part was that I was getting the cluster
from the jumbop zone while bypassing its normal refcnt mechanism.  This
I did so as to use the same zone as m_uiotombuf to keep it hot for all
consumers (driver + network stack).


If you insist I'll revert the commit removing M_NOFREE.  EXT_EXTREF isn't
touched yet, but should get better support.

The hackish part for me is that the driver again manages its own memory
pool.  Windows works that way, NetBSD is moving towards it while FreeBSD
has and remains at a central network memory pool.  The latter (our current)
way of doing it seems more efficient overall especially on heavily loaded
networked machines.  There may be significant queues building (think app
blocked having many sockets buffer fill up) up delaying the freeing and
returning of network memory resources.  Together with fragmentation this
can lead to bad very outcomes.  Router applications with many interfaces
also greatly benefit from central memory pools.

So I'm really not sure that we should move back in the driver owned pool
direction with lots of code duplication and copy-pasting (see NetBSD).
Also it is kinda weird to have a kernel based pool for data going down
the stack and another one in each driver for those going up.

Actually I'm of the opinion that we should stay with the central memory
pool and fix so that it works just as well for those cases a driver pool
currently performs better.


The central memory pool approach is too slow, unfortunately.  There's a
reason that other OS's are moving to them.  At Netflix we are
currently working on some approaches to private memory pools in order to
achieve better efficiency, and we're closely watching and anticipating Navdeep's
work.


I should point out that I went to great lengths to use the jumbop zone
in my experiments, and not create my own pool of memory for the rx
buffers.  The hope was to share cache warmth (sounds very cosy :-) with
the likes of m_uiotombuf (which uses jumbop too) etc.  So I'm actually
in the camp that prefers central pools.  I'm just trying out ways to
reduce the trips we have to make to the pool(s) involved.  Laying down
mbufs within clusters, and packing multiple frames per cluster clearly
helps.  Careful cluster recycling within the NIC seems to work too.


What you describe does make a lot of sense.  Jumbop is the optimal size
for the VM.  We should really look at and pushing forward to have a nicer
M_NOFREE+M_EXTREF API for 10 and new HEAD going forward.

As always it seems to depend on the use case and what is being measured
too.  Is it single stream performance?  Concurrent streams?  Both ways
or only one way, in or out?  Each makes very different use of many parts
of the stack and driver leading to different

Re: M_NOFREE removal (was Re: svn commit: r254520 - in head/sys: kern sys)

2013-08-21 Thread Andre Oppermann

On 21.08.2013 21:40, Navdeep Parhar wrote:

On 08/21/13 12:22, Andre Oppermann wrote:

On 21.08.2013 20:23, Navdeep Parhar wrote:

I believe we need an extra patch to get M_NOFREE correct.  I've had it
forever in some of my internal repos but never committed it upstream
(just plain forgot).  Since this stuff is fresh in your mind, can you
review this:

diff -r cd78031b7885 sys/sys/mbuf.h
--- a/sys/sys/mbuf.hFri Aug 16 13:35:26 2013 -0700
+++ b/sys/sys/mbuf.hWed Aug 21 10:55:57 2013 -0700
@@ -535,6 +535,8 @@ m_free(struct mbuf *m)
   {
   struct mbuf *n = m-m_next;

+if ((m-m_flags  (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
+m_tag_delete_chain(m, NULL);
   if (m-m_flags  M_EXT)
   mb_free_ext(m);
   else if ((m-m_flags  M_NOFREE) == 0)

It prevents leaks of tags from M_NOFREE+pkthdr mbufs.


Good point.  Looks correct.

But then I wonder if it is really a smart thing to allow single
mbufs (w/o M_EXT) to be M_NOFREE at the same time.  They easily
get lost.  If it doesn't have an external buffer attached there
is no way to know when and if it was freed.

If M_NOFREE is only allowed together with M_EXT then the tag chain
delete should happen in mb_ext_free() next to 'skipmbuf' instead.

Index: kern/uipc_mbuf.c
===
--- kern/uipc_mbuf.c(revision 254605)
+++ kern/uipc_mbuf.c(working copy)
@@ -283,11 +283,6 @@
 KASSERT((m-m_flags  M_EXT) == M_EXT, (%s: M_EXT not set,
__func__));
 KASSERT(m-m_ext.ref_cnt != NULL, (%s: ref_cnt not set,
__func__));

-   /*
-* check if the header is embedded in the cluster
-*/
-   skipmbuf = (m-m_flags  M_NOFREE);
-
 /* Free attached storage if this mbuf is the only reference to
it. */
 if (*(m-m_ext.ref_cnt) == 1 ||
 atomic_fetchadd_int(m-m_ext.ref_cnt, -1) == 1) {
@@ -328,8 +323,14 @@
 (%s: unknown ext_type, __func__));
 }
 }
-   if (skipmbuf)
+
+   /*
+* Do not free if the mbuf is embedded in the cluster.
+*/
+   if (m-m_flags  M_NOFREE) {
+   m_tag_delete_chain(m, NULL);
 return;
+   }


The problem with this is that the mbuf may already be gone if it was
embedded in the cluster that was just freed by the ext free.  That's why
skipmbuf is used to cache the M_NOFREE bit.


Next try: ;)

Index: kern/uipc_mbuf.c
===
--- kern/uipc_mbuf.c(revision 254605)
+++ kern/uipc_mbuf.c(working copy)
@@ -284,9 +284,12 @@
KASSERT(m-m_ext.ref_cnt != NULL, (%s: ref_cnt not set, __func__));

/*
-* check if the header is embedded in the cluster
+* Do not free if the mbuf is embedded in the cluster
+* but make sure to get rid of any tag chains first.
 */
skipmbuf = (m-m_flags  M_NOFREE);
+   if (skipmbuf  (m-m_flags  M_PKTHDR))
+   m_tag_delete_chain(m, NULL);

/* Free attached storage if this mbuf is the only reference to it. */
if (*(m-m_ext.ref_cnt) == 1 ||

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: M_NOFREE removal (was Re: svn commit: r254520 - in head/sys: kern sys)

2013-08-21 Thread Andre Oppermann

On 21.08.2013 22:52, Navdeep Parhar wrote:

On 08/21/13 13:44, Andre Oppermann wrote:

On 21.08.2013 21:40, Navdeep Parhar wrote:

On 08/21/13 12:22, Andre Oppermann wrote:

On 21.08.2013 20:23, Navdeep Parhar wrote:

I believe we need an extra patch to get M_NOFREE correct.  I've had it
forever in some of my internal repos but never committed it upstream
(just plain forgot).  Since this stuff is fresh in your mind, can you
review this:

diff -r cd78031b7885 sys/sys/mbuf.h
--- a/sys/sys/mbuf.hFri Aug 16 13:35:26 2013 -0700
+++ b/sys/sys/mbuf.hWed Aug 21 10:55:57 2013 -0700
@@ -535,6 +535,8 @@ m_free(struct mbuf *m)
{
struct mbuf *n = m-m_next;

+if ((m-m_flags  (M_PKTHDR|M_NOFREE)) == (M_PKTHDR|M_NOFREE))
+m_tag_delete_chain(m, NULL);
if (m-m_flags  M_EXT)
mb_free_ext(m);
else if ((m-m_flags  M_NOFREE) == 0)

It prevents leaks of tags from M_NOFREE+pkthdr mbufs.


Good point.  Looks correct.

But then I wonder if it is really a smart thing to allow single
mbufs (w/o M_EXT) to be M_NOFREE at the same time.  They easily
get lost.  If it doesn't have an external buffer attached there
is no way to know when and if it was freed.

If M_NOFREE is only allowed together with M_EXT then the tag chain
delete should happen in mb_ext_free() next to 'skipmbuf' instead.

Index: kern/uipc_mbuf.c
===
--- kern/uipc_mbuf.c(revision 254605)
+++ kern/uipc_mbuf.c(working copy)
@@ -283,11 +283,6 @@
  KASSERT((m-m_flags  M_EXT) == M_EXT, (%s: M_EXT not set,
__func__));
  KASSERT(m-m_ext.ref_cnt != NULL, (%s: ref_cnt not set,
__func__));

-   /*
-* check if the header is embedded in the cluster
-*/
-   skipmbuf = (m-m_flags  M_NOFREE);
-
  /* Free attached storage if this mbuf is the only reference to
it. */
  if (*(m-m_ext.ref_cnt) == 1 ||
  atomic_fetchadd_int(m-m_ext.ref_cnt, -1) == 1) {
@@ -328,8 +323,14 @@
  (%s: unknown ext_type, __func__));
  }
  }
-   if (skipmbuf)
+
+   /*
+* Do not free if the mbuf is embedded in the cluster.
+*/
+   if (m-m_flags  M_NOFREE) {
+   m_tag_delete_chain(m, NULL);
  return;
+   }


The problem with this is that the mbuf may already be gone if it was
embedded in the cluster that was just freed by the ext free.  That's why
skipmbuf is used to cache the M_NOFREE bit.


Next try: ;)


It is most flexible to let M_NOFREE work without any assumptions
attached (must be M_EXT, etc.)  So I still prefer my patch to this.  If
you don't have any strong preferences may I commit that one instead?


I don't mind having your patch.  Though I don't see how it possibly can't leak
mbufs if they are not M_EXT.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254524 - head/sys/sys

2013-08-20 Thread Andre Oppermann

On 19.08.2013 19:37, Navdeep Parhar wrote:

On 08/19/13 06:56, Andre Oppermann wrote:

Author: andre
Date: Mon Aug 19 13:56:14 2013
New Revision: 254524
URL: http://svnweb.freebsd.org/changeset/base/254524

Log:
   Add four additional M_PROTOFLAGS[9-12] for protocol specific use.

   Discussed with:  trociny, glebius, adrian

Modified:
   head/sys/sys/mbuf.h

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Mon Aug 19 13:27:32 2013(r254523)
+++ head/sys/sys/mbuf.h Mon Aug 19 13:56:14 2013(r254524)
@@ -196,22 +196,24 @@ struct mbuf {
  #define   M_FRAG  0x0800 /* packet is a fragment of a larger 
packet */
  #define   M_FIRSTFRAG 0x1000 /* packet is first fragment */
  #define   M_LASTFRAG  0x2000 /* packet is last fragment */
-/* 0x4000free */
-/* 0x8000free */
+#defineM_PROTO90x4000 /* protocol-specific */
+#defineM_PROTO10   0x8000 /* protocol-specific */
  #define   M_VLANTAG   0x0001 /* ether_vtag is valid */
  #define   M_PROMISC   0x0002 /* packet was not for us */
-/* 0x0004free */
+#defineM_PROTO11   0x0004 /* protocol-specific */
  #define   M_PROTO60x0008 /* protocol-specific */
  #define   M_PROTO70x0010 /* protocol-specific */
  #define   M_PROTO80x0020 /* protocol-specific */
  #define   M_FLOWID0x0040 /* deprecated: flowid is valid */
+#defineM_PROTO12   0x0080 /* protocol-specific */
  #define   M_HASHTYPEBITS  0x0F00 /* mask of bits holding flowid hash 
type */


Why reuse the freed up bits so soon (at least one of which I think was
prematurely GC'ed -- see my other email on M_NOFREE).  There was room
beyond M_HASHTYPEBITS, no?


This is HEAD where kernel and modules have to be (re)compiled together
at any point in time.  On stable this reuse would not have been possible.

In a subsequent commit I compacted and ordered the flags.  There's a couple
of free ones remaining.

I have some additional mbuf changes coming up to be posted for possible
objections later today.  The close HEAD freeze deadline has got me rushed
a bit to allow 10.x backporting of the checksum/offload overhaul.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254516 - in head/sys/dev: bce bxe mge ti

2013-08-19 Thread Andre Oppermann
Author: andre
Date: Mon Aug 19 10:20:20 2013
New Revision: 254516
URL: http://svnweb.freebsd.org/changeset/base/254516

Log:
  Remove unused and incomplete support for delayed fragment checksums
  from bce(4), bxe(4), mge(4) and ti(4) drivers.

Modified:
  head/sys/dev/bce/if_bce.c
  head/sys/dev/bxe/if_bxe.c
  head/sys/dev/mge/if_mge.c
  head/sys/dev/ti/if_ti.c

Modified: head/sys/dev/bce/if_bce.c
==
--- head/sys/dev/bce/if_bce.c   Mon Aug 19 09:49:51 2013(r254515)
+++ head/sys/dev/bce/if_bce.c   Mon Aug 19 10:20:20 2013(r254516)
@@ -9821,9 +9821,7 @@ bce_dump_mbuf(struct bce_softc *sc, stru
if (mp-m_flags  M_PKTHDR) {
BCE_PRINTF(- m_pkthdr: len = %d, flags = 0x%b, 
csum_flags = %b\n, mp-m_pkthdr.len,
-   mp-m_flags, \20\12M_BCAST\13M_MCAST\14M_FRAG
-   \15M_FIRSTFRAG\16M_LASTFRAG\21M_VLANTAG
-   \22M_PROMISC\23M_NOFREE,
+   mp-m_flags, M_FLAG_PRINTF,
mp-m_pkthdr.csum_flags,
\20\1CSUM_IP\2CSUM_TCP\3CSUM_UDP
\5CSUM_FRAGMENT\6CSUM_TSO\11CSUM_IP_CHECKED

Modified: head/sys/dev/bxe/if_bxe.c
==
--- head/sys/dev/bxe/if_bxe.c   Mon Aug 19 09:49:51 2013(r254515)
+++ head/sys/dev/bxe/if_bxe.c   Mon Aug 19 10:20:20 2013(r254516)
@@ -16265,9 +16265,7 @@ void bxe_dump_mbuf(struct bxe_softc *sc,
if (m-m_flags  M_PKTHDR) {
 BXE_PRINTF(- m_pkthdr: len = %d, flags = 0x%b, 
csum_flags = %b\n, m-m_pkthdr.len,
-   m-m_flags, \20\12M_BCAST\13M_MCAST\14M_FRAG
-   \15M_FIRSTFRAG\16M_LASTFRAG\21M_VLANTAG
-   \22M_PROMISC\23M_NOFREE,
+   m-m_flags, M_FLAG_PRINTF,
m-m_pkthdr.csum_flags,
\20\1CSUM_IP\2CSUM_TCP\3CSUM_UDP
\5CSUM_FRAGMENT\6CSUM_TSO\11CSUM_IP_CHECKED

Modified: head/sys/dev/mge/if_mge.c
==
--- head/sys/dev/mge/if_mge.c   Mon Aug 19 09:49:51 2013(r254515)
+++ head/sys/dev/mge/if_mge.c   Mon Aug 19 10:20:20 2013(r254516)
@@ -1703,9 +1703,7 @@ mge_offload_setup_descriptor(struct mge_
 
ip = (struct ip *)(m0-m_data + ehlen);
cmd_status |= MGE_TX_IP_HDR_SIZE(ip-ip_hl);
-
-   if ((m0-m_flags  M_FRAG) == 0)
-   cmd_status |= MGE_TX_NOT_FRAGMENT;
+   cmd_status |= MGE_TX_NOT_FRAGMENT;
}
 
if (csum_flags  CSUM_IP)

Modified: head/sys/dev/ti/if_ti.c
==
--- head/sys/dev/ti/if_ti.c Mon Aug 19 09:49:51 2013(r254515)
+++ head/sys/dev/ti/if_ti.c Mon Aug 19 10:20:20 2013(r254516)
@@ -3159,24 +3159,6 @@ ti_start_locked(struct ifnet *ifp)
break;
 
/*
-* XXX
-* safety overkill.  If this is a fragmented packet chain
-* with delayed TCP/UDP checksums, then only encapsulate
-* it if we have enough descriptors to handle the entire
-* chain at once.
-* (paranoia -- may not actually be needed)
-*/
-   if (m_head-m_flags  M_FIRSTFRAG 
-   m_head-m_pkthdr.csum_flags  (CSUM_DELAY_DATA)) {
-   if ((TI_TX_RING_CNT - sc-ti_txcnt) 
-   m_head-m_pkthdr.csum_data + 16) {
-   IFQ_DRV_PREPEND(ifp-if_snd, m_head);
-   ifp-if_drv_flags |= IFF_DRV_OACTIVE;
-   break;
-   }
-   }
-
-   /*
 * Pack the data into the transmit ring. If we
 * don't have room, set the OACTIVE flag and wait
 * for the NIC to drain the ring.
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254517 - head/sys/netinet

2013-08-19 Thread Andre Oppermann
Author: andre
Date: Mon Aug 19 10:30:15 2013
New Revision: 254517
URL: http://svnweb.freebsd.org/changeset/base/254517

Log:
  Remove unused M_FRAG, M_FIRSTFRAG and M_LASTFRAG tagging from ip_fragment().
  There wasn't any real driver (and hardware) support for it.  Modern hardware
  does full fragmentation/segmentation offload instead.

Modified:
  head/sys/netinet/ip_output.c

Modified: head/sys/netinet/ip_output.c
==
--- head/sys/netinet/ip_output.cMon Aug 19 10:20:20 2013
(r254516)
+++ head/sys/netinet/ip_output.cMon Aug 19 10:30:15 2013
(r254517)
@@ -784,7 +784,7 @@ smart_frag_failure:
IPSTAT_INC(ips_odropped);
goto done;
}
-   m-m_flags |= (m0-m_flags  M_MCAST) | M_FRAG;
+   m-m_flags |= (m0-m_flags  M_MCAST);
/*
 * In the first mbuf, leave room for the link header, then
 * copy the original IP header including options. The payload
@@ -801,10 +801,9 @@ smart_frag_failure:
m-m_len = mhlen;
/* XXX do we need to add ip_off below ? */
mhip-ip_off = ((off - hlen)  3) + ip_off;
-   if (off + len = ip_len) {  /* last fragment */
+   if (off + len = ip_len)
len = ip_len - off;
-   m-m_flags |= M_LASTFRAG;
-   } else
+   else
mhip-ip_off |= IP_MF;
mhip-ip_len = htons((u_short)(len + mhlen));
m-m_next = m_copym(m0, off, len, M_NOWAIT);
@@ -831,10 +830,6 @@ smart_frag_failure:
}
IPSTAT_ADD(ips_ofragments, nfrags);
 
-   /* set first marker for fragment chain */
-   m0-m_flags |= M_FIRSTFRAG | M_FRAG;
-   m0-m_pkthdr.csum_data = nfrags;
-
/*
 * Update first fragment by trimming what's been copied out
 * and updating header.
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254518 - head/sys/netinet

2013-08-19 Thread Andre Oppermann
Author: andre
Date: Mon Aug 19 10:34:10 2013
New Revision: 254518
URL: http://svnweb.freebsd.org/changeset/base/254518

Log:
  Move ip_reassemble()'s use of the global M_FRAG mbuf flag to a protocol layer
  specific flag instead.  The flag is only relevant while the packet stays in
  the IP reassembly queue.
  
  Discussed with:   trociny, glebius

Modified:
  head/sys/netinet/ip_input.c
  head/sys/netinet/ip_var.h

Modified: head/sys/netinet/ip_input.c
==
--- head/sys/netinet/ip_input.c Mon Aug 19 10:30:15 2013(r254517)
+++ head/sys/netinet/ip_input.c Mon Aug 19 10:34:10 2013(r254518)
@@ -911,9 +911,9 @@ found:
IPSTAT_INC(ips_toosmall); /* XXX */
goto dropfrag;
}
-   m-m_flags |= M_FRAG;
+   m-m_flags |= M_IP_FRAG;
} else
-   m-m_flags = ~M_FRAG;
+   m-m_flags = ~M_IP_FRAG;
ip-ip_off = htons(ntohs(ip-ip_off)  3);
 
/*
@@ -1060,7 +1060,7 @@ found:
next += ntohs(GETIP(q)-ip_len);
}
/* Make sure the last packet didn't have the IP_MF flag */
-   if (p-m_flags  M_FRAG) {
+   if (p-m_flags  M_IP_FRAG) {
if (fp-ipq_nfrags  V_maxfragsperpacket) {
IPSTAT_ADD(ips_fragdropped, fp-ipq_nfrags);
ip_freef(head, fp);

Modified: head/sys/netinet/ip_var.h
==
--- head/sys/netinet/ip_var.h   Mon Aug 19 10:30:15 2013(r254517)
+++ head/sys/netinet/ip_var.h   Mon Aug 19 10:34:10 2013(r254518)
@@ -167,6 +167,7 @@ voidkmod_ipstat_dec(int statnum);
  */
 #defineM_FASTFWD_OURS  M_PROTO1/* changed dst to local 
*/
 #defineM_IP_NEXTHOPM_PROTO2/* explicit ip nexthop 
*/
+#defineM_IP_FRAG   M_PROTO4/* fragment reassembly 
*/
 
 #ifdef __NO_STRICT_ALIGNMENT
 #define IP_HDR_ALIGNED_P(ip)   1
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254519 - in head/sys: netinet netinet6 sys

2013-08-19 Thread Andre Oppermann
Author: andre
Date: Mon Aug 19 11:08:36 2013
New Revision: 254519
URL: http://svnweb.freebsd.org/changeset/base/254519

Log:
  Move the global M_SKIP_FIREWALL mbuf flags to a protocol layer specific
  flag instead.  The flag is only used within the IP and IPv6 layer 3
  protocols.
  
  Because some firewall packages treat IPv4 and IPv6 packets the same the
  flag should have the same value for both.
  
  Discussed with:   trociny, glebius

Modified:
  head/sys/netinet/ip_var.h
  head/sys/netinet6/ip6_var.h
  head/sys/sys/mbuf.h

Modified: head/sys/netinet/ip_var.h
==
--- head/sys/netinet/ip_var.h   Mon Aug 19 10:34:10 2013(r254518)
+++ head/sys/netinet/ip_var.h   Mon Aug 19 11:08:36 2013(r254519)
@@ -163,10 +163,12 @@ void  kmod_ipstat_dec(int statnum);
 #define IP_ALLOWBROADCAST  SO_BROADCAST/* 0x20 can send broadcast 
packets */
 
 /*
- * mbuf flag used by ip_fastfwd
+ * IPv4 protocol layer specific mbuf flags.
  */
 #defineM_FASTFWD_OURS  M_PROTO1/* changed dst to local 
*/
 #defineM_IP_NEXTHOPM_PROTO2/* explicit ip nexthop 
*/
+#defineM_SKIP_FIREWALL M_PROTO3/* skip firewall 
processing,
+  keep in sync with IP6 */
 #defineM_IP_FRAG   M_PROTO4/* fragment reassembly 
*/
 
 #ifdef __NO_STRICT_ALIGNMENT

Modified: head/sys/netinet6/ip6_var.h
==
--- head/sys/netinet6/ip6_var.h Mon Aug 19 10:34:10 2013(r254518)
+++ head/sys/netinet6/ip6_var.h Mon Aug 19 11:08:36 2013(r254519)
@@ -293,7 +293,12 @@ struct ip6aux {
 #defineIPV6_FORWARDING 0x02/* most of IPv6 header exists */
 #defineIPV6_MINMTU 0x04/* use minimum MTU 
(IPV6_USE_MIN_MTU) */
 
-#defineM_IP6_NEXTHOP   M_PROTO7/* explicit ip nexthop 
*/
+/*
+ * IPv6 protocol layer specific mbuf flags.
+ */
+#defineM_IP6_NEXTHOP   M_PROTO2/* explicit ip nexthop 
*/
+#defineM_SKIP_FIREWALL M_PROTO3/* skip firewall 
processing,
+  keep in sync with IPv4 */
 
 #ifdef __NO_STRICT_ALIGNMENT
 #define IP6_HDR_ALIGNED_P(ip)  1

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Mon Aug 19 10:34:10 2013(r254518)
+++ head/sys/sys/mbuf.h Mon Aug 19 11:08:36 2013(r254519)
@@ -196,7 +196,7 @@ struct mbuf {
 #defineM_FRAG  0x0800 /* packet is a fragment of a larger 
packet */
 #defineM_FIRSTFRAG 0x1000 /* packet is first fragment */
 #defineM_LASTFRAG  0x2000 /* packet is last fragment */
-#defineM_SKIP_FIREWALL 0x4000 /* skip firewall processing */
+/* 0x4000free */
 /* 0x8000free */
 #defineM_VLANTAG   0x0001 /* ether_vtag is valid */
 #defineM_PROMISC   0x0002 /* packet was not for us */
@@ -253,7 +253,7 @@ struct mbuf {
  * Flags preserved when copying m_pkthdr.
  */
 #defineM_COPYFLAGS \
-(M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_SKIP_FIREWALL|M_BCAST|M_MCAST|\
+(M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_BCAST|M_MCAST|\
  M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_HASHTYPEBITS)
 
 /*
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254520 - in head/sys: kern sys

2013-08-19 Thread Andre Oppermann
Author: andre
Date: Mon Aug 19 11:16:53 2013
New Revision: 254520
URL: http://svnweb.freebsd.org/changeset/base/254520

Log:
  Remove the unused M_NOFREE mbuf flag.  It didn't have any in-tree users
  for a very long time, if ever.
  
  Should such a functionality ever be needed again the appropriate and
  much better way to do it is through a custom EXT_SOMETHING external mbuf
  type together with a dedicated *ext_free function.
  
  Discussed with:   trociny, glebius

Modified:
  head/sys/kern/kern_mbuf.c
  head/sys/kern/uipc_mbuf.c
  head/sys/sys/mbuf.h

Modified: head/sys/kern/kern_mbuf.c
==
--- head/sys/kern/kern_mbuf.c   Mon Aug 19 11:08:36 2013(r254519)
+++ head/sys/kern/kern_mbuf.c   Mon Aug 19 11:16:53 2013(r254520)
@@ -474,7 +474,6 @@ mb_dtor_mbuf(void *mem, int size, void *
if ((flags  MB_NOTAGS) == 0  (m-m_flags  M_PKTHDR) != 0)
m_tag_delete_chain(m, NULL);
KASSERT((m-m_flags  M_EXT) == 0, (%s: M_EXT set, __func__));
-   KASSERT((m-m_flags  M_NOFREE) == 0, (%s: M_NOFREE set, __func__));
 #ifdef INVARIANTS
trash_dtor(mem, size, arg);
 #endif

Modified: head/sys/kern/uipc_mbuf.c
==
--- head/sys/kern/uipc_mbuf.c   Mon Aug 19 11:08:36 2013(r254519)
+++ head/sys/kern/uipc_mbuf.c   Mon Aug 19 11:16:53 2013(r254520)
@@ -278,17 +278,10 @@ m_extadd(struct mbuf *mb, caddr_t buf, u
 void
 mb_free_ext(struct mbuf *m)
 {
-   int skipmbuf;

KASSERT((m-m_flags  M_EXT) == M_EXT, (%s: M_EXT not set, __func__));
KASSERT(m-m_ext.ref_cnt != NULL, (%s: ref_cnt not set, __func__));
 
-
-   /*
-* check if the header is embedded in the cluster
-*/ 
-   skipmbuf = (m-m_flags  M_NOFREE);
-   
/* Free attached storage if this mbuf is the only reference to it. */
if (*(m-m_ext.ref_cnt) == 1 ||
atomic_fetchadd_int(m-m_ext.ref_cnt, -1) == 1) {
@@ -329,8 +322,6 @@ mb_free_ext(struct mbuf *m)
(%s: unknown ext_type, __func__));
}
}
-   if (skipmbuf)
-   return;

/*
 * Free this mbuf back to the mbuf zone with all m_ext
@@ -395,7 +386,7 @@ m_demote(struct mbuf *m0, int all)
m_freem(m-m_nextpkt);
m-m_nextpkt = NULL;
}
-   m-m_flags = m-m_flags  (M_EXT|M_RDONLY|M_NOFREE);
+   m-m_flags = m-m_flags  (M_EXT|M_RDONLY);
}
 }
 

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Mon Aug 19 11:08:36 2013(r254519)
+++ head/sys/sys/mbuf.h Mon Aug 19 11:16:53 2013(r254520)
@@ -200,7 +200,7 @@ struct mbuf {
 /* 0x8000free */
 #defineM_VLANTAG   0x0001 /* ether_vtag is valid */
 #defineM_PROMISC   0x0002 /* packet was not for us */
-#defineM_NOFREE0x0004 /* do not free mbuf, embedded in 
cluster */
+/* 0x0004free */
 #defineM_PROTO60x0008 /* protocol-specific */
 #defineM_PROTO70x0010 /* protocol-specific */
 #defineM_PROTO80x0020 /* protocol-specific */
@@ -515,7 +515,7 @@ m_free(struct mbuf *m)
 
if (m-m_flags  M_EXT)
mb_free_ext(m);
-   else if ((m-m_flags  M_NOFREE) == 0)
+   else
uma_zfree(zone_mbuf, m);
return (n);
 }
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254520 - in head/sys: kern sys

2013-08-19 Thread Andre Oppermann

On 19.08.2013 13:23, Davide Italiano wrote:

On Mon, Aug 19, 2013 at 1:16 PM, Andre Oppermann an...@freebsd.org wrote:

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Mon Aug 19 11:08:36 2013(r254519)
+++ head/sys/sys/mbuf.h Mon Aug 19 11:16:53 2013(r254520)
@@ -200,7 +200,7 @@ struct mbuf {
  /* 0x8000free */
  #defineM_VLANTAG   0x0001 /* ether_vtag is valid */
  #defineM_PROMISC   0x0002 /* packet was not for us */
-#defineM_NOFREE0x0004 /* do not free mbuf, embedded in 
cluster */
+/* 0x0004free */


I think you should just use M_UNUSED or something similar here for
consistency, like it's happening in td_pflags (sys/sys/proc.h), e.g.:

#define TDP_UNUSED9 0x0100 /* --available-- */


There's a couple of more changes upcoming that will take care of it.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254521 - in head/sys: netinet sys

2013-08-19 Thread Andre Oppermann
Author: andre
Date: Mon Aug 19 12:30:18 2013
New Revision: 254521
URL: http://svnweb.freebsd.org/changeset/base/254521

Log:
  Move the SCTP specific definition of M_NOTIFICATION onto a protocol
  specific mbuf flag from sys/mbuf.h to netinet/sctp_os_bsd.h.  It is
  only relevant within SCTP.
  
  Discussed with:   tuexen

Modified:
  head/sys/netinet/sctp_os_bsd.h
  head/sys/sys/mbuf.h

Modified: head/sys/netinet/sctp_os_bsd.h
==
--- head/sys/netinet/sctp_os_bsd.h  Mon Aug 19 11:16:53 2013
(r254520)
+++ head/sys/netinet/sctp_os_bsd.h  Mon Aug 19 12:30:18 2013
(r254521)
@@ -429,6 +429,11 @@ typedef struct rtentry sctp_rtentry_t;
 #define SCTP_ZERO_COPY_SENDQ_EVENT(inp, so)
 
 /*
+ * SCTP protocol specific mbuf flags.
+ */
+#defineM_NOTIFICATION  M_PROTO5/* SCTP notification */
+
+/*
  * IP output routines
  */
 #define SCTP_IP_OUTPUT(result, o_pak, ro, stcb, vrf_id) \

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Mon Aug 19 11:16:53 2013(r254520)
+++ head/sys/sys/mbuf.h Mon Aug 19 12:30:18 2013(r254521)
@@ -207,8 +207,6 @@ struct mbuf {
 #defineM_FLOWID0x0040 /* deprecated: flowid is valid */
 #defineM_HASHTYPEBITS  0x0F00 /* mask of bits holding flowid hash 
type */
 
-#defineM_NOTIFICATION  M_PROTO5/* SCTP notification */
-
 /*
  * Flags to purge when crossing layers.
  */
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254523 - in head/sys: net net80211 netinet netinet6 netpfil/pf ofed/drivers/infiniband/ulp/ipoib sys

2013-08-19 Thread Andre Oppermann
Author: andre
Date: Mon Aug 19 13:27:32 2013
New Revision: 254523
URL: http://svnweb.freebsd.org/changeset/base/254523

Log:
  Add m_clrprotoflags() to clear protocol specific mbuf flags at up and
  downwards layer crossings.
  
  Consistently use it within IP, IPv6 and ethernet protocols.
  
  Discussed with:   trociny, glebius

Modified:
  head/sys/net/if_ethersubr.c
  head/sys/net80211/ieee80211_hostap.c
  head/sys/net80211/ieee80211_input.c
  head/sys/netinet/if_ether.c
  head/sys/netinet/igmp.c
  head/sys/netinet/ip_fastfwd.c
  head/sys/netinet/ip_output.c
  head/sys/netinet/sctp_os_bsd.h
  head/sys/netinet6/ip6_mroute.c
  head/sys/netinet6/mld6.c
  head/sys/netinet6/nd6.c
  head/sys/netinet6/send.c
  head/sys/netpfil/pf/pf.c
  head/sys/ofed/drivers/infiniband/ulp/ipoib/ipoib_main.c
  head/sys/sys/mbuf.h

Modified: head/sys/net/if_ethersubr.c
==
--- head/sys/net/if_ethersubr.c Mon Aug 19 12:37:13 2013(r254522)
+++ head/sys/net/if_ethersubr.c Mon Aug 19 13:27:32 2013(r254523)
@@ -774,7 +774,7 @@ ether_demux(struct ifnet *ifp, struct mb
 * Strip off Ethernet header.
 */
m-m_flags = ~M_VLANTAG;
-   m-m_flags = ~(M_PROTOFLAGS);
+   m_clrprotoflags(m);
m_adj(m, ETHER_HDR_LEN);
 
/*

Modified: head/sys/net80211/ieee80211_hostap.c
==
--- head/sys/net80211/ieee80211_hostap.cMon Aug 19 12:37:13 2013
(r254522)
+++ head/sys/net80211/ieee80211_hostap.cMon Aug 19 13:27:32 2013
(r254523)
@@ -355,7 +355,8 @@ hostap_deliver_data(struct ieee80211vap 
struct ifnet *ifp = vap-iv_ifp;
 
/* clear driver/net80211 flags before passing up */
-   m-m_flags = ~(M_80211_RX | M_MCAST | M_BCAST);
+   m-m_flags = ~(M_MCAST | M_BCAST);
+   m_clrprotoflags(m);
 
KASSERT(vap-iv_opmode == IEEE80211_M_HOSTAP,
(gack, opmode %d, vap-iv_opmode));

Modified: head/sys/net80211/ieee80211_input.c
==
--- head/sys/net80211/ieee80211_input.c Mon Aug 19 12:37:13 2013
(r254522)
+++ head/sys/net80211/ieee80211_input.c Mon Aug 19 13:27:32 2013
(r254523)
@@ -250,7 +250,8 @@ ieee80211_deliver_data(struct ieee80211v
struct ifnet *ifp = vap-iv_ifp;
 
/* clear driver/net80211 flags before passing up */
-   m-m_flags = ~(M_80211_RX | M_MCAST | M_BCAST);
+   m-m_flags = ~(M_MCAST | M_BCAST);
+   m_clrprotoflags(m);
 
/* NB: see hostap_deliver_data, this path doesn't handle hostap */
KASSERT(vap-iv_opmode != IEEE80211_M_HOSTAP, (gack, hostap));

Modified: head/sys/netinet/if_ether.c
==
--- head/sys/netinet/if_ether.c Mon Aug 19 12:37:13 2013(r254522)
+++ head/sys/netinet/if_ether.c Mon Aug 19 13:27:32 2013(r254523)
@@ -281,6 +281,7 @@ arprequest(struct ifnet *ifp, const stru
sa.sa_family = AF_ARP;
sa.sa_len = 2;
m-m_flags |= M_BCAST;
+   m_clrprotoflags(m); /* Avoid confusing lower layers. */
(*ifp-if_output)(ifp, m, sa, NULL);
ARPSTAT_INC(txrequests);
 }
@@ -784,6 +785,8 @@ match:
for (; m_hold != NULL; m_hold = m_hold_next) {
m_hold_next = m_hold-m_nextpkt;
m_hold-m_nextpkt = NULL;
+   /* Avoid confusing lower layers. */
+   m_clrprotoflags(m_hold);
(*ifp-if_output)(ifp, m_hold, sa, NULL);
}
} else
@@ -888,6 +891,7 @@ reply:
m-m_pkthdr.rcvif = NULL;
sa.sa_family = AF_ARP;
sa.sa_len = 2;
+   m_clrprotoflags(m); /* Avoid confusing lower layers. */
(*ifp-if_output)(ifp, m, sa, NULL);
ARPSTAT_INC(txreplies);
return;

Modified: head/sys/netinet/igmp.c
==
--- head/sys/netinet/igmp.c Mon Aug 19 12:37:13 2013(r254522)
+++ head/sys/netinet/igmp.c Mon Aug 19 13:27:32 2013(r254523)
@@ -3450,7 +3450,7 @@ igmp_intr(struct mbuf *m)
}
 
igmp_scrub_context(m0);
-   m-m_flags = ~(M_PROTOFLAGS);
+   m_clrprotoflags(m);
m0-m_pkthdr.rcvif = V_loif;
 #ifdef MAC
mac_netinet_igmp_send(ifp, m0);

Modified: head/sys/netinet/ip_fastfwd.c
==
--- head/sys/netinet/ip_fastfwd.c   Mon Aug 19 12:37:13 2013
(r254522)
+++ head/sys/netinet/ip_fastfwd.c   Mon Aug 19 13:27:32 2013
(r254523)
@@ -525,6 +525,10 @@ passout:
if (ip_len = mtu ||
(ifp-if_hwassist  CSUM_FRAGMENT  (ip_off  IP_DF) == 0)) {

svn commit: r254524 - head/sys/sys

2013-08-19 Thread Andre Oppermann
Author: andre
Date: Mon Aug 19 13:56:14 2013
New Revision: 254524
URL: http://svnweb.freebsd.org/changeset/base/254524

Log:
  Add four additional M_PROTOFLAGS[9-12] for protocol specific use.
  
  Discussed with:   trociny, glebius, adrian

Modified:
  head/sys/sys/mbuf.h

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Mon Aug 19 13:27:32 2013(r254523)
+++ head/sys/sys/mbuf.h Mon Aug 19 13:56:14 2013(r254524)
@@ -196,22 +196,24 @@ struct mbuf {
 #defineM_FRAG  0x0800 /* packet is a fragment of a larger 
packet */
 #defineM_FIRSTFRAG 0x1000 /* packet is first fragment */
 #defineM_LASTFRAG  0x2000 /* packet is last fragment */
-/* 0x4000free */
-/* 0x8000free */
+#defineM_PROTO90x4000 /* protocol-specific */
+#defineM_PROTO10   0x8000 /* protocol-specific */
 #defineM_VLANTAG   0x0001 /* ether_vtag is valid */
 #defineM_PROMISC   0x0002 /* packet was not for us */
-/* 0x0004free */
+#defineM_PROTO11   0x0004 /* protocol-specific */
 #defineM_PROTO60x0008 /* protocol-specific */
 #defineM_PROTO70x0010 /* protocol-specific */
 #defineM_PROTO80x0020 /* protocol-specific */
 #defineM_FLOWID0x0040 /* deprecated: flowid is valid */
+#defineM_PROTO12   0x0080 /* protocol-specific */
 #defineM_HASHTYPEBITS  0x0F00 /* mask of bits holding flowid hash 
type */
 
 /*
  * Flags to purge when crossing layers.
  */
 #defineM_PROTOFLAGS \
-(M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8)
+(M_PROTO1|M_PROTO2|M_PROTO3|M_PROTO4|M_PROTO5|M_PROTO6|M_PROTO7|M_PROTO8|\
+ M_PROTO9|M_PROTO10|M_PROTO11|M_PROTO12)
 
 /*
  * Network interface cards are able to hash protocol fields (such as IPv4
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254526 - in head/sys: net80211 sys

2013-08-19 Thread Andre Oppermann
Author: andre
Date: Mon Aug 19 14:07:31 2013
New Revision: 254526
URL: http://svnweb.freebsd.org/changeset/base/254526

Log:
  Migrate the net80211 protocol specific use of M_FRAG, M_FIRSTFRAG and
  M_LASTFRAG flags to protocol specific flags.
  
  Remove the now unused M_FRAG, M_FIRSTFRAG and M_LASTFRAG mbuf flags.
  
  Discussed with:   trociny, glebius, adrian

Modified:
  head/sys/net80211/ieee80211_freebsd.h
  head/sys/sys/mbuf.h

Modified: head/sys/net80211/ieee80211_freebsd.h
==
--- head/sys/net80211/ieee80211_freebsd.h   Mon Aug 19 14:04:35 2013
(r254525)
+++ head/sys/net80211/ieee80211_freebsd.h   Mon Aug 19 14:07:31 2013
(r254526)
@@ -234,9 +234,12 @@ struct mbuf *ieee80211_getmgtframe(uint8
 #defineM_FFM_PROTO6/* fast frame */
 #defineM_TXCB  M_PROTO7/* do tx complete 
callback */
 #defineM_AMPDU_MPDUM_PROTO8/* ok for A-MPDU 
aggregation */
+#defineM_FRAG  M_PROTO9/* frame fragmentation 
*/
+#defineM_FIRSTFRAG M_PROTO10   /* first frame fragment 
*/
+#defineM_LASTFRAG  M_PROTO11   /* last frame fragment 
*/
 #defineM_80211_TX \
-   (M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_ENCAP|M_EAPOL|M_PWR_SAV|\
-M_MORE_DATA|M_FF|M_TXCB|M_AMPDU_MPDU)
+   (M_ENCAP|M_EAPOL|M_PWR_SAV|M_MORE_DATA|M_FF|M_TXCB| \
+M_AMPDU_MPDU|M_FRAG|M_FIRSTFRAG|M_LASTFRAG)
 
 /* rx path usage */
 #defineM_AMPDU M_PROTO1/* A-MPDU subframe */

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Mon Aug 19 14:04:35 2013(r254525)
+++ head/sys/sys/mbuf.h Mon Aug 19 14:07:31 2013(r254526)
@@ -193,9 +193,9 @@ struct mbuf {
 #defineM_PROTO50x0100 /* protocol-specific */
 #defineM_BCAST 0x0200 /* send/received as link-level 
broadcast */
 #defineM_MCAST 0x0400 /* send/received as link-level 
multicast */
-#defineM_FRAG  0x0800 /* packet is a fragment of a larger 
packet */
-#defineM_FIRSTFRAG 0x1000 /* packet is first fragment */
-#defineM_LASTFRAG  0x2000 /* packet is last fragment */
+/* 0x0800free */
+/* 0x1000free */
+/* 0x2000free */
 #defineM_PROTO90x4000 /* protocol-specific */
 #defineM_PROTO10   0x8000 /* protocol-specific */
 #defineM_VLANTAG   0x0001 /* ether_vtag is valid */
@@ -254,7 +254,7 @@ struct mbuf {
  */
 #defineM_COPYFLAGS \
 (M_PKTHDR|M_EOR|M_RDONLY|M_PROTOFLAGS|M_BCAST|M_MCAST|\
- M_FRAG|M_FIRSTFRAG|M_LASTFRAG|M_VLANTAG|M_PROMISC|M_HASHTYPEBITS)
+ M_VLANTAG|M_PROMISC|M_HASHTYPEBITS)
 
 /*
  * External buffer types: identify ext_buf type.
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r254527 - in head/sys: net80211 netinet sys

2013-08-19 Thread Andre Oppermann
Author: andre
Date: Mon Aug 19 14:25:11 2013
New Revision: 254527
URL: http://svnweb.freebsd.org/changeset/base/254527

Log:
  Reorder the mbuf defines to make more sense and group related flags
  together.
  
  Add M_FLAG_PRINTF for use with printf(9) %b indentifier.
  
  Use the generic mbuf flags print names in the net80211 code and adjust
  the protocol specific bits for their new positions.
  
  Change SCTP M_PROTO mapping from 5 to 1 to fit within the 16bit field
  they use internally to store some additional information.
  
  Discussed with:   trociny, glebius

Modified:
  head/sys/net80211/ieee80211_freebsd.h
  head/sys/netinet/sctp_os_bsd.h
  head/sys/sys/mbuf.h

Modified: head/sys/net80211/ieee80211_freebsd.h
==
--- head/sys/net80211/ieee80211_freebsd.h   Mon Aug 19 14:07:31 2013
(r254526)
+++ head/sys/net80211/ieee80211_freebsd.h   Mon Aug 19 14:25:11 2013
(r254527)
@@ -250,16 +250,13 @@ struct mbuf *ieee80211_getmgtframe(uint8
 #defineM_80211_RX  (M_AMPDU|M_WEP|M_AMPDU_MPDU)
 
 #defineIEEE80211_MBUF_TX_FLAG_BITS \
-   \20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_ENCAP\6M_WEP\7M_EAPOL \
-   \10M_PWR_SAV\11M_MORE_DATA\12M_BCAST\13M_MCAST\14M_FRAG\15M_FIRSTFRAG 
\
-   \16M_LASTFRAG\17M_SKIP_FIREWALL\20M_FREELIST\21M_VLANTAG\22M_PROMISC \
-   \23M_NOFREE\24M_FF\25M_TXCB\26M_AMPDU_MPDU\27M_FLOWID
+   M_FLAG_BITS \
+   \15M_ENCAP\17M_EAPOL\20M_PWR_SAV\21M_MORE_DATA\22M_FF\23M_TXCB \
+   \24M_AMPDU_MPDU\25M_FRAG\26M_FIRSTFRAG\27M_LASTFRAG
 
 #defineIEEE80211_MBUF_RX_FLAG_BITS \
-   \20\1M_EXT\2M_PKTHDR\3M_EOR\4M_RDONLY\5M_AMPDU\6M_WEP\7M_PROTO3 \
-   \10M_PROTO4\11M_PROTO5\12M_BCAST\13M_MCAST\14M_FRAG\15M_FIRSTFRAG \
-   \16M_LASTFRAG\17M_SKIP_FIREWALL\20M_FREELIST\21M_VLANTAG\22M_PROMISC \
-   \23M_NOFREE\24M_PROTO6\25M_PROTO7\26M_AMPDU_MPDU\27M_FLOWID
+   M_FLAG_BITS \
+   \15M_AMPDU\16M_WEP\24M_AMPDU_MPDU
 
 /*
  * Store WME access control bits in the vlan tag.

Modified: head/sys/netinet/sctp_os_bsd.h
==
--- head/sys/netinet/sctp_os_bsd.h  Mon Aug 19 14:07:31 2013
(r254526)
+++ head/sys/netinet/sctp_os_bsd.h  Mon Aug 19 14:25:11 2013
(r254527)
@@ -431,7 +431,7 @@ typedef struct rtentry sctp_rtentry_t;
 /*
  * SCTP protocol specific mbuf flags.
  */
-#defineM_NOTIFICATION  M_PROTO5/* SCTP notification */
+#defineM_NOTIFICATION  M_PROTO1/* SCTP notification */
 
 /*
  * IP output routines

Modified: head/sys/sys/mbuf.h
==
--- head/sys/sys/mbuf.h Mon Aug 19 14:07:31 2013(r254526)
+++ head/sys/sys/mbuf.h Mon Aug 19 14:25:11 2013(r254527)
@@ -186,26 +186,25 @@ struct mbuf {
 #defineM_PKTHDR0x0002 /* start of record */
 #defineM_EOR   0x0004 /* end of record */
 #defineM_RDONLY0x0008 /* associated data is marked 
read-only */
-#defineM_PROTO10x0010 /* protocol-specific */
-#defineM_PROTO20x0020 /* protocol-specific */
-#defineM_PROTO30x0040 /* protocol-specific */
-#defineM_PROTO40x0080 /* protocol-specific */
-#defineM_PROTO50x0100 /* protocol-specific */
-#defineM_BCAST 0x0200 /* send/received as link-level 
broadcast */
-#defineM_MCAST 0x0400 /* send/received as link-level 
multicast */
-/* 0x0800free */
-/* 0x1000free */
-/* 0x2000free */
-#defineM_PROTO90x4000 /* protocol-specific */
-#defineM_PROTO10   0x8000 /* protocol-specific */
-#defineM_VLANTAG   0x0001 /* ether_vtag is valid */
-#defineM_PROMISC   0x0002 /* packet was not for us */
-#defineM_PROTO11   0x0004 /* protocol-specific */
-#defineM_PROTO60x0008 /* protocol-specific */
-#defineM_PROTO70x0010 /* protocol-specific */
-#defineM_PROTO80x0020 /* protocol-specific */
-#defineM_FLOWID0x0040 /* deprecated: flowid is valid */
+#defineM_BCAST 0x0010 /* send/received as link-level 
broadcast */
+#defineM_MCAST 0x0020 /* send/received as link-level 
multicast */
+#defineM_PROMISC   0x0040 /* packet was not for us */
+#defineM_VLANTAG   0x0080 /* ether_vtag is valid */
+#defineM_FLOWID0x0100 /* deprecated: flowid is valid */
+
+#defineM_PROTO10x1000 /* protocol-specific */
+#defineM_PROTO20x2000 /* protocol-specific */
+#defineM_PROTO30x4000 

svn commit: r254537 - head/sys/sys

2013-08-19 Thread Andre Oppermann
Author: andre
Date: Mon Aug 19 16:47:06 2013
New Revision: 254537
URL: http://svnweb.freebsd.org/changeset/base/254537

Log:
  Bump __FreeBSD_version to 146 after the addition of M_PROTO[9-12]
  and removal of M_NOFREE|M_FRAG|M_FIRSTFRAG|M_LASTFRAG mbuf flags.

Modified:
  head/sys/sys/param.h

Modified: head/sys/sys/param.h
==
--- head/sys/sys/param.hMon Aug 19 16:16:49 2013(r254536)
+++ head/sys/sys/param.hMon Aug 19 16:47:06 2013(r254537)
@@ -58,7 +58,7 @@
  * in the range 5 to 9.
  */
 #undef __FreeBSD_version
-#define __FreeBSD_version 145  /* Master, propagated to newvers */
+#define __FreeBSD_version 146  /* Master, propagated to newvers */
 
 /*
  * __FreeBSD_kernel__ indicates that this system uses the kernel of FreeBSD,
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254527 - in head/sys: net80211 netinet sys

2013-08-19 Thread Andre Oppermann

On 19.08.2013 17:12, Adrian Chadd wrote:

Hi,

Would you please bump FreeBSD_version ?


Done:

New Revision: 254537
URL: http://svnweb.freebsd.org/changeset/base/254537

Log:
  Bump __FreeBSD_version to 146 after the addition of M_PROTO[9-12]
  and removal of M_NOFREE|M_FRAG|M_FIRSTFRAG|M_LASTFRAG mbuf flags.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r254520 - in head/sys: kern sys

2013-08-19 Thread Andre Oppermann

On 19.08.2013 19:33, Navdeep Parhar wrote:

On 08/19/13 04:16, Andre Oppermann wrote:

Author: andre
Date: Mon Aug 19 11:16:53 2013
New Revision: 254520
URL: http://svnweb.freebsd.org/changeset/base/254520

Log:
   Remove the unused M_NOFREE mbuf flag.  It didn't have any in-tree users
   for a very long time, if ever.

   Should such a functionality ever be needed again the appropriate and
   much better way to do it is through a custom EXT_SOMETHING external mbuf
   type together with a dedicated *ext_free function.

   Discussed with:  trociny, glebius

Modified:
   head/sys/kern/kern_mbuf.c
   head/sys/kern/uipc_mbuf.c
   head/sys/sys/mbuf.h



Hello Andre,

Is this just garbage collection or is there some other reason for this?


This is garbage collection and removal of not quite right, rotten, 
functionality.


I recently tried some experiments to reduce the number of mbuf and
cluster allocations in a 40G NIC driver.  M_NOFREE and EXT_EXTREF proved
very useful and the code changes to the kernel were minimal.  See
user/np/cxl_tuning.  The experiment was quite successful and I was
planning to bring in most of those changes to HEAD.  I was hoping to get
some runtime mileage on the approach in general before tweaking the
ctors/dtors for jumpbo, jumbo9, jumbo16 to allow for an mbuf+refcnt
within the cluster.  But now M_NOFREE has vanished without a warning...


I'm looking through your experimental code and that is some really good
numbers you're achieving there!

However a couple things don't feel quite right, hackish even, and not
fit for HEAD.  This is a bit the same situation we had with some of the
first 1GigE cards quite a number of years back (mostly ti(4)).  There
we ended up with a couple of just good enough hacks to make it fast.
Most of the remains I've collected today.

I believe most of the improvements you've shown can be implemented in
a more generic and safe way into the mbuf system.  Also a number of things
in your experimental code may have side-effects in situations other than
netperf runs.

To summarize what I get from your experimental branch commits:
- the Chelsio T4/T5 card can DMA multiple ethernet frames (packets) into
  a single memory buffer, provided it is large enough.
- you make use of that feature and point multiple m_ext mbufs into that
  buffer to separate the packets and send them up the stack.
- you embed the m_ext refcount into the single memory buffer as well.
- you recycle mbufs? (I'm not entirely clear on that as I'm not familiar
  with the cxgbe code)

Lets examine and discuss these parts:
- M_NOFREE wasn't really safe with bpf anyway at least for packets going
  down the stack.
- Instead of M_NOFREE a custom *ext_free should be used that has the same
  and even more functionality.
- Recycling mbufs may cause imbalances to the mbuf pool with multiple cores
  and possibly NUMA in the future.  Not that we are very good with it at
  the moment but bypassing the mbuf allocator shouldn't become the norm.
  If it is a problem/bottleneck again it should be fixed, not worked around
  and copy-pasted n-times in so many drivers.
- jumbo9 and jumbo16 mbufs should not be used because they are more special
  than necessary with being KVM and physically contiguous.  This probably
  isn't necessary for the T4/T5 cards and any other modern DMA engine.
  Under heavy diverse network the kernel memory becomes fragmented and can't
  find memory fulfilling both criteria anymore.  In fact both are an artifact
  of the early GigE hacks when high speed DMA engines were in their infancy.
  Both jumbo9 and jumbo16 should go away without direct replacement.
  In your T4/T5 case the alternative would be either to a) allocate your
  own memory directly from KVM with the necessary properties (KVM and/or phys
  contig); b) have such a generic kernel mbuf type fulfilling the same role.
  There may be some cache line issues on non-x86 systems that have to be
  though and taken care of.
- Refcounts should have the option of being separately allocated.  It was
  mandatory to use the refcount zone so far because there wasn't any other
  type of refcount.  Now that we have one we should revisit the issue.
  Actually the entire mbuf allocation and initialization could be streamlined
  as a whole.

On a side note a different mbuf handling for the RX DMA rings may give some
significant improvements as well: allocate just a cluster without a mbuf through
m_cljget() and put it into the RX ring.  Only when the DMA has put a packet into
it allocated attach the mbuf (in the drivers RX function).  This avoids the 
cache
pollution from touching the mbuf fields during initial allocation, including the
refcount.

Another nice trick would be to shorten the mbuf by 4 bytes (in ext_size) and put
the refcount there.

Lets work on these together.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe

Re: svn commit: r254520 - in head/sys: kern sys

2013-08-19 Thread Andre Oppermann

On 19.08.2013 19:40, Peter Grehan wrote:

I recently tried some experiments to reduce the number of mbuf and
cluster allocations in a 40G NIC driver.  M_NOFREE and EXT_EXTREF proved
very useful and the code changes to the kernel were minimal.  See
user/np/cxl_tuning.  The experiment was quite successful and I was
planning to bring in most of those changes to HEAD.  I was hoping to get
some runtime mileage on the approach in general before tweaking the
ctors/dtors for jumpbo, jumbo9, jumbo16 to allow for an mbuf+refcnt
within the cluster.  But now M_NOFREE has vanished without a warning...


  I also had a virtualization work-in-progress where static mbufs were 
allocated in the kernel and
M_NOFREE set.

  Might be worth sending a prior heads-up for these type of changes.


I'm sorry for ambushing but this stuff has to be done.  I have provided
an alternative way of handling it and I'm happy to help you with your
use case to make it good for you and to prevent the mbuf system from
getting bloated and hackish again.

Mbuf is a core system for the kernel and we should avoid to kitchen-sink
it again while at the same time to keep speedy enough to keep up with
the speed requirements.

I believe it would be bad to have Navdeep, you and others invent their
own network buffer management routines over again in slightly different
ways tailored to each immediate use case.

Can you please describe your intended use of M_NOFREE to better understand
the shortcomings of the current mbuf systems and the additional advantages
of the M_NOFREE case?

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r253395 - head/sys/netinet

2013-07-16 Thread Andre Oppermann
Author: andre
Date: Tue Jul 16 16:37:08 2013
New Revision: 253395
URL: http://svnweb.freebsd.org/changeset/base/253395

Log:
  Free the non-fatal timestamp missing debug string manually as it is
  not covered by the catch-all free for the error cases.
  
  Found by: Coverity

Modified:
  head/sys/netinet/tcp_syncache.c

Modified: head/sys/netinet/tcp_syncache.c
==
--- head/sys/netinet/tcp_syncache.c Tue Jul 16 15:51:32 2013
(r253394)
+++ head/sys/netinet/tcp_syncache.c Tue Jul 16 16:37:08 2013
(r253395)
@@ -1041,9 +1041,12 @@ syncache_expand(struct in_conninfo *inc,
 * reports of non-compliants stacks.
 */
if ((sc-sc_flags  SCF_TIMESTAMP)  !(to-to_flags  TOF_TS)) {
-   if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+   if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
log(LOG_DEBUG, %s; %s: Timestamp missing, 
no action\n, s, __func__);
+   free(s, M_TCPLOG);
+   s = NULL;
+   }
}
 
/*
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r253210 - in head/sys: conf netinet

2013-07-15 Thread Andre Oppermann

On 15.07.2013 20:38, Mikolaj Golub wrote:

On Mon, Jul 15, 2013 at 11:36:16AM +0200, Ulrich Spörlein wrote:

Hey Andre,

I don't see why this commit triggers it, but Coverity Scan found a new
resource leak in this file. syncache_expand() will leak *s when line
1071 is reached. The failed: case below correctly frees the resources.

1068/* how do we find the inp for the new socket? */
 22. Condition sc != scs, taking true branch
1069if (sc != scs)
1070syncache_free(sc);

CID null (#1 of 1): Resource leak (RESOURCE_LEAK)
23. leaked_storage: Variable s going out of scope leaks the storage it points 
to.
1071return (1);
1072failed:
1073if (sc != NULL  sc != scs)
1074syncache_free(sc);
1075if (s != NULL)
1076free(s, M_TCPLOG);
1077*lsop = NULL;
1078return (0);
1079}


It looks like free(s, M_TCPLOG) is missed in this branch:

   1043 if ((sc-sc_flags  SCF_TIMESTAMP)  !(to-to_flags  TOF_TS)) 
{
   1044 if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
   1045 log(LOG_DEBUG, %s; %s: Timestamp missing, 
   1046 no action\n, s, __func__);
   1047 }


Yes, I just figured that out and prepared a patch.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org

svn commit: r253254 - head/sys/netinet

2013-07-12 Thread Andre Oppermann
Author: andre
Date: Fri Jul 12 07:43:56 2013
New Revision: 253254
URL: http://svnweb.freebsd.org/changeset/base/253254

Log:
  Unbreak VIMAGE by correctly naming the vnet pointer in struct tcp_syncache.
  
  Reported by:  trociny, rodrigc

Modified:
  head/sys/netinet/tcp_syncache.h

Modified: head/sys/netinet/tcp_syncache.h
==
--- head/sys/netinet/tcp_syncache.h Fri Jul 12 06:54:29 2013
(r253253)
+++ head/sys/netinet/tcp_syncache.h Fri Jul 12 07:43:56 2013
(r253254)
@@ -118,7 +118,7 @@ struct tcp_syncache {
u_int   cache_limit;
u_int   rexmt_limit;
u_int   hash_secret;
-   struct vnet *sch_vnet;
+   struct vnet *vnet;
struct syncookie_secret secret;
 };
 
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r253254 - head/sys/netinet

2013-07-12 Thread Andre Oppermann

On 12.07.2013 09:43, Andre Oppermann wrote:

Author: andre
Date: Fri Jul 12 07:43:56 2013
New Revision: 253254
URL: http://svnweb.freebsd.org/changeset/base/253254

Log:
   Unbreak VIMAGE by correctly naming the vnet pointer in struct tcp_syncache.

   Reported by: trociny, rodrigc


Sorry for the breakage.  I'm extending my test protocol to include VIMAGE,
INET-only and INET6-only builds.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r253204 - head/sys/kern

2013-07-11 Thread Andre Oppermann
Author: andre
Date: Thu Jul 11 12:46:35 2013
New Revision: 253204
URL: http://svnweb.freebsd.org/changeset/base/253204

Log:
  Fix style issues, a typo in kern.ipc.nmbufs and correctly plave and
  expose the value of the tunable maxmbufmem as kern.ipc.maxmbufmem
  through sysctl.
  
  Reported by:  smh
  MFC after:1 day

Modified:
  head/sys/kern/kern_mbuf.c

Modified: head/sys/kern/kern_mbuf.c
==
--- head/sys/kern/kern_mbuf.c   Thu Jul 11 12:42:46 2013(r253203)
+++ head/sys/kern/kern_mbuf.c   Thu Jul 11 12:46:35 2013(r253204)
@@ -104,13 +104,18 @@ int nmbjumbo9;/* limits number of 9k 
 int nmbjumbo16;/* limits number of 16k jumbo clusters 
*/
 struct mbstat mbstat;
 
+static quad_t maxmbufmem;  /* overall real memory limit for all mbufs */
+
+SYSCTL_QUAD(_kern_ipc, OID_AUTO, maxmbufmem, CTLFLAG_RDTUN, maxmbufmem, 0,
+Maximum real memory allocateable to various mbuf types);
+
 /*
  * tunable_mbinit() has to be run before any mbuf allocations are done.
  */
 static void
 tunable_mbinit(void *dummy)
 {
-   quad_t realmem, maxmbufmem;
+   quad_t realmem;
 
/*
 * The default limit for all mbuf related memory is 1/2 of all
@@ -120,7 +125,7 @@ tunable_mbinit(void *dummy)
realmem = qmin((quad_t)physmem * PAGE_SIZE,
vm_map_max(kmem_map) - vm_map_min(kmem_map));
maxmbufmem = realmem / 2;
-   TUNABLE_QUAD_FETCH(kern.maxmbufmem, maxmbufmem);
+   TUNABLE_QUAD_FETCH(kern.ipc.maxmbufmem, maxmbufmem);
if (maxmbufmem  realmem / 4 * 3)
maxmbufmem = realmem / 4 * 3;
 
@@ -204,7 +209,7 @@ sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
newnmbjumbo9 = nmbjumbo9;
error = sysctl_handle_int(oidp, newnmbjumbo9, 0, req);
if (error == 0  req-newptr) {
-   if (newnmbjumbo9  nmbjumbo9
+   if (newnmbjumbo9  nmbjumbo9 
nmbufs = nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 
{
nmbjumbo9 = newnmbjumbo9;
uma_zone_set_max(zone_jumbo9, nmbjumbo9);
@@ -258,7 +263,7 @@ sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
}
return (error);
 }
-SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbuf, CTLTYPE_INT|CTLFLAG_RW,
+SYSCTL_PROC(_kern_ipc, OID_AUTO, nmbufs, CTLTYPE_INT|CTLFLAG_RW,
 nmbufs, 0, sysctl_nmbufs, IU,
 Maximum number of mbufs allowed);
 
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r253207 - head/sys/kern

2013-07-11 Thread Andre Oppermann
Author: andre
Date: Thu Jul 11 12:53:13 2013
New Revision: 253207
URL: http://svnweb.freebsd.org/changeset/base/253207

Log:
  Make use of the fact that uma_zone_set_max(9) already returns the
  rounded limit making a call to uma_zone_get_max(9) unnecessary.
  
  MFC after:1 day

Modified:
  head/sys/kern/kern_mbuf.c

Modified: head/sys/kern/kern_mbuf.c
==
--- head/sys/kern/kern_mbuf.c   Thu Jul 11 12:49:12 2013(r253206)
+++ head/sys/kern/kern_mbuf.c   Thu Jul 11 12:53:13 2013(r253207)
@@ -167,8 +167,7 @@ sysctl_nmbclusters(SYSCTL_HANDLER_ARGS)
if (newnmbclusters  nmbclusters 
nmbufs = nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 
{
nmbclusters = newnmbclusters;
-   uma_zone_set_max(zone_clust, nmbclusters);
-   nmbclusters = uma_zone_get_max(zone_clust);
+   nmbclusters = uma_zone_set_max(zone_clust, nmbclusters);
EVENTHANDLER_INVOKE(nmbclusters_change);
} else
error = EINVAL;
@@ -190,8 +189,7 @@ sysctl_nmbjumbop(SYSCTL_HANDLER_ARGS)
if (newnmbjumbop  nmbjumbop 
nmbufs = nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 
{
nmbjumbop = newnmbjumbop;
-   uma_zone_set_max(zone_jumbop, nmbjumbop);
-   nmbjumbop = uma_zone_get_max(zone_jumbop);
+   nmbjumbop = uma_zone_set_max(zone_jumbop, nmbjumbop);
} else
error = EINVAL;
}
@@ -212,8 +210,7 @@ sysctl_nmbjumbo9(SYSCTL_HANDLER_ARGS)
if (newnmbjumbo9  nmbjumbo9 
nmbufs = nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 
{
nmbjumbo9 = newnmbjumbo9;
-   uma_zone_set_max(zone_jumbo9, nmbjumbo9);
-   nmbjumbo9 = uma_zone_get_max(zone_jumbo9);
+   nmbjumbo9 = uma_zone_set_max(zone_jumbo9, nmbjumbo9);
} else
error = EINVAL;
}
@@ -234,8 +231,7 @@ sysctl_nmbjumbo16(SYSCTL_HANDLER_ARGS)
if (newnmbjumbo16  nmbjumbo16 
nmbufs = nmbclusters + nmbjumbop + nmbjumbo9 + nmbjumbo16) 
{
nmbjumbo16 = newnmbjumbo16;
-   uma_zone_set_max(zone_jumbo16, nmbjumbo16);
-   nmbjumbo16 = uma_zone_get_max(zone_jumbo16);
+   nmbjumbo16 = uma_zone_set_max(zone_jumbo16, nmbjumbo16);
} else
error = EINVAL;
}
@@ -255,8 +251,7 @@ sysctl_nmbufs(SYSCTL_HANDLER_ARGS)
if (error == 0  req-newptr) {
if (newnmbufs  nmbufs) {
nmbufs = newnmbufs;
-   uma_zone_set_max(zone_mbuf, nmbufs);
-   nmbufs = uma_zone_get_max(zone_mbuf);
+   nmbufs = uma_zone_set_max(zone_mbuf, nmbufs);
EVENTHANDLER_INVOKE(nmbufs_change);
} else
error = EINVAL;
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r253208 - head/sys/crypto/siphash

2013-07-11 Thread Andre Oppermann
Author: andre
Date: Thu Jul 11 14:18:38 2013
New Revision: 253208
URL: http://svnweb.freebsd.org/changeset/base/253208

Log:
  SipHash is a cryptographically strong pseudo-random function (a.k.a. keyed
  hash function) optimized for speed on short messages returning a 64bit hash/
  digest value.
  
  SipHash is simpler and much faster than other secure MACs and competitive
  in speed with popular non-cryptographic hash functions.  It uses a 128-bit
  key without the hidden cost of a key expansion step.  SipHash iterates a
  simple round function consisting of four additions, four xors, and six
  rotations, interleaved with xors of message blocks for a pre-defined number
  of compression and finalization rounds.  The absence of  secret load/store
  addresses or secret branch conditions avoid timing attacks.  No state is
  shared between messages.  Hashing is deterministic and doesn't use nonces.
  It is not susceptible to length extension attacks.
  
  Target applications include network traffic authentication, message
  authentication (MAC) and hash-tables protection against hash-flooding
  denial-of-service attacks.
  
  The number of update/finalization rounds is defined during initialization:
  
   SipHash24_Init() for the fast and reasonable strong version.
   SipHash48_Init() for the strong version (half as fast).
  
  SipHash usage is similar to other hash functions:
  
   struct SIPHASH_CTX ctx;
   char *k = 16bytes long key
   char *s = string;
   uint64_t h = 0;
   SipHash24_Init(ctx);
   SipHash_SetKey(ctx, k);
   SipHash_Update(ctx, s, strlen(s));
   SipHash_Final(h, ctx);  /* or */
   h = SipHash_End(ctx);/* or */
   h = SipHash24(ctx, k, s, strlen(s));
  
  It was designed by Jean-Philippe Aumasson and Daniel J. Bernstein and
  is described in the paper SipHash: a fast short-input PRF, 2012.09.18:
   https://131002.net/siphash/siphash.pdf
   Permanent ID: b9a943a805fbfc6fde808af9fc0ecdfa
  
  Implemented by:   andre (based on the paper)
  Reviewed by:  cperciva

Added:
  head/sys/crypto/siphash/
  head/sys/crypto/siphash/siphash.c   (contents, props changed)
  head/sys/crypto/siphash/siphash.h   (contents, props changed)
  head/sys/crypto/siphash/siphash_test.c   (contents, props changed)

Added: head/sys/crypto/siphash/siphash.c
==
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/sys/crypto/siphash/siphash.c   Thu Jul 11 14:18:38 2013
(r253208)
@@ -0,0 +1,241 @@
+/*-
+ * Copyright (c) 2013 Andre Oppermann an...@freebsd.org
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *notice, this list of conditions and the following disclaimer in the
+ *documentation and/or other materials provided with the distribution.
+ * 3. The name of the author may not be used to endorse or promote
+ *products derived from this software without specific prior written
+ *permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+/*
+ * SipHash is a family of PRFs SipHash-c-d where the integer parameters c and d
+ * are the number of compression rounds and the number of finalization rounds.
+ * A compression round is identical to a finalization round and this round
+ * function is called SipRound.  Given a 128-bit key k and a (possibly empty)
+ * byte string m, SipHash-c-d returns a 64-bit value SipHash-c-d(k; m).
+ *
+ * Implemented from the paper SipHash: a fast short-input PRF, 2012.09.18,
+ * by Jean-Philippe Aumasson and Daniel J. Bernstein,
+ * Permanent Document ID b9a943a805fbfc6fde808af9fc0ecdfa
+ * https://131002.net/siphash/siphash.pdf
+ * https://131002.net/siphash/
+ */
+
+#include sys/cdefs.h
+__FBSDID($FreeBSD$);
+
+#include sys/param.h
+#include sys/types.h
+#include sys/systm.h
+#include sys/libkern.h
+#include sys/endian.h
+
+#include crypto/siphash/siphash.h
+
+static voidSipRounds

svn commit: r253210 - in head/sys: conf netinet

2013-07-11 Thread Andre Oppermann
Author: andre
Date: Thu Jul 11 15:29:25 2013
New Revision: 253210
URL: http://svnweb.freebsd.org/changeset/base/253210

Log:
  Improve SYN cookies by encoding the MSS, WSCALE (window scaling) and SACK
  information into the ISN (initial sequence number) without the additional
  use of timestamp bits and switching to the very fast and cryptographically
  strong SipHash-2-4 MAC hash algorithm to protect the SYN cookie against
  forgeries.
  
  The purpose of SYN cookies is to encode all necessary session state in
  the 32 bits of our initial sequence number to avoid storing any information
  locally in memory.  This is especially important when under heavy spoofed
  SYN attacks where we would either run out of memory or the syncache would
  fill with bogus connection attempts swamping out legitimate connections.
  
  The original SYN cookies method only stored an indexed MSS values in the
  cookie.  This isn't sufficient anymore and breaks down in the presence of
  WSCALE information which is only exchanged during SYN and SYN-ACK.  If we
  can't keep track of it then we may severely underestimate the available
  send or receive window. This is compounded with large windows whose size
  information on the TCP segment header is even lower numerically.  A number
  of years back SYN cookies were extended to store the additional state in
  the TCP timestamp fields, if available on a connection.  While timestamps
  are common among the BSD, Linux and other *nix systems Windows never enabled
  them by default and thus are not present for the vast majority of clients
  seen on the Internet.
  
  The common parameters used on TCP sessions have changed quite a bit since
  SYN cookies very invented some 17 years ago.  Today we have a lot more
  bandwidth available making the use window scaling almost mandatory.  Also
  SACK has become standard making recovering from packet loss much more
  efficient.
  
  This change moves all necessary information into the ISS removing the need
  for timestamps.  Both the MSS (16 bits) and send WSCALE (4 bits) are stored
  in 3 bit indexed form together with a single bit for SACK.  While this is
  significantly less than the original range, it is sufficient to encode all
  common values with minimal rounding.
  
  The MSS depends on the MTU of the path and with the dominance of ethernet
  the main value seen is around 1460 bytes.  Encapsulations for DSL lines
  and some other overheads reduce it by a few more bytes for many connections
  seen.  Rounding down to the next lower value in some cases isn't a problem
  as we send only slightly more packets for the same amount of data.
  
  The send WSCALE index is bit more tricky as rounding down under-estimates
  the available send space available towards the remote host, however a small
  number values dominate and are carefully selected again.
  
  The receive WSCALE isn't encoded at all but recalculated based on the local
  receive socket buffer size when a valid SYN cookie returns.  A listen socket
  buffer size is unlikely to change while active.
  
  The index values for MSS and WSCALE are selected for minimal rounding errors
  based on large traffic surveys.  These values have to be periodically
  validated against newer traffic surveys adjusting the arrays tcp_sc_msstab[]
  and tcp_sc_wstab[] if necessary.
  
  In addition the hash MAC to protect the SYN cookies is changed from MD5
  to SipHash-2-4, a much faster and cryptographically secure algorithm.
  
  Reviewed by:  dwmalone
  Tested by:Fabian Keil f...@fabiankeil.de

Modified:
  head/sys/conf/files
  head/sys/netinet/tcp_syncache.c
  head/sys/netinet/tcp_syncache.h

Modified: head/sys/conf/files
==
--- head/sys/conf/files Thu Jul 11 15:02:38 2013(r253209)
+++ head/sys/conf/files Thu Jul 11 15:29:25 2013(r253210)
@@ -547,6 +547,8 @@ crypto/sha1.c   optional carp | crypto |
 netgraph_mppc_encryption | sctp
 crypto/sha2/sha2.c optional crypto | geom_bde | ipsec | random | \
 sctp | zfs
+crypto/siphash/siphash.c   optional inet | inet6
+crypto/siphash/siphash_test.c  optional inet | inet6
 ddb/db_access.coptional ddb
 ddb/db_break.c optional ddb
 ddb/db_capture.c   optional ddb

Modified: head/sys/netinet/tcp_syncache.c
==
--- head/sys/netinet/tcp_syncache.c Thu Jul 11 15:02:38 2013
(r253209)
+++ head/sys/netinet/tcp_syncache.c Thu Jul 11 15:29:25 2013
(r253210)
@@ -1,12 +1,12 @@
 /*-
  * Copyright (c) 2001 McAfee, Inc.
- * Copyright (c) 2006 Andre Oppermann, Internet Business Solutions AG
+ * Copyright (c) 2006,2013 Andre Oppermann, Internet Business Solutions AG
  * All rights reserved.
  *
  * This software

Re: svn commit: r253208 - head/sys/crypto/siphash

2013-07-11 Thread Andre Oppermann

On 11.07.2013 16:18, Andre Oppermann wrote:

Author: andre
Date: Thu Jul 11 14:18:38 2013
New Revision: 253208
URL: http://svnweb.freebsd.org/changeset/base/253208

Log:
   SipHash is a cryptographically strong pseudo-random function (a.k.a. keyed
   hash function) optimized for speed on short messages returning a 64bit hash/
   digest value.

...

   The number of update/finalization rounds is defined during initialization:

SipHash24_Init() for the fast and reasonable strong version.
SipHash48_Init() for the strong version (half as fast).

   SipHash usage is similar to other hash functions:

struct SIPHASH_CTX ctx;
char *k = 16bytes long key
char *s = string;
uint64_t h = 0;
SipHash24_Init(ctx);
SipHash_SetKey(ctx, k);
SipHash_Update(ctx, s, strlen(s));
SipHash_Final(h, ctx);  /* or */
h = SipHash_End(ctx);/* or */
h = SipHash24(ctx, k, s, strlen(s));


Looking for a man page whiz to hack one up. :)

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r253214 - head/sys/crypto/siphash

2013-07-11 Thread Andre Oppermann
Author: andre
Date: Thu Jul 11 16:27:11 2013
New Revision: 253214
URL: http://svnweb.freebsd.org/changeset/base/253214

Log:
  Fix const propagation issues to make GCC happy.
  
  Submitted by: Michael Butler i...@protected-networks.net

Modified:
  head/sys/crypto/siphash/siphash.c

Modified: head/sys/crypto/siphash/siphash.c
==
--- head/sys/crypto/siphash/siphash.c   Thu Jul 11 15:55:57 2013
(r253213)
+++ head/sys/crypto/siphash/siphash.c   Thu Jul 11 16:27:11 2013
(r253214)
@@ -119,7 +119,8 @@ SipBuf(SIPHASH_CTX *ctx, const uint8_t *
 void
 SipHash_Update(SIPHASH_CTX *ctx, const void *src, size_t len)
 {
-   uint64_t m, *p;
+   uint64_t m;
+   const uint64_t *p;
const uint8_t *s;
size_t rem;
 
@@ -144,13 +145,13 @@ SipHash_Update(SIPHASH_CTX *ctx, const v
 
/* Optimze for 64bit aligned/unaligned access. */
if (((uintptr_t)s  0x7) == 0) {
-   for (p = (uint64_t *)s; len  0; len--, p++) {
+   for (p = (const uint64_t *)s; len  0; len--, p++) {
m = le64toh(*p);
ctx-v[3] ^= m;
SipRounds(ctx, 0);
ctx-v[0] ^= m;
}
-   s = (uint8_t *)p;
+   s = (const uint8_t *)p;
} else {
for (; len  0; len--, s += 8) {
m = le64dec(s);
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r253150 - head/sys/netinet

2013-07-10 Thread Andre Oppermann
Author: andre
Date: Wed Jul 10 12:06:01 2013
New Revision: 253150
URL: http://svnweb.freebsd.org/changeset/base/253150

Log:
  Extend debug logging of TCP timestamp related specification
  violations.
  
  Update related comments and style.

Modified:
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_syncache.c

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cWed Jul 10 10:57:09 2013
(r253149)
+++ head/sys/netinet/tcp_input.cWed Jul 10 12:06:01 2013
(r253150)
@@ -1446,6 +1446,8 @@ tcp_do_segment(struct mbuf *m, struct tc
int thflags, acked, ourfinisacked, needoutput = 0;
int rstreason, todrop, win;
u_long tiwin;
+   char *s;
+   struct in_conninfo *inc;
struct tcpopt to;
 
 #ifdef TCPDEBUG
@@ -1458,6 +1460,7 @@ tcp_do_segment(struct mbuf *m, struct tc
short ostate = 0;
 #endif
thflags = th-th_flags;
+   inc = tp-t_inpcb-inp_inc;
tp-sackhint.last_sack_ack = 0;
 
/*
@@ -1546,6 +1549,24 @@ tcp_do_segment(struct mbuf *m, struct tc
if (TSTMP_GT(to.to_tsecr, tcp_ts_getticks()))
to.to_tsecr = 0;
}
+   /*
+* If timestamps were negotiated during SYN/ACK they should
+* appear on every segment during this session and vice versa.
+*/
+   if ((tp-t_flags  TF_RCVD_TSTMP)  !(to.to_flags  TOF_TS)) {
+   if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+   log(LOG_DEBUG, %s; %s: Timestamp missing, 
+   no action\n, s, __func__);
+   free(s, M_TCPLOG);
+   }
+   }
+   if (!(tp-t_flags  TF_RCVD_TSTMP)  (to.to_flags  TOF_TS)) {
+   if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+   log(LOG_DEBUG, %s; %s: Timestamp not expected, 
+   no action\n, s, __func__);
+   free(s, M_TCPLOG);
+   }
+   }
 
/*
 * Process options only when we get SYN/ACK back. The SYN case
@@ -2213,15 +2234,14 @@ tcp_do_segment(struct mbuf *m, struct tc
 */
if ((so-so_state  SS_NOFDREF) 
tp-t_state  TCPS_CLOSE_WAIT  tlen) {
-   char *s;
-
KASSERT(ti_locked == TI_WLOCKED, (%s: SS_NOFDEREF  
CLOSE_WAIT  tlen ti_locked %d, __func__, ti_locked));
INP_INFO_WLOCK_ASSERT(V_tcbinfo);
 
-   if ((s = tcp_log_addrs(tp-t_inpcb-inp_inc, th, NULL, NULL))) 
{
-   log(LOG_DEBUG, %s; %s: %s: Received %d bytes of data 
after socket 
-   was closed, sending RST and removing tcpcb\n,
+   if ((s = tcp_log_addrs(inc, th, NULL, NULL))) {
+   log(LOG_DEBUG, %s; %s: %s: Received %d bytes of data 
+   after socket was closed, 
+   sending RST and removing tcpcb\n,
s, __func__, tcpstates[tp-t_state], tlen);
free(s, M_TCPLOG);
}

Modified: head/sys/netinet/tcp_syncache.c
==
--- head/sys/netinet/tcp_syncache.c Wed Jul 10 10:57:09 2013
(r253149)
+++ head/sys/netinet/tcp_syncache.c Wed Jul 10 12:06:01 2013
(r253150)
@@ -992,12 +992,29 @@ syncache_expand(struct in_conninfo *inc,
goto failed;
}
 
+   /*
+* If timestamps were not negotiated during SYN/ACK they
+* must not appear on any segment during this session.
+*/
if (!(sc-sc_flags  SCF_TIMESTAMP)  (to-to_flags  TOF_TS)) {
if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
log(LOG_DEBUG, %s; %s: Timestamp not expected, 
segment rejected\n, s, __func__);
goto failed;
}
+
+   /*
+* If timestamps were negotiated during SYN/ACK they should
+* appear on every segment during this session.
+* XXXAO: This is only informal as there have been unverified
+* reports of non-compliants stacks.
+*/
+   if ((sc-sc_flags  SCF_TIMESTAMP)  !(to-to_flags  TOF_TS)) {
+   if ((s = tcp_log_addrs(inc, th, NULL, NULL)))
+   log(LOG_DEBUG, %s; %s: Timestamp missing, 
+   no action\n, s, __func__);
+   }
+
/*
 * If timestamps were negotiated the reflected timestamp
 * must be equal to what we actually sent in the SYN|ACK.
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r253081 - in head: sys/net sys/netinet sys/netinet6 sys/netipsec tools/tools/crypto usr.bin/netstat

2013-07-09 Thread Andre Oppermann

On 09.07.2013 11:32, Andrey V. Elsukov wrote:

Author: ae
Date: Tue Jul  9 09:32:06 2013
New Revision: 253081
URL: http://svnweb.freebsd.org/changeset/base/253081

Log:
   Prepare network statistics structures for migration to PCPU counters.
   Use uint64_t as type for all fields of structures.


Yes!  Finally 64 bit counters! :)

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r252209 - in head: share/man/man9 sys/kern sys/sys

2013-06-26 Thread Andre Oppermann

On 25.06.2013 20:44, John Baldwin wrote:

Author: jhb
Date: Tue Jun 25 18:44:15 2013
New Revision: 252209
URL: http://svnweb.freebsd.org/changeset/base/252209

Log:
   Several improvements to rmlock(9).  Many of these are based on patches
   provided by Isilon.
   - Add an rm_assert() supporting various lock assertions similar to other
 locking primitives.  Because rmlocks track readers the assertions are
 always fully accurate unlike rw_assert() and sx_assert().
   - Flesh out the lock class methods for rmlocks to support sleeping via
 condvars and rm_sleep() (but only while holding write locks), rmlock
 details in 'show lock' in DDB, and the lc_owner method used by
 dtrace.
   - Add an internal destroyed cookie so that API functions can assert
 that an rmlock is not destroyed.
   - Make use of rm_assert() to add various assertions to the API (e.g.
 to assert locks are held when an unlock routine is called).
   - Give RM_SLEEPABLE locks their own lock class and always use the
 rmlock's own lock_object with WITNESS.
   - Use THREAD_NO_SLEEPING() / THREAD_SLEEPING_OK() to disallow sleeping
 while holding a read lock on an rmlock.


Thanks!

Would it make sense to move struct rm_queue from struct pcpu itself to
using DPCPU as a next step?


   Submitted by:andre


Actually these were only relayed by me and came from Max Laier / Stephan
Uphoff.  So all fame to them.


   Obtained from:   EMC/Isilon


--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r251894 - in head: lib/libmemstat sys/vm

2013-06-18 Thread Andre Oppermann

On 18.06.2013 06:50, Jeff Roberson wrote:

Author: jeff
Date: Tue Jun 18 04:50:20 2013
New Revision: 251894
URL: http://svnweb.freebsd.org/changeset/base/251894

Log:
   Refine UMA bucket allocation to reduce space consumption and improve
   performance.

- Always free to the alloc bucket if there is space.  This gives LIFO
  allocation order to improve hot-cache performance.  This also allows
  for zones with a single bucket per-cpu rather than a pair if the entire
  working set fits in one bucket.
- Enable per-cpu caches of buckets.  To prevent recursive bucket
  allocation one bucket zone still has per-cpu caches disabled.
- Pick the initial bucket size based on a table driven maximum size
  per-bucket rather than the number of items per-page.  This gives
  more sane initial sizes.
- Only grow the bucket size when we face contention on the zone lock, this
  causes bucket sizes to grow more slowly.
- Adjust the number of items per-bucket to account for the header space.
  This packs the buckets more efficiently per-page while making them
  not quite powers of two.
- Eliminate the per-zone free bucket list.  Always return buckets back
  to the bucket zone.  This ensures that as zones grow into larger
  bucket sizes they eventually discard the smaller sizes.  It persists
  fewer buckets in the system.  The locking is slightly trickier.
- Only switch buckets in zalloc, not zfree, this eliminates pathological
  cases where we ping-pong between two buckets.
- Ensure that the thread that fills a new bucket gets to allocate from
  it to give a better upper bound on allocation time.


There used to be a problem with per CPU caches accumulating large amounts
of items without freeing back to the global (or socket) pool.

Do these updates to UMA change this situation and/or do you have further
improvements coming up?

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r251886 - in head: contrib/apr contrib/apr-util contrib/serf contrib/sqlite3 contrib/subversion share/mk usr.bin usr.bin/svn usr.bin/svn/lib usr.bin/svn/lib/libapr usr.bin/svn/lib/liba

2013-06-18 Thread Andre Oppermann

On 18.06.2013 18:40, Tijl Coosemans wrote:

On 2013-06-18 04:53, Peter Wemm wrote:

Author: peter
Date: Tue Jun 18 02:53:45 2013
New Revision: 251886
URL: http://svnweb.freebsd.org/changeset/base/251886

Log:
   Introduce svnlite so that we can check out our source code again.

   This is actually a fully functional build except:
   * All internal shared libraries are static linked to make sure there
 is no interference with ports (and to reduce build time).
   * It does not have the python/perl/etc plugin or API support.
   * By default, it installs as svnlite rather than svn.
   * If WITH_SVN added in make.conf, you get svn.
   * If WITHOUT_SVNLITE is in make.conf, this is completely disabled.

   To be absolutely clear, this is not intended for any use other than
   checking out freebsd source and committing, like we once did with cvs.

   It should be usable for small scale local repositories that don't
   need the python/perl plugin architecture.


This ties the repo to the oldest supported release, meaning that years
from now we won't be able to use some new subversion feature because
an old FreeBSD release doesn't support it.


AFAIK there is a checkout-only SVN client available, as in cvsup, but I don't
remember the name.


I don't find it unreasonable to ask developers to install the port.
And for users it seems all they need is something like portsnap for base.
Portsnap already distributes ports svn so it shouldn't be too hard to
adapt it for base. And the extra layer it adds is very convenient. Apart
from a bigger than usual update maybe, portsnap users never even noticed
it was switched from cvs to svn at some point.


Installing SVN from ports is very painful because of the huge dependency
chain it carries, with the largest being Python and Perl IIRC.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r251886 - in head: contrib/apr contrib/apr-util contrib/serf contrib/sqlite3 contrib/subversion share/mk usr.bin usr.bin/svn usr.bin/svn/lib usr.bin/svn/lib/libapr usr.bin/svn/lib/liba

2013-06-18 Thread Andre Oppermann

On 18.06.2013 19:04, Alexey Dokuchaev wrote:


Being able to checkout the sources is very desirable, but not at the
cost of importing another heavy 3rd-party tool, which Subversion is.


Just wanted to note that applaud Peter for actually doing something (tm)
even though it came as a surprise to many it seems.

Now that we're having the discussion we can converge towards the best or
least controversial option.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r251296 - in head/sys: net netinet

2013-06-03 Thread Andre Oppermann
Author: andre
Date: Mon Jun  3 12:55:13 2013
New Revision: 251296
URL: http://svnweb.freebsd.org/changeset/base/251296

Log:
  Allow drivers to specify a maximum TSO length in bytes if they are
  limited in the amount of data they can handle at once.
  
  Drivers can set ifp-if_hw_tsomax before calling ether_ifattach() to
  change the limit.
  
  The lowest allowable size is IP_MAXPACKET / 8 (8192 bytes) as anything
  less wouldn't be very useful anymore.  The upper limit is still at
  IP_MAXPACKET (65536 bytes).  Raising it requires further auditing of
  the IPv4/v6 code path's as the length field in the IP header would
  overflow leading to confusion in firewalls and others packet handler on
  the real size of the packet.
  
  The placement into struct ifnet is a bit hackish but the best place
  that was found.  When the stack/driver boundary is updated it should
  be handled in a better way.
  
  Submitted by: cperciva (earlier version)
  Reviewed by:  cperciva
  Tested by:cperciva
  MFC after:1 week (using spare struct members to preserve ABI)

Modified:
  head/sys/net/if.c
  head/sys/net/if_var.h
  head/sys/netinet/tcp_input.c
  head/sys/netinet/tcp_output.c
  head/sys/netinet/tcp_subr.c
  head/sys/netinet/tcp_var.h

Modified: head/sys/net/if.c
==
--- head/sys/net/if.c   Mon Jun  3 12:43:09 2013(r251295)
+++ head/sys/net/if.c   Mon Jun  3 12:55:13 2013(r251296)
@@ -74,18 +74,18 @@
 #include net/vnet.h
 
 #if defined(INET) || defined(INET6)
-/*XXX*/
 #include netinet/in.h
 #include netinet/in_var.h
+#include netinet/ip.h
 #include netinet/ip_carp.h
+#ifdef INET
+#include netinet/if_ether.h
+#endif /* INET */
 #ifdef INET6
 #include netinet6/in6_var.h
 #include netinet6/in6_ifattach.h
-#endif
-#endif
-#ifdef INET
-#include netinet/if_ether.h
-#endif
+#endif /* INET6 */
+#endif /* INET || INET6 */
 
 #include security/mac/mac_framework.h
 
@@ -653,6 +653,13 @@ if_attach_internal(struct ifnet *ifp, in
TAILQ_INSERT_HEAD(ifp-if_addrhead, ifa, ifa_link);
/* Reliably crash if used uninitialized. */
ifp-if_broadcastaddr = NULL;
+
+   /* Initialize to max value. */
+   if (ifp-if_hw_tsomax == 0)
+   ifp-if_hw_tsomax = IP_MAXPACKET;
+   KASSERT(ifp-if_hw_tsomax = IP_MAXPACKET 
+   ifp-if_hw_tsomax = IP_MAXPACKET / 8,
+   (%s: tsomax outside of range, __func__));
}
 #ifdef VIMAGE
else {

Modified: head/sys/net/if_var.h
==
--- head/sys/net/if_var.h   Mon Jun  3 12:43:09 2013(r251295)
+++ head/sys/net/if_var.h   Mon Jun  3 12:55:13 2013(r251296)
@@ -204,6 +204,11 @@ struct ifnet {
u_int   if_fib; /* interface FIB */
u_char  if_alloctype;   /* if_type at time of allocation */
 
+   u_int   if_hw_tsomax;   /* tso burst length limit, the minmum
+* is (IP_MAXPACKET / 8).
+* XXXAO: Have to find a better place
+* for it eventually. */
+
/*
 * Spare fields are added so that we can modify sensitive data
 * structures without changing the kernel binary interface, and must

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cMon Jun  3 12:43:09 2013
(r251295)
+++ head/sys/netinet/tcp_input.cMon Jun  3 12:55:13 2013
(r251296)
@@ -3434,7 +3434,7 @@ tcp_xmit_timer(struct tcpcb *tp, int rtt
  */
 void
 tcp_mss_update(struct tcpcb *tp, int offer, int mtuoffer,
-struct hc_metrics_lite *metricptr, int *mtuflags)
+struct hc_metrics_lite *metricptr, struct tcp_ifcap *cap)
 {
int mss = 0;
u_long maxmtu = 0;
@@ -3461,7 +3461,7 @@ tcp_mss_update(struct tcpcb *tp, int off
/* Initialize. */
 #ifdef INET6
if (isipv6) {
-   maxmtu = tcp_maxmtu6(inp-inp_inc, mtuflags);
+   maxmtu = tcp_maxmtu6(inp-inp_inc, cap);
tp-t_maxopd = tp-t_maxseg = V_tcp_v6mssdflt;
}
 #endif
@@ -3470,7 +3470,7 @@ tcp_mss_update(struct tcpcb *tp, int off
 #endif
 #ifdef INET
{
-   maxmtu = tcp_maxmtu(inp-inp_inc, mtuflags);
+   maxmtu = tcp_maxmtu(inp-inp_inc, cap);
tp-t_maxopd = tp-t_maxseg = V_tcp_mssdflt;
}
 #endif
@@ -3605,11 +3605,12 @@ tcp_mss(struct tcpcb *tp, int offer)
struct inpcb *inp;
struct socket *so;
struct hc_metrics_lite metrics;
-   int mtuflags = 0;
+   struct tcp_ifcap cap;
 
KASSERT(tp != NULL, (%s: tp == NULL, __func__));
-   
-   tcp_mss_update(tp, offer, -1, metrics, mtuflags);
+
+   

svn commit: r251297 - head/sys/dev/xen/netfront

2013-06-03 Thread Andre Oppermann
Author: andre
Date: Mon Jun  3 13:00:33 2013
New Revision: 251297
URL: http://svnweb.freebsd.org/changeset/base/251297

Log:
  Specify a maximum TSO length limiting the segment chain to what the
  Xen host side can handle after defragmentation.
  
  This prevents the driver from throwing away too long TSO chains and
  improves the performance on Amazon AWS instances with 10GigE virtual
  interfaces to the normally expected throughput.
  
  Submitted by: cperciva (earlier version)
  Reviewed by:  cperciva
  Tested by:cperciva
  MFC after:1 week

Modified:
  head/sys/dev/xen/netfront/netfront.c

Modified: head/sys/dev/xen/netfront/netfront.c
==
--- head/sys/dev/xen/netfront/netfront.cMon Jun  3 12:55:13 2013
(r251296)
+++ head/sys/dev/xen/netfront/netfront.cMon Jun  3 13:00:33 2013
(r251297)
@@ -134,6 +134,7 @@ static const int MODPARM_rx_flip = 0;
  * to mirror the Linux MAX_SKB_FRAGS constant.
  */
 #defineMAX_TX_REQ_FRAGS (65536 / PAGE_SIZE + 2)
+#defineNF_TSO_MAXBURST ((IP_MAXPACKET / PAGE_SIZE) * MCLBYTES)
 
 #define RX_COPY_THRESHOLD 256
 
@@ -2122,6 +2123,7 @@ create_netdev(device_t dev)

ifp-if_hwassist = XN_CSUM_FEATURES;
ifp-if_capabilities = IFCAP_HWCSUM;
+   ifp-if_hw_tsomax = NF_TSO_MAXBURST;

ether_ifattach(ifp, np-mac);
callout_init(np-xn_stat_ch, CALLOUT_MPSAFE);
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r250658 - in head: share/mk sys/conf tools/build/options

2013-05-17 Thread Andre Oppermann

On 15.05.2013 15:04, Brooks Davis wrote:

Author: brooks
Date: Wed May 15 13:04:10 2013
New Revision: 250658
URL: http://svnweb.freebsd.org/changeset/base/250658

Log:
   Add a new option WITHOUT_FORMAT_EXTENSIONS to disable flags related to
   checking our kernel printf extensions.  This is useful to allow
   compilers without these extensions to build kernels.

   Sponsored by:DARPA, AFRL


This breaks make depend at least on amd64:

../../../conf/kern.mk, line 37: Malformed conditional (${MK_FORMAT_EXTENSIONS} == 
no)
../../../conf/kern.mk, line 39: if-less else
../../../conf/kern.mk, line 41: if-less endif
make: fatal errors encountered -- cannot continue

--
Andre


Added:
   head/tools/build/options/WITHOUT_FORMAT_EXTENSIONS   (contents, props 
changed)
Modified:
   head/share/mk/bsd.own.mk
   head/sys/conf/kern.mk

Modified: head/share/mk/bsd.own.mk
==
--- head/share/mk/bsd.own.mkWed May 15 08:38:49 2013(r250657)
+++ head/share/mk/bsd.own.mkWed May 15 13:04:10 2013(r250658)
@@ -268,6 +268,7 @@ __DEFAULT_YES_OPTIONS = \
  ED_CRYPTO \
  EXAMPLES \
  FLOPPY \
+FORMAT_EXTENSIONS \
  FORTH \
  FP_LIBC \
  FREEBSD_UPDATE \

Modified: head/sys/conf/kern.mk
==
--- head/sys/conf/kern.mk   Wed May 15 08:38:49 2013(r250657)
+++ head/sys/conf/kern.mk   Wed May 15 13:04:10 2013(r250658)
@@ -5,7 +5,7 @@
  #
  CWARNFLAGS?=  -Wall -Wredundant-decls -Wnested-externs -Wstrict-prototypes \
-Wmissing-prototypes -Wpointer-arith -Winline -Wcast-qual \
-   -Wundef -Wno-pointer-sign -fformat-extensions \
+   -Wundef -Wno-pointer-sign ${FORMAT_EXTENTIONS} \
-Wmissing-include-dirs -fdiagnostics-show-option \
${CWARNEXTRA}
  #
@@ -29,7 +29,15 @@ NO_WSOMETIMES_UNINITIALIZED= -Wno-error-
  # enough to error out the whole kernel build.  Display them anyway, so there 
is
  # some incentive to fix them eventually.
  CWARNEXTRA?=  -Wno-error-tautological-compare -Wno-error-empty-body \
-   -Wno-error-parentheses-equality
+   -Wno-error-parentheses-equality ${NO_WFORMAT}
+.endif
+
+# External compilers may not support our format extensions.  Allow them
+# to be disabled.  WARNING: format checking is disabled in this case.
+.if ${MK_FORMAT_EXTENSIONS} == no
+NO_WFORMAT=-Wno-format
+.else
+FORMAT_EXTENTIONS= -fformat-extensions
  .endif

  #

Added: head/tools/build/options/WITHOUT_FORMAT_EXTENSIONS
==
--- /dev/null   00:00:00 1970   (empty, because file is newly added)
+++ head/tools/build/options/WITHOUT_FORMAT_EXTENSIONS  Wed May 15 13:04:10 
2013(r250658)
@@ -0,0 +1,5 @@
+.\ $FreeBSD$
+Set to not enable
+.Fl fformat-extensions
+when compiling the kernel.
+Also disables all format checking.




___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r250365 - head/sys/kern

2013-05-08 Thread Andre Oppermann
Author: andre
Date: Wed May  8 14:13:14 2013
New Revision: 250365
URL: http://svnweb.freebsd.org/changeset/base/250365

Log:
  When the accept queue is full print the number of already pending
  new connections instead of by how many we're over the limit, which
  is always 1.
  
  Noticed by:   jmallet
  MFC after:1 week

Modified:
  head/sys/kern/uipc_socket.c

Modified: head/sys/kern/uipc_socket.c
==
--- head/sys/kern/uipc_socket.c Wed May  8 13:26:17 2013(r250364)
+++ head/sys/kern/uipc_socket.c Wed May  8 14:13:14 2013(r250365)
@@ -515,7 +515,7 @@ sonewconn(struct socket *head, int conns
 #endif
log(LOG_DEBUG, %s: pcb %p: Listen queue overflow: 
%i already in queue awaiting acceptance\n,
-   __func__, head-so_pcb, over);
+   __func__, head-so_pcb, head-so_qlen);
return (NULL);
}
VNET_ASSERT(head-so_vnet != NULL, (%s:%d so_vnet is NULL, head=%p,
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r250300 - in head/sys: kern net netinet sys

2013-05-06 Thread Andre Oppermann
Author: andre
Date: Mon May  6 16:42:18 2013
New Revision: 250300
URL: http://svnweb.freebsd.org/changeset/base/250300

Log:
  Back out r249318, r249320 and r249327 due to a heisenbug most
  likely related to a race condition in the ipi_hash_lock with
  the exact cause currently unknown but under investigation.

Modified:
  head/sys/kern/uipc_socket.c
  head/sys/net/if.c
  head/sys/net/if_llatbl.c
  head/sys/net/if_llatbl.h
  head/sys/net/if_var.h
  head/sys/netinet/in_pcb.h
  head/sys/netinet/in_var.h
  head/sys/netinet/ip_id.c
  head/sys/netinet/ip_input.c
  head/sys/netinet/tcp_subr.c
  head/sys/sys/socketvar.h

Modified: head/sys/kern/uipc_socket.c
==
--- head/sys/kern/uipc_socket.c Mon May  6 16:11:53 2013(r250299)
+++ head/sys/kern/uipc_socket.c Mon May  6 16:42:18 2013(r250300)
@@ -240,14 +240,14 @@ SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO
  * accept_mtx locks down per-socket fields relating to accept queues.  See
  * socketvar.h for an annotation of the protected fields of struct socket.
  */
-struct mtx_padalign accept_mtx;
+struct mtx accept_mtx;
 MTX_SYSINIT(accept_mtx, accept_mtx, accept, MTX_DEF);
 
 /*
  * so_global_mtx protects so_gencnt, numopensockets, and the per-socket
  * so_gencnt field.
  */
-static struct mtx_padalign so_global_mtx;
+static struct mtx so_global_mtx;
 MTX_SYSINIT(so_global_mtx, so_global_mtx, so_glabel, MTX_DEF);
 
 /*

Modified: head/sys/net/if.c
==
--- head/sys/net/if.c   Mon May  6 16:11:53 2013(r250299)
+++ head/sys/net/if.c   Mon May  6 16:42:18 2013(r250300)
@@ -206,7 +206,7 @@ VNET_DEFINE(struct ifindex_entry *, ifin
  * also to stablize it over long-running ioctls, without introducing priority
  * inversions and deadlocks.
  */
-struct rwlock_padalign ifnet_rwlock;
+struct rwlock ifnet_rwlock;
 struct sx ifnet_sxlock;
 
 /*

Modified: head/sys/net/if_llatbl.c
==
--- head/sys/net/if_llatbl.cMon May  6 16:11:53 2013(r250299)
+++ head/sys/net/if_llatbl.cMon May  6 16:42:18 2013(r250300)
@@ -67,7 +67,7 @@ static VNET_DEFINE(SLIST_HEAD(, lltable)
 
 static void vnet_lltable_init(void);
 
-struct rwlock_padalign lltable_rwlock;
+struct rwlock lltable_rwlock;
 RW_SYSINIT(lltable_rwlock, lltable_rwlock, lltable_rwlock);
 
 /*

Modified: head/sys/net/if_llatbl.h
==
--- head/sys/net/if_llatbl.hMon May  6 16:11:53 2013(r250299)
+++ head/sys/net/if_llatbl.hMon May  6 16:42:18 2013(r250300)
@@ -43,7 +43,7 @@ struct rt_addrinfo;
 struct llentry;
 LIST_HEAD(llentries, llentry);
 
-extern struct rwlock_padalign lltable_rwlock;
+extern struct rwlock lltable_rwlock;
 #defineLLTABLE_RLOCK() rw_rlock(lltable_rwlock)
 #defineLLTABLE_RUNLOCK()   rw_runlock(lltable_rwlock)
 #defineLLTABLE_WLOCK() rw_wlock(lltable_rwlock)

Modified: head/sys/net/if_var.h
==
--- head/sys/net/if_var.h   Mon May  6 16:11:53 2013(r250299)
+++ head/sys/net/if_var.h   Mon May  6 16:42:18 2013(r250300)
@@ -191,9 +191,9 @@ struct ifnet {
void*if_unused[2];
void*if_afdata[AF_MAX];
int if_afdata_initialized;
+   struct  rwlock if_afdata_lock;
struct  task if_linktask;   /* task for link change events */
-   struct  rwlock_padalign if_afdata_lock;
-   struct  rwlock_padalign if_addr_lock;   /* lock to protect address 
lists */
+   struct  rwlock if_addr_lock;/* lock to protect address lists */
 
LIST_ENTRY(ifnet) if_clones;/* interfaces of a cloner */
TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */
@@ -832,7 +832,7 @@ struct ifmultiaddr {
 
 #ifdef _KERNEL
 
-extern struct rwlock_padalign ifnet_rwlock;
+extern struct rwlock ifnet_rwlock;
 extern struct sx ifnet_sxlock;
 
 #defineIFNET_LOCK_INIT() do {  
\

Modified: head/sys/netinet/in_pcb.h
==
--- head/sys/netinet/in_pcb.h   Mon May  6 16:11:53 2013(r250299)
+++ head/sys/netinet/in_pcb.h   Mon May  6 16:42:18 2013(r250300)
@@ -330,7 +330,7 @@ struct inpcbinfo {
/*
 * Global lock protecting non-pcbgroup hash lookup tables.
 */
-   struct rwlock_padalign   ipi_hash_lock;
+   struct rwlockipi_hash_lock;
 
/*
 * Global hash of inpcbs, hashed by local and foreign addresses and

Modified: head/sys/netinet/in_var.h
==
--- head/sys/netinet/in_var.h   Mon May  6 16:11:53 

svn commit: r249843 - head/sys/kern

2013-04-24 Thread Andre Oppermann
Author: andre
Date: Wed Apr 24 13:54:55 2013
New Revision: 249843
URL: http://svnweb.freebsd.org/changeset/base/249843

Log:
  Base the calculation of maxmbufmem in part on kmem_map size
  instead of kernel_map size to prevent kernel memory exhaustion
  by mbufs and a subsequent panic on physical page allocation
  failure.
  
  On architectures without a direct map all mbuf memory (except
  for jumbo mbufs larger than PAGE_SIZE) comes from kmem_map.
  It is the limiting factor hence.
  
  For architectures with a direct map using the size of kmem_map
  is a good proxy of available kernel memory as well.  If it is
  much smaller the mbuf limit may be sub-optimal but remains
  reasonable, while avoiding panics under exhaustion.
  
  The overall mbuf memory limit calculation may be reconsidered
  again later, however due to the many different mbuf sizes and
  different backing KVM maps it is a tricky subject.
  
  Found by: pho's new network stress test
  Pointed out by:   alc (kmem_map instead of kernel_map)
  Tested by:pho

Modified:
  head/sys/kern/kern_mbuf.c

Modified: head/sys/kern/kern_mbuf.c
==
--- head/sys/kern/kern_mbuf.c   Wed Apr 24 13:19:48 2013(r249842)
+++ head/sys/kern/kern_mbuf.c   Wed Apr 24 13:54:55 2013(r249843)
@@ -118,7 +118,7 @@ tunable_mbinit(void *dummy)
 * At most it can be 3/4 of available kernel memory.
 */
realmem = qmin((quad_t)physmem * PAGE_SIZE,
-   vm_map_max(kernel_map) - vm_map_min(kernel_map));
+   vm_map_max(kmem_map) - vm_map_min(kmem_map));
maxmbufmem = realmem / 2;
TUNABLE_QUAD_FETCH(kern.maxmbufmem, maxmbufmem);
if (maxmbufmem  realmem / 4 * 3)
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r249317 - head/sys/netinet

2013-04-09 Thread Andre Oppermann
Author: andre
Date: Tue Apr  9 20:52:26 2013
New Revision: 249317
URL: http://svnweb.freebsd.org/changeset/base/249317

Log:
  Fix a race condition on tcp listen socket teardown with pending
  connections in the accept queue and contiguous new incoming SYNs.
  
  Compared to the original submitters patch I've moved the test
  next to the SYN handling to have it together in a logical unit
  and reworded the comment explaining the issue.
  
  Submitted by: Matt Miller m...@matthewjmiller.net
  Submitted by: Juan Mojica jmoj...@gmail.com
  Reviewed by:  Matt Miller (changes)
  Tested by:pho
  MFC after:1 week

Modified:
  head/sys/netinet/tcp_input.c

Modified: head/sys/netinet/tcp_input.c
==
--- head/sys/netinet/tcp_input.cTue Apr  9 20:21:35 2013
(r249316)
+++ head/sys/netinet/tcp_input.cTue Apr  9 20:52:26 2013
(r249317)
@@ -1405,6 +1405,15 @@ relocked:
 */
INP_INFO_UNLOCK_ASSERT(V_tcbinfo);
return;
+   } else if (tp-t_state == TCPS_LISTEN) {
+   /*
+* When a listen socket is torn down the SO_ACCEPTCONN
+* flag is removed first while connections are drained
+* from the accept queue in a unlock/lock cycle of the
+* ACCEPT_LOCK, opening a race condition allowing a SYN
+* attempt go through unhandled.
+*/
+   goto dropunlock;
}
 
 #ifdef TCP_SIGNATURE
___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


svn commit: r249318 - in head/sys: kern net netinet

2013-04-09 Thread Andre Oppermann
Author: andre
Date: Tue Apr  9 21:02:20 2013
New Revision: 249318
URL: http://svnweb.freebsd.org/changeset/base/249318

Log:
  Change certain heavily used network related mutexes and rwlocks to
  reside on their own cache line to prevent false sharing with other
  nearby structures, especially for those in the .bss segment.
  
  NB: Those mutexes and rwlocks with variables next to them that get
  changed on every invocation do not benefit from their own cache line.
  Actually it may be net negative because two cache misses would be
  incurred in those cases.

Modified:
  head/sys/kern/uipc_socket.c
  head/sys/net/if.c
  head/sys/net/if_llatbl.c
  head/sys/net/if_var.h
  head/sys/netinet/in_pcb.h
  head/sys/netinet/ip_id.c
  head/sys/netinet/ip_input.c
  head/sys/netinet/tcp_subr.c

Modified: head/sys/kern/uipc_socket.c
==
--- head/sys/kern/uipc_socket.c Tue Apr  9 20:52:26 2013(r249317)
+++ head/sys/kern/uipc_socket.c Tue Apr  9 21:02:20 2013(r249318)
@@ -240,14 +240,14 @@ SYSCTL_INT(_kern_ipc_zero_copy, OID_AUTO
  * accept_mtx locks down per-socket fields relating to accept queues.  See
  * socketvar.h for an annotation of the protected fields of struct socket.
  */
-struct mtx accept_mtx;
+struct mtx_padalign accept_mtx;
 MTX_SYSINIT(accept_mtx, accept_mtx, accept, MTX_DEF);
 
 /*
  * so_global_mtx protects so_gencnt, numopensockets, and the per-socket
  * so_gencnt field.
  */
-static struct mtx so_global_mtx;
+static struct so_global_mtx so_global_mtx;
 MTX_SYSINIT(so_global_mtx, so_global_mtx, so_glabel, MTX_DEF);
 
 /*

Modified: head/sys/net/if.c
==
--- head/sys/net/if.c   Tue Apr  9 20:52:26 2013(r249317)
+++ head/sys/net/if.c   Tue Apr  9 21:02:20 2013(r249318)
@@ -206,7 +206,7 @@ VNET_DEFINE(struct ifindex_entry *, ifin
  * also to stablize it over long-running ioctls, without introducing priority
  * inversions and deadlocks.
  */
-struct rwlock ifnet_rwlock;
+struct rwlock_padalign ifnet_rwlock;
 struct sx ifnet_sxlock;
 
 /*

Modified: head/sys/net/if_llatbl.c
==
--- head/sys/net/if_llatbl.cTue Apr  9 20:52:26 2013(r249317)
+++ head/sys/net/if_llatbl.cTue Apr  9 21:02:20 2013(r249318)
@@ -67,7 +67,7 @@ static VNET_DEFINE(SLIST_HEAD(, lltable)
 
 static void vnet_lltable_init(void);
 
-struct rwlock lltable_rwlock;
+struct rwlock_padalign lltable_rwlock;
 RW_SYSINIT(lltable_rwlock, lltable_rwlock, lltable_rwlock);
 
 /*

Modified: head/sys/net/if_var.h
==
--- head/sys/net/if_var.h   Tue Apr  9 20:52:26 2013(r249317)
+++ head/sys/net/if_var.h   Tue Apr  9 21:02:20 2013(r249318)
@@ -191,9 +191,9 @@ struct ifnet {
void*if_unused[2];
void*if_afdata[AF_MAX];
int if_afdata_initialized;
-   struct  rwlock if_afdata_lock;
struct  task if_linktask;   /* task for link change events */
-   struct  rwlock if_addr_lock;/* lock to protect address lists */
+   struct  rwlock_padalign if_afdata_lock;
+   struct  rwlock_padalign if_addr_lock;   /* lock to protect address 
lists */
 
LIST_ENTRY(ifnet) if_clones;/* interfaces of a cloner */
TAILQ_HEAD(, ifg_list) if_groups; /* linked list of groups per if */

Modified: head/sys/netinet/in_pcb.h
==
--- head/sys/netinet/in_pcb.h   Tue Apr  9 20:52:26 2013(r249317)
+++ head/sys/netinet/in_pcb.h   Tue Apr  9 21:02:20 2013(r249318)
@@ -330,7 +330,7 @@ struct inpcbinfo {
/*
 * Global lock protecting non-pcbgroup hash lookup tables.
 */
-   struct rwlockipi_hash_lock;
+   struct rwlock_padalign   ipi_hash_lock;
 
/*
 * Global hash of inpcbs, hashed by local and foreign addresses and

Modified: head/sys/netinet/ip_id.c
==
--- head/sys/netinet/ip_id.cTue Apr  9 20:52:26 2013(r249317)
+++ head/sys/netinet/ip_id.cTue Apr  9 21:02:20 2013(r249318)
@@ -97,7 +97,7 @@ static int array_ptr = 0;
 static int  array_size = 8192;
 static int  random_id_collisions = 0;
 static int  random_id_total = 0;
-static struct mtx   ip_id_mtx;
+static struct mtx_padalign  ip_id_mtx;
 
 static voidip_initid(void);
 static int sysctl_ip_id_change(SYSCTL_HANDLER_ARGS);

Modified: head/sys/netinet/ip_input.c
==
--- head/sys/netinet/ip_input.c Tue Apr  9 20:52:26 2013(r249317)
+++ head/sys/netinet/ip_input.c Tue Apr  9 21:02:20 2013

Re: svn commit: r248417 - head/sys/sys

2013-03-17 Thread Andre Oppermann

On 17.03.2013 10:33, Gleb Smirnoff wrote:

On Sun, Mar 17, 2013 at 10:02:09AM +0100, Andre Oppermann wrote:
A On 17.03.2013 08:39, Gleb Smirnoff wrote:
A  Author: glebius
A  Date: Sun Mar 17 07:39:45 2013
A  New Revision: 248417
A  URL: http://svnweb.freebsd.org/changeset/base/248417
A 
A  Log:
A Add MEXT_ALIGN() macro, similar to M_ALIGN() and MH_ALIGN(), but for
A mbufs with external buffer.
A
A While you are cleaning up the mbuf usage wouldn't it make sense to remove
A these macros, instead of adding new ones, and use m_align() which handles
A all these cases internally?

I'm thinking about this. Maybe it is worth to request tail alignment as
a flag to the allocating function itself?


IMHO that would overload the allocation function(s).  The explicit step of
doing m_align() for those who need it is fine and alerts the reader of what
is going on.  I'm all for simplification and unification, on the other hand
it shouldn't be taken too far creating new complexity on the other side.

--
Andre

___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


Re: svn commit: r248323 - head/sys/netinet

2013-03-15 Thread Andre Oppermann

On 15.03.2013 13:53, Gleb Smirnoff wrote:

Author: glebius
Date: Fri Mar 15 12:53:53 2013
New Revision: 248323
URL: http://svnweb.freebsd.org/changeset/base/248323

Log:
   - Use m_getcl() instead of hand allocating.

   Sponsored by:Nginx, Inc.

Modified:
   head/sys/netinet/tcp_output.c

Modified: head/sys/netinet/tcp_output.c
==
--- head/sys/netinet/tcp_output.c   Fri Mar 15 12:52:59 2013
(r248322)
+++ head/sys/netinet/tcp_output.c   Fri Mar 15 12:53:53 2013
(r248323)
@@ -842,23 +842,19 @@ send:
TCPSTAT_INC(tcps_sndpack);
TCPSTAT_ADD(tcps_sndbyte, len);
}
-   MGETHDR(m, M_NOWAIT, MT_DATA);
+#ifdef INET6
+   if (MHLEN  hdrlen + max_linkhdr)
+   m = m_getcl(M_NOWAIT, MT_DATA, M_PKTHDR);
+   else
+#endif
+   m = m_gethdr(M_NOWAIT, MT_DATA);
+


m = m_getm2(hdrlen + max_linkhdr, M_NOWAIT, MT_DATA, M_PKTHDR);

would be even more compact.  Since max_linkhdr could be large as well,
the possibility of a cluster applies to IPv4 too.

--
Andre


if (m == NULL) {
SOCKBUF_UNLOCK(so-so_snd);
error = ENOBUFS;
goto out;
}
-#ifdef INET6
-   if (MHLEN  hdrlen + max_linkhdr) {
-   MCLGET(m, M_NOWAIT);
-   if ((m-m_flags  M_EXT) == 0) {
-   SOCKBUF_UNLOCK(so-so_snd);
-   m_freem(m);
-   error = ENOBUFS;
-   goto out;
-   }
-   }
-#endif
+
m-m_data += max_linkhdr;
m-m_len = hdrlen;





___
svn-src-head@freebsd.org mailing list
http://lists.freebsd.org/mailman/listinfo/svn-src-head
To unsubscribe, send any mail to svn-src-head-unsubscr...@freebsd.org


  1   2   3   >