Re: add in6 multicast support to vxlan(4) ; question on mbufs

David Gwynne Tue, 11 Oct 2016 03:41:51 -0700

On Tue, Oct 11, 2016 at 12:06:46AM +0200, Mike Belopuhov wrote:
> On Fri, Oct 07, 2016 at 20:48 +1000, David Gwynne wrote:
> > On Thu, Oct 06, 2016 at 12:13:20PM +0200, Mike Belopuhov wrote:
> > > m_pullup will always get a new mbuf and on
> > > strict alignment architectures you will always do a m_dup_pkt
> > > (verified by my -DTEST1).
> > 
> > i didnt think m_pullup was that stupid, but yes, yes it is.
> > 
> > im not sure we can do better than m_dup_pkt on strict alignment
> > archs, except to use memmove to realign simple mbufs. the alternative
> > is vxlan parses the packet to figure out the most conservative
> > realignment it can do, but itll have to allocate a new mbuf to
> > prefix it with, which would be about the same cost as blindly copying
> > it all.
> > 
> > anyway, m_pullup could be simpler. while im there, it should maintain
> > the alignment of the payload.
> > 
> > the diff below makes it return early if the current mbuf already
> > has the requested region, regardless of whether it has a cluster
> > or not.
> > 
> > if that fails, but there is enough space in the first mbuf for the
> > requested region, itll memmove the data around to make the request
> > contig.
> > 
> > if that fails, itll attempt to allocate an mbuf and maybe a cluster
> > to prefix the chain with.
> > 
> > after shuffling the first mbuf data or prefixing an empty mbuf,
> > itll copy from the chain into the first mbuf.
> > 
> > i reckon this would be comparable to m_pulldown now.
> > 
> > ok?
> > 
> > Index: uipc_mbuf.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
> > retrieving revision 1.231
> > diff -u -p -r1.231 uipc_mbuf.c
> > --- uipc_mbuf.c     15 Sep 2016 02:00:16 -0000      1.231
> > +++ uipc_mbuf.c     7 Oct 2016 10:47:54 -0000
> > @@ -838,64 +842,73 @@ struct mbuf *
> >  m_pullup(struct mbuf *n, int len)
> >  {
> >     struct mbuf *m;
> > -   int count;
> > +   unsigned int align;
> > +   unsigned int space;
> > +
> > +   /* if n is already contig then don't do any work */
> > +   if (len <= n->m_len)
> > +           return (n);
> > +
> > +   align = (unsigned long)n->m_data & ALIGNBYTES;
> > +   space = M_LEADINGSPACE(n) - align;
> > +
> 
> M_LEADINGSPACE for mbufs that have (m->m_len == 0) is actually zero
> so here you can have space underflow and become huge.


yeesh. why do we let cthulu craft mbufs?

> > +   if (len <= space + n->m_len + M_TRAILINGSPACE(n)) {
> > +           /* if there's enough space in the first mbuf memmove into it */
> > +           memmove(mtod(n, caddr_t) - space, mtod(n, caddr_t), n->m_len);
> > +           n->m_data -= space;
> > +           len -= n->m_len;
> >  
> 
> You're not performing a m_getptr operation so you never optimize
> for a case where you can memcpy from the other mbuf into the first
> one which is what old code does (when m->m_next != NULL)...  I think
> these should be preserved.

thats what this chunk does. in the diff below i made the the memmove
conditional on if there's not enough space in the tail.

Index: uipc_mbuf.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.233
diff -u -p -r1.233 uipc_mbuf.c
--- uipc_mbuf.c 10 Oct 2016 00:41:17 -0000      1.233
+++ uipc_mbuf.c 11 Oct 2016 09:07:09 -0000
@@ -842,64 +842,79 @@ struct mbuf *
 m_pullup(struct mbuf *n, int len)
 {
        struct mbuf *m;
-       int count;
+       unsigned int adj;
+       caddr_t head, tail;
+       unsigned int space;
 
-       /*
-        * If first mbuf has no cluster, and has room for len bytes
-        * without shifting current data, pullup into it,
-        * otherwise allocate a new mbuf to prepend to the chain.
-        */
-       if ((n->m_flags & M_EXT) == 0 && n->m_next &&
-           n->m_data + len < &n->m_dat[MLEN]) {
-               if (n->m_len >= len)
-                       return (n);
-               m = n;
-               n = n->m_next;
-               len -= m->m_len;
-       } else if ((n->m_flags & M_EXT) != 0 && len > MHLEN && n->m_next &&
-           n->m_data + len < &n->m_ext.ext_buf[n->m_ext.ext_size]) {
-               if (n->m_len >= len)
-                       return (n);
+       /* if n is already contig then don't do any work */
+       if (len <= n->m_len)
+               return (n);
+
+       adj = (unsigned long)n->m_data & ALIGNBYTES;
+       head = (caddr_t)ALIGN(mtod(n, caddr_t) - M_LEADINGSPACE(n)) + adj;
+       tail = mtod(n, caddr_t) + n->m_len + M_TRAILINGSPACE(n);
+
+       if (head < tail && len <= tail - head) {
+               /* there's enough space in the first mbuf */
+
+               if (len > tail - mtod(n, caddr_t)) {
+                       /* need to memmove to make space at the end */
+                       memmove(head, mtod(n, caddr_t), n->m_len);
+                       m->m_data = head;
+               }
+
+               len -= n->m_len;
                m = n;
-               n = n->m_next;
-               len -= m->m_len;
+               n = m->m_next;
        } else {
-               if (len > MAXMCLBYTES)
+               /* the first mbuf is too small so prepend one with space */
+               space = adj + len;
+
+               if (space > MAXMCLBYTES)
                        goto bad;
+
                MGET(m, M_DONTWAIT, n->m_type);
                if (m == NULL)
                        goto bad;
-               if (len > MHLEN) {
-                       MCLGETI(m, M_DONTWAIT, NULL, len);
+               if (space > MHLEN) {
+                       MCLGETI(m, M_DONTWAIT, NULL, space);
                        if ((m->m_flags & M_EXT) == 0) {
                                m_free(m);
                                goto bad;
                        }
                }
-               m->m_len = 0;
+
                if (n->m_flags & M_PKTHDR)
                        M_MOVE_PKTHDR(m, n);
+
+               m->m_len = 0;
+               m->m_data += adj;
        }
 
+       KASSERT(M_TRAILINGSPACE(m) >= len);
+
        do {
-               count = min(len, n->m_len);
-               memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
-                   count);
-               len -= count;
-               m->m_len += count;
-               n->m_len -= count;
-               if (n->m_len)
-                       n->m_data += count;
+               if (n == NULL) {
+                       (void)m_free(m);
+                       goto bad;
+               }
+
+               space = min(len, n->m_len);
+               memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), space);
+               len -= space;
+               m->m_len += space;
+               n->m_len -= space;
+
+               if (n->m_len > 0)
+                       n->m_data += space;
                else
                        n = m_free(n);
-       } while (len > 0 && n);
-       if (len > 0) {
-               (void)m_free(m);
-               goto bad;
-       }
+       } while (len > 0);
+
        m->m_next = n;
 
        return (m);
+
 bad:
        m_freem(n);
        return (NULL);

Re: add in6 multicast support to vxlan(4) ; question on mbufs

Reply via email to