On Tue, Oct 11, 2016 at 20:41 +1000, David Gwynne wrote: > On Tue, Oct 11, 2016 at 12:06:46AM +0200, Mike Belopuhov wrote: > > On Fri, Oct 07, 2016 at 20:48 +1000, David Gwynne wrote: > > > On Thu, Oct 06, 2016 at 12:13:20PM +0200, Mike Belopuhov wrote: > > > > m_pullup will always get a new mbuf and on > > > > strict alignment architectures you will always do a m_dup_pkt > > > > (verified by my -DTEST1). > > > > > > i didnt think m_pullup was that stupid, but yes, yes it is. > > > > > > im not sure we can do better than m_dup_pkt on strict alignment > > > archs, except to use memmove to realign simple mbufs. the alternative > > > is vxlan parses the packet to figure out the most conservative > > > realignment it can do, but itll have to allocate a new mbuf to > > > prefix it with, which would be about the same cost as blindly copying > > > it all. > > > > > > anyway, m_pullup could be simpler. while im there, it should maintain > > > the alignment of the payload. > > > > > > the diff below makes it return early if the current mbuf already > > > has the requested region, regardless of whether it has a cluster > > > or not. > > > > > > if that fails, but there is enough space in the first mbuf for the > > > requested region, itll memmove the data around to make the request > > > contig. > > > > > > if that fails, itll attempt to allocate an mbuf and maybe a cluster > > > to prefix the chain with. > > > > > > after shuffling the first mbuf data or prefixing an empty mbuf, > > > itll copy from the chain into the first mbuf. > > > > > > i reckon this would be comparable to m_pulldown now. > > > > > > ok? > > > > > > Index: uipc_mbuf.c > > > =================================================================== > > > RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v > > > retrieving revision 1.231 > > > diff -u -p -r1.231 uipc_mbuf.c > > > --- uipc_mbuf.c 15 Sep 2016 02:00:16 -0000 1.231 > > > +++ uipc_mbuf.c 7 Oct 2016 10:47:54 -0000 > > > @@ -838,64 +842,73 @@ struct mbuf * > > > m_pullup(struct mbuf *n, int len) > > > { > > > struct mbuf *m; > > > - int count; > > > + unsigned int align; > > > + unsigned int space; > > > + > > > + /* if n is already contig then don't do any work */ > > > + if (len <= n->m_len) > > > + return (n); > > > + > > > + align = (unsigned long)n->m_data & ALIGNBYTES; > > > + space = M_LEADINGSPACE(n) - align; > > > + > > > > M_LEADINGSPACE for mbufs that have (m->m_len == 0) is actually zero > > so here you can have space underflow and become huge. > > yeesh. why do we let cthulu craft mbufs? > > > > + if (len <= space + n->m_len + M_TRAILINGSPACE(n)) { > > > + /* if there's enough space in the first mbuf memmove into it */ > > > + memmove(mtod(n, caddr_t) - space, mtod(n, caddr_t), n->m_len); > > > + n->m_data -= space; > > > + len -= n->m_len; > > > > > > > You're not performing a m_getptr operation so you never optimize > > for a case where you can memcpy from the other mbuf into the first > > one which is what old code does (when m->m_next != NULL)... I think > > these should be preserved. > > thats what this chunk does. in the diff below i made the the memmove > conditional on if there's not enough space in the tail. >
Right, I've looked at the wrong one. As far as I can tell and test this new diff is fine. I've double checked the alignment math and it looks good to me. OK mikeb > Index: uipc_mbuf.c > =================================================================== > RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v > retrieving revision 1.233 > diff -u -p -r1.233 uipc_mbuf.c > --- uipc_mbuf.c 10 Oct 2016 00:41:17 -0000 1.233 > +++ uipc_mbuf.c 11 Oct 2016 09:07:09 -0000 > @@ -842,64 +842,79 @@ struct mbuf * > m_pullup(struct mbuf *n, int len) > { > struct mbuf *m; > - int count; > + unsigned int adj; > + caddr_t head, tail; > + unsigned int space; > > - /* > - * If first mbuf has no cluster, and has room for len bytes > - * without shifting current data, pullup into it, > - * otherwise allocate a new mbuf to prepend to the chain. > - */ > - if ((n->m_flags & M_EXT) == 0 && n->m_next && > - n->m_data + len < &n->m_dat[MLEN]) { > - if (n->m_len >= len) > - return (n); > - m = n; > - n = n->m_next; > - len -= m->m_len; > - } else if ((n->m_flags & M_EXT) != 0 && len > MHLEN && n->m_next && > - n->m_data + len < &n->m_ext.ext_buf[n->m_ext.ext_size]) { > - if (n->m_len >= len) > - return (n); > + /* if n is already contig then don't do any work */ > + if (len <= n->m_len) > + return (n); > + > + adj = (unsigned long)n->m_data & ALIGNBYTES; > + head = (caddr_t)ALIGN(mtod(n, caddr_t) - M_LEADINGSPACE(n)) + adj; > + tail = mtod(n, caddr_t) + n->m_len + M_TRAILINGSPACE(n); > + > + if (head < tail && len <= tail - head) { > + /* there's enough space in the first mbuf */ > + > + if (len > tail - mtod(n, caddr_t)) { > + /* need to memmove to make space at the end */ > + memmove(head, mtod(n, caddr_t), n->m_len); > + m->m_data = head; > + } > + > + len -= n->m_len; > m = n; > - n = n->m_next; > - len -= m->m_len; > + n = m->m_next; > } else { > - if (len > MAXMCLBYTES) > + /* the first mbuf is too small so prepend one with space */ > + space = adj + len; > + > + if (space > MAXMCLBYTES) > goto bad; > + > MGET(m, M_DONTWAIT, n->m_type); > if (m == NULL) > goto bad; > - if (len > MHLEN) { > - MCLGETI(m, M_DONTWAIT, NULL, len); > + if (space > MHLEN) { > + MCLGETI(m, M_DONTWAIT, NULL, space); > if ((m->m_flags & M_EXT) == 0) { > m_free(m); > goto bad; > } > } > - m->m_len = 0; > + > if (n->m_flags & M_PKTHDR) > M_MOVE_PKTHDR(m, n); > + > + m->m_len = 0; > + m->m_data += adj; > } > > + KASSERT(M_TRAILINGSPACE(m) >= len); > + > do { > - count = min(len, n->m_len); > - memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), > - count); > - len -= count; > - m->m_len += count; > - n->m_len -= count; > - if (n->m_len) > - n->m_data += count; > + if (n == NULL) { > + (void)m_free(m); > + goto bad; > + } > + > + space = min(len, n->m_len); > + memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), space); > + len -= space; > + m->m_len += space; > + n->m_len -= space; > + > + if (n->m_len > 0) > + n->m_data += space; > else > n = m_free(n); > - } while (len > 0 && n); > - if (len > 0) { > - (void)m_free(m); > - goto bad; > - } > + } while (len > 0); > + > m->m_next = n; > > return (m); > + > bad: > m_freem(n); > return (NULL);