On Tue, Oct 11, 2016 at 12:06:46AM +0200, Mike Belopuhov wrote:
> On Fri, Oct 07, 2016 at 20:48 +1000, David Gwynne wrote:
> > On Thu, Oct 06, 2016 at 12:13:20PM +0200, Mike Belopuhov wrote:
> > > m_pullup will always get a new mbuf and on
> > > strict alignment architectures you will always do a m_dup_pkt
> > > (verified by my -DTEST1).
> >
> > i didnt think m_pullup was that stupid, but yes, yes it is.
> >
> > im not sure we can do better than m_dup_pkt on strict alignment
> > archs, except to use memmove to realign simple mbufs. the alternative
> > is vxlan parses the packet to figure out the most conservative
> > realignment it can do, but itll have to allocate a new mbuf to
> > prefix it with, which would be about the same cost as blindly copying
> > it all.
> >
> > anyway, m_pullup could be simpler. while im there, it should maintain
> > the alignment of the payload.
> >
> > the diff below makes it return early if the current mbuf already
> > has the requested region, regardless of whether it has a cluster
> > or not.
> >
> > if that fails, but there is enough space in the first mbuf for the
> > requested region, itll memmove the data around to make the request
> > contig.
> >
> > if that fails, itll attempt to allocate an mbuf and maybe a cluster
> > to prefix the chain with.
> >
> > after shuffling the first mbuf data or prefixing an empty mbuf,
> > itll copy from the chain into the first mbuf.
> >
> > i reckon this would be comparable to m_pulldown now.
> >
> > ok?
> >
> > Index: uipc_mbuf.c
> > ===================================================================
> > RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
> > retrieving revision 1.231
> > diff -u -p -r1.231 uipc_mbuf.c
> > --- uipc_mbuf.c 15 Sep 2016 02:00:16 -0000 1.231
> > +++ uipc_mbuf.c 7 Oct 2016 10:47:54 -0000
> > @@ -838,64 +842,73 @@ struct mbuf *
> > m_pullup(struct mbuf *n, int len)
> > {
> > struct mbuf *m;
> > - int count;
> > + unsigned int align;
> > + unsigned int space;
> > +
> > + /* if n is already contig then don't do any work */
> > + if (len <= n->m_len)
> > + return (n);
> > +
> > + align = (unsigned long)n->m_data & ALIGNBYTES;
> > + space = M_LEADINGSPACE(n) - align;
> > +
>
> M_LEADINGSPACE for mbufs that have (m->m_len == 0) is actually zero
> so here you can have space underflow and become huge.
yeesh. why do we let cthulu craft mbufs?
> > + if (len <= space + n->m_len + M_TRAILINGSPACE(n)) {
> > + /* if there's enough space in the first mbuf memmove into it */
> > + memmove(mtod(n, caddr_t) - space, mtod(n, caddr_t), n->m_len);
> > + n->m_data -= space;
> > + len -= n->m_len;
> >
>
> You're not performing a m_getptr operation so you never optimize
> for a case where you can memcpy from the other mbuf into the first
> one which is what old code does (when m->m_next != NULL)... I think
> these should be preserved.
thats what this chunk does. in the diff below i made the the memmove
conditional on if there's not enough space in the tail.
Index: uipc_mbuf.c
===================================================================
RCS file: /cvs/src/sys/kern/uipc_mbuf.c,v
retrieving revision 1.233
diff -u -p -r1.233 uipc_mbuf.c
--- uipc_mbuf.c 10 Oct 2016 00:41:17 -0000 1.233
+++ uipc_mbuf.c 11 Oct 2016 09:07:09 -0000
@@ -842,64 +842,79 @@ struct mbuf *
m_pullup(struct mbuf *n, int len)
{
struct mbuf *m;
- int count;
+ unsigned int adj;
+ caddr_t head, tail;
+ unsigned int space;
- /*
- * If first mbuf has no cluster, and has room for len bytes
- * without shifting current data, pullup into it,
- * otherwise allocate a new mbuf to prepend to the chain.
- */
- if ((n->m_flags & M_EXT) == 0 && n->m_next &&
- n->m_data + len < &n->m_dat[MLEN]) {
- if (n->m_len >= len)
- return (n);
- m = n;
- n = n->m_next;
- len -= m->m_len;
- } else if ((n->m_flags & M_EXT) != 0 && len > MHLEN && n->m_next &&
- n->m_data + len < &n->m_ext.ext_buf[n->m_ext.ext_size]) {
- if (n->m_len >= len)
- return (n);
+ /* if n is already contig then don't do any work */
+ if (len <= n->m_len)
+ return (n);
+
+ adj = (unsigned long)n->m_data & ALIGNBYTES;
+ head = (caddr_t)ALIGN(mtod(n, caddr_t) - M_LEADINGSPACE(n)) + adj;
+ tail = mtod(n, caddr_t) + n->m_len + M_TRAILINGSPACE(n);
+
+ if (head < tail && len <= tail - head) {
+ /* there's enough space in the first mbuf */
+
+ if (len > tail - mtod(n, caddr_t)) {
+ /* need to memmove to make space at the end */
+ memmove(head, mtod(n, caddr_t), n->m_len);
+ m->m_data = head;
+ }
+
+ len -= n->m_len;
m = n;
- n = n->m_next;
- len -= m->m_len;
+ n = m->m_next;
} else {
- if (len > MAXMCLBYTES)
+ /* the first mbuf is too small so prepend one with space */
+ space = adj + len;
+
+ if (space > MAXMCLBYTES)
goto bad;
+
MGET(m, M_DONTWAIT, n->m_type);
if (m == NULL)
goto bad;
- if (len > MHLEN) {
- MCLGETI(m, M_DONTWAIT, NULL, len);
+ if (space > MHLEN) {
+ MCLGETI(m, M_DONTWAIT, NULL, space);
if ((m->m_flags & M_EXT) == 0) {
m_free(m);
goto bad;
}
}
- m->m_len = 0;
+
if (n->m_flags & M_PKTHDR)
M_MOVE_PKTHDR(m, n);
+
+ m->m_len = 0;
+ m->m_data += adj;
}
+ KASSERT(M_TRAILINGSPACE(m) >= len);
+
do {
- count = min(len, n->m_len);
- memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t),
- count);
- len -= count;
- m->m_len += count;
- n->m_len -= count;
- if (n->m_len)
- n->m_data += count;
+ if (n == NULL) {
+ (void)m_free(m);
+ goto bad;
+ }
+
+ space = min(len, n->m_len);
+ memcpy(mtod(m, caddr_t) + m->m_len, mtod(n, caddr_t), space);
+ len -= space;
+ m->m_len += space;
+ n->m_len -= space;
+
+ if (n->m_len > 0)
+ n->m_data += space;
else
n = m_free(n);
- } while (len > 0 && n);
- if (len > 0) {
- (void)m_free(m);
- goto bad;
- }
+ } while (len > 0);
+
m->m_next = n;
return (m);
+
bad:
m_freem(n);
return (NULL);