On 02/01/2016 02:13 AM, w...@redhat.com wrote: > From: Wei Xu <w...@wei-thinkpad.nay.redhat.com> > > Upon a packet is arriving, a corresponding chain will be selected or created, > or be bypassed if it's not an IPv4 packets. > > The callback in the chain will be invoked to call the real coalescing. > > Since the coalescing is based on the TCP connection, so the packets will be > cached if there is no previous data within the same connection. > > The framework of IPv4 is also introduced. > > This patch depends on patch 2918cf2 (Detailed IPv4 and General TCP data > coalescing)
Then looks like the order needs to be changed? > > Signed-off-by: Wei Xu <w...@redhat.com> > --- > hw/net/virtio-net.c | 173 > +++++++++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 172 insertions(+), 1 deletion(-) > > diff --git a/hw/net/virtio-net.c b/hw/net/virtio-net.c > index 4e9458e..cfbac6d 100644 > --- a/hw/net/virtio-net.c > +++ b/hw/net/virtio-net.c > @@ -14,10 +14,12 @@ > #include "qemu/iov.h" > #include "hw/virtio/virtio.h" > #include "net/net.h" > +#include "net/eth.h" > #include "net/checksum.h" > #include "net/tap.h" > #include "qemu/error-report.h" > #include "qemu/timer.h" > +#include "qemu/sockets.h" > #include "hw/virtio/virtio-net.h" > #include "net/vhost_net.h" > #include "hw/virtio/virtio-bus.h" > @@ -37,6 +39,21 @@ > #define endof(container, field) \ > (offsetof(container, field) + sizeof(((container *)0)->field)) > > +#define VIRTIO_HEADER 12 /* Virtio net header size */ This looks wrong if mrg_rxbuf (VIRTIO_NET_F_MRG_RXBUF) is off. > +#define IP_OFFSET (VIRTIO_HEADER + sizeof(struct eth_header)) > + > +#define MAX_VIRTIO_IP_PAYLOAD (65535 + IP_OFFSET) > + > +/* Global statistics */ > +static uint32_t rsc_chain_no_mem; This is meaningless, see below comments. > + > +/* Switcher to enable/disable rsc */ > +static bool virtio_net_rsc_bypass; > + > +/* Coalesce callback for ipv4/6 */ > +typedef int32_t (VirtioNetCoalesce) (NetRscChain *chain, NetRscSeg *seg, > + const uint8_t *buf, size_t size); > + > typedef struct VirtIOFeature { > uint32_t flags; > size_t end; > @@ -1019,7 +1036,8 @@ static int receive_filter(VirtIONet *n, const uint8_t > *buf, int size) > return 0; > } > > -static ssize_t virtio_net_receive(NetClientState *nc, const uint8_t *buf, > size_t size) > +static ssize_t virtio_net_do_receive(NetClientState *nc, > + const uint8_t *buf, size_t size) > { > VirtIONet *n = qemu_get_nic_opaque(nc); > VirtIONetQueue *q = virtio_net_get_subqueue(nc); > @@ -1623,6 +1641,159 @@ static void virtio_net_rsc_cleanup(VirtIONet *n) > } > } > > +static int virtio_net_rsc_cache_buf(NetRscChain *chain, NetClientState *nc, > + const uint8_t *buf, size_t size) > +{ > + NetRscSeg *seg; > + > + seg = g_malloc(sizeof(NetRscSeg)); > + if (!seg) { > + return 0; > + } g_malloc() can't fail, no need to check if it succeeded. > + > + seg->buf = g_malloc(MAX_VIRTIO_IP_PAYLOAD); > + if (!seg->buf) { > + goto out; > + } > + > + memmove(seg->buf, buf, size); > + seg->size = size; > + seg->dup_ack_count = 0; > + seg->is_coalesced = 0; > + seg->nc = nc; > + > + QTAILQ_INSERT_TAIL(&chain->buffers, seg, next); > + return size; > + > +out: > + g_free(seg); > + return 0; > +} > + > + > +static int32_t virtio_net_rsc_try_coalesce4(NetRscChain *chain, > + NetRscSeg *seg, const uint8_t *buf, size_t size) > +{ > + /* This real part of this function will be introduced in next patch, just > + * return a 'final' to feed the compilation. */ > + return RSC_FINAL; > +} > + > +static size_t virtio_net_rsc_callback(NetRscChain *chain, NetClientState *nc, > + const uint8_t *buf, size_t size, VirtioNetCoalesce *coalesce) > +{ Looks like this function was called directly, so "callback" suffix is not accurate. > + int ret; > + NetRscSeg *seg, *nseg; > + > + if (QTAILQ_EMPTY(&chain->buffers)) { > + if (!virtio_net_rsc_cache_buf(chain, nc, buf, size)) { > + return 0; > + } else { > + return size; > + } > + } > + > + QTAILQ_FOREACH_SAFE(seg, &chain->buffers, next, nseg) { > + ret = coalesce(chain, seg, buf, size); > + if (RSC_FINAL == ret) { Let's use "ret == RSC_FINAL" for a consistent coding style with other qemu codes. > + ret = virtio_net_do_receive(seg->nc, seg->buf, seg->size); > + QTAILQ_REMOVE(&chain->buffers, seg, next); > + g_free(seg->buf); > + g_free(seg); > + if (ret == 0) { > + /* Send failed */ > + return 0; > + } > + > + /* Send current packet */ > + return virtio_net_do_receive(nc, buf, size); > + } else if (RSC_NO_MATCH == ret) { > + continue; > + } else { > + /* Coalesced, mark coalesced flag to tell calc cksum for ipv4 */ > + seg->is_coalesced = 1; > + return size; > + } > + } > + > + return virtio_net_rsc_cache_buf(chain, nc, buf, size); Maybe you can move the seg traversing info coalesce() function? This can greatly simplify the codes above? (E.g no need to call virtio_net_rsc_cache_buf() in two places?) > +} > + > +static size_t virtio_net_rsc_receive4(void *opq, NetClientState* nc, > + const uint8_t *buf, size_t size) > +{ > + NetRscChain *chain; > + > + chain = (NetRscChain *)opq; > + return virtio_net_rsc_callback(chain, nc, buf, size, > + virtio_net_rsc_try_coalesce4); > +} > + > +static NetRscChain *virtio_net_rsc_lookup_chain(NetClientState *nc, > + uint16_t proto) > +{ > + VirtIONet *n; > + NetRscChain *chain; > + NICState *nic; > + > + /* Only handle IPv4/6 */ > + if (proto != (uint16_t)ETH_P_IP) { The comments is inconsistent with code, the code can only handle IPv4 currently. > + return NULL; > + } > + > + nic = (NICState *)nc; This cast is wrong for multiqueue backend. You can refer the exist virtio_net_receive() for how to vet VirtIONet structure. E.g: ... VirtIONet *n = qemu_get_nic_opaque(nc); ... > + n = container_of(&nic, VirtIONet, nic); > + QTAILQ_FOREACH(chain, &n->rsc_chains, next) { > + if (chain->proto == proto) { > + return chain; > + } > + } > + > + chain = g_malloc(sizeof(*chain)); > + if (!chain) { > + rsc_chain_no_mem++; Since g_malloc() can't fail, this is meaningless. > + return NULL; > + } > + > + chain->proto = proto; > + chain->do_receive = virtio_net_rsc_receive4; > + > + QTAILQ_INIT(&chain->buffers); > + QTAILQ_INSERT_TAIL(&n->rsc_chains, chain, next); > + return chain; > +} Better to split the chain initialization from lookup. And we can initialize ipv4 chain during initialization. > + > +static ssize_t virtio_net_rsc_receive(NetClientState *nc, > + const uint8_t *buf, size_t size) > +{ > + uint16_t proto; > + NetRscChain *chain; > + struct eth_header *eth; > + > + if (size < IP_OFFSET) { > + return virtio_net_do_receive(nc, buf, size); > + } > + > + eth = (struct eth_header *)(buf + VIRTIO_HEADER); > + proto = htons(eth->h_proto); > + chain = virtio_net_rsc_lookup_chain(nc, proto); > + if (!chain) { > + return virtio_net_do_receive(nc, buf, size); > + } else { > + return chain->do_receive(chain, nc, buf, size); > + } > +} > + > +static ssize_t virtio_net_receive(NetClientState *nc, > + const uint8_t *buf, size_t size) > +{ > + if (virtio_net_rsc_bypass) { > + return virtio_net_do_receive(nc, buf, size); > + } else { > + return virtio_net_rsc_receive(nc, buf, size); > + } > +} > + > static NetClientInfo net_virtio_info = { > .type = NET_CLIENT_OPTIONS_KIND_NIC, > .size = sizeof(NICState),