Signed-off-by: Mark McLoughlin <[EMAIL PROTECTED]>
---
qemu/hw/virtio-net.c | 86 +++++++++++++++++++++++++++++++++++++++++---------
qemu/net.h | 5 +++
qemu/vl.c | 73 +++++++++++++++++++++++++++++++++++++++---
3 files changed, 144 insertions(+), 20 deletions(-)
diff --git a/qemu/hw/virtio-net.c b/qemu/hw/virtio-net.c
index 419a2d7..81282c4 100644
--- a/qemu/hw/virtio-net.c
+++ b/qemu/hw/virtio-net.c
@@ -22,9 +22,18 @@
#define VIRTIO_ID_NET 1
/* The feature bitmap for virtio net */
-#define VIRTIO_NET_F_NO_CSUM 0
-#define VIRTIO_NET_F_MAC 5
-#define VIRTIO_NET_F_GS0 6
+#define VIRTIO_NET_F_CSUM 0 /* Host handles pkts w/ partial csum */
+#define VIRTIO_NET_F_GUEST_CSUM 1 /* Guest handles pkts w/
partial csum */
+#define VIRTIO_NET_F_MAC 5 /* Host has given MAC address. */
+#define VIRTIO_NET_F_GSO 6 /* Host handles pkts w/ any GSO type */
+#define VIRTIO_NET_F_GUEST_TSO4 7 /* Guest can handle TSOv4 in. */
+#define VIRTIO_NET_F_GUEST_TSO6 8 /* Guest can handle TSOv6 in. */
+#define VIRTIO_NET_F_GUEST_ECN 9 /* Guest can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_GUEST_UFO 10 /* Guest can handle UFO in. */
+#define VIRTIO_NET_F_HOST_TSO4 11 /* Host can handle TSOv4 in. */
+#define VIRTIO_NET_F_HOST_TSO6 12 /* Host can handle TSOv6 in. */
+#define VIRTIO_NET_F_HOST_ECN 13 /* Host can handle TSO[6] w/ ECN in. */
+#define VIRTIO_NET_F_HOST_UFO 14 /* Host can handle UFO in. */
#define TX_TIMER_INTERVAL (150000) /* 150 us */
@@ -42,8 +51,6 @@ struct virtio_net_hdr
uint8_t flags;
#define VIRTIO_NET_HDR_GSO_NONE 0 // Not a GSO frame
#define VIRTIO_NET_HDR_GSO_TCPV4 1 // GSO frame, IPv4 TCP (TSO)
-/* FIXME: Do we need this? If they said they can handle ECN, do they care? */
-#define VIRTIO_NET_HDR_GSO_TCPV4_ECN 2 // GSO frame, IPv4 TCP w/ ECN
#define VIRTIO_NET_HDR_GSO_UDP 3 // GSO frame, IPv4 UDP (UFO)
#define VIRTIO_NET_HDR_GSO_TCPV6 4 // GSO frame, IPv6 TCP
#define VIRTIO_NET_HDR_GSO_ECN 0x80 // TCP has ECN set
@@ -85,7 +92,38 @@ static void virtio_net_update_config(VirtIODevice *vdev,
uint8_t *config)
static uint32_t virtio_net_get_features(VirtIODevice *vdev)
{
- return (1 << VIRTIO_NET_F_MAC);
+ VirtIONet *n = to_virtio_net(vdev);
+ VLANClientState *host = n->vc->vlan->first_client;
+ uint32_t features = (1 << VIRTIO_NET_F_MAC);
+
+ if (tap_has_offload(host)) {
+ features |= (1 << VIRTIO_NET_F_CSUM);
+ features |= (1 << VIRTIO_NET_F_GUEST_CSUM);
+ features |= (1 << VIRTIO_NET_F_GUEST_TSO4);
+ features |= (1 << VIRTIO_NET_F_GUEST_TSO6);
+ features |= (1 << VIRTIO_NET_F_GUEST_ECN);
+ features |= (1 << VIRTIO_NET_F_HOST_TSO4);
+ features |= (1 << VIRTIO_NET_F_HOST_TSO6);
+ features |= (1 << VIRTIO_NET_F_HOST_ECN);
+ /* Kernel can't actually handle UFO in software currently. */
+ }
+
+ return features;
+}
+
+static void virtio_net_set_features(VirtIODevice *vdev, uint32_t features)
+{
+ VirtIONet *n = to_virtio_net(vdev);
+ VLANClientState *host = n->vc->vlan->first_client;
+
+ if (!tap_has_offload(host) || !host->set_offload)
+ return;
+
+ host->set_offload(host,
+ (features >> VIRTIO_NET_F_GUEST_CSUM) & 1,
+ (features >> VIRTIO_NET_F_GUEST_TSO4) & 1,
+ (features >> VIRTIO_NET_F_GUEST_TSO6) & 1,
+ (features >> VIRTIO_NET_F_GUEST_ECN) & 1);
}
/* RX */
@@ -121,6 +159,7 @@ static void virtio_net_receive(void *opaque, const uint8_t
*buf, int size)
VirtQueueElement elem;
struct virtio_net_hdr *hdr;
int offset, i;
+ int total;
if (virtqueue_pop(n->rx_vq, &elem) == 0)
return;
@@ -134,18 +173,26 @@ static void virtio_net_receive(void *opaque, const
uint8_t *buf, int size)
hdr->flags = 0;
hdr->gso_type = VIRTIO_NET_HDR_GSO_NONE;
- /* copy in packet. ugh */
offset = 0;
+ total = sizeof(*hdr);
+
+ if (tap_has_offload(n->vc->vlan->first_client)) {
+ memcpy(hdr, buf, sizeof(*hdr));
+ offset += total;
+ }
+
+ /* copy in packet. ugh */
i = 1;
while (offset < size && i < elem.in_num) {
int len = MIN(elem.in_sg[i].iov_len, size - offset);
memcpy(elem.in_sg[i].iov_base, buf + offset, len);
offset += len;
+ total += len;
i++;
}
/* signal other side */
- virtqueue_push(n->rx_vq, &elem, sizeof(*hdr) + offset);
+ virtqueue_push(n->rx_vq, &elem, total);
virtio_notify(&n->vdev, n->rx_vq);
}
@@ -153,23 +200,31 @@ static void virtio_net_receive(void *opaque, const
uint8_t *buf, int size)
static void virtio_net_flush_tx(VirtIONet *n, VirtQueue *vq)
{
VirtQueueElement elem;
+ int has_offload = tap_has_offload(n->vc->vlan->first_client);
if (!(n->vdev.status & VIRTIO_CONFIG_S_DRIVER_OK))
return;
while (virtqueue_pop(vq, &elem)) {
ssize_t len = 0;
+ unsigned int out_num = elem.out_num;
+ struct iovec *out_sg = &elem.out_sg[0];
+
+ if (out_num < 1 || out_sg->iov_len != sizeof(struct virtio_net_hdr)) {
+ fprintf(stderr, "virtio-net header not in first element\n");
+ exit(1);
+ }
- if (elem.out_num < 1 ||
- elem.out_sg[0].iov_len != sizeof(struct virtio_net_hdr)) {
- fprintf(stderr, "virtio-net header not in first element\n");
- exit(1);
+ /* ignore the header if GSO is not supported */
+ if (!has_offload) {
+ out_num--;
+ out_sg++;
+ len += sizeof(struct virtio_net_hdr);
}
- /* ignore the header for now */
- len = qemu_sendv_packet(n->vc, &elem.out_sg[1], elem.out_num - 1);
+ len += qemu_sendv_packet(n->vc, out_sg, out_num);
- virtqueue_push(vq, &elem, sizeof(struct virtio_net_hdr) + len);
+ virtqueue_push(vq, &elem, len);
virtio_notify(&n->vdev, vq);
}
}
@@ -249,6 +304,7 @@ PCIDevice *virtio_net_init(PCIBus *bus, NICInfo *nd, int
devfn)
n->vdev.update_config = virtio_net_update_config;
n->vdev.get_features = virtio_net_get_features;
+ n->vdev.set_features = virtio_net_set_features;
n->rx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_rx);
n->tx_vq = virtio_add_queue(&n->vdev, 128, virtio_net_handle_tx);
memcpy(n->mac, nd->macaddr, 6);
diff --git a/qemu/net.h b/qemu/net.h
index e8ee325..6cfd8ce 100644
--- a/qemu/net.h
+++ b/qemu/net.h
@@ -9,12 +9,15 @@ typedef ssize_t (IOReadvHandler)(void *, const struct iovec
*, int);
typedef struct VLANClientState VLANClientState;
+typedef void (SetOffload)(VLANClientState *, int, int, int, int);
+
struct VLANClientState {
IOReadHandler *fd_read;
IOReadvHandler *fd_readv;
/* Packets may still be sent if this returns zero. It's used to
rate-limit the slirp code. */
IOCanRWHandler *fd_can_read;
+ SetOffload *set_offload;
void *opaque;
struct VLANClientState *next;
struct VLANState *vlan;
@@ -42,6 +45,8 @@ void qemu_handler_true(void *opaque);
void do_info_network(void);
+int tap_has_offload(void *opaque);
+
int net_client_init(const char *str);
void net_client_uninit(NICInfo *nd);
diff --git a/qemu/vl.c b/qemu/vl.c
index b7d3397..efdaafd 100644
--- a/qemu/vl.c
+++ b/qemu/vl.c
@@ -4186,12 +4186,24 @@ void do_info_slirp(void)
#if !defined(_WIN32)
+#ifndef IFF_VNET_HDR
+#define TAP_BUFSIZE 4096
+#else
+#include <linux/virtio_net.h>
+#define ETH_HLEN 14
+#define ETH_DATA_LEN 1500
+#define MAX_PACKET_LEN (ETH_HLEN + ETH_DATA_LEN)
+#define MAX_SKB_FRAGS ((65536/TARGET_PAGE_SIZE) + 2)
+#define TAP_BUFSIZE (sizeof(struct virtio_net_hdr) + MAX_PACKET_LEN +
(MAX_SKB_FRAGS*TARGET_PAGE_SIZE))
+#endif
+
typedef struct TAPState {
VLANClientState *vc;
int fd;
char down_script[1024];
- char buf[4096];
+ char buf[TAP_BUFSIZE];
int size;
+ int offload;
} TAPState;
static void tap_receive(void *opaque, const uint8_t *buf, int size)
@@ -4286,6 +4298,37 @@ static void tap_send(void *opaque)
} while (s->size > 0);
}
+int tap_has_offload(void *opaque)
+{
+ VLANClientState *vc = opaque;
+ TAPState *ts = vc->opaque;
+
+ return ts ? ts->offload : 0;
+}
+
+#ifdef TUNSETOFFLOAD
+static void tap_set_offload(VLANClientState *vc, int csum, int tso4, int tso6,
+ int ecn)
+{
+ TAPState *s = vc->opaque;
+ unsigned int offload = 0;
+
+ if (csum) {
+ offload |= TUN_F_CSUM;
+ if (tso4)
+ offload |= TUN_F_TSO4;
+ if (tso6)
+ offload |= TUN_F_TSO6;
+ if ((tso4 || tso6) && ecn)
+ offload |= TUN_F_TSO_ECN;
+ }
+
+ if (ioctl(s->fd, TUNSETOFFLOAD, offload) != 0)
+ fprintf(stderr, "TUNSETOFFLOAD ioctl() failed: %s\n",
+ strerror(errno));
+}
+#endif /* TUNSETOFFLOAD */
+
/* fd support */
static TAPState *net_tap_fd_init(VLANState *vlan, int fd)
@@ -4298,13 +4341,16 @@ static TAPState *net_tap_fd_init(VLANState *vlan, int
fd)
s->fd = fd;
s->vc = qemu_new_vlan_client(vlan, tap_receive, NULL, s);
s->vc->fd_readv = tap_readv;
+#ifdef TUNSETOFFLOAD
+ s->vc->set_offload = tap_set_offload;
+#endif
qemu_set_fd_handler2(s->fd, tap_can_send, tap_send, NULL, s);
snprintf(s->vc->info_str, sizeof(s->vc->info_str), "tap: fd=%d", fd);
return s;
}
#if defined (_BSD) || defined (__FreeBSD_kernel__)
-static int tap_open(char *ifname, int ifname_size)
+static int tap_open(char *ifname, int ifname_size, int *offload)
{
int fd;
char *dev;
@@ -4446,7 +4492,7 @@ int tap_alloc(char *dev)
return tap_fd;
}
-static int tap_open(char *ifname, int ifname_size)
+static int tap_open(char *ifname, int ifname_size, int *offload)
{
char dev[10]="";
int fd;
@@ -4459,18 +4505,31 @@ static int tap_open(char *ifname, int ifname_size)
return fd;
}
#else
-static int tap_open(char *ifname, int ifname_size)
+static int tap_open(char *ifname, int ifname_size, int *offload)
{
struct ifreq ifr;
int fd, ret;
+ unsigned int features;
TFR(fd = open("/dev/net/tun", O_RDWR));
if (fd < 0) {
fprintf(stderr, "warning: could not open /dev/net/tun: no virtual
network emulation\n");
return -1;
}
+
+ if (ioctl(fd, TUNGETFEATURES, &features))
+ features = IFF_TUN | IFF_TAP | IFF_NO_PI | IFF_ONE_QUEUE;
+
memset(&ifr, 0, sizeof(ifr));
ifr.ifr_flags = IFF_TAP | IFF_NO_PI;
+
+#ifdef IFF_VNET_HDR
+ if (features & IFF_VNET_HDR) {
+ *offload = 1;
+ ifr.ifr_flags |= IFF_VNET_HDR;
+ }
+#endif
+
if (ifname[0] != '\0')
pstrcpy(ifr.ifr_name, IFNAMSIZ, ifname);
else
@@ -4528,13 +4587,15 @@ static int net_tap_init(VLANState *vlan, const char
*ifname1,
{
TAPState *s;
int fd;
+ int offload;
char ifname[128];
if (ifname1 != NULL)
pstrcpy(ifname, sizeof(ifname), ifname1);
else
ifname[0] = '\0';
- TFR(fd = tap_open(ifname, sizeof(ifname)));
+ offload = 0;
+ TFR(fd = tap_open(ifname, sizeof(ifname), &offload));
if (fd < 0)
return -1;
@@ -4547,6 +4608,8 @@ static int net_tap_init(VLANState *vlan, const char
*ifname1,
s = net_tap_fd_init(vlan, fd);
if (!s)
return -1;
+
+ s->offload = offload;
snprintf(s->vc->info_str, sizeof(s->vc->info_str),
"tap: ifname=%s setup_script=%s", ifname, setup_script);
if (down_script && strcmp(down_script, "no"))
--
1.5.4.1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html