ping....

Zhenyu Gao

2017-06-20 18:15 GMT+08:00 Zhenyu Gao <[email protected]>:

> Sendmmsg can reduce cpu cycles in sending packets to kernel.
> Replace sendmsg with sendmmsg in function netdev_linux_send to send
> batch packets if sendmmsg is available.
>
> If kernel side doesn't support sendmmsg, will fallback to sendmsg.
>
>     netserver
> |------------|
> |            |
> |  container |
> |----veth----|
>           |
>           |        |------------|
>           |---veth-|   dpdk-ovs |      netperf
>                    |            |  |--------------|
>                    |----dpdk----|  | bare-metal   |
>                          |         |--------------|
>                          |              |
>                          |              |
>                         pnic-----------pnic
>
> Netperf was consumed to test the performance:
>
> 1)cmd:netperf -H remote-container -t UDP_STREAM -l 60 -- -m 1400
> result: netserver received 2383.21Mb(sendmsg)/2551.64Mb(sendmmsg)
>
> 2)cmd:netperf -H remote-container -t UDP_STREAM -l 60 -- -m 60
> result: netserver received 109.72Mb(sendmsg)/115.18Mb(sendmmsg)
>
> Sendmmsg show about 6% improvement in netperf UDP testing.
>
> Signed-off-by: Zhenyu Gao <[email protected]>
> ---
>  configure.ac       |  2 +-
>  lib/netdev-linux.c | 71 ++++++++++++++++++++++++++++++
> ++++++++++++++++++++++--
>  2 files changed, 70 insertions(+), 3 deletions(-)
>
> diff --git a/configure.ac b/configure.ac
> index 6404b5f..b02c7c4 100644
> --- a/configure.ac
> +++ b/configure.ac
> @@ -106,7 +106,7 @@ AC_CHECK_DECLS([sys_siglist], [], [], [[#include
> <signal.h>]])
>  AC_CHECK_MEMBERS([struct stat.st_mtim.tv_nsec, struct stat.st_mtimensec],
>    [], [], [[#include <sys/stat.h>]])
>  AC_CHECK_MEMBERS([struct ifreq.ifr_flagshigh], [], [], [[#include
> <net/if.h>]])
> -AC_CHECK_FUNCS([mlockall strnlen getloadavg statvfs getmntent_r])
> +AC_CHECK_FUNCS([mlockall strnlen getloadavg statvfs getmntent_r sendmmsg])
>  AC_CHECK_HEADERS([mntent.h sys/statvfs.h linux/types.h linux/if_ether.h
> stdatomic.h])
>  AC_CHECK_HEADERS([net/if_mib.h], [], [], [[#include <sys/types.h>
>  #include <net/if.h>]])
> diff --git a/lib/netdev-linux.c b/lib/netdev-linux.c
> index 1b88775..b90a22a 100644
> --- a/lib/netdev-linux.c
> +++ b/lib/netdev-linux.c
> @@ -1187,6 +1187,54 @@ netdev_linux_rxq_drain(struct netdev_rxq *rxq_)
>      }
>  }
>
> +static inline int
> +netdev_linux_sock_batch_send(int sock, struct msghdr *msg,
> +                             struct dp_packet_batch *batch)
> +{
> +    int error = 0;
> +    ssize_t retval;
> +    uint32_t resend_idx = 0;
> +    struct mmsghdr *mmsg;
> +    struct iovec *iov;
> +
> +    mmsg = xmalloc(sizeof(*mmsg) * batch->count);
> +    iov = xmalloc(sizeof(*iov) * batch->count);
> +
> +    for (int i = 0; i < batch->count; i++) {
> +        const void *data = dp_packet_data(batch->packets[i]);
> +        size_t size = dp_packet_size(batch->packets[i]);
> +
> +        /* Truncate the packet if it is configured. */
> +        size -= dp_packet_get_cutlen(batch->packets[i]);
> +
> +        iov[i].iov_base = CONST_CAST(void *, data);
> +        iov[i].iov_len = size;
> +        mmsg[i].msg_hdr = *msg;
> +        mmsg[i].msg_hdr.msg_iov = &iov[i];
> +    }
> +
> +resend_batch:
> +    retval = sendmmsg(sock, mmsg + resend_idx,
> +                      batch->count - resend_idx, 0);
> +    if (retval < 0) {
> +        if (errno == EINTR) {
> +            goto resend_batch;
> +        }
> +        /* The Linux AF_PACKET implementation never blocks waiting for
> +         * room for packets, instead returning ENOBUFS.  Translate this
> +         * into EAGAIN for the caller. */
> +        error = errno == ENOBUFS ? EAGAIN : errno;
> +    } else if (retval != batch->count - resend_idx) {
> +       /* Send remain packets again. */
> +        resend_idx += retval;
> +        goto resend_batch;
> +    }
> +
> +    free(mmsg);
> +    free(iov);
> +    return error;
> +}
> +
>  /* Sends 'buffer' on 'netdev'.  Returns 0 if successful, otherwise a
> positive
>   * errno value.  Returns EAGAIN without blocking if the packet cannot be
> queued
>   * immediately.  Returns EMSGSIZE if a partial packet was transmitted or
> if
> @@ -1207,6 +1255,9 @@ netdev_linux_send(struct netdev *netdev_, int qid
> OVS_UNUSED,
>      struct sockaddr_ll sll;
>      struct msghdr msg;
>      if (!is_tap_netdev(netdev_)) {
> +#ifdef HAVE_SENDMMSG
> +        static bool try_sendmmsg = true;
> +#endif
>          sock = af_packet_sock();
>          if (sock < 0) {
>              error = -sock;
> @@ -1231,6 +1282,21 @@ netdev_linux_send(struct netdev *netdev_, int qid
> OVS_UNUSED,
>          msg.msg_control = NULL;
>          msg.msg_controllen = 0;
>          msg.msg_flags = 0;
> +
> +#ifdef HAVE_SENDMMSG
> +        if (try_sendmmsg) {
> +            /* Try batch sending to socket */
> +            error = netdev_linux_sock_batch_send(sock, &msg, batch);
> +            if (error == ENOSYS) {
> +                /* Linux kernel does not implement this function */
> +                try_sendmmsg = false;
> +                VLOG_WARN("Linux kernel doesn't implement sendmmsg, "
> +                          "going to consume sendmsg");
> +            } else {
> +                goto check_error;
> +            }
> +        }
> +#endif
>      }
>
>      /* 'i' is incremented only if there's no error */
> @@ -1290,9 +1356,10 @@ netdev_linux_send(struct netdev *netdev_, int qid
> OVS_UNUSED,
>          i++;
>      }
>
> +check_error:
>      if (error && error != EAGAIN) {
> -            VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
> -                         netdev_get_name(netdev_), ovs_strerror(error));
> +        VLOG_WARN_RL(&rl, "error sending Ethernet packet on %s: %s",
> +                     netdev_get_name(netdev_), ovs_strerror(error));
>      }
>
>  free_batch:
> --
> 1.8.3.1
>
>
_______________________________________________
dev mailing list
[email protected]
https://mail.openvswitch.org/mailman/listinfo/ovs-dev

Reply via email to