Simple hack to use dma engine for tun RX.
Only one skb in flight at the moment.

Signed-off-by: Michael S. Tsirkin <m...@redhat.com>
---

I am still looking at handling multiple skbs, but
sending this out for early flames and improvement suggestions.

Loopback testing seems to show only minor performance gains:
this is not really suprising as data is hot in cache already.
Where I would expect this to help more is with incoming
traffic from an external NIC. This still needs to be tested.

 drivers/dma/Kconfig   |    2 +-
 drivers/dma/iovlock.c |    2 +-
 drivers/net/tun.c     |  389 ++++++++++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 390 insertions(+), 3 deletions(-)

diff --git a/drivers/dma/Kconfig b/drivers/dma/Kconfig
index 9520cf0..7e82c00 100644
--- a/drivers/dma/Kconfig
+++ b/drivers/dma/Kconfig
@@ -202,7 +202,7 @@ comment "DMA Clients"
        depends on DMA_ENGINE
 
 config NET_DMA
-       bool "Network: TCP receive copy offload"
+       bool "Network: TCP/TUN receive copy offload"
        depends on DMA_ENGINE && NET
        default (INTEL_IOATDMA || FSL_DMA)
        help
diff --git a/drivers/dma/iovlock.c b/drivers/dma/iovlock.c
index c6917e8..121d7fd 100644
--- a/drivers/dma/iovlock.c
+++ b/drivers/dma/iovlock.c
@@ -138,7 +138,7 @@ void dma_unpin_iovec_pages(struct dma_pinned_list 
*pinned_list)
 
        kfree(pinned_list);
 }
-
+EXPORT_SYMBOL_GPL(dma_unpin_iovec_pages);
 
 /*
  * We have already pinned down the pages we will be using in the iovecs.
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 55f3a3e..ddbfbc8 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -62,6 +62,8 @@
 #include <linux/nsproxy.h>
 #include <linux/virtio_net.h>
 #include <linux/rcupdate.h>
+#include <linux/dmaengine.h>
+#include <linux/pagemap.h>
 #include <net/net_namespace.h>
 #include <net/netns/generic.h>
 #include <net/rtnetlink.h>
@@ -70,6 +72,9 @@
 #include <asm/system.h>
 #include <asm/uaccess.h>
 
+int tun_dma_copybreak = 0x10000;
+module_param_named(dma_copybreak, tun_dma_copybreak, int, 0644);
+MODULE_PARM_DESC(debug_level, "Use DMA engine for messages of this length and 
up");
 /* Uncomment to enable debugging */
 /* #define TUN_DEBUG 1 */
 
@@ -547,6 +552,364 @@ static inline struct sk_buff *tun_alloc_skb(struct 
tun_struct *tun,
        return skb;
 }
 
+#ifdef CONFIG_NET_DMA
+/* The below duplicates code from net/core and drivers/dma
+ * with the minor twist that these functions work on a const
+ * iovec with an offset. TODO: move it there? */
+static int num_pages_spanned(void __user * iov_base, size_t iov_len)
+{
+       return
+       ((PAGE_ALIGN((unsigned long)iov_base + iov_len) -
+       ((unsigned long)iov_base & PAGE_MASK)) >> PAGE_SHIFT);
+}
+
+/*
+ * Pin down all the iovec pages needed for len bytes.
+ * Return a struct dma_pinned_list to keep track of pages pinned down.
+ *
+ * We are allocating a single chunk of memory, and then carving it up into
+ * 3 sections, the latter 2 whose size depends on the number of iovecs and the
+ * total number of pages, respectively.
+ */
+static struct dma_pinned_list *dma_pin_const_iovec_pages(const struct iovec 
*iov,
+                                                      size_t iov_offset, 
size_t len)
+{
+       struct dma_pinned_list *local_list;
+       struct page **pages;
+       int i;
+       int ret;
+       int nr_iovecs = 0;
+       int iovec_len_used = 0;
+       int iovec_pages_used = 0;
+       void __user *iov_base;
+       size_t iov_len;
+
+       /* determine how many iovecs/pages there are, up front */
+       do {
+               /* Skip offset as required. */
+               iov_len = iov[nr_iovecs].iov_len;
+               if (iov_offset >= iovec_len_used + iov_len) {
+                       iov_offset -= iov_len;
+                       ++iov;
+                       continue;
+               }
+               iov_base = iov[nr_iovecs].iov_base;
+               if (!iovec_len_used) {
+                       iov_base += iov_offset;
+                       iov_len -= iov_offset;
+               }
+               iovec_len_used += iov_len;
+               iovec_pages_used += num_pages_spanned(iov_base, iov_len);
+               nr_iovecs++;
+       } while (iovec_len_used < len);
+
+       /* single kmalloc for pinned list, page_list[], and the page arrays */
+       local_list = kmalloc(sizeof(*local_list)
+               + (nr_iovecs * sizeof (struct dma_page_list))
+               + (iovec_pages_used * sizeof (struct page*)), GFP_KERNEL);
+       if (!local_list)
+               goto out;
+
+       /* list of pages starts right after the page list array */
+       pages = (struct page **) &local_list->page_list[nr_iovecs];
+
+       local_list->nr_iovecs = 0;
+
+       for (i = 0; i < nr_iovecs; i++) {
+               struct dma_page_list *page_list = &local_list->page_list[i];
+
+               iov_len = iov[i].iov_len + iov_offset;
+               iov_base = iov[i].iov_base + iov_offset;
+               iov_offset = 0;
+               len -= iov_len;
+
+               page_list->nr_pages = num_pages_spanned(iov_base, iov_len);
+               page_list->base_address = iov_base;
+
+               page_list->pages = pages;
+               pages += page_list->nr_pages;
+
+               /* pin pages down */
+               ret = get_user_pages_fast(
+                       (unsigned long)iov_base,
+                       page_list->nr_pages,
+                       1,      /* write */
+                       page_list->pages);
+
+               if (unlikely(ret < 0))
+                       goto unpin;
+
+               local_list->nr_iovecs = i + 1;
+
+               if (unlikely(ret != page_list->nr_pages)) {
+                       page_list->nr_pages = ret;
+                       goto unpin;
+               }
+
+       }
+
+       return local_list;
+
+unpin:
+       dma_unpin_iovec_pages(local_list);
+out:
+       return NULL;
+}
+
+/*
+ * We have already pinned down the pages we will be using in the iovecs.
+ * Each entry in iov array has corresponding entry in pinned_list->page_list.
+ * Using array indexing to keep iov[] and page_list[] in sync.
+ * Initial elements in iov array's iov->iov_len will be 0 if already copied 
into
+ *   by another call.
+ * iov array length remaining guaranteed to be bigger than len.
+ */
+dma_cookie_t dma_memcpy_to_iovecend(struct dma_chan *chan, const struct iovec 
*iov,
+       struct dma_pinned_list *pinned_list, unsigned char *kdata,
+       size_t iov_offset, size_t len)
+{
+       int iov_byte_offset;
+       int copy;
+       dma_cookie_t dma_cookie = 0;
+       int iovec_idx;
+       int page_idx;
+       size_t iov_len;
+       unsigned long iov_base;
+
+       if (!chan)
+               return memcpy_toiovecend(iov, kdata, iov_offset, len);
+
+       iovec_idx = 0;
+       for (iovec_idx = 0; iovec_idx < pinned_list->nr_iovecs; ++iovec_idx) {
+               struct dma_page_list *page_list;
+
+               iov_len = iov[iovec_idx].iov_len;
+               /* skip already used-up iovecs */
+               if (iov_len <= iov_offset) {
+                       iov_offset -= iov_len;
+                       continue;
+               }
+
+               page_list = &pinned_list->page_list[iovec_idx];
+
+               iov_base = (unsigned long)iov[iovec_idx].iov_base + iov_offset;
+               iov_len -= iov_offset;
+               iov_offset = 0;
+               iov_byte_offset = iov_base & ~PAGE_MASK;
+               page_idx = ((iov_base & PAGE_MASK)
+                        - ((unsigned long)page_list->base_address & 
PAGE_MASK)) >> PAGE_SHIFT;
+
+               /* break up copies to not cross page boundary */
+               while (iov_len) {
+                       copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+                       copy = min_t(int, copy, iov_len);
+
+                       dma_cookie = dma_async_memcpy_buf_to_pg(chan,
+                                       page_list->pages[page_idx],
+                                       iov_byte_offset,
+                                       kdata,
+                                       copy);
+                       /* poll for a descriptor slot */
+                       if (unlikely(dma_cookie < 0)) {
+                               dma_async_issue_pending(chan);
+                               continue;
+                       }
+
+                       len -= copy;
+                       iov_len -= copy;
+                       iov_base += copy;
+
+                       if (!len)
+                               return dma_cookie;
+
+                       kdata += copy;
+                       iov_byte_offset = 0;
+                       page_idx++;
+               }
+       }
+
+       /* really bad if we ever run out of iovecs */
+       BUG();
+       return -EFAULT;
+}
+
+dma_cookie_t dma_memcpy_pg_to_const_iovec(struct dma_chan *chan, const struct 
iovec *iov,
+       struct dma_pinned_list *pinned_list, struct page *page,
+       unsigned int offset, size_t iov_offset, size_t len)
+{
+       int iov_byte_offset;
+       int copy;
+       dma_cookie_t dma_cookie = 0;
+       int iovec_idx;
+       int page_idx;
+       int err;
+       size_t iov_len;
+       unsigned long iov_base;
+
+       /* this needs as-yet-unimplemented buf-to-buff, so punt. */
+       /* TODO: use dma for this */
+       if (!chan || !pinned_list) {
+               u8 *vaddr = kmap(page);
+               err = memcpy_toiovecend(iov, vaddr + offset, iov_offset, len);
+               kunmap(page);
+               return err;
+       }
+
+       for (iovec_idx = 0; iovec_idx < pinned_list->nr_iovecs; ++iovec_idx) {
+               struct dma_page_list *page_list;
+
+               iov_len = iov[iovec_idx].iov_len;
+               /* skip already used-up iovecs */
+               if (iov_len <= iov_offset) {
+                       iov_offset -= iov_len;
+                       continue;
+               }
+
+               page_list = &pinned_list->page_list[iovec_idx];
+               iov_base = (unsigned long)iov[iovec_idx].iov_base + iov_offset;
+               iov_len -= iov_offset;
+               iov_offset = 0;
+
+               iov_byte_offset = iov_base & ~PAGE_MASK;
+               page_idx = ((iov_base & PAGE_MASK)
+                        - ((unsigned long)page_list->base_address & 
PAGE_MASK)) >> PAGE_SHIFT;
+
+               /* break up copies to not cross page boundary */
+               while (iov_len) {
+                       copy = min_t(int, PAGE_SIZE - iov_byte_offset, len);
+                       copy = min_t(int, copy, iov_len);
+
+                       dma_cookie = dma_async_memcpy_pg_to_pg(chan,
+                                       page_list->pages[page_idx],
+                                       iov_byte_offset,
+                                       page,
+                                       offset,
+                                       copy);
+                       /* poll for a descriptor slot */
+                       if (unlikely(dma_cookie < 0)) {
+                               dma_async_issue_pending(chan);
+                               continue;
+                       }
+
+                       len -= copy;
+                       iov_len -= copy;
+                       iov_base += copy;
+
+                       if (!len)
+                               return dma_cookie;
+
+                       offset += copy;
+                       iov_byte_offset = 0;
+                       page_idx++;
+               }
+       }
+
+       /* really bad if we ever run out of iovecs */
+       BUG();
+       return -EFAULT;
+}
+
+/**
+ *     dma_skb_copy_datagram_iovec - Copy a datagram to an iovec.
+ *     @skb - buffer to copy
+ *     @offset - offset in the buffer to start copying from
+ *     @iovec - io vector to copy to
+ *     @len - amount of data to copy from buffer to iovec
+ *     @pinned_list - locked iovec buffer data
+ *
+ *     Note: the iovec is not modified during the copy.
+ *     Note: pinned_list is assumed pinned with the same offset.
+ */
+dma_cookie_t dma_skb_copy_datagram_const_iovec(struct dma_chan *chan,
+                       struct sk_buff *skb, int offset, const struct iovec *to,
+                       size_t iov_offset, 
+                       size_t len, struct dma_pinned_list *pinned_list)
+{
+       int start = skb_headlen(skb);
+       int i, copy = start - offset;
+       struct sk_buff *frag_iter;
+       dma_cookie_t cookie = 0;
+
+       /* Copy header. */
+       if (copy > 0) {
+               if (copy > len)
+                       copy = len;
+               cookie = dma_memcpy_to_iovecend(chan, to, pinned_list,
+                                               skb->data + offset, iov_offset,
+                                               copy);
+               if (cookie < 0)
+                       goto fault;
+               len -= copy;
+               if (len == 0)
+                       goto end;
+               offset += copy;
+               iov_offset += copy;
+       }
+
+       /* Copy paged appendix. Hmm... why does this look so complicated? */
+       for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
+               int end;
+
+               WARN_ON(start > offset + len);
+
+               end = start + skb_shinfo(skb)->frags[i].size;
+               copy = end - offset;
+               if (copy > 0) {
+                       skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
+                       struct page *page = frag->page;
+
+                       if (copy > len)
+                               copy = len;
+
+                       cookie = dma_memcpy_pg_to_const_iovec(chan, to, 
pinned_list, page,
+                                       frag->page_offset + offset - start, 
iov_offset, copy);
+                       if (cookie < 0)
+                               goto fault;
+                       len -= copy;
+                       if (len == 0)
+                               goto end;
+                       offset += copy;
+                       iov_offset += copy;
+               }
+               start = end;
+       }
+
+       skb_walk_frags(skb, frag_iter) {
+               int end;
+
+               WARN_ON(start > offset + len);
+
+               end = start + frag_iter->len;
+               copy = end - offset;
+               if (copy > 0) {
+                       if (copy > len)
+                               copy = len;
+                       cookie = dma_skb_copy_datagram_const_iovec(chan, 
frag_iter,
+                                                            offset - start,
+                                                            to, iov_offset, 
copy,
+                                                            pinned_list);
+                       if (cookie < 0)
+                               goto fault;
+                       len -= copy;
+                       if (len == 0)
+                               goto end;
+                       offset += copy;
+                       iov_offset += copy;
+               }
+               start = end;
+       }
+
+end:
+       if (!len) {
+               skb->dma_cookie = cookie;
+               return cookie;
+       }
+
+fault:
+       return -EFAULT;
+}
+#endif
+
 /* Get packet from user space buffer */
 static __inline__ ssize_t tun_get_user(struct tun_struct *tun,
                                       const struct iovec *iv, size_t count,
@@ -706,6 +1069,9 @@ static __inline__ ssize_t tun_put_user(struct tun_struct 
*tun,
 {
        struct tun_pi pi = { 0, skb->protocol };
        ssize_t total = 0;
+       struct dma_chan *dma_chan;
+       struct dma_pinned_list *pinned_list;
+       int dma_cookie;
 
        if (!(tun->flags & TUN_NO_PI)) {
                if ((len -= sizeof(pi)) < 0)
@@ -768,8 +1134,29 @@ static __inline__ ssize_t tun_put_user(struct tun_struct 
*tun,
        }
 
        len = min_t(int, skb->len, len);
-
+#ifdef CONFIG_NET_DMA
+
+       if (len < tun_dma_copybreak)
+               goto copy;
+
+       dma_chan = dma_find_channel(DMA_MEMCPY);
+       if (!dma_chan)
+               goto copy;
+       pinned_list = dma_pin_const_iovec_pages(iv, total, len);
+       if (!pinned_list)
+               goto copy;
+       dma_cookie = dma_skb_copy_datagram_const_iovec(dma_chan, skb, 0, iv,
+                                                      total, len, pinned_list);
+       if (dma_cookie >= 0) {
+               dma_async_memcpy_issue_pending(dma_chan);
+               dma_sync_wait(dma_chan, dma_cookie);
+       }
+       dma_unpin_iovec_pages(pinned_list);
+       goto done;
+#endif
+copy:
        skb_copy_datagram_const_iovec(skb, 0, iv, total, len);
+done:
        total += skb->len;
 
        tun->dev->stats.tx_packets++;
-- 
1.7.3-rc1
--
To unsubscribe from this list: send the line "unsubscribe kvm" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to