From: Alexander Duyck <alexander.h.du...@intel.com>

Update the driver code so that we do bulk updates of the page reference
count instead of just incrementing it by one reference at a time.  The
advantage to doing this is that we cut down on atomic operations and
this in turn should give us a slight improvement in cycles per packet.
In addition if we eventually move this over to using build_skb the gains
will be more noticeable.

I also found and fixed a store forwarding stall from where we were
assigning "*new_buff = *old_buff".  By breaking it up into individual
copies we can avoid this and as a result the performance is slightly
improved.

Change-ID: I1d3880dece4133eca3c32423b04a5467321ccc52
Signed-off-by: Alexander Duyck <alexander.h.du...@intel.com>
Tested-by: Andrew Bowers <andrewx.bow...@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirs...@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c   | 25 ++++++++++++++++++-------
 drivers/net/ethernet/intel/i40e/i40e_txrx.h   |  7 ++++++-
 drivers/net/ethernet/intel/i40evf/i40e_txrx.c | 24 ++++++++++++++++++------
 drivers/net/ethernet/intel/i40evf/i40e_txrx.h |  7 ++++++-
 4 files changed, 48 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c 
b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 0ca307a6c731..e5c89770cbc2 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1154,7 +1154,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
                                     PAGE_SIZE,
                                     DMA_FROM_DEVICE,
                                     I40E_RX_DMA_ATTR);
-               __free_pages(rx_bi->page, 0);
+               __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
 
                rx_bi->page = NULL;
                rx_bi->page_offset = 0;
@@ -1299,6 +1299,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring 
*rx_ring,
        bi->dma = dma;
        bi->page = page;
        bi->page_offset = 0;
+       bi->pagecnt_bias = 1;
 
        return true;
 }
@@ -1604,7 +1605,10 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
        rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
 
        /* transfer page from old buffer to new buffer */
-       *new_buff = *old_buff;
+       new_buff->dma           = old_buff->dma;
+       new_buff->page          = old_buff->page;
+       new_buff->page_offset   = old_buff->page_offset;
+       new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
 }
 
 /**
@@ -1656,6 +1660,7 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer 
*rx_buffer,
 #if (PAGE_SIZE >= 8192)
        unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
 #endif
+       unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
 
        /* Is any reuse possible? */
        if (unlikely(!i40e_page_is_reusable(page)))
@@ -1663,7 +1668,7 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer 
*rx_buffer,
 
 #if (PAGE_SIZE < 8192)
        /* if we are only owner of page we can reuse it */
-       if (unlikely(page_count(page) != 1))
+       if (unlikely(page_count(page) != pagecnt_bias))
                return false;
 
        /* flip page offset to other buffer */
@@ -1676,9 +1681,14 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer 
*rx_buffer,
                return false;
 #endif
 
-       /* Inc ref count on page before passing it up to the stack */
-       get_page(page);
-
+       /* If we have drained the page fragment pool we need to update
+        * the pagecnt_bias and page count so that we fully restock the
+        * number of references the driver holds.
+        */
+       if (unlikely(pagecnt_bias == 1)) {
+               page_ref_add(page, USHRT_MAX);
+               rx_buffer->pagecnt_bias = USHRT_MAX;
+       }
        return true;
 }
 
@@ -1725,7 +1735,6 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring,
                        return true;
 
                /* this page cannot be reused so discard it */
-               __free_pages(page, 0);
                return false;
        }
 
@@ -1819,6 +1828,8 @@ struct sk_buff *i40e_fetch_rx_buffer(struct i40e_ring 
*rx_ring,
                /* we are not reusing the buffer so unmap it */
                dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
                                     DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
+               __page_frag_cache_drain(rx_buffer->page,
+                                       rx_buffer->pagecnt_bias);
        }
 
        /* clear contents of buffer_info */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h 
b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 49c7b2089d8e..77c3e96f5172 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -258,7 +258,12 @@ struct i40e_tx_buffer {
 struct i40e_rx_buffer {
        dma_addr_t dma;
        struct page *page;
-       unsigned int page_offset;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+       __u32 page_offset;
+#else
+       __u16 page_offset;
+#endif
+       __u16 pagecnt_bias;
 };
 
 struct i40e_queue_stats {
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c 
b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
index d7790c08e523..d892922a2ed9 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.c
@@ -526,7 +526,7 @@ void i40evf_clean_rx_ring(struct i40e_ring *rx_ring)
                                     PAGE_SIZE,
                                     DMA_FROM_DEVICE,
                                     I40E_RX_DMA_ATTR);
-               __free_pages(rx_bi->page, 0);
+               __page_frag_cache_drain(rx_bi->page, rx_bi->pagecnt_bias);
 
                rx_bi->page = NULL;
                rx_bi->page_offset = 0;
@@ -671,6 +671,7 @@ static bool i40e_alloc_mapped_page(struct i40e_ring 
*rx_ring,
        bi->dma = dma;
        bi->page = page;
        bi->page_offset = 0;
+       bi->pagecnt_bias = 1;
 
        return true;
 }
@@ -966,7 +967,10 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
        rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
 
        /* transfer page from old buffer to new buffer */
-       *new_buff = *old_buff;
+       new_buff->dma           = old_buff->dma;
+       new_buff->page          = old_buff->page;
+       new_buff->page_offset   = old_buff->page_offset;
+       new_buff->pagecnt_bias  = old_buff->pagecnt_bias;
 }
 
 /**
@@ -1018,6 +1022,7 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer 
*rx_buffer,
 #if (PAGE_SIZE >= 8192)
        unsigned int last_offset = PAGE_SIZE - I40E_RXBUFFER_2048;
 #endif
+       unsigned int pagecnt_bias = rx_buffer->pagecnt_bias--;
 
        /* Is any reuse possible? */
        if (unlikely(!i40e_page_is_reusable(page)))
@@ -1025,7 +1030,7 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer 
*rx_buffer,
 
 #if (PAGE_SIZE < 8192)
        /* if we are only owner of page we can reuse it */
-       if (unlikely(page_count(page) != 1))
+       if (unlikely(page_count(page) != pagecnt_bias))
                return false;
 
        /* flip page offset to other buffer */
@@ -1038,8 +1043,14 @@ static bool i40e_can_reuse_rx_page(struct i40e_rx_buffer 
*rx_buffer,
                return false;
 #endif
 
-       /* Inc ref count on page before passing it up to the stack */
-       get_page(page);
+       /* If we have drained the page fragment pool we need to update
+        * the pagecnt_bias and page count so that we fully restock the
+        * number of references the driver holds.
+        */
+       if (unlikely(pagecnt_bias == 1)) {
+               page_ref_add(page, USHRT_MAX);
+               rx_buffer->pagecnt_bias = USHRT_MAX;
+       }
 
        return true;
 }
@@ -1087,7 +1098,6 @@ static bool i40e_add_rx_frag(struct i40e_ring *rx_ring,
                        return true;
 
                /* this page cannot be reused so discard it */
-               __free_pages(page, 0);
                return false;
        }
 
@@ -1181,6 +1191,8 @@ struct sk_buff *i40evf_fetch_rx_buffer(struct i40e_ring 
*rx_ring,
                /* we are not reusing the buffer so unmap it */
                dma_unmap_page_attrs(rx_ring->dev, rx_buffer->dma, PAGE_SIZE,
                                     DMA_FROM_DEVICE, I40E_RX_DMA_ATTR);
+               __page_frag_cache_drain(rx_buffer->page,
+                                       rx_buffer->pagecnt_bias);
        }
 
        /* clear contents of buffer_info */
diff --git a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h 
b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
index 013512124e6a..7b41df1909be 100644
--- a/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40evf/i40e_txrx.h
@@ -244,7 +244,12 @@ struct i40e_tx_buffer {
 struct i40e_rx_buffer {
        dma_addr_t dma;
        struct page *page;
-       unsigned int page_offset;
+#if (BITS_PER_LONG > 32) || (PAGE_SIZE >= 65536)
+       __u32 page_offset;
+#else
+       __u16 page_offset;
+#endif
+       __u16 pagecnt_bias;
 };
 
 struct i40e_queue_stats {
-- 
2.12.0

Reply via email to