[PATCH (net-next.git) 13/18] stmmac: perf, remove modulo in stmmac_rx()

2016-01-04 Thread Giuseppe Cavallaro
The indexes into the ring buffer are always incremented, and
the entry is accessed via doing a modulo to find the "real" index.
Modulo is an expensive operation.

This patch replaces the modulo with a simple if clamp.
It helps improve stmmac RX path as it's being called inside RX loop.

Signed-off-by: Fabrice Gasnier 
Signed-off-by: Giuseppe Cavallaro 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   28 ++---
 1 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 5bb2804..4a75f79 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -212,6 +212,18 @@ static inline u32 stmmac_tx_avail(struct stmmac_priv *priv)
return avail;
 }
 
+static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv)
+{
+   unsigned dirty;
+
+   if (priv->dirty_rx <= priv->cur_rx)
+   dirty = priv->cur_rx - priv->dirty_rx;
+   else
+   dirty = priv->dma_rx_size - priv->dirty_rx + priv->cur_rx;
+
+   return dirty;
+}
+
 /**
  * stmmac_hw_fix_mac_speed - callback for speed selection
  * @priv: driver private structure
@@ -2162,9 +2174,10 @@ static inline void stmmac_rx_refill(struct stmmac_priv 
*priv)
 {
unsigned int rxsize = priv->dma_rx_size;
int bfsize = priv->dma_buf_sz;
+   unsigned int entry = priv->dirty_rx;
+   int dirty = stmmac_rx_dirty(priv);
 
-   for (; priv->cur_rx - priv->dirty_rx > 0; priv->dirty_rx++) {
-   unsigned int entry = priv->dirty_rx % rxsize;
+   while (dirty-- > 0) {
struct dma_desc *p;
 
if (priv->extend_desc)
@@ -2200,6 +2213,10 @@ static inline void stmmac_rx_refill(struct stmmac_priv 
*priv)
wmb();
priv->hw->desc->set_rx_owner(p);
wmb();
+
+   if (unlikely(++priv->dirty_rx >= rxsize))
+   priv->dirty_rx = 0;
+   entry = priv->dirty_rx;
}
 }
 
@@ -2213,7 +2230,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv 
*priv)
 static int stmmac_rx(struct stmmac_priv *priv, int limit)
 {
unsigned int rxsize = priv->dma_rx_size;
-   unsigned int entry = priv->cur_rx % rxsize;
+   unsigned int entry = priv->cur_rx;
unsigned int next_entry;
unsigned int count = 0;
int coe = priv->hw->rx_csum;
@@ -2243,7 +2260,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 
count++;
 
-   next_entry = (++priv->cur_rx) % rxsize;
+   if (unlikely(++priv->cur_rx >= rxsize))
+   priv->cur_rx = 0;
+   next_entry = priv->cur_rx;
+
if (priv->extend_desc)
prefetch(priv->dma_erx + next_entry);
else
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH (net-next.git) 13/18] stmmac: perf, remove modulo in stmmac_rx()

2015-12-09 Thread Giuseppe Cavallaro
The indexes into the ring buffer are always incremented, and
the entry is accessed via doing a modulo to find the "real" index.
Modulo is an expensive operation.

This patch replaces the modulo with a simple if clamp.
It helps improve stmmac RX path as it's being called inside RX loop.

Signed-off-by: Fabrice Gasnier 
Signed-off-by: Giuseppe Cavallaro 
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c |   28 ++---
 1 files changed, 24 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c 
b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 0a39331..52ac648 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -211,6 +211,18 @@ static inline u32 stmmac_tx_avail(struct stmmac_priv *priv)
return avail;
 }
 
+static inline u32 stmmac_rx_dirty(struct stmmac_priv *priv)
+{
+   unsigned dirty;
+
+   if (priv->dirty_rx <= priv->cur_rx)
+   dirty = priv->cur_rx - priv->dirty_rx;
+   else
+   dirty = priv->dma_rx_size - priv->dirty_rx + priv->cur_rx;
+
+   return dirty;
+}
+
 /**
  * stmmac_hw_fix_mac_speed - callback for speed selection
  * @priv: driver private structure
@@ -2165,9 +2177,10 @@ static inline void stmmac_rx_refill(struct stmmac_priv 
*priv)
 {
unsigned int rxsize = priv->dma_rx_size;
int bfsize = priv->dma_buf_sz;
+   unsigned int entry = priv->dirty_rx;
+   int dirty = stmmac_rx_dirty(priv);
 
-   for (; priv->cur_rx - priv->dirty_rx > 0; priv->dirty_rx++) {
-   unsigned int entry = priv->dirty_rx % rxsize;
+   while (dirty-- > 0) {
struct dma_desc *p;
 
if (priv->extend_desc)
@@ -2203,6 +2216,10 @@ static inline void stmmac_rx_refill(struct stmmac_priv 
*priv)
wmb();
priv->hw->desc->set_rx_owner(p);
wmb();
+
+   if (unlikely(++priv->dirty_rx >= rxsize))
+   priv->dirty_rx = 0;
+   entry = priv->dirty_rx;
}
 }
 
@@ -2216,7 +2233,7 @@ static inline void stmmac_rx_refill(struct stmmac_priv 
*priv)
 static int stmmac_rx(struct stmmac_priv *priv, int limit)
 {
unsigned int rxsize = priv->dma_rx_size;
-   unsigned int entry = priv->cur_rx % rxsize;
+   unsigned int entry = priv->cur_rx;
unsigned int next_entry;
unsigned int count = 0;
int coe = priv->hw->rx_csum;
@@ -2246,7 +2263,10 @@ static int stmmac_rx(struct stmmac_priv *priv, int limit)
 
count++;
 
-   next_entry = (++priv->cur_rx) % rxsize;
+   if (unlikely(++priv->cur_rx >= rxsize))
+   priv->cur_rx = 0;
+   next_entry = priv->cur_rx;
+
if (priv->extend_desc)
prefetch(priv->dma_erx + next_entry);
else
-- 
1.7.4.4

--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: [PATCH (net-next.git) 13/18] stmmac: perf, remove modulo in stmmac_rx()

2015-12-09 Thread David Laight
From: Giuseppe Cavallaro
> Sent: 09 December 2015 08:38
> The indexes into the ring buffer are always incremented, and
> the entry is accessed via doing a modulo to find the "real" index.
> Modulo is an expensive operation.
> 
> This patch replaces the modulo with a simple if clamp.
> It helps improve stmmac RX path as it's being called inside RX loop.

Is dma_rx_size always a power of 2 ?
If so you can replace the % by & and remove the conditionals.

If you have changed the read and write values to indexes
then you need to be certain that the 'ring full' and 'ring empty'
cases are correctly distinguished.

David
--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH (net-next.git) 13/18] stmmac: perf, remove modulo in stmmac_rx()

2015-12-09 Thread Giuseppe CAVALLARO

On 12/10/2015 6:11 AM, Giuseppe CAVALLARO wrote:

On 12/9/2015 6:21 PM, David Laight wrote:

From: Giuseppe Cavallaro

Sent: 09 December 2015 08:38
The indexes into the ring buffer are always incremented, and
the entry is accessed via doing a modulo to find the "real" index.
Modulo is an expensive operation.

This patch replaces the modulo with a simple if clamp.
It helps improve stmmac RX path as it's being called inside RX loop.


Is dma_rx_size always a power of 2 ?


no


If so you can replace the % by & and remove the conditionals.

If you have changed the read and write values to indexes
then you need to be certain that the 'ring full' and 'ring empty'
cases are correctly distinguished.


this is implicitly managed by dirty and curr index.


  ... also the dma will fail with unavailable buffer.

peppe


thx
peppe



David







--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH (net-next.git) 13/18] stmmac: perf, remove modulo in stmmac_rx()

2015-12-09 Thread Giuseppe CAVALLARO

On 12/9/2015 6:21 PM, David Laight wrote:

From: Giuseppe Cavallaro

Sent: 09 December 2015 08:38
The indexes into the ring buffer are always incremented, and
the entry is accessed via doing a modulo to find the "real" index.
Modulo is an expensive operation.

This patch replaces the modulo with a simple if clamp.
It helps improve stmmac RX path as it's being called inside RX loop.


Is dma_rx_size always a power of 2 ?


no


If so you can replace the % by & and remove the conditionals.

If you have changed the read and write values to indexes
then you need to be certain that the 'ring full' and 'ring empty'
cases are correctly distinguished.


this is implicitly managed by dirty and curr index.

thx
peppe



David



--
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html