Re: [PATCH 3/3] chelsio: more rx speedup

2007-01-18 Thread Jeff Garzik

Stephen Hemminger wrote:

Cleanup receive processing some more:
   * do the reserve padding of skb during setup
   * don't pass constants to get_packet
   * do smart prefetch of skb
   * make copybreak a module parameter

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]


applied


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] chelsio: more rx speedup

2007-01-10 Thread Ingo Oeser
Stephen Hemminger schrieb:
 On Tue, 9 Jan 2007 09:42:03 +0100
 Ingo Oeser [EMAIL PROTECTED] wrote:
  Stephen Hemminger schrieb:
   --- netdev-2.6.orig/drivers/net/chelsio/sge.c
   +++ netdev-2.6/drivers/net/chelsio/sge.c
  Please use NET_IP_ALIGN here:
 
 Wrong, NET_IP_ALIGN is intended to deal with platforms where alignment of DMA 
 is more
 important of alignment of structures.  Therefore if data is copied, it should
 always be 2.

Ah! Thanks for clearing this up. These magic numbers always make my head spin
so I tried to come up with sth. useful :-)

If this pattern is present in many drivers, we might have a define or helper 
for this.


Regards

Ingo Oeser
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] chelsio: more rx speedup

2007-01-10 Thread Ingo Oeser
Divy Le Ray schrieb:
 Stephen Hemminger wrote:
  On Tue, 9 Jan 2007 09:42:03 +0100
  Ingo Oeser [EMAIL PROTECTED] wrote:
  Stephen Hemminger schrieb:
  - if (fl-credits  drop_thres) {
  +use_orig_buf:
  + if (fl-credits  2) {

  Why 2? What does this magic number mean?
  No idea, it was there in the original. (as a parameter).
 The T2 HW behaves nicely when it is guaranteed to have 2 available 
 entries in the rx free list.

Can we have a #define for this?

That would help people understand this issue more.
And don't be shy about errata, all hardware and software out there has 
them like the humans that made them :-)

Regards

Ingo Oeser
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] chelsio: more rx speedup

2007-01-09 Thread Stephen Hemminger
On Tue, 9 Jan 2007 09:42:03 +0100
Ingo Oeser [EMAIL PROTECTED] wrote:

 Hi Stephen,
 
 Stephen Hemminger schrieb:
  --- netdev-2.6.orig/drivers/net/chelsio/sge.c
  +++ netdev-2.6/drivers/net/chelsio/sge.c
 [...]
  @@ -1043,45 +1046,42 @@ static void recycle_fl_buf(struct freelQ
* be copied but there is no memory for the copy.
*/
   static inline struct sk_buff *get_packet(struct pci_dev *pdev,
  -struct freelQ *fl, unsigned int len,
  -int dma_pad, int skb_pad,
  -unsigned int copy_thres,
  -unsigned int drop_thres)
  +struct freelQ *fl, unsigned int len)
   {
  struct sk_buff *skb;
  -   struct freelQ_ce *ce = fl-centries[fl-cidx];
  +   const struct freelQ_ce *ce = fl-centries[fl-cidx];
   
  -   if (len  copy_thres) {
  -   skb = alloc_skb(len + skb_pad, GFP_ATOMIC);
  -   if (likely(skb != NULL)) {
  -   skb_reserve(skb, skb_pad);
  -   skb_put(skb, len);
  -   pci_dma_sync_single_for_cpu(pdev,
  -   pci_unmap_addr(ce, dma_addr),
  -   pci_unmap_len(ce, dma_len),
  -   PCI_DMA_FROMDEVICE);
  -   memcpy(skb-data, ce-skb-data + dma_pad, len);
  -   pci_dma_sync_single_for_device(pdev,
  +   if (len  copybreak) {
  +   skb = alloc_skb(len + 2, GFP_ATOMIC);
  +   if (!skb)
  +   goto use_orig_buf;
  +
  +   skb_reserve(skb, 2);/* align IP header */
 
 Please use NET_IP_ALIGN here:

Wrong, NET_IP_ALIGN is intended to deal with platforms where alignment of DMA 
is more
important of alignment of structures.  Therefore if data is copied, it should
always be 2.

 
 + skb = alloc_skb(len + NET_IP_ALIGN, GFP_ATOMIC);
 + if (!skb)
 + goto use_orig_buf;
 +
 + skb_reserve(skb, NET_IP_ALIGN);
 
  +   skb_put(skb, len);
  +   pci_dma_sync_single_for_cpu(pdev,
  pci_unmap_addr(ce, dma_addr),
  pci_unmap_len(ce, dma_len),
  PCI_DMA_FROMDEVICE);
  -   } else if (!drop_thres)
  -   goto use_orig_buf;
  -
  +   memcpy(skb-data, ce-skb-data, len);
  +   pci_dma_sync_single_for_device(pdev,
  +  pci_unmap_addr(ce, dma_addr),
  +  pci_unmap_len(ce, dma_len),
  +  PCI_DMA_FROMDEVICE);
  recycle_fl_buf(fl, fl-cidx);
  return skb;
  }
   
  -   if (fl-credits  drop_thres) {
  +use_orig_buf:
  +   if (fl-credits  2) {
 
 Why 2? What does this magic number mean?

No idea, it was there in the original. (as a parameter).

-- 
Stephen Hemminger [EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] chelsio: more rx speedup

2007-01-09 Thread Divy Le Ray

Stephen Hemminger wrote:

On Tue, 9 Jan 2007 09:42:03 +0100
Ingo Oeser [EMAIL PROTECTED] wrote:

  

Hi Stephen,

Stephen Hemminger schrieb:


--- netdev-2.6.orig/drivers/net/chelsio/sge.c
+++ netdev-2.6/drivers/net/chelsio/sge.c
  

[...]


@@ -1043,45 +1046,42 @@ static void recycle_fl_buf(struct freelQ
  * be copied but there is no memory for the copy.
  */
 static inline struct sk_buff *get_packet(struct pci_dev *pdev,
-struct freelQ *fl, unsigned int len,
-int dma_pad, int skb_pad,
-unsigned int copy_thres,
-unsigned int drop_thres)
+struct freelQ *fl, unsigned int len)
 {
struct sk_buff *skb;
-   struct freelQ_ce *ce = fl-centries[fl-cidx];
+   const struct freelQ_ce *ce = fl-centries[fl-cidx];
 
-	if (len  copy_thres) {

-   skb = alloc_skb(len + skb_pad, GFP_ATOMIC);
-   if (likely(skb != NULL)) {
-   skb_reserve(skb, skb_pad);
-   skb_put(skb, len);
-   pci_dma_sync_single_for_cpu(pdev,
-   pci_unmap_addr(ce, dma_addr),
-   pci_unmap_len(ce, dma_len),
-   PCI_DMA_FROMDEVICE);
-   memcpy(skb-data, ce-skb-data + dma_pad, len);
-   pci_dma_sync_single_for_device(pdev,
+   if (len  copybreak) {
+   skb = alloc_skb(len + 2, GFP_ATOMIC);
+   if (!skb)
+   goto use_orig_buf;
+
+   skb_reserve(skb, 2);/* align IP header */
  

Please use NET_IP_ALIGN here:



Wrong, NET_IP_ALIGN is intended to deal with platforms where alignment of DMA 
is more
important of alignment of structures.  Therefore if data is copied, it should
always be 2.

  

+   skb = alloc_skb(len + NET_IP_ALIGN, GFP_ATOMIC);
+   if (!skb)
+   goto use_orig_buf;
+
+   skb_reserve(skb, NET_IP_ALIGN);



+   skb_put(skb, len);
+   pci_dma_sync_single_for_cpu(pdev,
pci_unmap_addr(ce, dma_addr),
pci_unmap_len(ce, dma_len),
PCI_DMA_FROMDEVICE);
-   } else if (!drop_thres)
-   goto use_orig_buf;
-
+   memcpy(skb-data, ce-skb-data, len);
+   pci_dma_sync_single_for_device(pdev,
+  pci_unmap_addr(ce, dma_addr),
+  pci_unmap_len(ce, dma_len),
+  PCI_DMA_FROMDEVICE);
recycle_fl_buf(fl, fl-cidx);
return skb;
}
 
-	if (fl-credits  drop_thres) {

+use_orig_buf:
+   if (fl-credits  2) {
  

Why 2? What does this magic number mean?



No idea, it was there in the original. (as a parameter).

  
The T2 HW behaves nicely when it is guaranteed to have 2 available 
entries in the rx free list.


Cheers,
Divy
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 3/3] chelsio: more rx speedup

2007-01-08 Thread Stephen Hemminger
Cleanup receive processing some more:
   * do the reserve padding of skb during setup
   * don't pass constants to get_packet
   * do smart prefetch of skb
   * make copybreak a module parameter

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]

---
 drivers/net/chelsio/sge.c |   87 +++---
 1 file changed, 45 insertions(+), 42 deletions(-)

--- netdev-2.6.orig/drivers/net/chelsio/sge.c
+++ netdev-2.6/drivers/net/chelsio/sge.c
@@ -71,12 +71,9 @@
 #define SGE_FREEL_REFILL_THRESH16
 #define SGE_RESPQ_E_N  1024
 #define SGE_INTRTIMER_NRES 1000
-#define SGE_RX_COPY_THRES  256
 #define SGE_RX_SM_BUF_SIZE 1536
 #define SGE_TX_DESC_MAX_PLEN   16384
 
-# define SGE_RX_DROP_THRES 2
-
 #define SGE_RESPQ_REPLENISH_THRES (SGE_RESPQ_E_N / 4)
 
 /*
@@ -846,6 +843,8 @@ static void refill_free_list(struct sge 
skb_reserve(skb, q-dma_offset);
mapping = pci_map_single(pdev, skb-data, dma_len,
 PCI_DMA_FROMDEVICE);
+   skb_reserve(skb, sge-rx_pkt_pad);
+
ce-skb = skb;
pci_unmap_addr_set(ce, dma_addr, mapping);
pci_unmap_len_set(ce, dma_len, dma_len);
@@ -1024,6 +1023,10 @@ static void recycle_fl_buf(struct freelQ
}
 }
 
+static int copybreak __read_mostly = 256;
+module_param(copybreak, int, 0);
+MODULE_PARM_DESC(copybreak, Receive copy threshold);
+
 /**
  * get_packet - return the next ingress packet buffer
  * @pdev: the PCI device that received the packet
@@ -1043,45 +1046,42 @@ static void recycle_fl_buf(struct freelQ
  * be copied but there is no memory for the copy.
  */
 static inline struct sk_buff *get_packet(struct pci_dev *pdev,
-struct freelQ *fl, unsigned int len,
-int dma_pad, int skb_pad,
-unsigned int copy_thres,
-unsigned int drop_thres)
+struct freelQ *fl, unsigned int len)
 {
struct sk_buff *skb;
-   struct freelQ_ce *ce = fl-centries[fl-cidx];
+   const struct freelQ_ce *ce = fl-centries[fl-cidx];
 
-   if (len  copy_thres) {
-   skb = alloc_skb(len + skb_pad, GFP_ATOMIC);
-   if (likely(skb != NULL)) {
-   skb_reserve(skb, skb_pad);
-   skb_put(skb, len);
-   pci_dma_sync_single_for_cpu(pdev,
-   pci_unmap_addr(ce, dma_addr),
-   pci_unmap_len(ce, dma_len),
-   PCI_DMA_FROMDEVICE);
-   memcpy(skb-data, ce-skb-data + dma_pad, len);
-   pci_dma_sync_single_for_device(pdev,
+   if (len  copybreak) {
+   skb = alloc_skb(len + 2, GFP_ATOMIC);
+   if (!skb)
+   goto use_orig_buf;
+
+   skb_reserve(skb, 2);/* align IP header */
+   skb_put(skb, len);
+   pci_dma_sync_single_for_cpu(pdev,
pci_unmap_addr(ce, dma_addr),
pci_unmap_len(ce, dma_len),
PCI_DMA_FROMDEVICE);
-   } else if (!drop_thres)
-   goto use_orig_buf;
-
+   memcpy(skb-data, ce-skb-data, len);
+   pci_dma_sync_single_for_device(pdev,
+  pci_unmap_addr(ce, dma_addr),
+  pci_unmap_len(ce, dma_len),
+  PCI_DMA_FROMDEVICE);
recycle_fl_buf(fl, fl-cidx);
return skb;
}
 
-   if (fl-credits  drop_thres) {
+use_orig_buf:
+   if (fl-credits  2) {
recycle_fl_buf(fl, fl-cidx);
return NULL;
}
 
-use_orig_buf:
pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr),
 pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE);
skb = ce-skb;
-   skb_reserve(skb, dma_pad);
+   prefetch(skb-data);
+
skb_put(skb, len);
return skb;
 }
@@ -1359,27 +1359,25 @@ static void restart_sched(unsigned long 
  *
  * Process an ingress ethernet pakcet and deliver it to the stack.
  */
-static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
+static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len)
 {
struct sk_buff *skb;
-   struct cpl_rx_pkt *p;
+   const struct cpl_rx_pkt *p;
struct adapter *adapter = sge-adapter;
struct sge_port_stats *st;
 
-   skb = get_packet(adapter-pdev, fl, len - sge-rx_pkt_pad,
-sge-rx_pkt_pad, 2, SGE_RX_COPY_THRES,
-