Re: [PATCH 3/3] chelsio: more rx speedup
Stephen Hemminger wrote: Cleanup receive processing some more: * do the reserve padding of skb during setup * don't pass constants to get_packet * do smart prefetch of skb * make copybreak a module parameter Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] applied - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] chelsio: more rx speedup
Stephen Hemminger schrieb: On Tue, 9 Jan 2007 09:42:03 +0100 Ingo Oeser [EMAIL PROTECTED] wrote: Stephen Hemminger schrieb: --- netdev-2.6.orig/drivers/net/chelsio/sge.c +++ netdev-2.6/drivers/net/chelsio/sge.c Please use NET_IP_ALIGN here: Wrong, NET_IP_ALIGN is intended to deal with platforms where alignment of DMA is more important of alignment of structures. Therefore if data is copied, it should always be 2. Ah! Thanks for clearing this up. These magic numbers always make my head spin so I tried to come up with sth. useful :-) If this pattern is present in many drivers, we might have a define or helper for this. Regards Ingo Oeser - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] chelsio: more rx speedup
Divy Le Ray schrieb: Stephen Hemminger wrote: On Tue, 9 Jan 2007 09:42:03 +0100 Ingo Oeser [EMAIL PROTECTED] wrote: Stephen Hemminger schrieb: - if (fl-credits drop_thres) { +use_orig_buf: + if (fl-credits 2) { Why 2? What does this magic number mean? No idea, it was there in the original. (as a parameter). The T2 HW behaves nicely when it is guaranteed to have 2 available entries in the rx free list. Can we have a #define for this? That would help people understand this issue more. And don't be shy about errata, all hardware and software out there has them like the humans that made them :-) Regards Ingo Oeser - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] chelsio: more rx speedup
On Tue, 9 Jan 2007 09:42:03 +0100 Ingo Oeser [EMAIL PROTECTED] wrote: Hi Stephen, Stephen Hemminger schrieb: --- netdev-2.6.orig/drivers/net/chelsio/sge.c +++ netdev-2.6/drivers/net/chelsio/sge.c [...] @@ -1043,45 +1046,42 @@ static void recycle_fl_buf(struct freelQ * be copied but there is no memory for the copy. */ static inline struct sk_buff *get_packet(struct pci_dev *pdev, -struct freelQ *fl, unsigned int len, -int dma_pad, int skb_pad, -unsigned int copy_thres, -unsigned int drop_thres) +struct freelQ *fl, unsigned int len) { struct sk_buff *skb; - struct freelQ_ce *ce = fl-centries[fl-cidx]; + const struct freelQ_ce *ce = fl-centries[fl-cidx]; - if (len copy_thres) { - skb = alloc_skb(len + skb_pad, GFP_ATOMIC); - if (likely(skb != NULL)) { - skb_reserve(skb, skb_pad); - skb_put(skb, len); - pci_dma_sync_single_for_cpu(pdev, - pci_unmap_addr(ce, dma_addr), - pci_unmap_len(ce, dma_len), - PCI_DMA_FROMDEVICE); - memcpy(skb-data, ce-skb-data + dma_pad, len); - pci_dma_sync_single_for_device(pdev, + if (len copybreak) { + skb = alloc_skb(len + 2, GFP_ATOMIC); + if (!skb) + goto use_orig_buf; + + skb_reserve(skb, 2);/* align IP header */ Please use NET_IP_ALIGN here: Wrong, NET_IP_ALIGN is intended to deal with platforms where alignment of DMA is more important of alignment of structures. Therefore if data is copied, it should always be 2. + skb = alloc_skb(len + NET_IP_ALIGN, GFP_ATOMIC); + if (!skb) + goto use_orig_buf; + + skb_reserve(skb, NET_IP_ALIGN); + skb_put(skb, len); + pci_dma_sync_single_for_cpu(pdev, pci_unmap_addr(ce, dma_addr), pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); - } else if (!drop_thres) - goto use_orig_buf; - + memcpy(skb-data, ce-skb-data, len); + pci_dma_sync_single_for_device(pdev, + pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); recycle_fl_buf(fl, fl-cidx); return skb; } - if (fl-credits drop_thres) { +use_orig_buf: + if (fl-credits 2) { Why 2? What does this magic number mean? No idea, it was there in the original. (as a parameter). -- Stephen Hemminger [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] chelsio: more rx speedup
Stephen Hemminger wrote: On Tue, 9 Jan 2007 09:42:03 +0100 Ingo Oeser [EMAIL PROTECTED] wrote: Hi Stephen, Stephen Hemminger schrieb: --- netdev-2.6.orig/drivers/net/chelsio/sge.c +++ netdev-2.6/drivers/net/chelsio/sge.c [...] @@ -1043,45 +1046,42 @@ static void recycle_fl_buf(struct freelQ * be copied but there is no memory for the copy. */ static inline struct sk_buff *get_packet(struct pci_dev *pdev, -struct freelQ *fl, unsigned int len, -int dma_pad, int skb_pad, -unsigned int copy_thres, -unsigned int drop_thres) +struct freelQ *fl, unsigned int len) { struct sk_buff *skb; - struct freelQ_ce *ce = fl-centries[fl-cidx]; + const struct freelQ_ce *ce = fl-centries[fl-cidx]; - if (len copy_thres) { - skb = alloc_skb(len + skb_pad, GFP_ATOMIC); - if (likely(skb != NULL)) { - skb_reserve(skb, skb_pad); - skb_put(skb, len); - pci_dma_sync_single_for_cpu(pdev, - pci_unmap_addr(ce, dma_addr), - pci_unmap_len(ce, dma_len), - PCI_DMA_FROMDEVICE); - memcpy(skb-data, ce-skb-data + dma_pad, len); - pci_dma_sync_single_for_device(pdev, + if (len copybreak) { + skb = alloc_skb(len + 2, GFP_ATOMIC); + if (!skb) + goto use_orig_buf; + + skb_reserve(skb, 2);/* align IP header */ Please use NET_IP_ALIGN here: Wrong, NET_IP_ALIGN is intended to deal with platforms where alignment of DMA is more important of alignment of structures. Therefore if data is copied, it should always be 2. + skb = alloc_skb(len + NET_IP_ALIGN, GFP_ATOMIC); + if (!skb) + goto use_orig_buf; + + skb_reserve(skb, NET_IP_ALIGN); + skb_put(skb, len); + pci_dma_sync_single_for_cpu(pdev, pci_unmap_addr(ce, dma_addr), pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); - } else if (!drop_thres) - goto use_orig_buf; - + memcpy(skb-data, ce-skb-data, len); + pci_dma_sync_single_for_device(pdev, + pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); recycle_fl_buf(fl, fl-cidx); return skb; } - if (fl-credits drop_thres) { +use_orig_buf: + if (fl-credits 2) { Why 2? What does this magic number mean? No idea, it was there in the original. (as a parameter). The T2 HW behaves nicely when it is guaranteed to have 2 available entries in the rx free list. Cheers, Divy - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3] chelsio: more rx speedup
Cleanup receive processing some more: * do the reserve padding of skb during setup * don't pass constants to get_packet * do smart prefetch of skb * make copybreak a module parameter Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- drivers/net/chelsio/sge.c | 87 +++--- 1 file changed, 45 insertions(+), 42 deletions(-) --- netdev-2.6.orig/drivers/net/chelsio/sge.c +++ netdev-2.6/drivers/net/chelsio/sge.c @@ -71,12 +71,9 @@ #define SGE_FREEL_REFILL_THRESH16 #define SGE_RESPQ_E_N 1024 #define SGE_INTRTIMER_NRES 1000 -#define SGE_RX_COPY_THRES 256 #define SGE_RX_SM_BUF_SIZE 1536 #define SGE_TX_DESC_MAX_PLEN 16384 -# define SGE_RX_DROP_THRES 2 - #define SGE_RESPQ_REPLENISH_THRES (SGE_RESPQ_E_N / 4) /* @@ -846,6 +843,8 @@ static void refill_free_list(struct sge skb_reserve(skb, q-dma_offset); mapping = pci_map_single(pdev, skb-data, dma_len, PCI_DMA_FROMDEVICE); + skb_reserve(skb, sge-rx_pkt_pad); + ce-skb = skb; pci_unmap_addr_set(ce, dma_addr, mapping); pci_unmap_len_set(ce, dma_len, dma_len); @@ -1024,6 +1023,10 @@ static void recycle_fl_buf(struct freelQ } } +static int copybreak __read_mostly = 256; +module_param(copybreak, int, 0); +MODULE_PARM_DESC(copybreak, Receive copy threshold); + /** * get_packet - return the next ingress packet buffer * @pdev: the PCI device that received the packet @@ -1043,45 +1046,42 @@ static void recycle_fl_buf(struct freelQ * be copied but there is no memory for the copy. */ static inline struct sk_buff *get_packet(struct pci_dev *pdev, -struct freelQ *fl, unsigned int len, -int dma_pad, int skb_pad, -unsigned int copy_thres, -unsigned int drop_thres) +struct freelQ *fl, unsigned int len) { struct sk_buff *skb; - struct freelQ_ce *ce = fl-centries[fl-cidx]; + const struct freelQ_ce *ce = fl-centries[fl-cidx]; - if (len copy_thres) { - skb = alloc_skb(len + skb_pad, GFP_ATOMIC); - if (likely(skb != NULL)) { - skb_reserve(skb, skb_pad); - skb_put(skb, len); - pci_dma_sync_single_for_cpu(pdev, - pci_unmap_addr(ce, dma_addr), - pci_unmap_len(ce, dma_len), - PCI_DMA_FROMDEVICE); - memcpy(skb-data, ce-skb-data + dma_pad, len); - pci_dma_sync_single_for_device(pdev, + if (len copybreak) { + skb = alloc_skb(len + 2, GFP_ATOMIC); + if (!skb) + goto use_orig_buf; + + skb_reserve(skb, 2);/* align IP header */ + skb_put(skb, len); + pci_dma_sync_single_for_cpu(pdev, pci_unmap_addr(ce, dma_addr), pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); - } else if (!drop_thres) - goto use_orig_buf; - + memcpy(skb-data, ce-skb-data, len); + pci_dma_sync_single_for_device(pdev, + pci_unmap_addr(ce, dma_addr), + pci_unmap_len(ce, dma_len), + PCI_DMA_FROMDEVICE); recycle_fl_buf(fl, fl-cidx); return skb; } - if (fl-credits drop_thres) { +use_orig_buf: + if (fl-credits 2) { recycle_fl_buf(fl, fl-cidx); return NULL; } -use_orig_buf: pci_unmap_single(pdev, pci_unmap_addr(ce, dma_addr), pci_unmap_len(ce, dma_len), PCI_DMA_FROMDEVICE); skb = ce-skb; - skb_reserve(skb, dma_pad); + prefetch(skb-data); + skb_put(skb, len); return skb; } @@ -1359,27 +1359,25 @@ static void restart_sched(unsigned long * * Process an ingress ethernet pakcet and deliver it to the stack. */ -static int sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) +static void sge_rx(struct sge *sge, struct freelQ *fl, unsigned int len) { struct sk_buff *skb; - struct cpl_rx_pkt *p; + const struct cpl_rx_pkt *p; struct adapter *adapter = sge-adapter; struct sge_port_stats *st; - skb = get_packet(adapter-pdev, fl, len - sge-rx_pkt_pad, -sge-rx_pkt_pad, 2, SGE_RX_COPY_THRES, -