Hi

Here is a patch to drop the expensive memcpy of received ethernet frames in
interrupt context. I have not done any bench marking, but mounting a NFS rootfs 
feels faster.

I am using a heavily modified enet.c in my system, but I think I got the patch 
correct.

Also fixed a bug in set_multicast_list(), move the dmi list forward when 
walking it(dmi = dmi->next;)

Comments? Anyone care to do some benchmarking?

        Jocke

 --- arch/ppc/8xx_io/enet.c.org Mon Oct 21 14:35:59 2002
+++ arch/ppc/8xx_io/enet.c      Mon Oct 21 15:06:04 2002
@@ -96,18 +96,17 @@
  * We don't need to allocate pages for the transmitter.  We just use
  * the skbuffer directly.
  */
+#define CPM_ENET_RX_FRSIZE     1600   /* MUST be a multiple of cache line! */
+#if CPM_ENET_RX_FRSIZE % L1_CACHE_LINE_SIZE != 0
+    #error CPM_ENET_RX_FRSIZE must be a multiple of L1 cache size
+#endif
+
 #ifdef CONFIG_ENET_BIG_BUFFERS
-#define CPM_ENET_RX_PAGES      32
-#define CPM_ENET_RX_FRSIZE     2048
-#define CPM_ENET_RX_FRPPG      (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE           (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
+#define RX_RING_SIZE           64
 #define TX_RING_SIZE           64      /* Must be power of two */
 #define TX_RING_MOD_MASK       63      /*   for this to work */
 #else
-#define CPM_ENET_RX_PAGES      4
-#define CPM_ENET_RX_FRSIZE     2048
-#define CPM_ENET_RX_FRPPG      (PAGE_SIZE / CPM_ENET_RX_FRSIZE)
-#define RX_RING_SIZE           (CPM_ENET_RX_FRPPG * CPM_ENET_RX_PAGES)
+#define RX_RING_SIZE           8
 #define TX_RING_SIZE           8       /* Must be power of two */
 #define TX_RING_MOD_MASK       7       /*   for this to work */
 #endif
@@ -143,7 +142,7 @@
        /* Virtual addresses for the receive buffers because we can't
         * do a __va() on them anymore.
         */
-       unsigned char *rx_vaddr[RX_RING_SIZE];
+       void    *rx_vaddr[RX_RING_SIZE];
        struct  net_device_stats stats;
        uint    tx_full;
        spinlock_t lock;
@@ -449,6 +448,7 @@
        struct  scc_enet_private *cep;
        volatile cbd_t  *bdp;
        struct  sk_buff *skb;
+       struct  sk_buff *skb_tmp;
        ushort  pkt_len;

        cep = (struct scc_enet_private *)dev->priv;
@@ -497,25 +497,26 @@
                pkt_len = bdp->cbd_datlen;
                cep->stats.rx_bytes += pkt_len;

-               /* This does 16 byte alignment, much more than we need.
+               /* This does 16 byte alignment, exactly what we need.
                 * The packet length includes FCS, but we don't want to
                 * include that when passing upstream as it messes up
                 * bridging applications.
                 */
-               skb = dev_alloc_skb(pkt_len-4);
-
-               if (skb == NULL) {
+               skb_tmp = dev_alloc_skb(CPM_ENET_RX_FRSIZE);
+               if (skb_tmp == NULL) {
                        printk("%s: Memory squeeze, dropping packet.\n", 
dev->name);
                        cep->stats.rx_dropped++;
-               }
-               else {
+               } else {
+                       skb = cep->rx_vaddr[bdp - cep->rx_bd_base];
                        skb->dev = dev;
                        skb_put(skb,pkt_len-4); /* Make room */
-                       eth_copy_and_sum(skb,
-                               cep->rx_vaddr[bdp - cep->rx_bd_base],
-                               pkt_len-4, 0);
                        skb->protocol=eth_type_trans(skb,dev);
                        netif_rx(skb);
+
+                       invalidate_dcache_range((unsigned long) skb_tmp->data,
+                                               (unsigned long) skb_tmp->data + 
CPM_ENET_RX_FRSIZE);
+                       bdp->cbd_bufaddr = __pa(skb_tmp->data);
+                       cep->rx_vaddr[bdp - cep->rx_bd_base] = skb_tmp;
                }
        }

@@ -631,6 +632,7 @@
                                /* this delay is necessary here -- Cort */
                                udelay(10);
                                while (cpmp->cp_cpcr & CPM_CR_FLG);
+                               dmi = dmi->next;
                        }
                }
        }
@@ -647,8 +649,7 @@
        struct net_device *dev;
        struct scc_enet_private *cep;
        int i, j, k;
-       unsigned char   *eap, *ba;
-       dma_addr_t      mem_addr;
+       unsigned char   *eap;
        bd_t            *bd;
        volatile        cbd_t           *bdp;
        volatile        cpm8xx_t        *cp;
@@ -839,22 +840,15 @@

        bdp = cep->rx_bd_base;
        k = 0;
-       for (i=0; i<CPM_ENET_RX_PAGES; i++) {
-
-               /* Allocate a page.
-               */
-               ba = (unsigned char *)consistent_alloc(GFP_KERNEL, PAGE_SIZE, 
&mem_addr);
-
-               /* Initialize the BD for every fragment in the page.
-               */
-               for (j=0; j<CPM_ENET_RX_FRPPG; j++) {
-                       bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
-                       bdp->cbd_bufaddr = mem_addr;
-                       cep->rx_vaddr[k++] = ba;
-                       mem_addr += CPM_ENET_RX_FRSIZE;
-                       ba += CPM_ENET_RX_FRSIZE;
-                       bdp++;
-               }
+       /* Initialize the BDs. */
+       for (j=0; j < RX_RING_SIZE; j++) {
+               struct  sk_buff * skb = dev_alloc_skb(CPM_ENET_RX_FRSIZE);
+               bdp->cbd_sc = BD_ENET_RX_EMPTY | BD_ENET_RX_INTR;
+               bdp->cbd_bufaddr = __pa(skb->data);
+               invalidate_dcache_range((unsigned long) skb->data,
+                                       (unsigned long) skb->data + 
CPM_ENET_RX_FRSIZE);
+               cep->rx_vaddr[k++] = skb;
+               bdp++;
        }

        /* Set the last buffer to wrap.

** Sent via the linuxppc-embedded mail list. See http://lists.linuxppc.org/



Reply via email to