On 08-04-2012 22:54, Soren Kristensen wrote:
And it is a fact that the Linux VT6105M driver had bugs, reported to be
fixed in Linux 3.3. And that the atheros and/or wlan drivers had/have
bugs, don't know if fixed.

Opgrade to Linux>= 3.3 and we can continue talking.

I have observed the via-rhine bugs as well - not only on net5501, but also on alix boards.

While some of these bugs have been fixed in Linux 3.3, the most critical one (random lockup) is still there (and new bugs have sneaked in, such as random loss of link).

Two machines sending high rates of UDP packets to each other would make the box hang rather fast, often within a few seconds under certain loads. I made a few fixes to the 3.2.13 driver which seem to have solved the hangs as well as another major problem (the one addressed in 3.3), namely missed timer ticks due to too much work in the via-rhine interrupt handler.
It has now been running for a week under high load without hanging at all.

I've attached a patch, but I don't know if the list server will allow it..

An *important* thing to be aware of is that it doesn't work reliably unless MMIO mode (CONFIG_VIA_RHINE_MMIO) is disabled! I don't know if that is a driver issue or a hardware issue (could be the VT6105M chip itself or the PCI bridge inside the Geode causing trouble with posted writes or write combining or some such), but in any case I don't think anyone could blame Søren for this.

Svenning


--- via-rhine.c.orig    2012-03-23 21:54:45.000000000 +0100
+++ via-rhine.c 2012-04-10 12:22:15.000000000 +0200
@@ -76,7 +76,8 @@
    There are no ill effects from too-large receive rings. */
 #define TX_RING_SIZE   16
 #define TX_QUEUE_LEN   10      /* Limit ring entries actually used. */
-#define RX_RING_SIZE   64
+#define RX_RING_SIZE   16
+#define RX_NAPI_WEIGHT 8
 
 /* Operational parameters that usually are not changed. */
 
@@ -781,6 +782,7 @@
        pioaddr = pci_resource_start(pdev, 0);
        memaddr = pci_resource_start(pdev, 1);
 
+       pci_set_mwi(pdev);
        pci_set_master(pdev);
 
        dev = alloc_etherdev(sizeof(struct rhine_private));
@@ -868,7 +870,7 @@
        dev->ethtool_ops = &netdev_ethtool_ops,
        dev->watchdog_timeo = TX_TIMEOUT;
 
-       netif_napi_add(dev, &rp->napi, rhine_napipoll, 64);
+       netif_napi_add(dev, &rp->napi, rhine_napipoll, RX_NAPI_WEIGHT);
 
        if (rp->quirks & rqRhineI)
                dev->features |= NETIF_F_SG|NETIF_F_HW_CSUM;
@@ -1019,6 +1021,7 @@
                                       PCI_DMA_FROMDEVICE);
 
                rp->rx_ring[i].addr = cpu_to_le32(rp->rx_skbuff_dma[i]);
+               wmb();
                rp->rx_ring[i].rx_status = cpu_to_le32(DescOwn);
        }
        rp->dirty_rx = (unsigned int)(i - RX_RING_SIZE);
@@ -1475,6 +1478,7 @@
        void __iomem *ioaddr = rp->base;
        unsigned entry;
        unsigned long flags;
+       int txstatus;
 
        /* Caution: the write order is important here, set the field
           with the "ownership" bits last. */
@@ -1485,6 +1489,14 @@
        if (skb_padto(skb, ETH_ZLEN))
                return NETDEV_TX_OK;
 
+       IOSYNC;
+       txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status);
+       rmb();
+       if (unlikely(txstatus & DescOwn)) {
+               netdev_warn(dev, "Tx descriptor busy\n");
+               return NETDEV_TX_BUSY;
+       }
+
        rp->tx_skbuff[entry] = skb;
 
        if ((rp->quirks & rqRhineI) &&
@@ -1518,17 +1530,17 @@
                cpu_to_le32(TXDESC | (skb->len >= ETH_ZLEN ? skb->len : 
ETH_ZLEN));
 
        if (unlikely(vlan_tx_tag_present(skb))) {
-               rp->tx_ring[entry].tx_status = 
cpu_to_le32((vlan_tx_tag_get(skb)) << 16);
+               txstatus = (vlan_tx_tag_get(skb) << 16) | DescOwn;
                /* request tagging */
                rp->tx_ring[entry].desc_length |= cpu_to_le32(0x020000);
        }
        else
-               rp->tx_ring[entry].tx_status = 0;
+               txstatus = DescOwn;
 
        /* lock eth irq */
        spin_lock_irqsave(&rp->lock, flags);
        wmb();
-       rp->tx_ring[entry].tx_status |= cpu_to_le32(DescOwn);
+       rp->tx_ring[entry].tx_status = cpu_to_le32(txstatus);
        wmb();
 
        rp->cur_tx++;
@@ -1634,6 +1646,8 @@
        /* find and cleanup dirty tx descriptors */
        while (rp->dirty_tx != rp->cur_tx) {
                txstatus = le32_to_cpu(rp->tx_ring[entry].tx_status);
+               rmb();
+
                if (debug > 6)
                        netdev_dbg(dev, "Tx scavenge %d status %08x\n",
                                   entry, txstatus);
@@ -1652,12 +1666,8 @@
                                dev->stats.tx_aborted_errors++;
                        if (txstatus & 0x0080)
                                dev->stats.tx_heartbeat_errors++;
-                       if (((rp->quirks & rqRhineI) && txstatus & 0x0002) ||
-                           (txstatus & 0x0800) || (txstatus & 0x1000)) {
+                       if (txstatus & ((rp->quirks & rqRhineI) ? 0x0002 : 
0x0800))
                                dev->stats.tx_fifo_errors++;
-                               rp->tx_ring[entry].tx_status = 
cpu_to_le32(DescOwn);
-                               break; /* Keep the skb - we try again */
-                       }
                        /* Transmitter restarted in 'abnormal' handler. */
                } else {
                        if (rp->quirks & rqRhineI)
@@ -1720,12 +1730,16 @@
        for (count = 0; count < limit; ++count) {
                struct rx_desc *desc = rp->rx_head_desc;
                u32 desc_status = le32_to_cpu(desc->rx_status);
-               u32 desc_length = le32_to_cpu(desc->desc_length);
-               int data_size = desc_status >> 16;
+               u32 desc_length;
+               int data_size;
 
                if (desc_status & DescOwn)
                        break;
 
+               rmb();
+               desc_length = le32_to_cpu(desc->desc_length);
+               data_size = desc_status >> 16;
+
                if (debug > 4)
                        netdev_dbg(dev, "%s() status is %08x\n",
                                   __func__, desc_status);
@@ -1756,9 +1770,10 @@
                                        dev->stats.rx_frame_errors++;
                                if (desc_status & 0x0002) {
                                        /* this can also be updated outside the 
interrupt handler */
-                                       spin_lock(&rp->lock);
+                                       unsigned long flags;
+                                       spin_lock_irqsave(&rp->lock, flags);
                                        dev->stats.rx_crc_errors++;
-                                       spin_unlock(&rp->lock);
+                                       spin_unlock_irqrestore(&rp->lock, 
flags);
                                }
                        }
                } else {
@@ -1829,6 +1844,7 @@
                                               rp->rx_buf_sz,
                                               PCI_DMA_FROMDEVICE);
                        rp->rx_ring[entry].addr = 
cpu_to_le32(rp->rx_skbuff_dma[entry]);
+                       wmb();
                }
                rp->rx_ring[entry].rx_status = cpu_to_le32(DescOwn);
        }
@@ -1908,7 +1924,7 @@
        }
        if (intr_status & IntrTxUnderrun) {
                if (rp->tx_thresh < 0xE0)
-                       BYTE_REG_BITS_SET((rp->tx_thresh += 0x20), 0x80, ioaddr 
+ TxConfig);
+                       BYTE_REG_BITS_SET((rp->tx_thresh += 0x20), 0xE0, ioaddr 
+ TxConfig);
                if (debug > 1)
                        netdev_info(dev, "Transmitter underrun, Tx threshold 
now %02x\n",
                                    rp->tx_thresh);
@@ -1921,7 +1937,7 @@
            (intr_status & (IntrTxAborted |
             IntrTxUnderrun | IntrTxDescRace)) == 0) {
                if (rp->tx_thresh < 0xE0) {
-                       BYTE_REG_BITS_SET((rp->tx_thresh += 0x20), 0x80, ioaddr 
+ TxConfig);
+                       BYTE_REG_BITS_SET((rp->tx_thresh += 0x20), 0xE0, ioaddr 
+ TxConfig);
                }
                if (debug > 1)
                        netdev_info(dev, "Unspecified error. Tx threshold now 
%02x\n",
@@ -2002,7 +2018,7 @@
                else
                        BYTE_REG_BITS_ON(BCR1_VIDFR, ioaddr + PCIBusConfig1);
        }
-       BYTE_REG_BITS_ON(rx_mode, ioaddr + RxConfig);
+       BYTE_REG_BITS_SET(rx_mode | rp->rx_thresh, 0xff , ioaddr + RxConfig);
 }
 
 static void netdev_get_drvinfo(struct net_device *dev, struct ethtool_drvinfo 
*info)
_______________________________________________
Soekris-tech mailing list
Soekris-tech@lists.soekris.com
http://lists.soekris.com/mailman/listinfo/soekris-tech

Reply via email to