[PATCH v4 4/9] pasemi_mac: Add SKB reuse / copy-break
Add a copy-break and recycle the SKB in the driver for small packets. Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: netdev-2.6/drivers/net/pasemi_mac.c === --- netdev-2.6.orig/drivers/net/pasemi_mac.c +++ netdev-2.6/drivers/net/pasemi_mac.c @@ -279,8 +279,8 @@ static void pasemi_mac_free_rx_resources for (i = 0; i RX_RING_SIZE; i++) { info = RX_DESC_INFO(mac, i); dp = RX_DESC(mac, i); - if (info-dma) { - if (info-skb) { + if (info-skb) { + if (info-dma) { pci_unmap_single(mac-dma_pdev, info-dma, info-skb-len, @@ -329,12 +329,14 @@ static void pasemi_mac_replenish_rx_ring struct sk_buff *skb; dma_addr_t dma; - skb = dev_alloc_skb(BUF_SIZE); + /* skb might still be in there for recycle on short receives */ + if (info-skb) + skb = info-skb; + else + skb = dev_alloc_skb(BUF_SIZE); - if (!skb) { - count = i - start; + if (unlikely(!skb)) break; - } dma = pci_map_single(mac-dma_pdev, skb-data, skb-len, PCI_DMA_FROMDEVICE); @@ -442,13 +444,28 @@ static int pasemi_mac_clean_rx(struct pa BUG_ON(!info); BUG_ON(info-dma != dma); + skb = info-skb; pci_unmap_single(mac-dma_pdev, info-dma, info-skb-len, PCI_DMA_FROMDEVICE); + info-dma = 0; - skb = info-skb; len = (dp-macrx XCT_MACRX_LLEN_M) XCT_MACRX_LLEN_S; + if (len 256) { + struct sk_buff *new_skb = + netdev_alloc_skb(mac-netdev, len + NET_IP_ALIGN); + if (new_skb) { + skb_reserve(new_skb, NET_IP_ALIGN); + memcpy(new_skb-data - NET_IP_ALIGN, + skb-data - NET_IP_ALIGN, + len + NET_IP_ALIGN); + /* save the skb in buffer_info as good */ + skb = new_skb; + } + /* else just continue with the old one */ + } else + info-skb = NULL; skb_put(skb, len); -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 6/9] pasemi_mac: Logic cleanup / rx performance improvements
Logic cleanup and some performance enhancements to the RX path. Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: netdev-2.6/drivers/net/pasemi_mac.c === --- netdev-2.6.orig/drivers/net/pasemi_mac.c +++ netdev-2.6/drivers/net/pasemi_mac.c @@ -305,19 +305,20 @@ static void pasemi_mac_replenish_rx_ring struct pasemi_mac *mac = netdev_priv(dev); unsigned int i; int start = mac-rx-next_to_fill; - unsigned int count; + unsigned int limit, count; - count = (mac-rx-next_to_clean + RX_RING_SIZE - + limit = (mac-rx-next_to_clean + RX_RING_SIZE - mac-rx-next_to_fill) (RX_RING_SIZE - 1); /* Check to see if we're doing first-time setup */ if (unlikely(mac-rx-next_to_clean == 0 mac-rx-next_to_fill == 0)) - count = RX_RING_SIZE; + limit = RX_RING_SIZE; - if (count = 0) + if (limit = 0) return; - for (i = start; i start + count; i++) { + i = start; + for (count = limit; count; count--) { struct pasemi_mac_buffer *info = RX_DESC_INFO(mac, i); u64 *buff = RX_BUFF(mac, i); struct sk_buff *skb; @@ -335,27 +336,27 @@ static void pasemi_mac_replenish_rx_ring dma = pci_map_single(mac-dma_pdev, skb-data, skb-len, PCI_DMA_FROMDEVICE); - if (dma_mapping_error(dma)) { + if (unlikely(dma_mapping_error(dma))) { dev_kfree_skb_irq(info-skb); - count = i - start; break; } info-skb = skb; info-dma = dma; *buff = XCT_RXB_LEN(BUF_SIZE) | XCT_RXB_ADDR(dma); + i++; } wmb(); pci_write_config_dword(mac-dma_pdev, PAS_DMA_RXCHAN_INCR(mac-dma_rxch), - count); + limit - count); pci_write_config_dword(mac-dma_pdev, PAS_DMA_RXINT_INCR(mac-dma_if), - count); + limit - count); - mac-rx-next_to_fill += count; + mac-rx-next_to_fill += limit - count; } static void pasemi_mac_restart_rx_intr(struct pasemi_mac *mac) @@ -393,32 +394,31 @@ static void pasemi_mac_restart_tx_intr(s } - static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit) { - unsigned int i; - int start, count; + unsigned int n; + int count; + struct pas_dma_xct_descr *dp; + struct pasemi_mac_buffer *info; + struct sk_buff *skb; + unsigned int i, len; + u64 macrx; + dma_addr_t dma; spin_lock(mac-rx-lock); - start = mac-rx-next_to_clean; - count = 0; + n = mac-rx-next_to_clean; - for (i = start; i (start + RX_RING_SIZE) count limit; i++) { - struct pas_dma_xct_descr *dp; - struct pasemi_mac_buffer *info; - struct sk_buff *skb; - unsigned int j, len; - dma_addr_t dma; + for (count = limit; count; count--) { rmb(); - dp = RX_DESC(mac, i); + dp = RX_DESC(mac, n); + macrx = dp-macrx; - if (!(dp-macrx XCT_MACRX_O)) + if (!(macrx XCT_MACRX_O)) break; - count++; info = NULL; @@ -430,22 +430,20 @@ static int pasemi_mac_clean_rx(struct pa */ dma = (dp-ptr XCT_PTR_ADDR_M); - for (j = start; j (start + RX_RING_SIZE); j++) { - info = RX_DESC_INFO(mac, j); + for (i = n; i (n + RX_RING_SIZE); i++) { + info = RX_DESC_INFO(mac, i); if (info-dma == dma) break; } - BUG_ON(!info); - BUG_ON(info-dma != dma); skb = info-skb; + info-dma = 0; - pci_unmap_single(mac-dma_pdev, info-dma, info-skb-len, + pci_unmap_single(mac-dma_pdev, dma, skb-len, PCI_DMA_FROMDEVICE); - info-dma = 0; + len = (macrx XCT_MACRX_LLEN_M) XCT_MACRX_LLEN_S; - len = (dp-macrx XCT_MACRX_LLEN_M) XCT_MACRX_LLEN_S; if (len 256) { struct sk_buff *new_skb = netdev_alloc_skb(mac-netdev, len + NET_IP_ALIGN); @@ -465,9 +463,9 @@ static int pasemi_mac_clean_rx(struct pa skb-protocol = eth_type_trans(skb, mac-netdev); - if ((dp-macrx XCT_MACRX_HTY_M) == XCT_MACRX_HTY_IPV4_OK) { + if ((macrx XCT_MACRX_HTY_M) ==
[PATCH v4 5/9] pasemi_mac: Minor cleanup / define fixes
* Remove some unused defines * Fix a couple of wrong chip register defines, and add a few more fields that might be used in the near future. Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: netdev-2.6/drivers/net/pasemi_mac.c === --- netdev-2.6.orig/drivers/net/pasemi_mac.c +++ netdev-2.6/drivers/net/pasemi_mac.c @@ -61,12 +61,6 @@ #define BUF_SIZE 1646 /* 1500 MTU + ETH_HLEN + VLAN_HLEN + 2 64B cachelines */ -/* XXXOJN these should come out of the device tree some day */ -#define PAS_DMA_CAP_BASE 0xe00d0040 -#define PAS_DMA_CAP_SIZE 0x100 -#define PAS_DMA_COM_BASE 0xe00d0100 -#define PAS_DMA_COM_SIZE 0x100 - static struct pasdma_status *dma_status; static int pasemi_get_mac_addr(struct pasemi_mac *mac) Index: netdev-2.6/drivers/net/pasemi_mac.h === --- netdev-2.6.orig/drivers/net/pasemi_mac.h +++ netdev-2.6/drivers/net/pasemi_mac.h @@ -195,11 +195,15 @@ enum { #define PAS_DMA_RXINT_RCMDSTA(i) (0x200+(i)*_PAS_DMA_RXINT_STRIDE) #definePAS_DMA_RXINT_RCMDSTA_EN0x0001 #definePAS_DMA_RXINT_RCMDSTA_ST0x0002 -#definePAS_DMA_RXINT_RCMDSTA_OO0x0100 -#definePAS_DMA_RXINT_RCMDSTA_BP0x0200 -#definePAS_DMA_RXINT_RCMDSTA_DR0x0400 +#definePAS_DMA_RXINT_RCMDSTA_MBT 0x0008 +#definePAS_DMA_RXINT_RCMDSTA_MDR 0x0010 +#definePAS_DMA_RXINT_RCMDSTA_MOO 0x0020 +#definePAS_DMA_RXINT_RCMDSTA_MBP 0x0040 #definePAS_DMA_RXINT_RCMDSTA_BT0x0800 -#definePAS_DMA_RXINT_RCMDSTA_TB0x1000 +#definePAS_DMA_RXINT_RCMDSTA_DR0x1000 +#definePAS_DMA_RXINT_RCMDSTA_OO0x2000 +#definePAS_DMA_RXINT_RCMDSTA_BP0x4000 +#definePAS_DMA_RXINT_RCMDSTA_TB0x8000 #definePAS_DMA_RXINT_RCMDSTA_ACT 0x0001 #definePAS_DMA_RXINT_RCMDSTA_DROPS_M 0xfffe #definePAS_DMA_RXINT_RCMDSTA_DROPS_S 17 -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 1/9] pasemi_mac: Move the IRQ mapping from the PCI layer to the driver
Fixes for ethernet IRQ mapping, to be done in the driver instead of in the platform setup code. Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: netdev-2.6/arch/powerpc/platforms/pasemi/pci.c === --- netdev-2.6.orig/arch/powerpc/platforms/pasemi/pci.c +++ netdev-2.6/arch/powerpc/platforms/pasemi/pci.c @@ -173,19 +173,6 @@ static void __init pas_fixup_phb_resourc } -void __devinit pas_pci_irq_fixup(struct pci_dev *dev) -{ - /* DMA is special, 84 interrupts (128 - 211), all but 128 -* need to be mapped by hand here. -*/ - if (dev-vendor == 0x1959 dev-device == 0xa007) { - int i; - for (i = 129; i 212; i++) - irq_create_mapping(NULL, i); - } -} - - void __init pas_pci_init(void) { struct device_node *np, *root; Index: netdev-2.6/arch/powerpc/platforms/pasemi/setup.c === --- netdev-2.6.orig/arch/powerpc/platforms/pasemi/setup.c +++ netdev-2.6/arch/powerpc/platforms/pasemi/setup.c @@ -248,5 +248,4 @@ define_machine(pas) { .calibrate_decr = generic_calibrate_decr, .progress = pas_progress, .machine_check_exception = pas_machine_check_handler, - .pci_irq_fixup = pas_pci_irq_fixup, }; Index: netdev-2.6/drivers/net/pasemi_mac.c === --- netdev-2.6.orig/drivers/net/pasemi_mac.c +++ netdev-2.6/drivers/net/pasemi_mac.c @@ -33,6 +33,8 @@ #include linux/tcp.h #include net/checksum.h +#include asm/irq.h + #include pasemi_mac.h @@ -531,6 +533,7 @@ static irqreturn_t pasemi_mac_tx_intr(in static int pasemi_mac_open(struct net_device *dev) { struct pasemi_mac *mac = netdev_priv(dev); + int base_irq; unsigned int flags; int ret; @@ -594,28 +597,37 @@ static int pasemi_mac_open(struct net_de netif_start_queue(dev); netif_poll_enable(dev); - ret = request_irq(mac-dma_pdev-irq + mac-dma_txch, - pasemi_mac_tx_intr, IRQF_DISABLED, + /* Interrupts are a bit different for our DMA controller: While +* it's got one a regular PCI device header, the interrupt there +* is really the base of the range it's using. Each tx and rx +* channel has it's own interrupt source. +*/ + + base_irq = virq_to_hw(mac-dma_pdev-irq); + + mac-tx_irq = irq_create_mapping(NULL, base_irq + mac-dma_txch); + mac-rx_irq = irq_create_mapping(NULL, base_irq + 20 + mac-dma_txch); + + ret = request_irq(mac-tx_irq, pasemi_mac_tx_intr, IRQF_DISABLED, mac-tx-irq_name, dev); if (ret) { dev_err(mac-pdev-dev, request_irq of irq %d failed: %d\n, - mac-dma_pdev-irq + mac-dma_txch, ret); + base_irq + mac-dma_txch, ret); goto out_tx_int; } - ret = request_irq(mac-dma_pdev-irq + 20 + mac-dma_rxch, - pasemi_mac_rx_intr, IRQF_DISABLED, + ret = request_irq(mac-rx_irq, pasemi_mac_rx_intr, IRQF_DISABLED, mac-rx-irq_name, dev); if (ret) { dev_err(mac-pdev-dev, request_irq of irq %d failed: %d\n, - mac-dma_pdev-irq + 20 + mac-dma_rxch, ret); + base_irq + 20 + mac-dma_rxch, ret); goto out_rx_int; } return 0; out_rx_int: - free_irq(mac-dma_pdev-irq + mac-dma_txch, dev); + free_irq(mac-tx_irq, dev); out_tx_int: netif_poll_disable(dev); netif_stop_queue(dev); @@ -699,8 +711,8 @@ static int pasemi_mac_close(struct net_d pci_write_config_dword(mac-dma_pdev, PAS_DMA_RXINT_RCMDSTA(mac-dma_if), 0); - free_irq(mac-dma_pdev-irq + mac-dma_txch, dev); - free_irq(mac-dma_pdev-irq + 20 + mac-dma_rxch, dev); + free_irq(mac-tx_irq, dev); + free_irq(mac-rx_irq, dev); /* Free resources */ pasemi_mac_free_rx_resources(dev); Index: netdev-2.6/drivers/net/pasemi_mac.h === --- netdev-2.6.orig/drivers/net/pasemi_mac.h +++ netdev-2.6/drivers/net/pasemi_mac.h @@ -73,6 +73,8 @@ struct pasemi_mac { struct pasemi_mac_txring *tx; struct pasemi_mac_rxring *rx; + unsigned long tx_irq; + unsigned long rx_irq; }; /* Software status descriptor (desc_info) */ -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 8/9] pasemi_mac: PHY support
PHY support for pasemi_mac. Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: netdev-2.6/drivers/net/pasemi_mac.c === --- netdev-2.6.orig/drivers/net/pasemi_mac.c +++ netdev-2.6/drivers/net/pasemi_mac.c @@ -606,6 +606,114 @@ static irqreturn_t pasemi_mac_tx_intr(in return IRQ_HANDLED; } +static void pasemi_adjust_link(struct net_device *dev) +{ + struct pasemi_mac *mac = netdev_priv(dev); + int msg; + unsigned int flags; + unsigned int new_flags; + + if (!mac-phydev-link) { + /* If no link, MAC speed settings don't matter. Just report +* link down and return. +*/ + if (mac-link netif_msg_link(mac)) + printk(KERN_INFO %s: Link is down.\n, dev-name); + + netif_carrier_off(dev); + mac-link = 0; + + return; + } else + netif_carrier_on(dev); + + pci_read_config_dword(mac-pdev, PAS_MAC_CFG_PCFG, flags); + new_flags = flags ~(PAS_MAC_CFG_PCFG_HD | PAS_MAC_CFG_PCFG_SPD_M | + PAS_MAC_CFG_PCFG_TSR_M); + + if (!mac-phydev-duplex) + new_flags |= PAS_MAC_CFG_PCFG_HD; + + switch (mac-phydev-speed) { + case 1000: + new_flags |= PAS_MAC_CFG_PCFG_SPD_1G | +PAS_MAC_CFG_PCFG_TSR_1G; + break; + case 100: + new_flags |= PAS_MAC_CFG_PCFG_SPD_100M | +PAS_MAC_CFG_PCFG_TSR_100M; + break; + case 10: + new_flags |= PAS_MAC_CFG_PCFG_SPD_10M | +PAS_MAC_CFG_PCFG_TSR_10M; + break; + default: + printk(Unsupported speed %d\n, mac-phydev-speed); + } + + /* Print on link or speed/duplex change */ + msg = mac-link != mac-phydev-link || flags != new_flags; + + mac-duplex = mac-phydev-duplex; + mac-speed = mac-phydev-speed; + mac-link = mac-phydev-link; + + if (new_flags != flags) + pci_write_config_dword(mac-pdev, PAS_MAC_CFG_PCFG, new_flags); + + if (msg netif_msg_link(mac)) + printk(KERN_INFO %s: Link is up at %d Mbps, %s duplex.\n, + dev-name, mac-speed, mac-duplex ? full : half); +} + +static int pasemi_mac_phy_init(struct net_device *dev) +{ + struct pasemi_mac *mac = netdev_priv(dev); + struct device_node *dn, *phy_dn; + struct phy_device *phydev; + unsigned int phy_id; + const phandle *ph; + const unsigned int *prop; + struct resource r; + int ret; + + dn = pci_device_to_OF_node(mac-pdev); + ph = get_property(dn, phy-handle, NULL); + if (!ph) + return -ENODEV; + phy_dn = of_find_node_by_phandle(*ph); + + prop = get_property(phy_dn, reg, NULL); + ret = of_address_to_resource(phy_dn-parent, 0, r); + if (ret) + goto err; + + phy_id = *prop; + snprintf(mac-phy_id, BUS_ID_SIZE, PHY_ID_FMT, (int)r.start, phy_id); + + of_node_put(phy_dn); + + mac-link = 0; + mac-speed = 0; + mac-duplex = -1; + + phydev = phy_connect(dev, mac-phy_id, pasemi_adjust_link, 0, PHY_INTERFACE_MODE_SGMII); + + if (IS_ERR(phydev)) { + printk(KERN_ERR %s: Could not attach to phy\n, dev-name); + return PTR_ERR(phydev); + } + + mac-phydev = phydev; + + return 0; + +err: + of_node_put(phy_dn); + return -ENODEV; +} + + static int pasemi_mac_open(struct net_device *dev) { struct pasemi_mac *mac = netdev_priv(dev); @@ -678,6 +786,13 @@ static int pasemi_mac_open(struct net_de pasemi_mac_replenish_rx_ring(dev); + ret = pasemi_mac_phy_init(dev); + /* Some configs don't have PHYs (XAUI etc), so don't complain about +* failed init due to -ENODEV. +*/ + if (ret ret != -ENODEV) + dev_warn(mac-pdev-dev, phy init failed: %d\n, ret); + netif_start_queue(dev); netif_poll_enable(dev); @@ -708,6 +823,9 @@ static int pasemi_mac_open(struct net_de goto out_rx_int; } + if (mac-phydev) + phy_start(mac-phydev); + return 0; out_rx_int: @@ -731,6 +849,11 @@ static int pasemi_mac_close(struct net_d unsigned int stat; int retries; + if (mac-phydev) { + phy_stop(mac-phydev); + phy_disconnect(mac-phydev); + } + netif_stop_queue(dev); /* Clean out any pending buffers */ @@ -1032,6 +1155,9 @@ pasemi_mac_probe(struct pci_dev *pdev, c else mac-msg_enable = pasemi_mac_debug; + /* Enable most messages by default */ + mac-msg_enable = (NETIF_MSG_IFUP 1 ) - 1; + err = register_netdev(dev); if
[PATCH v4 7/9] pasemi_mac: Add msglevel support and debug module param
Add msglevel support for pasemi_mac. Move the MODULE_* defines to the top to go together with the variable (similar to tg3). Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: netdev-2.6/drivers/net/pasemi_mac.c === --- netdev-2.6.orig/drivers/net/pasemi_mac.c +++ netdev-2.6/drivers/net/pasemi_mac.c @@ -53,6 +53,16 @@ #define RX_RING_SIZE 512 #define TX_RING_SIZE 512 +#define DEFAULT_MSG_ENABLE \ + (NETIF_MSG_DRV | \ +NETIF_MSG_PROBE| \ +NETIF_MSG_LINK | \ +NETIF_MSG_TIMER| \ +NETIF_MSG_IFDOWN | \ +NETIF_MSG_IFUP | \ +NETIF_MSG_RX_ERR | \ +NETIF_MSG_TX_ERR) + #define TX_DESC(mac, num) ((mac)-tx-desc[(num) (TX_RING_SIZE-1)]) #define TX_DESC_INFO(mac, num) ((mac)-tx-desc_info[(num) (TX_RING_SIZE-1)]) #define RX_DESC(mac, num) ((mac)-rx-desc[(num) (RX_RING_SIZE-1)]) @@ -61,6 +71,14 @@ #define BUF_SIZE 1646 /* 1500 MTU + ETH_HLEN + VLAN_HLEN + 2 64B cachelines */ +MODULE_LICENSE(GPL); +MODULE_AUTHOR (Olof Johansson [EMAIL PROTECTED]); +MODULE_DESCRIPTION(PA Semi PWRficient Ethernet driver); + +static int debug = -1; /* -1 == use DEFAULT_MSG_ENABLE as value */ +module_param(debug, int, 0); +MODULE_PARM_DESC(debug, PA Semi MAC bitmapped debugging message enable value); + static struct pasdma_status *dma_status; static int pasemi_get_mac_addr(struct pasemi_mac *mac) @@ -873,6 +891,7 @@ static struct net_device_stats *pasemi_m return mac-stats; } + static void pasemi_mac_set_rx_mode(struct net_device *dev) { struct pasemi_mac *mac = netdev_priv(dev); @@ -1007,6 +1026,8 @@ pasemi_mac_probe(struct pci_dev *pdev, c mac-rx_status = dma_status-rx_sta[mac-dma_rxch]; mac-tx_status = dma_status-tx_sta[mac-dma_txch]; + mac-msg_enable = netif_msg_init(debug, DEFAULT_MSG_ENABLE); + err = register_netdev(dev); if (err) { @@ -1081,9 +1102,5 @@ int pasemi_mac_init_module(void) return pci_register_driver(pasemi_mac_driver); } -MODULE_LICENSE(GPL); -MODULE_AUTHOR (Olof Johansson [EMAIL PROTECTED]); -MODULE_DESCRIPTION(PA Semi PWRficient Ethernet driver); - module_init(pasemi_mac_init_module); module_exit(pasemi_mac_cleanup_module); Index: netdev-2.6/drivers/net/pasemi_mac.h === --- netdev-2.6.orig/drivers/net/pasemi_mac.h +++ netdev-2.6/drivers/net/pasemi_mac.h @@ -75,6 +75,8 @@ struct pasemi_mac { struct pasemi_mac_rxring *rx; unsigned long tx_irq; unsigned long rx_irq; + + unsigned intmsg_enable; }; /* Software status descriptor (desc_info) */ -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3] [2/10] [REAL 2/10] pasemi_mac: Move the IRQ mapping from the PCI layer to the driver
On Tue, May 08, 2007 at 01:02:34AM -0400, Jeff Garzik wrote: Olof Johansson wrote: Fixes for ethernet IRQ mapping, to be done in the driver instead of in the platform setup code. Signed-off-by: Olof Johansson [EMAIL PROTECTED] patch failed to apply, so I stopped here ACK patches 2-7 Looks like it had some fuzz due to OF changes on powerpc, I've refreshed and will repost all but 1/10. Thanks, -Olof - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 0/9] pasemi_mac: fixes and enhancements
Hi, The nine following patches contain a number of fixes and improvements of the pasemi_mac driver: [PATCH v4 1/9] pasemi_mac: Move the IRQ mapping from the PCI layer to the driver [PATCH v4 2/9] pasemi_mac: Abstract and fix up interrupt restart routines [PATCH v4 3/9] pasemi_mac: Timer and interrupt fixes [PATCH v4 4/9] pasemi_mac: Add SKB reuse / copy-break [PATCH v4 5/9] pasemi_mac: Minor cleanup / define fixes [PATCH v4 6/9] pasemi_mac: Logic cleanup / rx performance improvements [PATCH v4 7/9] pasemi_mac: Add msglevel support and pasemi_mac_debug module param [PATCH v4 8/9] pasemi_mac: PHY support [PATCH v4 9/9] pasemi_mac: Use local-mac-address instead of mac-address if available Changes since last: Dropped 1/10 since it's already applied 1/9: refresh due to changes in the arch code 7/9: netif_msg_init() and rename module parameter to debug. Thanks, -Olof - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3] [8/10] pasemi_mac: Add msglevel support and pasemi_mac_debug module param
On Tue, May 08, 2007 at 01:03:12AM -0400, Jeff Garzik wrote: Olof Johansson wrote: +/* Enable most messages by default */ +if (pasemi_mac_debug 0) +mac-msg_enable = DEFAULT_MSG_ENABLE; +else +mac-msg_enable = pasemi_mac_debug; + use netif_msg_init() Thanks. It's also needlessly verbose to have the module parameter that long. Other drivers just use debug, so I switched to the same. -Olof - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 9/9] pasemi_mac: Use local-mac-address instead of mac-address if available
Use local-mac-address in the device tree instead. Fall back to mac-address for older firmware. Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: netdev-2.6/drivers/net/pasemi_mac.c === --- netdev-2.6.orig/drivers/net/pasemi_mac.c +++ netdev-2.6/drivers/net/pasemi_mac.c @@ -94,7 +94,12 @@ static int pasemi_get_mac_addr(struct pa return -ENOENT; } - maddr = get_property(dn, mac-address, NULL); + maddr = get_property(dn, local-mac-address, NULL); + + /* Fall back to mac-address for older firmware */ + if (maddr == NULL) + maddr = get_property(dn, mac-address, NULL); + if (maddr == NULL) { dev_warn(pdev-dev, no mac address in device tree, not configuring\n); -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 2/9] pasemi_mac: Abstract and fix up interrupt restart routines
Abstract out (and fix up) the interrupt restart routines, making sure we start out in a consistent state. Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: netdev-2.6/drivers/net/pasemi_mac.c === --- netdev-2.6.orig/drivers/net/pasemi_mac.c +++ netdev-2.6/drivers/net/pasemi_mac.c @@ -362,6 +362,42 @@ static void pasemi_mac_replenish_rx_ring mac-rx-next_to_fill += count; } +static void pasemi_mac_restart_rx_intr(struct pasemi_mac *mac) +{ + unsigned int reg, stat; + /* Re-enable packet count interrupts: finally +* ack the packet count interrupt we got in rx_intr. +*/ + + pci_read_config_dword(mac-iob_pdev, + PAS_IOB_DMA_RXCH_STAT(mac-dma_rxch), + stat); + + reg = PAS_IOB_DMA_RXCH_RESET_PCNT(stat PAS_IOB_DMA_RXCH_STAT_CNTDEL_M) + | PAS_IOB_DMA_RXCH_RESET_PINTC; + + pci_write_config_dword(mac-iob_pdev, + PAS_IOB_DMA_RXCH_RESET(mac-dma_rxch), + reg); +} + +static void pasemi_mac_restart_tx_intr(struct pasemi_mac *mac) +{ + unsigned int reg, stat; + + /* Re-enable packet count interrupts */ + pci_read_config_dword(mac-iob_pdev, + PAS_IOB_DMA_TXCH_STAT(mac-dma_txch), stat); + + reg = PAS_IOB_DMA_TXCH_RESET_PCNT(stat PAS_IOB_DMA_TXCH_STAT_CNTDEL_M) + | PAS_IOB_DMA_TXCH_RESET_PINTC; + + pci_write_config_dword(mac-iob_pdev, + PAS_IOB_DMA_TXCH_RESET(mac-dma_txch), reg); +} + + + static int pasemi_mac_clean_rx(struct pasemi_mac *mac, int limit) { unsigned int i; @@ -559,6 +595,10 @@ static int pasemi_mac_open(struct net_de pci_write_config_dword(mac-iob_pdev, PAS_IOB_DMA_RXCH_CFG(mac-dma_rxch), PAS_IOB_DMA_RXCH_CFG_CNTTH(30)); + /* Clear out any residual packet count state from firmware */ + pasemi_mac_restart_rx_intr(mac); + pasemi_mac_restart_tx_intr(mac); + pci_write_config_dword(mac-iob_pdev, PAS_IOB_DMA_COM_TIMEOUTCFG, PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT(100)); @@ -835,9 +875,7 @@ static int pasemi_mac_poll(struct net_de /* all done, no more packets present */ netif_rx_complete(dev); - /* re-enable receive interrupts */ - pci_write_config_dword(mac-iob_pdev, PAS_IOB_DMA_COM_TIMEOUTCFG, - PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT(100)); + pasemi_mac_restart_rx_intr(mac); return 0; } else { /* used up our quantum, so reschedule */ -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4 3/9] pasemi_mac: Timer and interrupt fixes
Timer and interrupt fixes: * Be pickier with what kind of interrupts are acked to avoid the device to get out of sync with the driver state * Set RX count threshhold to 1 (for NAPI interrupted mode), TX count threshold to 32. * Set timer thresholds to current max (~16ms). Signed-off-by: Olof Johansson [EMAIL PROTECTED] Index: netdev-2.6/drivers/net/pasemi_mac.c === --- netdev-2.6.orig/drivers/net/pasemi_mac.c +++ netdev-2.6/drivers/net/pasemi_mac.c @@ -526,18 +526,28 @@ static irqreturn_t pasemi_mac_rx_intr(in struct pasemi_mac *mac = netdev_priv(dev); unsigned int reg; - if (!(*mac-rx_status PAS_STATUS_INT)) + if (!(*mac-rx_status PAS_STATUS_CAUSE_M)) return IRQ_NONE; - netif_rx_schedule(dev); - pci_write_config_dword(mac-iob_pdev, PAS_IOB_DMA_COM_TIMEOUTCFG, - PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT(0)); + if (*mac-rx_status PAS_STATUS_ERROR) + printk(rx_status reported error\n); + + /* Don't reset packet count so it won't fire again but clear +* all others. +*/ + + pci_read_config_dword(mac-dma_pdev, PAS_DMA_RXINT_RCMDSTA(mac-dma_if), reg); - reg = PAS_IOB_DMA_RXCH_RESET_PINTC | PAS_IOB_DMA_RXCH_RESET_SINTC | - PAS_IOB_DMA_RXCH_RESET_DINTC; + reg = 0; + if (*mac-rx_status PAS_STATUS_SOFT) + reg |= PAS_IOB_DMA_RXCH_RESET_SINTC; + if (*mac-rx_status PAS_STATUS_ERROR) + reg |= PAS_IOB_DMA_RXCH_RESET_DINTC; if (*mac-rx_status PAS_STATUS_TIMER) reg |= PAS_IOB_DMA_RXCH_RESET_TINTC; + netif_rx_schedule(dev); + pci_write_config_dword(mac-iob_pdev, PAS_IOB_DMA_RXCH_RESET(mac-dma_rxch), reg); @@ -551,14 +561,17 @@ static irqreturn_t pasemi_mac_tx_intr(in struct pasemi_mac *mac = netdev_priv(dev); unsigned int reg; - if (!(*mac-tx_status PAS_STATUS_INT)) + if (!(*mac-tx_status PAS_STATUS_CAUSE_M)) return IRQ_NONE; pasemi_mac_clean_tx(mac); - reg = PAS_IOB_DMA_TXCH_RESET_PINTC | PAS_IOB_DMA_TXCH_RESET_SINTC; - if (*mac-tx_status PAS_STATUS_TIMER) - reg |= PAS_IOB_DMA_TXCH_RESET_TINTC; + reg = PAS_IOB_DMA_TXCH_RESET_PINTC; + + if (*mac-tx_status PAS_STATUS_SOFT) + reg |= PAS_IOB_DMA_TXCH_RESET_SINTC; + if (*mac-tx_status PAS_STATUS_ERROR) + reg |= PAS_IOB_DMA_TXCH_RESET_DINTC; pci_write_config_dword(mac-iob_pdev, PAS_IOB_DMA_TXCH_RESET(mac-dma_txch), reg); @@ -593,14 +606,18 @@ static int pasemi_mac_open(struct net_de flags |= PAS_MAC_CFG_PCFG_TSR_1G | PAS_MAC_CFG_PCFG_SPD_1G; pci_write_config_dword(mac-iob_pdev, PAS_IOB_DMA_RXCH_CFG(mac-dma_rxch), - PAS_IOB_DMA_RXCH_CFG_CNTTH(30)); + PAS_IOB_DMA_RXCH_CFG_CNTTH(1)); + + pci_write_config_dword(mac-iob_pdev, PAS_IOB_DMA_TXCH_CFG(mac-dma_txch), + PAS_IOB_DMA_TXCH_CFG_CNTTH(32)); /* Clear out any residual packet count state from firmware */ pasemi_mac_restart_rx_intr(mac); pasemi_mac_restart_tx_intr(mac); + /* 0xff is max value, about 16ms */ pci_write_config_dword(mac-iob_pdev, PAS_IOB_DMA_COM_TIMEOUTCFG, - PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT(100)); + PAS_IOB_DMA_COM_TIMEOUTCFG_TCNT(0xff)); pci_write_config_dword(mac-pdev, PAS_MAC_CFG_PCFG, flags); Index: netdev-2.6/drivers/net/pasemi_mac.h === --- netdev-2.6.orig/drivers/net/pasemi_mac.h +++ netdev-2.6/drivers/net/pasemi_mac.h @@ -299,6 +299,7 @@ enum { #definePAS_STATUS_DCNT_S 16 #definePAS_STATUS_BPCNT_M 0xull #definePAS_STATUS_BPCNT_S 32 +#definePAS_STATUS_CAUSE_M 0xf000ull #definePAS_STATUS_TIMER0x1000ull #definePAS_STATUS_ERROR0x2000ull #definePAS_STATUS_SOFT 0x4000ull -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [patch 25/29] xen: Add the Xen virtual network device driver.
On Mon, 2007-05-07 at 23:30 -0700, Jeremy Fitzhardinge wrote: Rusty Russell wrote: Looks good, you can slightly improve it to be the model use of new module_param types by calling your functions param_set_rx_mode and param_get_rx_mode, then simply using module_param(rx_mode, rx_mode, 0400) Cute. I tried it out, but it doesn't yield an obvious improvement: Of course not, but it's the Right Way! Thanks, Rusty. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.3 - QMGR
On 8 May 2007, at 01:46, Krzysztof Halasa wrote: Adds a driver for built-in IXP4xx hardware Queue Manager. Signed-off-by: Krzysztof Halasa [EMAIL PROTECTED] [snip] diff --git a/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c b/arch/arm/mach- ixp4xx/ixp4xx_qmgr.c new file mode 100644 index 000..b9e9bd6 --- /dev/null +++ b/arch/arm/mach-ixp4xx/ixp4xx_qmgr.c Already in mach-ixp4xx, so can just be called qmgr.c @@ -0,0 +1,273 @@ +/* + * Intel IXP4xx Queue Manager driver for Linux + * + * Copyright (C) 2007 Krzysztof Halasa [EMAIL PROTECTED] + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + */ + +#include linux/interrupt.h +#include linux/kernel.h +#include asm/io.h +#include asm/arch/qmgr.h + +#define DEBUG 0 + +struct qmgr_regs __iomem *qmgr_regs; +static struct resource *mem_res; +static spinlock_t qmgr_lock; +static u32 used_sram_bitmap[4]; /* 128 16-dword pages */ +static void (*irq_handlers[HALF_QUEUES])(void *pdev); +static void *irq_pdevs[HALF_QUEUES]; + +void qmgr_set_irq(unsigned int queue, int src, + void (*handler)(void *pdev), void *pdev) +{ + u32 __iomem *reg = qmgr_regs-irqsrc[queue / 8]; /* 8 queues / u32 */ + int bit = (queue % 8) * 4; /* 3 bits + 1 reserved bit per queue */ + unsigned long flags; + + src = 7; + spin_lock_irqsave(qmgr_lock, flags); + __raw_writel((__raw_readl(reg) ~(7 bit)) | (src bit), reg); + irq_handlers[queue] = handler; + irq_pdevs[queue] = pdev; + spin_unlock_irqrestore(qmgr_lock, flags); +} + + +static irqreturn_t qmgr_irq1(int irq, void *pdev) +{ + int i; + u32 val = __raw_readl(qmgr_regs-irqstat[0]); + __raw_writel(val, qmgr_regs-irqstat[0]); /* ACK */ + + for (i = 0; i HALF_QUEUES; i++) + if (val (1 i)) + irq_handlers[i](irq_pdevs[i]); + + return val ? IRQ_HANDLED : 0; +} + + +void qmgr_enable_irq(unsigned int queue) +{ + unsigned long flags; + + spin_lock_irqsave(qmgr_lock, flags); + __raw_writel(__raw_readl(qmgr_regs-irqen[0]) | (1 queue), +qmgr_regs-irqen[0]); + spin_unlock_irqrestore(qmgr_lock, flags); +} + +void qmgr_disable_irq(unsigned int queue) +{ + unsigned long flags; + + spin_lock_irqsave(qmgr_lock, flags); + __raw_writel(__raw_readl(qmgr_regs-irqen[0]) ~(1 queue), +qmgr_regs-irqen[0]); + spin_unlock_irqrestore(qmgr_lock, flags); +} + +static inline void shift_mask(u32 *mask) +{ + mask[3] = mask[3] 1 | mask[2] 31; + mask[2] = mask[2] 1 | mask[1] 31; + mask[1] = mask[1] 1 | mask[0] 31; + mask[0] = 1; +} + +int qmgr_request_queue(unsigned int queue, unsigned int len /* dwords */, + unsigned int nearly_empty_watermark, + unsigned int nearly_full_watermark) +{ + u32 cfg, addr = 0, mask[4]; /* in 16-dwords */ + int err; + + if (queue = HALF_QUEUES) + return -ERANGE; + + if ((nearly_empty_watermark | nearly_full_watermark) ~7) + return -EINVAL; + + switch (len) { + case 16: + cfg = 0 24; + mask[0] = 0x1; + break; + case 32: + cfg = 1 24; + mask[0] = 0x3; + break; + case 64: + cfg = 2 24; + mask[0] = 0xF; + break; + case 128: + cfg = 3 24; + mask[0] = 0xFF; + break; + default: + return -EINVAL; + } + + cfg |= nearly_empty_watermark 26; + cfg |= nearly_full_watermark 29; + len /= 16; /* in 16-dwords: 1, 2, 4 or 8 */ + mask[1] = mask[2] = mask[3] = 0; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + spin_lock_irq(qmgr_lock); + if (__raw_readl(qmgr_regs-sram[queue])) { + err = -EBUSY; + goto err; + } + + while (1) { + if (!(used_sram_bitmap[0] mask[0]) + !(used_sram_bitmap[1] mask[1]) + !(used_sram_bitmap[2] mask[2]) + !(used_sram_bitmap[3] mask[3])) + break; /* found free space */ + + addr++; + shift_mask(mask); + if (addr + len ARRAY_SIZE(qmgr_regs-sram)) { + printk(KERN_ERR qmgr: no free SRAM space for + queue %i\n, queue); + err = -ENOMEM; + goto err; + } + } + + used_sram_bitmap[0] |= mask[0]; + used_sram_bitmap[1] |= mask[1]; + used_sram_bitmap[2] |= mask[2]; + used_sram_bitmap[3] |= mask[3]; + __raw_writel(cfg | (addr
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
On 8 May 2007, at 02:19, Krzysztof Halasa wrote: Adds a driver for built-in IXP4xx Ethernet MAC and HSS ports Signed-off-by: Krzysztof Halasa [EMAIL PROTECTED] diff --git a/arch/arm/mach-ixp4xx/ixdp425-setup.c b/arch/arm/mach- ixp4xx/ixdp425-setup.c index ec4f079..f20d39d 100644 --- a/arch/arm/mach-ixp4xx/ixdp425-setup.c +++ b/arch/arm/mach-ixp4xx/ixdp425-setup.c @@ -101,10 +101,35 @@ static struct platform_device ixdp425_uart = { .resource = ixdp425_uart_resources }; +/* Built-in 10/100 Ethernet MAC interfaces */ +static struct mac_plat_info ixdp425_plat_mac[] = { + { + .phy= 0, + .rxq= 3, + }, { + .phy= 1, + .rxq= 4, + } +}; + +static struct platform_device ixdp425_mac[] = { + { + .name = ixp4xx_eth, + .id = IXP4XX_ETH_NPEB, + .dev.platform_data = ixdp425_plat_mac, + }, { + .name = ixp4xx_eth, + .id = IXP4XX_ETH_NPEC, + .dev.platform_data = ixdp425_plat_mac + 1, + } +}; + static struct platform_device *ixdp425_devices[] __initdata = { ixdp425_i2c_controller, ixdp425_flash, - ixdp425_uart + ixdp425_uart, + ixdp425_mac[0], + ixdp425_mac[1], }; static void __init ixdp425_init(void) A final submission should probably have this platform data separated from the net driver and sent upstream via Russell's patch tracking system rather than netdev. diff --git a/drivers/net/arm/Kconfig b/drivers/net/arm/Kconfig index 678e4f4..5e2acb6 100644 --- a/drivers/net/arm/Kconfig +++ b/drivers/net/arm/Kconfig @@ -46,3 +46,13 @@ config EP93XX_ETH help This is a driver for the ethernet hardware included in EP93xx CPUs. Say Y if you are building a kernel for EP93xx based devices. + +config IXP4XX_ETH + tristate IXP4xx Ethernet support + depends on NET_ETHERNET ARM ARCH_IXP4XX + select IXP4XX_NPE + select IXP4XX_QMGR + select MII + help + Say Y here if you want to use built-in Ethernet ports + on IXP4xx processor. diff --git a/drivers/net/arm/Makefile b/drivers/net/arm/Makefile index a4c8682..7c812ac 100644 --- a/drivers/net/arm/Makefile +++ b/drivers/net/arm/Makefile @@ -9,3 +9,4 @@ obj-$(CONFIG_ARM_ETHER3)+= ether3.o obj-$(CONFIG_ARM_ETHER1) += ether1.o obj-$(CONFIG_ARM_AT91_ETHER) += at91_ether.o obj-$(CONFIG_EP93XX_ETH) += ep93xx_eth.o +obj-$(CONFIG_IXP4XX_ETH) += ixp4xx_eth.o diff --git a/drivers/net/arm/ixp4xx_eth.c b/drivers/net/arm/ ixp4xx_eth.c new file mode 100644 index 000..dcea6e5 --- /dev/null +++ b/drivers/net/arm/ixp4xx_eth.c @@ -0,0 +1,1002 @@ +/* + * Intel IXP4xx Ethernet driver for Linux + * + * Copyright (C) 2007 Krzysztof Halasa [EMAIL PROTECTED] + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of version 2 of the GNU General Public License + * as published by the Free Software Foundation. + * + * Ethernet port config (0x00 is not present on IXP42X): + * + * logical port0x000x100x20 + * NPE 0 (NPE-A) 1 (NPE-B) 2 (NPE-C) + * physical PortId 2 0 1 + * TX queue23 24 25 + * RX-free queue 26 27 28 + * TX-done queue is always 31, RX queue is configurable + */ + +#include linux/delay.h +#include linux/dma-mapping.h +#include linux/dmapool.h +#include linux/kernel.h +#include linux/mii.h +#include linux/platform_device.h +#include asm/io.h +#include asm/arch/npe.h +#include asm/arch/qmgr.h + +#ifndef __ARMEB__ +#warning Little endian mode not supported +#endif This has gone from error to warning - fair play but if are planning to put this upstream this cycle (anything's possible :) ) you'll want to declare this driver broken on ARMEB in Kconfig please. Personally I'd like LE ethernet tested and working before we push. + +#define DEBUG_QUEUES 0 +#define DEBUG_RX 0 +#define DEBUG_TX 0 +#define DEBUG_PKT_BYTES0 +#define DEBUG_MDIO 0 + +#define DRV_NAME ixp4xx_eth +#define DRV_VERSION0.04 + +#define TX_QUEUE_LEN 16 /* dwords */ +#define PKT_DESCS 64 /* also length of queues: TX-done, RX-ready, RX */ + +#define POOL_ALLOC_SIZE(sizeof(struct desc) * (PKT_DESCS)) +#define REGS_SIZE 0x1000 +#define MAX_MRU1536 + +#define MDIO_INTERVAL (3 * HZ) +#define MAX_MDIO_RETRIES 100 /* microseconds, typically 30 cycles */ + +#define NPE_ID(port) ((port)-id 4) +#define PHYSICAL_ID(port) ((NPE_ID(port) + 2) % 3) +#define TX_QUEUE(plat) (NPE_ID(port) + 23) +#define
[PATCH] sched: teql_enqueue can check limits before skb enqueue
Optimize teql_enqueue so that it first checks limits before enqueing. Patch against net-2.6.22.git Signed-off-by: Krishna Kumar [EMAIL PROTECTED] --- diff -ruNp org/net/sched/sch_teql.c new/net/sched/sch_teql.c --- org/net/sched/sch_teql.c2007-04-09 12:37:41.0 +0530 +++ new/net/sched/sch_teql.c2007-04-09 12:39:15.0 +0530 @@ -94,14 +94,13 @@ teql_enqueue(struct sk_buff *skb, struct struct net_device *dev = sch-dev; struct teql_sched_data *q = qdisc_priv(sch); - __skb_queue_tail(q-q, skb); - if (q-q.qlen = dev-tx_queue_len) { + if (q-q.qlen dev-tx_queue_len) { + __skb_queue_tail(q-q, skb); sch-bstats.bytes += skb-len; sch-bstats.packets++; return 0; } - __skb_unlink(skb, q-q); kfree_skb(skb); sch-qstats.drops++; return NET_XMIT_DROP; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] e1000: Simple optimizations in e1000_xmit_frame
Some simple optimizations in e1000_xmit_frame. Patch against net-2.6.22.git Signed-off-by: Krishna Kumar [EMAIL PROTECTED] --- diff -ruNp org/drivers/net/e1000/e1000_main.c new/drivers/net/e1000/e1000_main.c --- org/drivers/net/e1000/e1000_main.c 2007-04-09 12:40:02.0 +0530 +++ new/drivers/net/e1000/e1000_main.c 2007-04-09 12:42:28.0 +0530 @@ -3264,14 +3264,13 @@ e1000_xmit_frame(struct sk_buff *skb, st unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD; unsigned int max_txd_pwr = E1000_MAX_TXD_PWR; unsigned int tx_flags = 0; - unsigned int len = skb-len; + unsigned int len = skb-len - skb-data_len; unsigned long flags; - unsigned int nr_frags = 0; - unsigned int mss = 0; + unsigned int nr_frags; + unsigned int mss; int count = 0; int tso; unsigned int f; - len -= skb-data_len; /* This goes back to the question of how to logically map a tx queue * to a flow. Right now, performance is impacted slightly negatively @@ -3305,7 +3304,7 @@ e1000_xmit_frame(struct sk_buff *skb, st * points to just header, pull a few bytes of payload from * frags into skb-data */ hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); - if (skb-data_len (hdr_len == (skb-len - skb-data_len))) { + if (skb-data_len hdr_len == len) { switch (adapter-hw.mac_type) { unsigned int pull_size; case e1000_82544: - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] sched: Optimize return value of qdisc_restart
Optimize return value of qdisc_restart so that it is not called an extra time if there are no more packets on the queue to be sent out. It is also not required to check for gso_skb (though the lock is dropped) since another cpu which added this would have done a netif_schedule. Patch against net-2.6.22.git Signed-off-by: Krishna Kumar [EMAIL PROTECTED] --- diff -ruNp org/net/sched/sch_generic.c new/net/sched/sch_generic.c --- org/net/sched/sch_generic.c 2007-05-07 17:25:25.0 +0530 +++ new/net/sched/sch_generic.c 2007-05-07 17:39:04.0 +0530 @@ -115,7 +115,7 @@ static inline int qdisc_restart(struct n kfree_skb(skb); if (net_ratelimit()) printk(KERN_DEBUG Dead loop on netdevice %s, fix it urgently!\n, dev-name); - return -1; + return q-q.qlen ? -1 : 0; } __get_cpu_var(netdev_rx_stat).cpu_collision++; goto requeue; @@ -135,7 +135,7 @@ static inline int qdisc_restart(struct n netif_tx_unlock(dev); } spin_lock(dev-queue_lock); - return -1; + return q-q.qlen ? -1 : 0; } if (ret == NETDEV_TX_LOCKED nolock) { spin_lock(dev-queue_lock); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
On 8 May 2007, at 09:26, Mikael Pettersson wrote: On Tue, 8 May 2007 08:22:17 +0100, Michael-Luke Jones wrote: AFAIK, it's a HW limitation of the IXP4xx NPEs, or possibly Intel's microcode for them. I run my IXP42x boxes big-endian and don't mind doing so. /Mikael *cough* http://www.hohnstaedt.de/ixp_npe/0.2.0/0001-IXP4XX-Driver-for-NPE- QMGR-MAC-0.2.0.txt :p --- On 8 May 2007, at 09:29, Tomasz Chmielewski wrote: Christian Hohnstaedt's work did support LE though. Indeed. Krzysztof, why is LE not supported? Butting in here. It's not supported because LE mode has to work in a brain-damaged way. NPE DMAs the complete skb straight out of RAM. Unfortunately it expects the skb to already be written out in ram BE. Thus, in LE mode we have to byteswap the skb with CPU before the NPE can DMA it. This hasn't been implemented yet. Michael-Luke Jones - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
On Tue, 8 May 2007 08:22:17 +0100, Michael-Luke Jones wrote: On 8 May 2007, at 02:19, Krzysztof Halasa wrote: Adds a driver for built-in IXP4xx Ethernet MAC and HSS ports ... +#ifndef __ARMEB__ +#warning Little endian mode not supported +#endif This has gone from error to warning - fair play but if are planning to put this upstream this cycle (anything's possible :) ) you'll want to declare this driver broken on ARMEB in Kconfig please. Personally I'd like LE ethernet tested and working before we push. AFAIK, it's a HW limitation of the IXP4xx NPEs, or possibly Intel's microcode for them. I run my IXP42x boxes big-endian and don't mind doing so. /Mikael - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
Michael Jones wrote: +#ifndef __ARMEB__ +#warning Little endian mode not supported +#endif Personally I'm less fussed about WAN / LE support. Anyone with any sense will run ixp4xx boards doing such a specialised network operation as BE. Also, NSLU2-Linux can't test this functionality with our LE setup as we don't have this hardware on-board. You may just want to declare a depends on ARMEB in Kconfig (with or without OR (ARM || BROKEN) ) and have done with it - it's up to you. Christian Hohnstaedt's work did support LE though. Not all ixp4xx boards are by definition doing such a specialised network operation. Krzysztof, why is LE not supported? Do you need access to ixp4xx that starts in LE mode? -- Tomasz Chmielewski http://wpkg.org - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
On 8 May 2007, at 09:48, Alexey Zaytsev wrote: I was always curious, why do people want to run ixp4xx in LE mode? What are the benefits that overweight the obvious performance degradation? Debian. http://www.debian.org/ports/arm/ Michael-Luke - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
Alexey Zaytsev schrieb: On 5/8/07, Tomasz Chmielewski [EMAIL PROTECTED] wrote: Michael Jones wrote: +#ifndef __ARMEB__ +#warning Little endian mode not supported +#endif Personally I'm less fussed about WAN / LE support. Anyone with any sense will run ixp4xx boards doing such a specialised network operation as BE. Also, NSLU2-Linux can't test this functionality with our LE setup as we don't have this hardware on-board. You may just want to declare a depends on ARMEB in Kconfig (with or without OR (ARM || BROKEN) ) and have done with it - it's up to you. Christian Hohnstaedt's work did support LE though. Not all ixp4xx boards are by definition doing such a specialised network operation. I was always curious, why do people want to run ixp4xx in LE mode? What are the benefits that overweight the obvious performance degradation? I guess the main reason, at least for me, is that there is only one distro that properly supports LE ARM: Debian. It greatly simplifies management/administration of a higher number of devices, given the fact that Debian also supports other architectures (not just x86/64, sometimes PPC, like most distros do). Not always network performance is to most important factor. -- Tomasz Chmielewski http://wpkg.org - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH] IPROUTE: Modify tc for new PRIO multiqueue behavior
On Fri, 2007-05-04 at 23:22 +0200, Johannes Berg wrote: On Fri, 2007-05-04 at 13:43 -0700, Waskiewicz Jr, Peter P wrote: If hardware exists that wants the granularity to start/stop queues independent of each other and continue to have traffic flow, I really think it should be able to do that. Not much of an if there, I'm pretty sure at least some wireless hardware can do that. We've been watching this multiqueue stuff for a while now with some interest but haven't hashed out yet how we could use it. Right. Jamal, as you said, the wireless subsystem uses an interim workaround (the extra netdev approach) to achieve hardware packets scheduling. But with Peter's patch, the wireless stack doesn't need the workaround anymore. This is the actual fix. On Wed, 02 May 2007 08:43:49 -0400, jamal wrote: You feel the need to keep all the rings busy even when one is shutdown; I claim by having a synced up qdisc of the same scheduler type you dont need to worry about that. Both approaches are correct; what iam proposing is many factors simpler. Let me explain why this is not true for wireless. The wireless priority happens in the MAC level. That is, packets priority not only compete each other in the host, they also compete in the network. For example, once the wireless medium becomes idle from busy, the higher priority packet seizes the channel after waiting for a shorter time period (which makes the channel unavailable again). Both the high and low priority packets have to be queued in the hardware queues before they are sent out so that the hardware knows how to kick off its timers when it detects the medium is idle. If the Qdisc stops feeding all packets just because the hardware low prio queue is full (as it cannot seize the channel in the network), it is unfair to the local high prio packets. The host is too nice(2) to NOT let local high prio packets complete with the ones in the other hosts. BTW, you cannot write a smiliar scheduler in the Qdisc since it requires hard real time in microsecond level. After a second thought, this is not wireless specific. It can be generalized as hardware level packet scheduling. I think kernel needs such kind of support. And Peter's patch address this well. Thanks, -yi - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 2.6.20.7 TCP cubic (and bic) initial slow start way too slow?
Hi Bill, At this time, BIC and CUBIC use a less aggressive slow start than other protocols. Because we observed slow start is somewhat aggressive and introduced a lot of packet losses. This may be changed to standard slow start in later version of BIC and CUBIC, but, at this time, we still using a modified slow start. So, as you observed, this modified slow start behavior may slow for 10G testing. You can alleviate this for your 10G testing by changing BIC and CUBIC to use a standard slow start by loading these modules with initial_ssthresh=0. Regards, Sangtae On 5/6/07, Bill Fink [EMAIL PROTECTED] wrote: The initial TCP slow start on 2.6.20.7 cubic (and to a lesser extent bic) seems to be way too slow. With an ~80 ms RTT, this is what cubic delivers (thirty second test with one second interval reporting and specifying a socket buffer size of 60 MB): [EMAIL PROTECTED] ~]# netstat -s | grep -i retrans 0 segments retransmited [EMAIL PROTECTED] ~]# cat /proc/sys/net/ipv4/tcp_congestion_control cubic [EMAIL PROTECTED] ~]# nuttcp -T30 -i1 -w60m 192.168.89.15 6.8188 MB / 1.00 sec = 57.0365 Mbps 16.2097 MB / 1.00 sec = 135.9824 Mbps 25.4553 MB / 1.00 sec = 213.5420 Mbps 35.5127 MB / 1.00 sec = 297.9119 Mbps 43.0066 MB / 1.00 sec = 360.7770 Mbps 50.3210 MB / 1.00 sec = 422.1370 Mbps 59.0796 MB / 1.00 sec = 495.6124 Mbps 69.1284 MB / 1.00 sec = 579.9098 Mbps 76.6479 MB / 1.00 sec = 642.9130 Mbps 90.6189 MB / 1.00 sec = 760.2835 Mbps 109.4348 MB / 1.00 sec = 918.0361 Mbps 128.3105 MB / 1.00 sec = 1076.3813 Mbps 150.4932 MB / 1.00 sec = 1262.4686 Mbps 175.9229 MB / 1.00 sec = 1475.7965 Mbps 205.9412 MB / 1.00 sec = 1727.6150 Mbps 240.8130 MB / 1.00 sec = 2020.1504 Mbps 282.1790 MB / 1.00 sec = 2367.1644 Mbps 318.1841 MB / 1.00 sec = 2669.1349 Mbps 372.6814 MB / 1.00 sec = 3126.1687 Mbps 440.8411 MB / 1.00 sec = 3698.5200 Mbps 524.8633 MB / 1.00 sec = 4403.0220 Mbps 614.3542 MB / 1.00 sec = 5153.7367 Mbps 718.9917 MB / 1.00 sec = 6031.5386 Mbps 829.0474 MB / 1.00 sec = 6954.6438 Mbps 867.3289 MB / 1.00 sec = 7275.9510 Mbps 865.7759 MB / 1.00 sec = 7262.9813 Mbps 864.4795 MB / 1.00 sec = 7251.7071 Mbps 864.5425 MB / 1.00 sec = 7252.8519 Mbps 867.3372 MB / 1.00 sec = 7246.9232 Mbps 10773.6875 MB / 30.00 sec = 3012.3936 Mbps 38 %TX 25 %RX [EMAIL PROTECTED] ~]# netstat -s | grep -i retrans 0 segments retransmited It takes 25 seconds for cubic TCP to reach its maximal rate. Note that there were no TCP retransmissions (no congestion experienced). Now with bic (only 20 second test this time): [EMAIL PROTECTED] ~]# echo bic /proc/sys/net/ipv4/tcp_congestion_control [EMAIL PROTECTED] ~]# cat /proc/sys/net/ipv4/tcp_congestion_control bic [EMAIL PROTECTED] ~]# nuttcp -T20 -i1 -w60m 192.168.89.15 9.9548 MB / 1.00 sec = 83.1497 Mbps 47.2021 MB / 1.00 sec = 395.9762 Mbps 92.4304 MB / 1.00 sec = 775.3889 Mbps 134.3774 MB / 1.00 sec = 1127.2758 Mbps 194.3286 MB / 1.00 sec = 1630.1987 Mbps 280.0598 MB / 1.00 sec = 2349.3613 Mbps 404.3201 MB / 1.00 sec = 3391.8250 Mbps 559.1594 MB / 1.00 sec = 4690.6677 Mbps 792.7100 MB / 1.00 sec = 6650.0257 Mbps 857.2241 MB / 1.00 sec = 7190.6942 Mbps 852.6912 MB / 1.00 sec = 7153.3283 Mbps 852.6968 MB / 1.00 sec = 7153.2538 Mbps 851.3162 MB / 1.00 sec = 7141.7575 Mbps 851.4927 MB / 1.00 sec = 7143.0240 Mbps 850.8782 MB / 1.00 sec = 7137.8762 Mbps 852.7119 MB / 1.00 sec = 7153.2949 Mbps 852.3879 MB / 1.00 sec = 7150.2982 Mbps 850.2163 MB / 1.00 sec = 7132.5165 Mbps 849.8340 MB / 1.00 sec = 7129.0026 Mbps 11882.7500 MB / 20.00 sec = 4984.0068 Mbps 67 %TX 41 %RX [EMAIL PROTECTED] ~]# netstat -s | grep -i retrans 0 segments retransmited bic does better but still takes 10 seconds to achieve its maximal rate. Surprisingly venerable reno does the best (only a 10 second test): [EMAIL PROTECTED] ~]# echo reno /proc/sys/net/ipv4/tcp_congestion_control [EMAIL PROTECTED] ~]# cat /proc/sys/net/ipv4/tcp_congestion_control reno [EMAIL PROTECTED] ~]# nuttcp -T10 -i1 -w60m 192.168.89.15 69.9829 MB / 1.01 sec = 583.5822 Mbps 844.3870 MB / 1.00 sec = 7083.2808 Mbps 862.7568 MB / 1.00 sec = 7237.7342 Mbps 859.5725 MB / 1.00 sec = 7210.8981 Mbps 860.1365 MB / 1.00 sec = 7215.4487 Mbps 865.3940 MB / 1.00 sec = 7259.8434 Mbps 863.9678 MB / 1.00 sec = 7247.4942 Mbps 864.7493 MB / 1.00 sec = 7254.4634 Mbps 864.6660 MB / 1.00 sec = 7253.5183 Mbps 7816.9375 MB / 10.00 sec = 6554.4883 Mbps 90 %TX 53 %RX [EMAIL PROTECTED] ~]# netstat -s | grep -i retrans 0 segments retransmited reno achieves its maximal rate in about 2 seconds. This is what I would expect from the exponential increase during TCP's initial slow start. To achieve 10 Gbps on an 80 ms RTT with 9000 byte jumbo frame packets would require: [EMAIL
RE: [PATCH] IPROUTE: Modify tc for new PRIO multiqueue behavior
Somehow I didn't see the mails inbetween. Let me think. On Tue, 2007-05-08 at 17:33 +0800, Zhu Yi wrote: Jamal, as you said, the wireless subsystem uses an interim workaround (the extra netdev approach) to achieve hardware packets scheduling. But with Peter's patch, the wireless stack doesn't need the workaround anymore. This is the actual fix. Actually, we still need multiple devices for virtual devices? Or which multiple devices are you talking about here? johannes signature.asc Description: This is a digitally signed message part
strange TCP behaviour
Dear All I have application, thats accept redirected requests from squid (over iptables ... -j REDIRECT), getting ip over getsockopt/SO_ORIGINAL_DST and throwing data in to the tunnel. And i have strange behaviour, when i do sysctl net.ipv4.tcp_frto=0 And i try any website with attachments (means a lot of data going to be sent to redirect), i am getting after 3-10 seconds message Read error: Connection timed out. Thats a bit strange, possible it is related to application, but i guess kernel must not give error to recv/read (i dont know what squid is using). It is a bit difficult to reproduce the problem, but i can give a remote access to PC where all things installed. But i have no idea, who of TCP(?) maintainers can help me with this. I think there is no app problem, tcpdump looks normal, there is no reset(and message will be different in case of reset). -- Denys Fedoryshchenko Technical Manager Virtual ISP S.A.L. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Bugme-new] [Bug 8450] New: ip6sic causes bug during interrupt handling
On Tue, 8 May 2007 02:47:46 -0700 [EMAIL PROTECTED] wrote: http://bugzilla.kernel.org/show_bug.cgi?id=8450 Summary: ip6sic causes bug during interrupt handling Kernel Version: 2.6.21-ga989705c and earlier Status: NEW Severity: normal Owner: [EMAIL PROTECTED] Submitter: [EMAIL PROTECTED] Most recent kernel where this bug did *NOT* occur: had this since 2.6.20, didnt test earlier versions Distribution: gentoo Hardware Environment: p3 Software Environment: gcc 4.1.2 Problem Description: running ip6sic with the following seed ip6sic -i lo -d ::1 -p 2000 -r 32321 causes this oops [ 69.293000] Oops: [#1] [ 69.293000] PREEMPT [ 69.293000] Modules linked in: [ 69.293000] CPU:0 [ 69.293000] EIP:0060:[c0548b76]Not tainted VLI [ 69.293000] EFLAGS: 00010282 (2.6.21-ga989705c #7) [ 69.293000] EIP is at ipv6_hop_jumbo+0x26/0x180 [ 69.293000] eax: ebx: ce61bc08 ecx: 0001 edx: 0103 [ 69.293000] esi: ce750166 edi: 00fd ebp: c0773ed8 esp: c0773ec0 [ 69.293000] ds: 007b es: 007b fs: gs: 0033 ss: 0068 [ 69.293000] Process ip6sic (pid: 4607, ti=c0773000 task=cf332070 task.ti=cf002000) [ 69.293000] Stack: c06c4ccc 00fb c0773ef8 0246 c071ae9c 002a c0773f08 c054837f [ 69.293000]ce61bc08 c055a39d c0773f38 c071ae94 ce61bc08 ce75013c 0306 ce61bc08 [ 69.293000]c0773f38 ce61bc44 c0773f18 c0548ed1 cf8bdd84 c0773f48 c052807e [ 69.293000] Call Trace: [ 69.293000] [c010485a] show_trace_log_lvl+0x1a/0x30 [ 69.293000] [c0104919] show_stack_log_lvl+0xa9/0xd0 [ 69.293000] [c0104b5b] show_registers+0x21b/0x3a0 [ 69.293000] [c0104de3] die+0x103/0x260 [ 69.293000] [c01162c2] do_page_fault+0x2d2/0x610 [ 69.293000] [c05a5732] error_code+0x6a/0x70 [ 69.293000] [c054837f] ip6_parse_tlv+0xef/0x130 [ 69.293000] [c0548ed1] ipv6_parse_hopopts+0x41/0xb0 [ 69.293000] [c052807e] ipv6_rcv+0x1be/0x370 [ 69.293000] [c04b32fb] netif_receive_skb+0x21b/0x2b0 [ 69.293000] [c04b52d2] process_backlog+0x82/0xf0 [ 69.293000] [c04b558b] net_rx_action+0xab/0x1c0 [ 69.293000] [c0120cd2] __do_softirq+0x72/0xe0 [ 69.293000] [c010627a] do_softirq+0x8a/0xf0 [ 69.293000] [c0120fd5] local_bh_enable+0xa5/0x160 [ 69.293000] [c04b5738] dev_queue_xmit+0x98/0x330 [ 69.293000] [c055a678] packet_sendmsg+0x208/0x260 [ 69.293000] [c04a8594] sock_sendmsg+0xc4/0xf0 [ 69.293000] [c04a889f] sys_sendto+0xbf/0xe0 [ 69.293000] [c04a97c7] sys_socketcall+0x187/0x260 [ 69.293000] [c0104132] sysenter_past_esp+0x5f/0x99 [ 69.293000] === [ 69.293000] Code: 90 8d 74 26 00 55 89 e5 56 53 83 ec 10 8b 18 8b 4b 78 8d 34 11 80 7e 01 04 74 3b a1 10 3c 72 c0 85 c0 0f 85 7d 00 00 00 8b 43 1c 8b 80 8c 00 00 00 85 c0 74 09 8b 80 38 01 00 00 ff 40 08 a1 e4 [ 69.293000] EIP: [c0548b76] ipv6_hop_jumbo+0x26/0x180 SS:ESP 0068:c0773ec0 [ 69.305000] Kernel panic - not syncing: Fatal exception in interrupt Steps to reproduce: - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 2.6.21-git8+ BUG: NMI Watchdog detected LOCKUP on CPU1
On Tue, 08 May 2007 10:35:14 +0200 Michal Piotrowski [EMAIL PROTECTED] wrote: Hi, / filesystem was full [39525.46] BUG: NMI Watchdog detected LOCKUP on CPU1, eip 08056990, registers: [39525.468000] Modules linked in: loop ipt_MASQUERADE iptable_nat nf_nat autofs4 af_packet nf_conntrack_netbios_ns ipt_REJECT nf_conntrack_ipv4 xt_state nf_conntrack nfnetlink iptable_filter ip_tables ip6t_REJECT xt_tcpudp ip6table_filter ip6_tables x_tables ipv6 binfmt_misc thermal processor fan container nvram snd_intel8x0 snd_ac97_codec ac97_bus snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss evdev snd_pcm intel_agp snd_timer snd agpgart soundcore i2c_i801 snd_page_alloc ide_cd cdrom rtc unix [39525.518000] CPU:1 [39525.518000] EIP:0073:[08056990]Not tainted VLI [39525.518000] EFLAGS: 0202 (2.6.21-ga989705c #187) [39525.529000] EIP is at 0x8056990 [39525.529000] eax: 6e560d60 ebx: 000b ecx: edx: 000dd15e [39525.541000] esi: edi: 6e560220 ebp: bfeb0a58 esp: bfeb0990 [39525.547000] ds: 007b es: 007b fs: gs: 0033 ss: 007b [39525.553000] Process line (pid: 4277, ti=cf20 task=f6f560b0 task.ti=cf20) [39525.56] Kernel panic - not syncing: Aiee, killing interrupt handler! http://www.stardust.webpages.pl/files/tbf/bitis-gabonica/2.6.21-git8/git-console.log http://www.stardust.webpages.pl/files/tbf/bitis-gabonica/2.6.21-git8/git-config I don't know what caused the CPU to jump into hyperspace like that, but Patrick tells me that this: [38773.921000] printk: 15909 messages suppressed. [38773.926000] ipt_hook: happy cracking. [38778.921000] printk: 16332 messages suppressed. [38778.925000] ipt_hook: happy cracking. [38783.921000] printk: 16175 messages suppressed. [38783.926000] ipt_hook: happy cracking. [38788.921000] printk: 16390 messages suppressed. [38788.925000] ipt_hook: happy cracking. [38793.921000] printk: 16289 messages suppressed. [38793.925000] ipt_hook: happy cracking. [38798.921000] printk: 16172 messages suppressed. [38798.926000] ipt_hook: happy cracking. [38803.921000] printk: 15738 messages suppressed. [38803.925000] ipt_hook: happy cracking. [38808.921000] printk: 14731 messages suppressed. happens when a local process sends packets with invalid IP headers through raw sockets. [ 5225.195000] UDP: short packet: From 37.126.206.54:46544 39671/1182 to 127.0.0.1:40761 This seems to indicate something on the local machine (packets are not routed to 127.0.0.1) is sending invalid packets, probably with incorrectly set up skb pointers. I'd suggest to add a WARN_ON(1) in ipt_local_hook(). So can you please add the appropriate WARN_ON? Whatever happens, that printk should be toned down, shouldn't it? We prefer to not let unprivileged apps spam the logs. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 2.6.21-git8+ BUG: NMI Watchdog detected LOCKUP on CPU1
Andrew Morton wrote: Whatever happens, that printk should be toned down, shouldn't it? We prefer to not let unprivileged apps spam the logs. Only priviledged apps can send these packets. I've never seen it in practice except for one case that was a bug in the network stack, so I'd prefer to keep it. The original intention was to give the user a hint why these packets are not affected by iptables rules, so a more descriptive message would make sense. I'll queue a patch for this. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: strange TCP behaviour
I forgot to mention, latest tested kernel (for now) 2.6.21-rc5 It's just not easy to upgrade, is there any changes after rc5 related to that? On Tue, 8 May 2007 12:49:36 +0300, Denys wrote Dear All I have application, thats accept redirected requests from squid (over iptables -j REDIRECT), getting ip over getsockopt/SO_ORIGINAL_DST and throwing data in to the tunnel. And i have strange behaviour, when i do sysctl net.ipv4.tcp_frto=0 And i try any website with attachments (means a lot of data going to be sent to redirect), i am getting after 3-10 seconds message Read error: Connection timed out. Thats a bit strange, possible it is related to application, but i guess kernel must not give error to recv/read (i dont know what squid is using). It is a bit difficult to reproduce the problem, but i can give a remote access to PC where all things installed. But i have no idea, who of TCP(?) maintainers can help me with this. I think there is no app problem, tcpdump looks normal, there is no reset(and message will be different in case of reset). -- Denys Fedoryshchenko Technical Manager Virtual ISP S.A.L. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html -- Denys Fedoryshchenko Technical Manager Virtual ISP S.A.L. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: 2.6.21-git8+ BUG: NMI Watchdog detected LOCKUP on CPU1
On 08/05/07, Andrew Morton [EMAIL PROTECTED] wrote: On Tue, 08 May 2007 10:35:14 +0200 Michal Piotrowski [EMAIL PROTECTED] wrote: Hi, / filesystem was full [39525.46] BUG: NMI Watchdog detected LOCKUP on CPU1, eip 08056990, registers: [39525.468000] Modules linked in: loop ipt_MASQUERADE iptable_nat nf_nat autofs4 af_packet nf_conntrack_netbios_ns ipt_REJECT nf_conntrack_ipv4 xt_state nf_conntrack nfnetlink iptable_filter ip_tables ip6t_REJECT xt_tcpudp ip6table_filter ip6_tables x_tables ipv6 binfmt_misc thermal processor fan container nvram snd_intel8x0 snd_ac97_codec ac97_bus snd_seq_dummy snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device snd_pcm_oss snd_mixer_oss evdev snd_pcm intel_agp snd_timer snd agpgart soundcore i2c_i801 snd_page_alloc ide_cd cdrom rtc unix [39525.518000] CPU:1 [39525.518000] EIP:0073:[08056990]Not tainted VLI [39525.518000] EFLAGS: 0202 (2.6.21-ga989705c #187) [39525.529000] EIP is at 0x8056990 [39525.529000] eax: 6e560d60 ebx: 000b ecx: edx: 000dd15e [39525.541000] esi: edi: 6e560220 ebp: bfeb0a58 esp: bfeb0990 [39525.547000] ds: 007b es: 007b fs: gs: 0033 ss: 007b [39525.553000] Process line (pid: 4277, ti=cf20 task=f6f560b0 task.ti=cf20) [39525.56] Kernel panic - not syncing: Aiee, killing interrupt handler! http://www.stardust.webpages.pl/files/tbf/bitis-gabonica/2.6.21-git8/git-console.log http://www.stardust.webpages.pl/files/tbf/bitis-gabonica/2.6.21-git8/git-config I don't know what caused the CPU to jump into hyperspace like that, but Patrick tells me that this: [38773.921000] printk: 15909 messages suppressed. [38773.926000] ipt_hook: happy cracking. [38778.921000] printk: 16332 messages suppressed. [38778.925000] ipt_hook: happy cracking. [38783.921000] printk: 16175 messages suppressed. [38783.926000] ipt_hook: happy cracking. [38788.921000] printk: 16390 messages suppressed. [38788.925000] ipt_hook: happy cracking. [38793.921000] printk: 16289 messages suppressed. [38793.925000] ipt_hook: happy cracking. [38798.921000] printk: 16172 messages suppressed. [38798.926000] ipt_hook: happy cracking. [38803.921000] printk: 15738 messages suppressed. [38803.925000] ipt_hook: happy cracking. [38808.921000] printk: 14731 messages suppressed. happens when a local process sends packets with invalid IP headers through raw sockets. Yes, it was an isic session. [ 5225.195000] UDP: short packet: From 37.126.206.54:46544 39671/1182 to 127.0.0.1:40761 This seems to indicate something on the local machine (packets are not routed to 127.0.0.1) is sending invalid packets, probably with incorrectly set up skb pointers. I'd suggest to add a WARN_ON(1) in ipt_local_hook(). So can you please add the appropriate WARN_ON? Whatever happens, that printk should be toned down, shouldn't it? We prefer to not let unprivileged apps spam the logs. [39293.925000] ipt_hook: happy cracking. [39429.024000] printk: 15828 messages suppressed. [39429.028000] nf_conntrack: table full, dropping packet. [39430.034000] nf_conntrack: table full, dropping packet. [39431.039000] nf_conntrack: table full, dropping packet. [39432.044000] nf_conntrack: table full, dropping packet. [39444.056000] nf_conntrack: table full, dropping packet. [39445.061000] nf_conntrack: table full, dropping packet. [39525.46] BUG: NMI Watchdog detected LOCKUP on CPU1, eip 08056990, registers: This lockup occurred after an isic test. Hmmm... linus_stress? FAIL aio_dio_bugs Command LD_LIBRARY_PATH=/usr/local/autotest/client/deps/libaio/lib/ /usr/local/autotest/client/tests/aio_dio_bugs/src/aio-dio-extend-stat file failed, rc=32512 GOOD aiostress completed successfully GOOD bonnie completed successfully GOOD cpu_hotplug completed successfully GOOD cyclictest completed successfully GOOD dbench completed successfully FAIL disktest running test disktest --[random error]-- FAIL fs_mark Command ./fs_mark -d /mnt -s 10240 -n 1000 failed, rc=256 GOOD fsfuzzer completed successfully GOOD fsx completed successfully FAIL interbench Command /usr/local/autotest/client/tests/interbench/src/interbench -m 'run #0' -c failed, rc=256 GOOD iozone completed successfully FAIL isic running test job Traceback (most recent call last): File /usr/local/autotest/client/bin/job.py, line 179, in __runtest test.runtest(self, url, tag, args, dargs) File /usr/local/autotest/client/bin/test.py, line 195, in runtest fork_waitfor(job.resultdir, pid) File /usr/local/autotest/client/bin/parallel.py, line 40, in fork_waitfor (pid, status) = os.waitpid(pid, 0) KeyboardInterrupt GOOD linus_stress completed successfully I don't remember what was the next test. I'll try to find out how to reproduce this lockup. Anyway, IMO it's not a network related problem. Regards, Michal -- Michal K. K. Piotrowski Kernel Monkeys (http://kernel.wikidot.com/start) - To unsubscribe from this list:
Pull request for 'r8169-for-jeff' branch
Please pull from branch 'r8169-for-jeff' in repository git://electric-eye.fr.zoreil.com/home/romieu/linux/linux-2.6-out r8169-for-jeff to get the changes below. Distance from 'netdev-2.6-upstream' (a0b8ed4f168a604001ee492468c79c20edfac2f4) -- 02839990a776e2665a65e9e5779374b6eaf9c50d 8a116e5bfd93d94271cea02d7053dd65d06aeaba 803de0aadf5e47776470181b732b4d926e8333dc ee49838edddc90d4d2dc16ac980fbc18649b1e95 37a71099a5626d20dfa618b880ace88bd0202f1b 22df31d6ae900bfbca2a55f3fe06b09b68d49cb8 a6e25c31cea49404b466f849a019b337a9b809b1 8c6fa83054d569d4817c051d300114ebc959fdcb f25c82f04a6223579d383726cd487819587d6517 89af92046107d6ba09efd3fc364798f9c5ec3abd Diffstat drivers/net/r8169.c | 792 --- 1 files changed, 438 insertions(+), 354 deletions(-) Shortlog Francois Romieu (10): r8169: add per-device hw_start handler (1/2) r8169: add per-device hw_start handler (2/2) r8169: merge with version 6.001.00 of Realtek's r8169 driver r8169: merge with version 8.001.00 of Realtek's r8168 driver r8169: confusion between hardware and IP header alignment r8169: small 8101 comment r8169: remove the media option r8169: cleanup r8169: add bit description for the TxPoll register r8169: align the IP header when there is no DMA constraint I removed the MSI stuff and the MAC address change support from the previous serie. They will be submitted again after updating and testing. Patch - diff --git a/drivers/net/r8169.c b/drivers/net/r8169.c index 45876a8..045f1a1 100644 --- a/drivers/net/r8169.c +++ b/drivers/net/r8169.c @@ -1,53 +1,11 @@ /* -= - r8169.c: A RealTek RTL-8169 Gigabit Ethernet driver for Linux kernel 2.4.x. - - - History: - Feb 4 2002 - created initially by ShuChen [EMAIL PROTECTED]. - May 20 2002 - Add link status force-mode and TBI mode support. - 2004- Massive updates. See kernel SCM system for details. -= - 1. [DEPRECATED: use ethtool instead] The media can be forced in 5 modes. -Command: 'insmod r8169 media = SET_MEDIA' -Ex: 'insmod r8169 media = 0x04' will force PHY to operate in 100Mpbs Half-duplex. - -SET_MEDIA can be: - _10_Half= 0x01 - _10_Full= 0x02 - _100_Half = 0x04 - _100_Full = 0x08 - _1000_Full = 0x10 - - 2. Support TBI mode. -= -VERSION 1.12002/10/4 - - The bit4:0 of MII register 4 is called selector field, and have to be - 1b to indicate support of IEEE std 802.3 during NWay process of - exchanging Link Code Word (FLP). - -VERSION 1.22002/11/30 - - - Large style cleanup - - Use ether_crc in stock kernel (linux/crc32.h) - - Copy mc_filter setup code from 8139cp - (includes an optimization, and avoids set_bit use) - -VERSION 1.6LK 2004/04/14 - - - Merge of Realtek's version 1.6 - - Conversion to DMA API - - Suspend/resume - - Endianness - - Misc Rx/Tx bugs - -VERSION 2.2LK 2005/01/25 - - - RX csum, TX csum/SG, TSO - - VLAN - - baby ( 7200) Jumbo frames support - - Merge of Realtek's version 2.2 (new phy) + * r8169.c: RealTek 8169/8168/8101 ethernet driver. + * + * Copyright (c) 2002 ShuChen [EMAIL PROTECTED] + * Copyright (c) 2003 - 2007 Francois Romieu [EMAIL PROTECTED] + * Copyright (c) a lot of people too. Please respect their work. + * + * See MAINTAINERS file fro support contact information. */ #include linux/module.h @@ -108,11 +66,6 @@ VERSION 2.2LK 2005/01/25 #define rtl8169_rx_quota(count, quota) count #endif -/* media options */ -#define MAX_UNITS 8 -static int media[MAX_UNITS] = { -1, -1, -1, -1, -1, -1, -1, -1 }; -static int num_media = 0; - /* Maximum events (Rx packets, etc.) to handle at each interrupt. */ static const int max_interrupt_work = 20; @@ -126,7 +79,7 @@ static const int multicast_filter_limit = 32; #define RX_FIFO_THRESH 7 /* 7 means NO threshold, Rx buffer level before first PCI xfer. */ #define RX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */ #define TX_DMA_BURST 6 /* Maximum PCI burst, '6' is 1024 */ -#define EarlyTxThld0x3F/* 0x3F means NO early transmit */ +#define EarlyTxThld0x3F/* 0x3F means NO early transmit */ #define RxPacketMaxSize0x3FE8 /* 16K - 1 - ETH_HLEN - VLAN - CRC... */ #define SafeMtu0x1c20 /* ... actually life sucks beyond ~7k */ #define InterFrameGap 0x03/* 3 means InterFrameGap = the shortest one */ @@
Re: [PATCH] Intel IXP4xx network drivers v.3 - QMGR
I'm not sure what the latest versions are, so I'm not sure which patches to review and which patches are obsolete. On Tue, May 08, 2007 at 02:46:28AM +0200, Krzysztof Halasa wrote: +struct qmgr_regs __iomem *qmgr_regs; +static struct resource *mem_res; +static spinlock_t qmgr_lock; +static u32 used_sram_bitmap[4]; /* 128 16-dword pages */ +static void (*irq_handlers[HALF_QUEUES])(void *pdev); +static void *irq_pdevs[HALF_QUEUES]; + +void qmgr_set_irq(unsigned int queue, int src, + void (*handler)(void *pdev), void *pdev) +{ + u32 __iomem *reg = qmgr_regs-irqsrc[queue / 8]; /* 8 queues / u32 */ + int bit = (queue % 8) * 4; /* 3 bits + 1 reserved bit per queue */ + unsigned long flags; + + src = 7; + spin_lock_irqsave(qmgr_lock, flags); + __raw_writel((__raw_readl(reg) ~(7 bit)) | (src bit), reg); + irq_handlers[queue] = handler; + irq_pdevs[queue] = pdev; + spin_unlock_irqrestore(qmgr_lock, flags); +} The queue manager interrupts should probably be implemented as an irqchip, in the same way that GPIO interrupts are implemented. (I.e. allocate 'real' interrupt numbers for them, and use the interrupt cascade mechanism.) You probably want to have separate irqchips for the upper and lower halves, too. This way, drivers can just use request_irq() instead of having to bother with platform-specific qmgr_set_irq() methods. I think I also made this review comment with Christian's driver. +int qmgr_request_queue(unsigned int queue, unsigned int len /* dwords */, +unsigned int nearly_empty_watermark, +unsigned int nearly_full_watermark) +{ + u32 cfg, addr = 0, mask[4]; /* in 16-dwords */ + int err; + + if (queue = HALF_QUEUES) + return -ERANGE; + + if ((nearly_empty_watermark | nearly_full_watermark) ~7) + return -EINVAL; + + switch (len) { + case 16: + cfg = 0 24; + mask[0] = 0x1; + break; + case 32: + cfg = 1 24; + mask[0] = 0x3; + break; + case 64: + cfg = 2 24; + mask[0] = 0xF; + break; + case 128: + cfg = 3 24; + mask[0] = 0xFF; + break; + default: + return -EINVAL; + } + + cfg |= nearly_empty_watermark 26; + cfg |= nearly_full_watermark 29; + len /= 16; /* in 16-dwords: 1, 2, 4 or 8 */ + mask[1] = mask[2] = mask[3] = 0; + + if (!try_module_get(THIS_MODULE)) + return -ENODEV; + + spin_lock_irq(qmgr_lock); + if (__raw_readl(qmgr_regs-sram[queue])) { + err = -EBUSY; + goto err; + } + + while (1) { + if (!(used_sram_bitmap[0] mask[0]) + !(used_sram_bitmap[1] mask[1]) + !(used_sram_bitmap[2] mask[2]) + !(used_sram_bitmap[3] mask[3])) + break; /* found free space */ + + addr++; + shift_mask(mask); + if (addr + len ARRAY_SIZE(qmgr_regs-sram)) { + printk(KERN_ERR qmgr: no free SRAM space for + queue %i\n, queue); + err = -ENOMEM; + goto err; + } + } + + used_sram_bitmap[0] |= mask[0]; + used_sram_bitmap[1] |= mask[1]; + used_sram_bitmap[2] |= mask[2]; + used_sram_bitmap[3] |= mask[3]; + __raw_writel(cfg | (addr 14), qmgr_regs-sram[queue]); + spin_unlock_irq(qmgr_lock); + +#if DEBUG + printk(KERN_DEBUG qmgr: requested queue %i, addr = 0x%02X\n, +queue, addr); +#endif + return 0; + +err: + spin_unlock_irq(qmgr_lock); + module_put(THIS_MODULE); + return err; +} As with Christian's driver, I don't know whether an SRAM allocator makes much sense. We can just set up a static allocation map for the in-tree drivers and leave out the allocator altogether. I.e. I don't think it's worth the complexity (and just because the butt-ugly Intel code has an allocator isn't a very good reason. :-) I.e. an API a la: ixp4xx_qmgr_config_queue(int queue_nr, int sram_base_address, int queue_size, ...); might simply suffice. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] Intel IXP4xx network drivers
On Mon, May 07, 2007 at 02:07:16AM +0200, Krzysztof Halasa wrote: + * Ethernet port config (0x00 is not present on IXP42X): + * + * logical port 0x000x100x20 + * NPE 0 (NPE-A) 1 (NPE-B) 2 (NPE-C) + * physical PortId 2 0 1 + * TX queue 23 24 25 + * RX-free queue 26 27 28 + * TX-done queue is always 31, RX queue is configurable (Note that this assignment depends on the firmware, and different firmware versions use different queues -- you might want to add a note about which firmware version this holds for.) - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] Intel IXP4xx network drivers
On Mon, May 07, 2007 at 09:18:00PM +0100, Michael-Luke Jones wrote: Well, I'm told that (compatible) NPEs are present on other IXP CPUs. Not sure about details. If, by a combined effort, we ever manage to create a generic NPE driver for the NPEs found in IXP42x/43x/46x/2000/23xx then the driver should go in arch/arm/npe.c (Note that the ixp2000 doesn't have NPEs.) (Both the 2000 and the 23xx have microengines, which are both supported by arch/arm/common/uengine.c.) It's possible, but hard due to the differences in hardware design The ixp23xx NPEs seem pretty much identical to me to the ixp4xx NPEs. There are some minor differences between the ixp2000 and ixp23xx uengines, but those are easy enough to deal with. and the fact that boards based on anything other than 42x are few and far between. The vast majority of 'independent' users following mainline are likely running on 42x boards. Sure, ixp23xx hardware is harder to get. I'm not sure what you mean by 'independent' users, though. Are people with non-42x hardware 'dependent' users, and why? Thus, for now, I would drop the NPE / QMGR code in arch/arm/mach- ixp4xx/ and concentrate on making it 42x/43x/46x agnostic. One step at a time :) I'd say that it's up to those who are interested in ixp23xx support (probably only myself at this point) to add ixp23xx support. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] Intel IXP4xx network drivers
On Mon, May 07, 2007 at 10:00:20PM +0200, Krzysztof Halasa wrote: - the NPE can also be used as DMA engine and for crypto operations. Both are not network related. Additionally, the NPE is not only ixp4xx related, but is also used in IXP23xx CPUs, so it could be placed in arch/arm/common or arch/arm/xscale ? - The MAC is used on IXP23xx, too. So the drivers for both CPU familys only differ in the way they exchange network packets between the NPE and the kernel. Hmm... perhaps someone have a spare device with such IXP23xx and wants to make it a donation for science? :-) I have a couple of ixp23xx boards at home, but I'm not sure whether I can give them away. I can give you remote access to them, though. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
skge vs sk98lin
2.6.21 seems to have fixed the stability issues I was seeing when using the skge driver with the older sk98xx dual port fiber cards. There is still one more lingering oddness: if I have *two* dual port cards in a system, say eth2-5, I see traffic on eth2, eth3, and eth5, but nothing on eth4. This seems to be consistent accross a couple of systems I've tested; only the first card's second interface sees packets (e.g., with tcpdump). If I reboot with the sk98lin driver on the same kernel I see all traffic, as expected. Mike Stone - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] ehea: Receive SKB Aggregation
This patch enables the receive side processing to aggregate TCP packets within the HEA device driver. It analyses the packets already received after a interrupt arrived and forwards these as a chains of SKBs for the same TCP connection with modified header field. We have seen a lower CPU load and improved throughput for small numbers of parallel TCP connections. We added a disabled module parameter to prevent disruption of normal driver operation. We currently consider this as experimental until further review and tests have been passed. Are there any concerns about including this in the mainline driver? Signed-off-by: Thomas Klein [EMAIL PROTECTED] --- diff -Nurp -X dontdiff linux-2.6.22pre/drivers/net/ehea/ehea.h patched_kernel/drivers/net/ehea/ehea.h --- linux-2.6.22pre/drivers/net/ehea/ehea.h 2007-05-07 15:55:43.0 +0200 +++ patched_kernel/drivers/net/ehea/ehea.h 2007-05-07 16:00:34.0 +0200 @@ -39,7 +39,7 @@ #include asm/io.h #define DRV_NAME ehea -#define DRV_VERSIONEHEA_0058 +#define DRV_VERSIONEHEA_0060 #define EHEA_MSG_DEFAULT (NETIF_MSG_LINK | NETIF_MSG_TIMER \ | NETIF_MSG_RX_ERR | NETIF_MSG_TX_ERR) @@ -49,6 +49,7 @@ #define EHEA_MAX_ENTRIES_RQ3 16383 #define EHEA_MAX_ENTRIES_SQ 32767 #define EHEA_MIN_ENTRIES_QP 127 +#define EHEA_LRO_MAX_PKTS 60 #define EHEA_SMALL_QUEUES #define EHEA_NUM_TX_QP 1 @@ -78,6 +79,9 @@ #define EHEA_RQ2_PKT_SIZE 1522 #define EHEA_L_PKT_SIZE 256/* low latency */ +#define MAX_LRO_DESCRIPTORS 8 +#define LRO_DESC_MASK 0x + /* Send completion signaling */ /* Protection Domain Identifier */ @@ -334,6 +338,29 @@ struct ehea_q_skb_arr { }; /* + * Large Receive Offload (LRO) descriptor for a tcp seesion + */ +struct ehea_lro { + struct sk_buff *parent; + struct sk_buff *last_skb; +struct iphdr *iph; +struct tcphdr *tcph; + + u32 tcp_rcv_tsecr; + u32 tcp_rcv_tsval; + u32 tcp_ack; + u32 tcp_next_seq; + u32 skb_tot_frags_len; + u16 ip_tot_len; + u16 tcp_saw_tstamp; /* timestamps enabled */ + u16 tcp_window; + u16 vlan_tag; + int skb_sg_cnt; /* counts aggregated skbs */ + int vlan_packet; + int active; +}; + +/* * Port resources */ struct ehea_port_res { @@ -362,6 +389,9 @@ struct ehea_port_res { u64 tx_packets; u64 rx_packets; u32 poll_counter; + struct ehea_lro lro[MAX_LRO_DESCRIPTORS]; + u64 lro_desc; + struct port_stats p_state; }; @@ -411,6 +441,7 @@ struct ehea_port { u32 msg_enable; u32 sig_comp_iv; u32 state; + u32 lro_max_aggr; u8 full_duplex; u8 autoneg; u8 num_def_qps; diff -Nurp -X dontdiff linux-2.6.22pre/drivers/net/ehea/ehea_main.c patched_kernel/drivers/net/ehea/ehea_main.c --- linux-2.6.22pre/drivers/net/ehea/ehea_main.c2007-05-07 15:59:16.0 +0200 +++ patched_kernel/drivers/net/ehea/ehea_main.c 2007-05-07 16:00:34.0 +0200 @@ -34,6 +34,7 @@ #include linux/list.h #include linux/if_ether.h #include net/ip.h +#include net/tcp.h #include ehea.h #include ehea_qmr.h @@ -52,6 +53,8 @@ static int rq2_entries = EHEA_DEF_ENTRIE static int rq3_entries = EHEA_DEF_ENTRIES_RQ3; static int sq_entries = EHEA_DEF_ENTRIES_SQ; static int use_mcs = 0; +static int use_lro = 0; +static int lro_max_pkts = EHEA_LRO_MAX_PKTS; static int num_tx_qps = EHEA_NUM_TX_QP; module_param(msg_level, int, 0); @@ -60,6 +63,8 @@ module_param(rq2_entries, int, 0); module_param(rq3_entries, int, 0); module_param(sq_entries, int, 0); module_param(use_mcs, int, 0); +module_param(use_lro, int, 0); +module_param(lro_max_pkts, int, 0); module_param(num_tx_qps, int, 0); MODULE_PARM_DESC(num_tx_qps, Number of TX-QPS); @@ -77,6 +82,9 @@ MODULE_PARM_DESC(sq_entries, Number of [2^x - 1], x = [6..14]. Default = __MODULE_STRING(EHEA_DEF_ENTRIES_SQ) )); MODULE_PARM_DESC(use_mcs, 0:NAPI, 1:Multiple receive queues, Default = 1 ); +MODULE_PARM_DESC(lro_max_pkts, LRO: Max packets to be aggregated. Default = +__MODULE_STRING(EHEA_LRO_MAX_PKTS)); +MODULE_PARM_DESC(use_lro, 1: enable, 0: disable Large Reveive Offload ); static int port_name_cnt = 0; @@ -380,6 +388,282 @@ static int ehea_treat_poll_error(struct return 0; } +static int try_get_ip_tcp_hdr(struct ehea_cqe *cqe, struct sk_buff *skb, + struct iphdr **iph, struct tcphdr **tcph) +{ + int ip_len; + +/* non tcp/udp packets */ + if (!cqe-header_length) + return -1; + +/* non tcp packet */ +*iph = (struct iphdr *)(skb-data); + if ((*iph)-protocol != IPPROTO_TCP) + return -1; + +ip_len = (u8)((*iph)-ihl); +ip_len = 2; +*tcph = (struct tcphdr *)(((u64)*iph) + ip_len); + + return 0; +} +
[1/2] [NET] link_watch: Move link watch list into net_device
On Mon, May 07, 2007 at 02:10:27PM -0700, Jeremy Fitzhardinge wrote: We should just change this to use netif_device_attach and netif_device_detach. Like this? Sorry, I had forgotten that I've already concluded previously that this doesn't work because we don't want to prevent the interface from being brought up (and other reasons). My memory is failing me :) So I think the best option now is to get rid of the delay on carrier on events for everyone. Here is the first of 2 patches. [NET] link_watch: Move link watch list into net_device These days the link watch mechanism is an integral part of the network subsystem as it manages the carrier status. So it now makes sense to allocate some memory for it in net_device rather than allocating it on demand. In fact, this is necessary because we can't tolerate a memory allocation failure since that means we'd have to potentially throw a link up event away. It also simplifies the code greatly. In doing so I discovered a subtle race condition in the use of singleevent. This race condition still exists (and is somewhat magnified) without singleevent but it's now plugged thanks to an smp_mb__before_clear_bit. Signed-off-by: Herbert Xu [EMAIL PROTECTED] Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- a6c194d06da9aed2a8f5a4ea07e3cbf9266db4ef diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 3044622..f671cd2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -467,6 +467,8 @@ struct net_device /* device index hash chain */ struct hlist_node index_hlist; + struct net_device *link_watch_next; + /* register/unregister state machine */ enum { NETREG_UNINITIALIZED=0, NETREG_REGISTERED, /* completed register_netdevice */ diff --git a/net/core/link_watch.c b/net/core/link_watch.c index e3c26a9..71a35da 100644 --- a/net/core/link_watch.c +++ b/net/core/link_watch.c @@ -19,7 +19,6 @@ #include linux/rtnetlink.h #include linux/jiffies.h #include linux/spinlock.h -#include linux/list.h #include linux/slab.h #include linux/workqueue.h #include linux/bitops.h @@ -28,7 +27,6 @@ enum lw_bits { LW_RUNNING = 0, - LW_SE_USED }; static unsigned long linkwatch_flags; @@ -37,17 +35,9 @@ static unsigned long linkwatch_nextevent; static void linkwatch_event(struct work_struct *dummy); static DECLARE_DELAYED_WORK(linkwatch_work, linkwatch_event); -static LIST_HEAD(lweventlist); +static struct net_device *lweventlist; static DEFINE_SPINLOCK(lweventlist_lock); -struct lw_event { - struct list_head list; - struct net_device *dev; -}; - -/* Avoid kmalloc() for most systems */ -static struct lw_event singleevent; - static unsigned char default_operstate(const struct net_device *dev) { if (!netif_carrier_ok(dev)) @@ -90,21 +80,23 @@ static void rfc2863_policy(struct net_device *dev) /* Must be called with the rtnl semaphore held */ void linkwatch_run_queue(void) { - struct list_head head, *n, *next; + struct net_device *next; spin_lock_irq(lweventlist_lock); - list_replace_init(lweventlist, head); + next = lweventlist; + lweventlist = NULL; spin_unlock_irq(lweventlist_lock); - list_for_each_safe(n, next, head) { - struct lw_event *event = list_entry(n, struct lw_event, list); - struct net_device *dev = event-dev; + while (next) { + struct net_device *dev = next; - if (event == singleevent) { - clear_bit(LW_SE_USED, linkwatch_flags); - } else { - kfree(event); - } + next = dev-link_watch_next; + + /* +* Make sure the above read is complete since it can be +* rewritten as soon as we clear the bit below. +*/ + smp_mb__before_clear_bit(); /* We are about to handle this device, * so new events can be accepted @@ -147,24 +139,12 @@ void linkwatch_fire_event(struct net_device *dev) { if (!test_and_set_bit(__LINK_STATE_LINKWATCH_PENDING, dev-state)) { unsigned long flags; - struct lw_event *event; - - if (test_and_set_bit(LW_SE_USED, linkwatch_flags)) { - event = kmalloc(sizeof(struct lw_event), GFP_ATOMIC); - - if (unlikely(event == NULL)) { - clear_bit(__LINK_STATE_LINKWATCH_PENDING, dev-state); - return; - } - } else { - event = singleevent; - } dev_hold(dev); - event-dev = dev;
Re: [PATCH] ehea: Receive SKB Aggregation
On Tue, May 08, 2007 at 02:05:33PM +0200, Thomas Klein wrote: This patch enables the receive side processing to aggregate TCP packets within the HEA device driver. It analyses the packets already received after a interrupt arrived and forwards these as a chains of SKBs for the same TCP connection with modified header field. We have seen a lower CPU load and improved throughput for small numbers of parallel TCP connections. We added a disabled module parameter to prevent disruption of normal driver operation. We currently consider this as experimental until further review and tests have been passed. Are there any concerns about including this in the mainline driver? So we grow yet another LRO copy in drivers? I think someone needs to bite the bullet and make it generic. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] ehea: Receive SKB Aggregation
On Tue, 8 May 2007 14:05:33 +0200 Thomas Klein [EMAIL PROTECTED] wrote: +struct ehea_lro { + struct sk_buff *parent; + struct sk_buff *last_skb; +struct iphdr *iph; +struct tcphdr *tcph; Spaces instead of tabs. Also in at least one other place in this patch. -- Cheers, Stephen Rothwell[EMAIL PROTECTED] http://www.canb.auug.org.au/~sfr/ pgpccSVHP2g0w.pgp Description: PGP signature
Re: [PATCH] Intel IXP4xx network drivers v.3 - QMGR
On 5/8/07, Lennert Buytenhek [EMAIL PROTECTED] wrote: ... As with Christian's driver, I don't know whether an SRAM allocator makes much sense. We can just set up a static allocation map for the in-tree drivers and leave out the allocator altogether. I.e. I don't think it's worth the complexity (and just because the butt-ugly Intel code has an allocator isn't a very good reason. :-) Is the qmgr used when the NPEs are utilized as DMA engines? And is the allocator needed in this case? If yes, I beg you not to drop it, because we use one NPE for this purpose, and if we are going to adopt this driver instead of the intel's one, you will receive a patch adding DMA functionality very soon. ;) - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH] IPROUTE: Modify tc for new PRIO multiqueue behavior
On Tue, 2007-08-05 at 11:45 +0200, Johannes Berg wrote: .. Sorry, I missed a lot of the discussions; I am busyed out and will try to catchup later tonight. I have quickly scanned the emails and I will respond backwards (typically the most effective way to catchup with a thread). As a summary, I am not against the concept of addressing per-ring flow control. Having said that, i fully understand where DaveM and Stephen are coming from. Making such huge changes to a critical region to support uncommon hardware doesnt abide to the optimize for the common paradigm. That is also the basis of my arguement all along. I also agree it is quiet fscked an approach to have the virtual flow control. I think it is driven by some marketing people and i dont really think there is a science behind it. Switched (External) PCI-E which is supposed to be really cheap and hit the market RSN has per-virtual queue flow control, so that maybe where that came from. In any case, that is a digression. Peter, can we meet the goals you strive for and stick to the optimize for the common? How willing are you to change directions to achieve those goals? On Tue, 2007-05-08 at 17:33 +0800, Zhu Yi wrote: Jamal, as you said, the wireless subsystem uses an interim workaround (the extra netdev approach) to achieve hardware packets scheduling. But with Peter's patch, the wireless stack doesn't need the workaround anymore. This is the actual fix. I dont believe wireless needs anything other than the simple approach i described. The fact that there an occasional low prio packet may endup going out first before a high prio due to the contention is non-affecting to the overall results. Actually, we still need multiple devices for virtual devices? Or which multiple devices are you talking about here? Those virtual devices you have right now. They are a hack that needs to go at some point. cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
On 5/8/07, Alexey Zaytsev [EMAIL PROTECTED] wrote: I was always curious, why do people want to run ixp4xx in LE mode? What are the benefits that overweight the obvious performance degradation? Debian on the NSLU2 runs in LE, and it is pretty popular. http://www.linuxdevices.com/news/NS3535328630.html Gordon -- Gordon Farquharson - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] Intel IXP4xx network drivers
Lennert Buytenhek [EMAIL PROTECTED] writes: I have a couple of ixp23xx boards at home, but I'm not sure whether I can give them away. I can give you remote access to them, though. Hmm, may be interesting some day. -- Krzysztof Halasa - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - NPE
Michael-Luke Jones [EMAIL PROTECTED] writes: Already in mach-ixp4xx, so can just be called npe.c I want ixp4xx_ prefix in module name, otherwise I'd call it npe.c, sure. Debugging code? Can this go? Why? Especially with code having to work with third party binary-only firmware? Suicide. They are eliminated at build time = performance hit (OTOH this file isn't on any fast path). It may be a matter of taste, but could some of the many definitions at the top of ixp4xx_npe.c go in the header file here? It's actually not only a matter of taste, they are private to the .c file and I don't want to make them available to the public (but sure, I don't like them in .c either, I think nobody likes such definitions anywhere but they have to exist somewhere). -- Krzysztof Halasa - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.3 - QMGR
Michael-Luke Jones [EMAIL PROTECTED] writes: Already in mach-ixp4xx, so can just be called qmgr.c Same here. +#define QUEUE_IRQ_SRC_NEARLY_FULL 2 +#define QUEUE_IRQ_SRC_FULL 3 +#define QUEUE_IRQ_SRC_NOT_EMPTY 4 +#define QUEUE_IRQ_SRC_NOT_NEARLY_EMPTY 5 +#define QUEUE_IRQ_SRC_NOT_NEARLY_FULL 6 +#define QUEUE_IRQ_SRC_NOT_FULL 7 Here, unlike ixp4xx_npe.c defines are in qmgr.h - that seems a bit more natural. Because they are public interface :-) -- Krzysztof Halasa - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.3 - QMGR
Lennert Buytenhek [EMAIL PROTECTED] writes: The queue manager interrupts should probably be implemented as an irqchip, in the same way that GPIO interrupts are implemented. (I.e. allocate 'real' interrupt numbers for them, and use the interrupt cascade mechanism.) You probably want to have separate irqchips for the upper and lower halves, too. This way, drivers can just use request_irq() instead of having to bother with platform-specific qmgr_set_irq() methods. Is there a sample somewhere? As with Christian's driver, I don't know whether an SRAM allocator makes much sense. We can just set up a static allocation map for the in-tree drivers and leave out the allocator altogether. I.e. I don't think it's worth the complexity (and just because the butt-ugly Intel code has an allocator isn't a very good reason. :-) It's a very simple allocator. I don't whink we have enough SRAM without it. For now it would work but it's probably too small for all potential users at a time. There may be up to 6 Ethernet ports (not sure about hardware status, not yet supported even by Intel) - 7 queues * 128 entries each = ~ 3.5 KB. Add 2 long queues (RX) for HSS and something for TX, and then crypto, and maybe other things. Current allocator have its potential problems, but they can be solved internally (fragmentation, be we tend to use only 128-entry queues (RX and TX-ready Ethernet pool) and short, 16-entry ones (TX) - easy to deal with). -- Krzysztof Halasa - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
Lennert Buytenhek [EMAIL PROTECTED] writes: +/* Built-in 10/100 Ethernet MAC interfaces */ +static struct mac_plat_info ixdp425_plat_mac[] = { +{ +.phy= 0, +.rxq= 3, +}, { +.phy= 1, +.rxq= 4, +} +}; As with Christian's driver (I'm feeling like a bit of a broken record here :-), putting knowledge of which queue to use (which is firmware- specific) in the _board_ support file is almost certainly wrong. I would just put the port number in there, and let the ethernet driver map the port number to the hardware queue number. After all, the ethernet driver knows which queues the firmware uses, while the board support code doesn't. No, quite the opposite. The board code knows its set of hardware interfaces etc. and can let Ethernet driver use, say, HSS queues. The driver can't know that. It would make sense if we had many queues, but it doesn't seem the case (perhaps the upper queues could be used for some purposes, but Intel's code doesn't use them either and they probably know better). +static inline void set_regbits(u32 bits, u32 __iomem *reg) +{ +__raw_writel(__raw_readl(reg) | bits, reg); +} +static inline void clr_regbits(u32 bits, u32 __iomem *reg) +{ +__raw_writel(__raw_readl(reg) ~bits, reg); +} I generally discourage the use of such wrappers, as it often makes people forget that the set and clear operations are not atomic, and it ignores the fact that some of the other bits in the register you are modifying might have side-effects. Without them the code in question is hardly readable, I pick the need to remember about non-atomicity and possible side effects instead :-) I've outlined the current versions in a separate mail, generally 2 patches are marked v.2 and one v.3. -- Krzysztof Halasa - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.3 - QMGR
On Tue, May 08, 2007 at 04:12:17PM +0200, Krzysztof Halasa wrote: The queue manager interrupts should probably be implemented as an irqchip, in the same way that GPIO interrupts are implemented. (I.e. allocate 'real' interrupt numbers for them, and use the interrupt cascade mechanism.) You probably want to have separate irqchips for the upper and lower halves, too. This way, drivers can just use request_irq() instead of having to bother with platform-specific qmgr_set_irq() methods. Is there a sample somewhere? See for example arch/arm/mach-ep93xx/core.c, handling of the A/B/F port GPIO interrupts. In a nutshell, it goes like this. 1) Allocate a set of IRQ numbers. E.g. in include/asm-arm/arch-ixp4xx/irqs.h: #define IRQ_IXP4XX_QUEUE_0 64 #define IRQ_IXP4XX_QUEUE_1 65 [...] Adjust NR_IRQS, too. 2) Implement interrupt chip functions: static void ixp4xx_queue_low_irq_mask_ack(unsigned int irq) { [...] } static void ixp4xx_queue_low_irq_mask(unsigned int irq) { [...] } static void ixp4xx_queue_low_irq_unmask(unsigned int irq) { [...] } static void ixp4xx_queue_low_irq_set_type(unsigned int irq) { [...] } static struct irq_chip ixp4xx_queue_low_irq_chip = { .name = QMGR low, .ack= ixp4xx_queue_low_irq_mask_ack, .mask = ixp4xx_queue_low_irq_mask, .unmask = ixp4xx_queue_low_irq_unmask, .set_type = ixp4xx_queue_low_irq_set_type, }; 3) Hook up the queue interrupts: for (i = IRQ_IXP4XX_QUEUE_0; i = IRQ_IXP4XX_QUEUE_31; i++) { set_irq_chip(i, ixp4xx_queue_low_irq_chip); set_irq_handler(i, handle_level_irq); set_irq_flags(i, IRQF_VALID); } 4) Implement an interrupt handler for the parent interrupt: static void ixp4xx_qmgr_low_irq_handler(unsigned int irq, struct irq_des c *desc) { u32 status; int i; status = __raw_readl(IXP4XX_WHATEVER_QMGR_LOW_STATUS_REGISTER); for (i = 0; i 32; i++) { if (status (1 i)) { desc = irq_desc + IRQ_IXP4XX_QUEUE_0 + i; desc_handle_irq(IRQ_IXP4XX_QUEUE_0 + i, desc); } } } 5) Hook up the parent interrupt: set_irq_chained_handler(IRQ_IXP4XX_QM1, ixp4xx_qmgr_low_irq_handler); Or something like that. As with Christian's driver, I don't know whether an SRAM allocator makes much sense. We can just set up a static allocation map for the in-tree drivers and leave out the allocator altogether. I.e. I don't think it's worth the complexity (and just because the butt-ugly Intel code has an allocator isn't a very good reason. :-) It's a very simple allocator. I don't whink we have enough SRAM without it. For now it would work but it's probably too small for all potential users at a time. There may be up to 6 Ethernet ports (not sure about hardware status, not yet supported even by Intel) - 7 queues * 128 entries each = ~ 3.5 KB. Add 2 long queues (RX) for HSS and something for TX, and then crypto, and maybe other things. You're unlikely to be using all of those at the same time, though. And what do you do if the user does compile all of these features into his kernel and then tries to use them all at the same time? Return -ENOMEM? Shouldn't we make sure that at least the features that are compiled in can be used at the same time? If you want that guarantee, then you might as well determine the SRAM map at compile time. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
On Tue, May 08, 2007 at 04:31:12PM +0200, Krzysztof Halasa wrote: +/* Built-in 10/100 Ethernet MAC interfaces */ +static struct mac_plat_info ixdp425_plat_mac[] = { + { + .phy= 0, + .rxq= 3, + }, { + .phy= 1, + .rxq= 4, + } +}; As with Christian's driver (I'm feeling like a bit of a broken record here :-), putting knowledge of which queue to use (which is firmware- specific) in the _board_ support file is almost certainly wrong. I would just put the port number in there, and let the ethernet driver map the port number to the hardware queue number. After all, the ethernet driver knows which queues the firmware uses, while the board support code doesn't. No, quite the opposite. The board code knows its set of hardware interfaces etc. and can let Ethernet driver use, say, HSS queues. The driver can't know that. You are attacking a point that I did not make. The board support code knows such things as that the front ethernet port on the board is connected to the CPU's MII port number #2, but the board support code does _not_ know that MII port number #2 corresponds to ixp4xx hardware queue #5. If Intel puts out a firmware update next month, and your ethernet driver is modified to take advantage of the new features in that firmware and starts depending on the newer version of that firmware, we will have to modify every ixp4xx board support file in case the firmware update modifies the ixp4xx queue numbers in use. The mapping from hardware ports (MII port #0, MII port #6, HSS port #42, whatever) to ixp4xx hardware queue numbers (0-63) should _not_ be put in every single ixp4xx board support file. Even if you only change the (in board support file) .rxq= 4, line to something like this instead: (in some ixp4xx-specific or driver-specific header file) #define IXP4XX_MII_PORT_1_RX_QUEUE 4 (in board support file) .rxq= IXP4XX_MII_PORT_1_RX_QUEUE, then you have remved this dependency, and then you only have to update one place if you move to a newer firmware version. I generally discourage the use of such wrappers, as it often makes people forget that the set and clear operations are not atomic, and it ignores the fact that some of the other bits in the register you are modifying might have side-effects. Without them the code in question is hardly readable, You can read Polish, how can you complain about code readability. :-)) *runs* I pick the need to remember about non-atomicity and possible side effects instead :-) Sure, point taken, it's just that the person after you might not remember.. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
Alexey Zaytsev [EMAIL PROTECTED] writes: I was always curious, why do people want to run ixp4xx in LE mode? What are the benefits that overweight the obvious performance degradation? Debian is indeed a valid reason. I wonder if it would be much work to create BE Debian as well. Simple automated process it seems, for most part at least. -- Krzysztof Halasa - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] e1000: Simple optimizations in e1000_xmit_frame
Krishna Kumar wrote: Some simple optimizations in e1000_xmit_frame. Patch against net-2.6.22.git Signed-off-by: Krishna Kumar [EMAIL PROTECTED] looks good. I'll queue this and push it upstream. Thanks, Auke --- diff -ruNp org/drivers/net/e1000/e1000_main.c new/drivers/net/e1000/e1000_main.c --- org/drivers/net/e1000/e1000_main.c 2007-04-09 12:40:02.0 +0530 +++ new/drivers/net/e1000/e1000_main.c 2007-04-09 12:42:28.0 +0530 @@ -3264,14 +3264,13 @@ e1000_xmit_frame(struct sk_buff *skb, st unsigned int first, max_per_txd = E1000_MAX_DATA_PER_TXD; unsigned int max_txd_pwr = E1000_MAX_TXD_PWR; unsigned int tx_flags = 0; - unsigned int len = skb-len; + unsigned int len = skb-len - skb-data_len; unsigned long flags; - unsigned int nr_frags = 0; - unsigned int mss = 0; + unsigned int nr_frags; + unsigned int mss; int count = 0; int tso; unsigned int f; - len -= skb-data_len; /* This goes back to the question of how to logically map a tx queue * to a flow. Right now, performance is impacted slightly negatively @@ -3305,7 +3304,7 @@ e1000_xmit_frame(struct sk_buff *skb, st * points to just header, pull a few bytes of payload from * frags into skb-data */ hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb); - if (skb-data_len (hdr_len == (skb-len - skb-data_len))) { + if (skb-data_len hdr_len == len) { switch (adapter-hw.mac_type) { unsigned int pull_size; case e1000_82544: - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
On Tue, May 08, 2007 at 05:28:21PM +0200, Krzysztof Halasa wrote: I was always curious, why do people want to run ixp4xx in LE mode? What are the benefits that overweight the obvious performance degradation? Debian is indeed a valid reason. I wonder if it would be much work to create BE Debian as well. There _is_ an ARM BE version of Debian. It's not an official port, but it's not maintained any worse than the 'official' LE ARM Debian port is. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Getting make net/built-in.o Error with 2.6.21.1 Build
Hi Jiri, On 5/8/07, Jiri Kosina [EMAIL PROTECTED] wrote: On Tue, 8 May 2007, Satyam Sharma wrote: CC init/version.o LD init/built-in.o LD .tmp_vmlinux1 net/built-in.o: In function `hidp_add_connection': (.text+0x8bb08): undefined reference to `hid_ff_init' make: *** [.tmp_vmlinux1] Error 1 You've got CONFIG_BT_HIDP=y there but CONFIG_USB_HID=m. The trivial patch below solves it. To test, you can just make oldconfig on the same .config that broke above and then proceed to make bzImage. I'd say we need a different fix here. First, the question is whether there are any force-feedback devices, supported by USBHID force-feedback layer, which have a bluetooth version? If there are none of them, we could just drop the FF initialization completely for now. On the other hand if there are such devices, just calling hid_ff_init() is presently not enough anyway, as the force feedback drivers for HID devices are currently USB-transport specific. Sure, my aim here was to only solve the _build breakage_ by fixing the Kconfig for this module (that used code from another kernel module without listing it in its dependencies). If, as you say, the real solution is that we should actually be taking out the offending call to the other module itself, then please go ahead -- I don't know much about the Bluetooth / HIDP subsytem anyway. Thanks, Satyam - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Getting make net/built-in.o Error with 2.6.21.1 Build
On Tue, 8 May 2007, Satyam Sharma wrote: Sure, my aim here was to only solve the _build breakage_ by fixing the Kconfig for this module (that used code from another kernel module without listing it in its dependencies). If, as you say, the real solution is that we should actually be taking out the offending call to the other module itself, then please go ahead -- I don't know much about the Bluetooth / HIDP subsytem anyway. Converting the hid-ff drivers to be also transport-independent is on my TODO list, but it didn't happen yet. Marcel - are you aware of any devices currently supported by USB HID force-feedback code, which have a bluetooth version, please? I'd propose the patch below, until I make the usbhid force-feedback code transport independent. Thanks. From: Jiri Kosina [EMAIL PROTECTED] [Bluetooth] HIDP - don't initialize force feedback The current implementation of force feedback for HID devices is USB-transport only and therefore calling hid_ff_init() from hidp code is not going to work (plus it creates unwanted dependency of hidp on usbhid). Remove the hid_ff_init() until either the hid-ff is made transport-independent, or at least support for bluetooth transport is added. Signed-off-by: Jiri Kosina [EMAIL PROTECTED] diff --git a/net/bluetooth/hidp/core.c b/net/bluetooth/hidp/core.c index d342e89..3e77e81 100644 --- a/net/bluetooth/hidp/core.c +++ b/net/bluetooth/hidp/core.c @@ -737,10 +737,8 @@ static inline void hidp_setup_hid(struct list_for_each_entry(report, hid-report_enum[HID_FEATURE_REPORT].report_list, list) hidp_send_report(session, report); - if (hidinput_connect(hid) == 0) { + if (hidinput_connect(hid) == 0) hid-claimed |= HID_CLAIMED_INPUT; - hid_ff_init(hid); - } } int hidp_add_connection(struct hidp_connadd_req *req, struct socket *ctrl_sock, struct socket *intr_sock) - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Getting make net/built-in.o Error with 2.6.21.1 Build
Hi Jiri, Sure, my aim here was to only solve the _build breakage_ by fixing the Kconfig for this module (that used code from another kernel module without listing it in its dependencies). If, as you say, the real solution is that we should actually be taking out the offending call to the other module itself, then please go ahead -- I don't know much about the Bluetooth / HIDP subsytem anyway. Converting the hid-ff drivers to be also transport-independent is on my TODO list, but it didn't happen yet. Marcel - are you aware of any devices currently supported by USB HID force-feedback code, which have a bluetooth version, please? I haven't looked at all details for the PS3 controller, but that might be the first one. In theory they can and at some point they will enter the market. I'd propose the patch below, until I make the usbhid force-feedback code transport independent. Thanks. From: Jiri Kosina [EMAIL PROTECTED] [Bluetooth] HIDP - don't initialize force feedback The current implementation of force feedback for HID devices is USB-transport only and therefore calling hid_ff_init() from hidp code is not going to work (plus it creates unwanted dependency of hidp on usbhid). Remove the hid_ff_init() until either the hid-ff is made transport-independent, or at least support for bluetooth transport is added. Signed-off-by: Jiri Kosina [EMAIL PROTECTED] Signed-off-by: Marcel Holtmann [EMAIL PROTECTED] Under the condition that you remember to put it back once a generic FF exists. Regards Marcel - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.3 - QMGR
Lennert Buytenhek [EMAIL PROTECTED] writes: See for example arch/arm/mach-ep93xx/core.c, handling of the A/B/F port GPIO interrupts. In a nutshell, it goes like this. Thanks, I will investigate. There may be up to 6 Ethernet ports (not sure about hardware status, not yet supported even by Intel) - 7 queues * 128 entries each = ~ 3.5 KB. Add 2 long queues (RX) for HSS and something for TX, and then crypto, and maybe other things. You're unlikely to be using all of those at the same time, though. That's the point. And what do you do if the user does compile all of these features into his kernel and then tries to use them all at the same time? Return -ENOMEM? If he is able to do so, yes - there is nothing we can do. But I suspect a single machine would not have all possible hardware. The problem is, we don't know what would it have, so it must be dynamic. Shouldn't we make sure that at least the features that are compiled in can be used at the same time? We can't - hardware capabilities limit that. A general purpose distribution would probably want to compile in everything (perhaps as modules). If you want that guarantee, then you might as well determine the SRAM map at compile time. That would be most limiting with IMHO no visible advantage. -- Krzysztof Halasa - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: skge vs sk98lin
On Tue, 08 May 2007 08:00:36 -0400 Michael Stone [EMAIL PROTECTED] wrote: 2.6.21 seems to have fixed the stability issues I was seeing when using the skge driver with the older sk98xx dual port fiber cards. There is still one more lingering oddness: if I have *two* dual port cards in a system, say eth2-5, I see traffic on eth2, eth3, and eth5, but nothing on eth4. This seems to be consistent accross a couple of systems I've tested; only the first card's second interface sees packets (e.g., with tcpdump). If I reboot with the sk98lin driver on the same kernel I see all traffic, as expected. Mike Stone Are the statistics changing? ie. ethtool -S eth4 and ifconfig eth4 -- Stephen Hemminger [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
Lennert Buytenhek [EMAIL PROTECTED] writes: The board support code knows such things as that the front ethernet port on the board is connected to the CPU's MII port number #2, but the board support code does _not_ know that MII port number #2 corresponds to ixp4xx hardware queue #5. Sure. And I don't want it to know. It has to pick up any available queue for RX, that is. If the board code knows it uses ETH connected to NPE-B and NPE-C, and HSS-0 connected (obviously) to NPE-A, and it wants some crypto functions etc., it can pick a queue which normally belongs to HSS-1. If the code knows the board has both HSS and only NPE-B Ethernet, it can use one of NPE-C Ethernet's queues. It's that simple. The Ethernet (and HSS etc.) driver knows it has to use queue 24 for NPE-B Ethernet's TX and 27 for TX and so on, this is fixed in the firmware so I don't let the board code mess with that. The Ethernet RX queue is different, we can just make something up and tell NPE about that. That's BTW the same thing you would want to do with SRAM - except that the SRAM allocator is technically possible, while making queue assignments needs knowledge about the hardware. If Intel puts out a firmware update next month, and your ethernet driver is modified to take advantage of the new features in that firmware and starts depending on the newer version of that firmware, we will have to modify every ixp4xx board support file in case the firmware update modifies the ixp4xx queue numbers in use. Nope, we just modify Ethernet driver: drivers/net/arm/ixp4xx_eth.c: #define TX_QUEUE(plat) (NPE_ID(port) + 23) #define RXFREE_QUEUE(plat) (NPE_ID(port) + 26) #define TXDONE_QUEUE31 The mapping from hardware ports (MII port #0, MII port #6, HSS port #42, whatever) to ixp4xx hardware queue numbers (0-63) should _not_ be put in every single ixp4xx board support file. I've never considered doing that :-) drivers/net/wan/ixp4xx_hss.c: /* Queue IDs */ #define HSS0_CHL_RXTRIG_QUEUE 12 /* orig size = 32 dwords */ #define HSS0_PKT_RX_QUEUE 13 /* orig size = 32 dwords */ #define HSS0_PKT_TX0_QUEUE 14 /* orig size = 16 dwords */ #define HSS0_PKT_TX1_QUEUE 15 #define HSS0_PKT_TX2_QUEUE 16 #define HSS0_PKT_TX3_QUEUE 17 #define HSS0_PKT_RXFREE0_QUEUE 18 /* orig size = 16 dwords */ #define HSS0_PKT_RXFREE1_QUEUE 19 #define HSS0_PKT_RXFREE2_QUEUE 20 #define HSS0_PKT_RXFREE3_QUEUE 21 #define HSS0_PKT_TXDONE_QUEUE 22 /* orig size = 64 dwords */ #define HSS1_CHL_RXTRIG_QUEUE 10 #define HSS1_PKT_RX_QUEUE 0 #define HSS1_PKT_TX0_QUEUE 5 #define HSS1_PKT_TX1_QUEUE 6 #define HSS1_PKT_TX2_QUEUE 7 #define HSS1_PKT_TX3_QUEUE 8 #define HSS1_PKT_RXFREE0_QUEUE 1 #define HSS1_PKT_RXFREE1_QUEUE 2 #define HSS1_PKT_RXFREE2_QUEUE 3 #define HSS1_PKT_RXFREE3_QUEUE 4 #define HSS1_PKT_TXDONE_QUEUE 9 Without them the code in question is hardly readable, You can read Polish, how can you complain about code readability. :-)) Well, you may have the point, but I also care about others :-) -- Krzysztof Halasa - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
Lennert Buytenhek [EMAIL PROTECTED] writes: There _is_ an ARM BE version of Debian. It's not an official port, but it's not maintained any worse than the 'official' LE ARM Debian port is. Hmm... That changes a bit. Perhaps we should forget about that LE thing then, and (at best) put that trivial workaround? -- Krzysztof Halasa - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
Krzysztof Halasa schrieb: Lennert Buytenhek [EMAIL PROTECTED] writes: There _is_ an ARM BE version of Debian. It's not an official port, but it's not maintained any worse than the 'official' LE ARM Debian port is. Hmm... That changes a bit. Perhaps we should forget about that LE thing then, and (at best) put that trivial workaround? Does using ixp4xx on LE have any other drawbacks than inferior network performance? And talking about network performance, what numbers are we talking about (LE vs BE; 30% performance hit on LE, more, or less)? -- Tomasz Chmielewski http://wpkg.org - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Getting make net/built-in.o Error with 2.6.21.1 Build
On Tue, 8 May 2007, Marcel Holtmann wrote: Marcel - are you aware of any devices currently supported by USB HID force-feedback code, which have a bluetooth version, please? I haven't looked at all details for the PS3 controller, but that might be the first one. In theory they can and at some point they will enter the market. You are right, PS3 controller is going to be shipped in both variants. On the other hand it is perfectly possible that we will need special force-feedback driver for it anyway. BTW when talking about this - we already have PS3 quirk present in usb hid (extra control URB is required to make it operational), probably something similar will be needed for BT version too. From: Jiri Kosina [EMAIL PROTECTED] [Bluetooth] HIDP - don't initialize force feedback The current implementation of force feedback for HID devices is USB-transport only and therefore calling hid_ff_init() from hidp code is not going to work (plus it creates unwanted dependency of hidp on usbhid). Remove the hid_ff_init() until either the hid-ff is made transport-independent, or at least support for bluetooth transport is added. Signed-off-by: Jiri Kosina [EMAIL PROTECTED] Signed-off-by: Marcel Holtmann [EMAIL PROTECTED] Under the condition that you remember to put it back once a generic FF exists. Sure. I will take this through my tree then, thanks. -- Jiri Kosina - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Getting make net/built-in.o Error with 2.6.21.1 Build
Hi Jiri, Marcel - are you aware of any devices currently supported by USB HID force-feedback code, which have a bluetooth version, please? I haven't looked at all details for the PS3 controller, but that might be the first one. In theory they can and at some point they will enter the market. You are right, PS3 controller is going to be shipped in both variants. On the other hand it is perfectly possible that we will need special force-feedback driver for it anyway. BTW when talking about this - we already have PS3 quirk present in usb hid (extra control URB is required to make it operational), probably something similar will be needed for BT version too. I know, but we do that in user space before we hand over the control to the kernel. It is simpler, because it is on the control channel and that part is not nicely integrated into the HID subsystem to be used by the transport layers right now. At least as far as I can tell. Regards Marcel - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Intel IXP4xx network drivers v.2 - Ethernet and HSS
Tomasz Chmielewski [EMAIL PROTECTED] writes: Does using ixp4xx on LE have any other drawbacks than inferior network performance? More memory is needed, something like max 600 KB for 2 Ethernet ports. And talking about network performance, what numbers are we talking about (LE vs BE; 30% performance hit on LE, more, or less)? Haven't checked yet but I'd expect something like that. -- Krzysztof Halasa - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Please pull 'upstream' branch of wireless-2.6
This request is withdrawn. New request to follow. On Mon, May 07, 2007 at 01:51:21PM -0400, John W. Linville wrote: The following changes since commit 15700770ef7c5d12e2f1659d2ddbeb3f658d9f37: Linus Torvalds (1): Merge git://git.kernel.org/.../sam/kbuild are found in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git upstream Daniel Drake (1): zd1211rw: Add ID for ZyXEL AG-225H v2 Geert Uytterhoeven (1): mac80211: include linux/delay.h instead of asm/delay.h Ivo van Doorn (1): Add 93cx6 eeprom library John W. Linville (1): libertas: fix for wireless Kconfig changes Larry Finger (3): ieee80211: add ieee80211_channel_to_freq ieee80211: include frequency in scan results bcm43xx: Remove dead configuration variable CONFIG_947XX Matthew Davidson (1): zd1211rw: Add ID for Sitecom WL-117 Michael Wu (1): Add rtl8187 wireless driver Ulrich Kunitz (1): zd1211rw: Added new USB id for Planex GW-US54ZGL -- John W. Linville [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Please pull 'upstream' branch of wireless-2.6
The following changes since commit 5b94f675f57e4ff16c8fda09088d7480a84dcd91: Linus Torvalds (1): Merge master.kernel.org:/.../davem/sparc-2.6 are found in the git repository at: git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git upstream Daniel Drake (1): zd1211rw: Add ID for ZyXEL AG-225H v2 Larry Finger (3): ieee80211: add ieee80211_channel_to_freq ieee80211: include frequency in scan results bcm43xx: Remove dead configuration variable CONFIG_947XX Matthew Davidson (1): zd1211rw: Add ID for Sitecom WL-117 Ulrich Kunitz (1): zd1211rw: Added new USB id for Planex GW-US54ZGL drivers/net/wireless/bcm43xx/bcm43xx.h | 18 +- drivers/net/wireless/bcm43xx/bcm43xx_dma.c |4 - drivers/net/wireless/bcm43xx/bcm43xx_main.c | 81 --- drivers/net/wireless/bcm43xx/bcm43xx_main.h | 19 -- drivers/net/wireless/zd1211rw/zd_usb.c |4 + include/net/ieee80211.h |2 + net/ieee80211/ieee80211_geo.c | 16 + net/ieee80211/ieee80211_wx.c|8 ++- 8 files changed, 31 insertions(+), 121 deletions(-) diff --git a/drivers/net/wireless/bcm43xx/bcm43xx.h b/drivers/net/wireless/bcm43xx/bcm43xx.h index f8483c1..10e07e8 100644 --- a/drivers/net/wireless/bcm43xx/bcm43xx.h +++ b/drivers/net/wireless/bcm43xx/bcm43xx.h @@ -658,12 +658,6 @@ struct bcm43xx_pio { #define BCM43xx_MAX_80211_CORES2 -#ifdef CONFIG_BCM947XX -#define core_offset(bcm) (bcm)-current_core_offset -#else -#define core_offset(bcm) 0 -#endif - /* Generic information about a core. */ struct bcm43xx_coreinfo { u8 available:1, @@ -789,10 +783,6 @@ struct bcm43xx_private { /* The currently active core. */ struct bcm43xx_coreinfo *current_core; -#ifdef CONFIG_BCM947XX - /** current core memory offset */ - u32 current_core_offset; -#endif struct bcm43xx_coreinfo *active_80211_core; /* coreinfo structs for all possible cores follow. * Note that a core might not exist. @@ -943,25 +933,25 @@ struct bcm43xx_lopair * bcm43xx_get_lopair(struct bcm43xx_phyinfo *phy, static inline u16 bcm43xx_read16(struct bcm43xx_private *bcm, u16 offset) { - return ioread16(bcm-mmio_addr + core_offset(bcm) + offset); + return ioread16(bcm-mmio_addr + offset); } static inline void bcm43xx_write16(struct bcm43xx_private *bcm, u16 offset, u16 value) { - iowrite16(value, bcm-mmio_addr + core_offset(bcm) + offset); + iowrite16(value, bcm-mmio_addr + offset); } static inline u32 bcm43xx_read32(struct bcm43xx_private *bcm, u16 offset) { - return ioread32(bcm-mmio_addr + core_offset(bcm) + offset); + return ioread32(bcm-mmio_addr + offset); } static inline void bcm43xx_write32(struct bcm43xx_private *bcm, u16 offset, u32 value) { - iowrite32(value, bcm-mmio_addr + core_offset(bcm) + offset); + iowrite32(value, bcm-mmio_addr + offset); } static inline diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_dma.c b/drivers/net/wireless/bcm43xx/bcm43xx_dma.c index e3d2e61..1f7731f 100644 --- a/drivers/net/wireless/bcm43xx/bcm43xx_dma.c +++ b/drivers/net/wireless/bcm43xx/bcm43xx_dma.c @@ -660,10 +660,6 @@ struct bcm43xx_dmaring * bcm43xx_setup_dmaring(struct bcm43xx_private *bcm, ring-routing = BCM43xx_DMA32_CLIENTTRANS; if (dma64) ring-routing = BCM43xx_DMA64_CLIENTTRANS; -#ifdef CONFIG_BCM947XX - if (bcm-pci_dev-bus-number == 0) - ring-routing = dma64 ? BCM43xx_DMA64_NOTRANS : BCM43xx_DMA32_NOTRANS; -#endif ring-bcm = bcm; ring-nr_slots = nr_slots; diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_main.c b/drivers/net/wireless/bcm43xx/bcm43xx_main.c index 5e96bca..ef6b253 100644 --- a/drivers/net/wireless/bcm43xx/bcm43xx_main.c +++ b/drivers/net/wireless/bcm43xx/bcm43xx_main.c @@ -61,10 +61,6 @@ MODULE_AUTHOR(Stefano Brivio); MODULE_AUTHOR(Michael Buesch); MODULE_LICENSE(GPL); -#ifdef CONFIG_BCM947XX -extern char *nvram_get(char *name); -#endif - #if defined(CONFIG_BCM43XX_DMA) defined(CONFIG_BCM43XX_PIO) static int modparam_pio; module_param_named(pio, modparam_pio, int, 0444); @@ -142,10 +138,6 @@ MODULE_PARM_DESC(fwpostfix, Postfix for .fw files. Useful for using multiple fi { PCI_VENDOR_ID_BROADCOM, 0x4324, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, /* Broadcom 43XG 802.11b/g */ { PCI_VENDOR_ID_BROADCOM, 0x4325, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, -#ifdef CONFIG_BCM947XX - /* SB bus on BCM947xx */ - { PCI_VENDOR_ID_BROADCOM, 0x0800, PCI_ANY_ID, PCI_ANY_ID, 0, 0, 0 }, -#endif { 0 }, }; MODULE_DEVICE_TABLE(pci, bcm43xx_pci_tbl); @@ -786,9 +778,6 @@ static int bcm43xx_sprom_extract(struct bcm43xx_private *bcm) { u16 value; u16 *sprom; -#ifdef CONFIG_BCM947XX - char *c; -#endif sprom = kzalloc(BCM43xx_SPROM_SIZE *
SG_IO with 4k buffer size to iscsi sg device causes Bad page panic
Hi All, This panic is related to the interactions between scsi/sg.c, iscsi initiator and tcp on the RHEL 2.6.9-42 kernel. But we may also have the similar problem with open-iscsi initiator. I will explain why we see the Bad page panic first. I did a patch to the sg driver to workaround the problem and seek for ideas where we should fix the problem. When sg driver accepts a sg_io request from user space, it invokes kernel API __get_free_pages() to allocate multiple pages for holding user space data IO request. The allocated pages will consist of one base page and a number of sub pages (total 8 pages for a big request). The pages have the following attributes after they are allocated by the sg driver. 0 page:01007fb89ac0 flags:0x0100 mapping: mapcount:0 count:1 1 page:01007fb89af8 flags:0x0104 mapping: mapcount:0 count:0 2 page:01007fb89b30 flags:0x0104 mapping: mapcount:0 count:0 Please note that only the base page has count=1 and all subpages have count=0. After the request reaches iscsi-sfnet initiator driver, the iscsi-sfnet driver will send a buffer with multiple pages one by one through network interface API. rc = sock-ops-sendpage(sock, pg, pg_offset, len, flags); At the network layer (linux/net/ipv4/tcp.c), the sendpage() operation will perform get_page() first and then put_page() later. The get_page() will increase the page's count by 1. The put_page() will perform the following (linux/mm/swap.c) void put_page(struct page *page) { if (unlikely(PageCompound(page))) { page = (struct page *)page-private; if (put_page_testzero(page)) { void (*dtor)(struct page *page); dtor = (void (*)(struct page *))page[1].mapping; (*dtor)(page); } return; } if (!PageReserved(page) put_page_testzero(page)) __page_cache_release(page); } Please note that if the count is 0, the page will be released and recycled to the free-page pool. At the time when sg driver is ready to free its allocated pages by invoking free_pages(), the sub-pages is already re-used by someone else. We will get Bad page kernel expeption such as the following Bad page state at __free_pages_ok (in process 'java', page 01007fb89b30) flags:0x0100103c mapping:010075a4eaf0 mapcount:0 count:2 Backtrace: Call Trace:8015d37f{bad_page+112} 8015d713{__free_pages_ok+154} a01d9fa5{:sg:sg_remove_scat+276} a01da13e {:sg:sg_finish_rem_req+238} a01da56a{:sg:sg_new_read+1050} a01dcb48{:sg:sg_ioctl+929} 8030a0f5{thread_return+0} 801d42e6{selinux_file_ioctl+711} 8030ab88{schedule_timeout+224} 8016bfb6{find_extend_vma+22} 8014c6b0{unqueue_me+138} 8014c8ce{do_futex+441} 80135752{autoremove_wake_function+0} 80135752{autoremove_wake_function+0} 8018ae05{sys_ioctl+853} 8012a122{sg_ioctl_trans+832} 8019e8ac{compat_sys_ioctl+235} 80125bbb{sysenter_do_call+27} In the above oops, the page with page address 01007fb89b30 has been reused with active count 2 and memory mapped. Because the sg driver tries to free a page that is mapped and active, we got the above bad page panic. I did the following patch to the sg.c. The sg driver will set PG_reserved for all sub-pages at sg_page_malloc() time and clear the bit/count at sg_page_free() time. I tested it and it worked great. Do you see any side impacts with this patch? Is this a right place to fix the panic? We may have similar problem for st driver. --- linux-2.6.9/drivers/scsi/sg.c 2007-05-07 22:14:33.0 -0500 +++ /home/yqi/working_sg_iscsi_sfnet/sg.c 2007-05-07 22:45:26.0 -0500 @@ -2551,8 +2551,9 @@ sg_page_malloc(int rqSz, int lowDma, int { char *resp = NULL; int page_mask; - int order, a_size; + int order, a_size, m; int resSz = rqSz; + struct page *tmppage; if (rqSz = 0) return resp; @@ -2571,6 +2572,13 @@ sg_page_malloc(int rqSz, int lowDma, int resp = (char *) __get_free_pages(page_mask, order); /* try half */ resSz = a_size; } + tmppage = virt_to_page(resp); + for( m = PAGE_SIZE; m resSz; m += PAGE_SIZE ) + { + tmppage++; + SetPageReserved(tmppage); + } + if (resp) { if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO)) memset(resp, 0, resSz); @@ -2583,12 +2591,20 @@ sg_page_malloc(int rqSz, int lowDma, int static void sg_page_free(char *buff, int size) { - int order, a_size; + int order, a_size, m; + struct page * tmppage; + tmppage = virt_to_page(buff); if
Re: SG_IO with 4k buffer size to iscsi sg device causes Bad page panic
Qi, Yanling wrote: Hi All, This panic is related to the interactions between scsi/sg.c, iscsi initiator and tcp on the RHEL 2.6.9-42 kernel. But we may also have the similar problem with open-iscsi initiator. I will explain why we see the Yeah, this problem should occur in the upstream open-iscsi iscsi code. open-iscsi works very similar to linux-scsi where it just sends pages around with sock-ops-sendpage, and it looks like sg uses __get_free_pages in RHEL's kernel and upstream it uses alloc_pages so unless there was a change in those functions or the network layer then we should have a similar problem. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Kconfig warnings on latest GIT
On Tue, 8 May 2007 00:04:14 +0300 Ismail Dönmez [EMAIL PROTECTED] wrote: drivers/net/Kconfig:2279:warning: 'select' used by config symbol 'UCC_GETH' refers to undefined symbol 'UCC_FAST' looks like this introduces the error: commit 7d776cb596994219584257eb5956b87628e5deaf Author: Timur Tabi [EMAIL PROTECTED] Date: Mon Mar 12 15:40:27 2007 -0500 [POWERPC] QE: automatically select QE options Change the Kconfig files so that the Freescale QE options are automatically selected if a QE device is selected. Previously, you'd need to manually select UCC_FAST if you want any fast UCC devices, such as Gigabit Ethernet Now, the QE Gigabit Ethernet option is always available if the device has a QE, and UCC_FAST is automatically enabled. A side-effect is that the QE Options menu no longer exists. Signed-off-by: Timur Tabi [EMAIL PROTECTED] Signed-off-by: Kumar Gala [EMAIL PROTECTED] cc:ing authors, fti. Kim - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/3] AFS: Export a couple of core functions for AFS write support
Export a couple of core functions for AFS write support to use: find_get_pages_contig() find_get_pages_tag() Signed-off-by: David Howells [EMAIL PROTECTED] --- mm/filemap.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/mm/filemap.c b/mm/filemap.c index 5631d6b..b276508 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -750,6 +750,7 @@ unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t index, read_unlock_irq(mapping-tree_lock); return i; } +EXPORT_SYMBOL(find_get_pages_contig); /** * find_get_pages_tag - find and return pages that match @tag @@ -778,6 +779,7 @@ unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, read_unlock_irq(mapping-tree_lock); return ret; } +EXPORT_SYMBOL(find_get_pages_tag); /** * grab_cache_page_nowait - returns locked page at given index in given cache - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v4] powerpc: change rheap functions to use ulongs instead of pointers
The rheap allocation functions return a pointer, but the actual value is based on how the heap was initialized, and so it can be anything, e.g. an offset into a buffer. A ulong is a better representation of the value returned by the allocation functions. This patch changes all of the relevant rheap functions to use a unsigned long integers instead of a pointer. In case of an error, the value returned is a negative error code that has been cast to an unsigned long. The caller can use the IS_ERR_VALUE() macro to check for this. All code which calls the rheap functions is updated accordingly. Macros IS_MURAM_ERR() and IS_DPERR(), have been deleted in favor of IS_ERR_VALUE(). Also added error checking to rh_attach_region(). Signed-off-by: Timur Tabi [EMAIL PROTECTED] --- Updated for Paul's latest tree. Even though this patch changes powerpc, netdev, and serial drivers, it cannot be split up. The powerpc side deletes a function and changes prototypes of other functions, so if the netdev and serial drivers are not updated at the same time, they won't compile. arch/powerpc/lib/rheap.c| 117 ++- arch/powerpc/sysdev/commproc.c | 20 +++--- arch/powerpc/sysdev/cpm2_common.c | 21 +++--- arch/powerpc/sysdev/qe_lib/qe.c | 29 arch/powerpc/sysdev/qe_lib/ucc_fast.c |5 +- arch/powerpc/sysdev/qe_lib/ucc_slow.c |7 +- arch/ppc/8xx_io/commproc.c | 22 +++--- arch/ppc/lib/rheap.c| 95 + arch/ppc/syslib/cpm2_common.c | 23 +++--- drivers/net/fs_enet/mac-scc.c |2 +- drivers/net/ucc_geth.c | 30 drivers/serial/cpm_uart/cpm_uart_cpm1.c |4 +- drivers/serial/cpm_uart/cpm_uart_cpm2.c |4 +- include/asm-powerpc/qe.h| 13 +--- include/asm-ppc/commproc.h | 13 +--- include/asm-ppc/cpm2.h | 13 +--- include/asm-ppc/rheap.h | 20 +++--- 17 files changed, 221 insertions(+), 217 deletions(-) diff --git a/arch/powerpc/lib/rheap.c b/arch/powerpc/lib/rheap.c index 6c5c5dd..b2f6dcc 100644 --- a/arch/powerpc/lib/rheap.c +++ b/arch/powerpc/lib/rheap.c @@ -133,7 +133,7 @@ static rh_block_t *get_slot(rh_info_t * info) info-empty_slots--; /* Initialize */ - blk-start = NULL; + blk-start = 0; blk-size = 0; blk-owner = NULL; @@ -158,7 +158,7 @@ static void attach_free_block(rh_info_t * info, rh_block_t * blkn) /* We assume that they are aligned properly */ size = blkn-size; - s = (unsigned long)blkn-start; + s = blkn-start; e = s + size; /* Find the blocks immediately before and after the given one @@ -170,7 +170,7 @@ static void attach_free_block(rh_info_t * info, rh_block_t * blkn) list_for_each(l, info-free_list) { blk = list_entry(l, rh_block_t, list); - bs = (unsigned long)blk-start; + bs = blk-start; be = bs + blk-size; if (next == NULL s = bs) @@ -188,10 +188,10 @@ static void attach_free_block(rh_info_t * info, rh_block_t * blkn) } /* Now check if they are really adjacent */ - if (before != NULL s != (unsigned long)before-start + before-size) + if (before s != (before-start + before-size)) before = NULL; - if (after != NULL e != (unsigned long)after-start) + if (after e != after-start) after = NULL; /* No coalescing; list insert and return */ @@ -216,7 +216,7 @@ static void attach_free_block(rh_info_t * info, rh_block_t * blkn) /* Grow the after block backwards */ if (before == NULL after != NULL) { - after-start = (int8_t *)after-start - size; + after-start -= size; after-size += size; return; } @@ -321,14 +321,14 @@ void rh_init(rh_info_t * info, unsigned int alignment, int max_blocks, } /* Attach a free memory region, coalesces regions if adjuscent */ -int rh_attach_region(rh_info_t * info, void *start, int size) +int rh_attach_region(rh_info_t * info, unsigned long start, int size) { rh_block_t *blk; unsigned long s, e, m; int r; /* The region must be aligned */ - s = (unsigned long)start; + s = start; e = s + size; m = info-alignment - 1; @@ -338,9 +338,12 @@ int rh_attach_region(rh_info_t * info, void *start, int size) /* Round end down */ e = e ~m; + if (IS_ERR_VALUE(e) || (e s)) + return -ERANGE; + /* Take final values */ - start = (void *)s; - size = (int)(e - s); + start = s; + size = e - s; /* Grow the blocks, if needed */ r = assure_empty(info, 1); @@ -358,7 +361,7 @@ int rh_attach_region(rh_info_t * info, void *start, int
Re: Kconfig warnings on latest GIT
Kim Phillips wrote: On Tue, 8 May 2007 00:04:14 +0300 Ismail Dönmez [EMAIL PROTECTED] wrote: drivers/net/Kconfig:2279:warning: 'select' used by config symbol 'UCC_GETH' refers to undefined symbol 'UCC_FAST' looks like this introduces the error: commit 7d776cb596994219584257eb5956b87628e5deaf Author: Timur Tabi [EMAIL PROTECTED] Date: Mon Mar 12 15:40:27 2007 -0500 [POWERPC] QE: automatically select QE options I have a dilemma, so I need help fixing this bug. This particular patch is necessary because without it, selecting support for the QE is too complicated. Background: The QUICC Engine (QE) is a microcontroller on some Freescale CPUs that can mimic a wide variety of devices. It has multiple controllers (called UCCs), and each one can be an ethernet device, or a UART, or an HDLC thingy, etc. There's a QE library and a bunch of other support code in the arch/powerpc directory, so the QE is a powerpc-specific device. However, all of the drivers that use it are located in drivers/xxx. This isn't a new problem. The common solution is to define some intermediate Kconfig option, like UCC_FAST_TEMP in the driver's Kconfig. Selecting UCC_FAST will then also set UCC_FAST_TEMP. The device driver then depends on UCC_FAST_TEMP. There's also a UCC_SLOW option with the same problem. The dillema is that there is no single device driver class that depends on UCC_FAST. Currently, there's only one that uses UCC_FAST: the ucc_geth driver. But I'm also working on a UART driver. So my question is: in which Kconfig do I define UCC_FAST_TEMP and UCC_SLOW_TEMP? At first I thought, just put it in drivers/Kconfig, but that Kconfig does nothing but including other Kconfigs. I believe that if I submit a patch that adds UCC_FAST_TEMP and UCC_SLOW_TEMP to drivers/Kconfig, it will be rejected. Either that, or I'll spend six weeks trying to persuade everyone that it's a good idea. Does anyone have any suggestions on how I can fix this? -- Timur Tabi Linux Kernel Developer @ Freescale - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [1/2] [NET] link_watch: Move link watch list into net_device
Herbert Xu wrote: Sorry, I had forgotten that I've already concluded previously that this doesn't work because we don't want to prevent the interface from being brought up (and other reasons). My memory is failing me :) So I think the best option now is to get rid of the delay on carrier on events for everyone. Sounds good to me. So I'll use this change instead. Subject: xen: go back to using normal network stack carriers This effectively reverts xen-unstable change 14280:42b29f084c31. Herbert has changed the behaviour of the core networking to not delay an initial down-up transition, and so the timing concern has been solved. Signed-off-by: Jeremy Fitzhardinge [EMAIL PROTECTED] Cc: Herbert Xu [EMAIL PROTECTED] Cc: Keir Fraser [EMAIL PROTECTED] diff -r 282bef511e66 drivers/net/xen-netfront.c --- a/drivers/net/xen-netfront.cTue May 08 13:11:18 2007 -0700 +++ b/drivers/net/xen-netfront.cTue May 08 13:14:47 2007 -0700 @@ -95,7 +95,6 @@ struct netfront_info { unsigned int evtchn; unsigned int copying_receiver; - unsigned int carrier; /* Receive-ring batched refills. */ #define RX_MIN_TARGET 8 @@ -142,15 +141,6 @@ struct netfront_rx_info { }; /* - * Implement our own carrier flag: the network stack's version causes delays - * when the carrier is re-enabled (in particular, dev_activate() may not - * immediately be called, which can cause packet loss). - */ -#define netfront_carrier_on(netif) ((netif)-carrier = 1) -#define netfront_carrier_off(netif)((netif)-carrier = 0) -#define netfront_carrier_ok(netif) ((netif)-carrier) - -/* * Access macros for acquiring freeing slots in tx_skbs[]. */ @@ -241,7 +231,7 @@ static void xennet_alloc_rx_buffers(stru int nr_flips; struct xen_netif_rx_request *req; - if (unlikely(!netfront_carrier_ok(np))) + if (unlikely(!netif_carrier_ok(dev))) return; /* @@ -380,7 +370,7 @@ static int xennet_open(struct net_device memset(np-stats, 0, sizeof(np-stats)); spin_lock_bh(np-rx_lock); - if (netfront_carrier_ok(np)) { + if (netif_carrier_ok(dev)) { xennet_alloc_rx_buffers(dev); np-rx.sring-rsp_event = np-rx.rsp_cons + 1; if (RING_HAS_UNCONSUMED_RESPONSES(np-rx)) @@ -400,7 +390,7 @@ static void xennet_tx_buf_gc(struct net_ struct netfront_info *np = netdev_priv(dev); struct sk_buff *skb; - BUG_ON(!netfront_carrier_ok(np)); + BUG_ON(!netif_carrier_ok(dev)); do { prod = np-tx.sring-rsp_prod; @@ -540,7 +530,7 @@ static int xennet_start_xmit(struct sk_b spin_lock_irq(np-tx_lock); - if (unlikely(!netfront_carrier_ok(np) || + if (unlikely(!netif_carrier_ok(dev) || (frags 1 !xennet_can_sg(dev)) || netif_needs_gso(dev, skb))) { spin_unlock_irq(np-tx_lock); @@ -973,7 +963,7 @@ static int xennet_poll(struct net_device spin_lock(np-rx_lock); - if (unlikely(!netfront_carrier_ok(np))) { + if (unlikely(!netif_carrier_ok(dev))) { spin_unlock(np-rx_lock); return 0; } @@ -1308,7 +1298,7 @@ static struct net_device * __devinit xen np-netdev = netdev; - netfront_carrier_off(np); + netif_carrier_off(netdev); return netdev; @@ -1376,7 +1366,7 @@ static void xennet_disconnect_backend(st /* Stop old i/f to prevent errors whilst we rebuild the state. */ spin_lock_bh(info-rx_lock); spin_lock_irq(info-tx_lock); - netfront_carrier_off(info); + netif_carrier_off(info-netdev); spin_unlock_irq(info-tx_lock); spin_unlock_bh(info-rx_lock); @@ -1440,7 +1430,7 @@ static irqreturn_t xennet_interrupt(int spin_lock_irqsave(np-tx_lock, flags); - if (likely(netfront_carrier_ok(np))) { + if (likely(netif_carrier_ok(dev))) { xennet_tx_buf_gc(dev); /* Under tx_lock: protects access to rx shared-ring indexes. */ if (RING_HAS_UNCONSUMED_RESPONSES(np-rx)) @@ -1728,7 +1718,7 @@ static int xennet_connect(struct net_dev * domain a kick because we've probably just requeued some * packets. */ - netfront_carrier_on(np); + netif_carrier_on(np-netdev); notify_remote_via_irq(np-netdev-irq); xennet_tx_buf_gc(dev); xennet_alloc_rx_buffers(dev); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] skge: default WOL should be magic only (rev2)
By default, the skge driver now enables wake on magic and wake on PHY. This is a bad default (bug), wake on PHY means machine will never shutdown if connected to a switch. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]a --- drivers/net/skge.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) --- sky2-2.6.21.orig/drivers/net/skge.c 2007-05-08 10:06:39.0 -0700 +++ sky2-2.6.21/drivers/net/skge.c 2007-05-08 10:21:51.0 -0700 @@ -3594,7 +3594,9 @@ static struct net_device *skge_devinit(s skge-duplex = -1; skge-speed = -1; skge-advertising = skge_supported_modes(hw); - skge-wol = pci_wake_enabled(hw-pdev) ? wol_supported(hw) : 0; + + if (pci_wake_enabled(hw-pdev)) + skge-wol = wol_supported(hw) WAKE_MAGIC; hw-dev[port] = dev; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] myri10ge: limit the number of recoveries
Jeff Garzik wrote: Brice Goglin wrote: Limit the number of recoveries from a NIC hw watchdog reset to 1 by default. This is tweakable via the myri10ge_reset_recover tunable. NAK. Tunables like this are generally (a) never touched by the vast majority of users, and (b) have useful values and purposes known only to Myri employees :) Well, actually, it's kind of the opposite. Myri employees won't need to tune this value since they will be able to replace the NIC with another one immediately. The whole point of this tunable is to help end-users: * The default value (set to 1) enables detection of defective NICs immediately. These memory parity errors are expected to happen very rarely (less than once per century per NIC). However, a defective NIC (very rare, fortunately) can see such an error quite often, ie. every few minutes under high load. * An increased limit value will still allow people with mission critical installations to crank up the tunable and recover an INTMAX number of times while waiting for a downtime window to replace the NIC. The performance won't be optimal, but at least, it will still work. Should I resent the patch? Thanks, Brice - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/4] v9fs: rename non-vfs related structs and functions to be moved to net/9p
On Tue, 8 May 2007 14:51:02 -0600 Latchesar Ionkov [EMAIL PROTECTED] wrote: This patchset moves non-filesystem interfaces of v9fs from fs/9p to net/9p. It moves the transport, packet marshalling and connection layers to net/9p leaving only the VFS related files in fs/9p. (Please cc netdev@vger.kernel.org on net-related work) These changes would be best handled via Eric's git tree, with appropriate acks from the net maintainers. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: skge- soft lockup on CPU#0 with mtu=9000 (2.6.20.1 + web100 patch)
On Mon, 12 Mar 2007, Chris Stromsoe wrote: On Thu, 8 Mar 2007, Chris Stromsoe wrote: On Thu, 8 Mar 2007, Jay Vosburgh wrote: Chris Stromsoe [EMAIL PROTECTED] wrote: 1) ip link set mtu 9000 eth2 -- eth2 is no longer responsive ip link set mtu 1500 eth2 -- eth2 remains unresponsive 2) ifup eth2 ifdown eth2 perl -pi -e 's/eth2/eth3/' /etc/network/interfaces ifup eth3 -- locks up here This would seem to suggest a problem with skge itself, although there might be some other interaction with bonding that causes the problems for that case. In both of the above mentioned cases, I was not using bonding. That was with the skge driver only. The above tests both work fine with the 2.6.20.1 sk98lin driver loaded as modprobe sk98lin RlmtMode=DualNet. I can change the MTU, add and remove eth2/eth3 from the bond, and up and down the interface. It also works fine with different hardware (e100, e1000, tg3, bnx2). Running both interfaces alone without the bonding driver also works (I can up and down the interfaces with no side-affects). Just an update - it looks like 2.6.20.1 fixed the MTU problem (1 above), but not the other problem (where the machine locks up if the second port on the dual-port card is downed). To recap: I am use SysKonnect SK-9843 cards. The sk98lin driver works fine; the skge driver does not. The following sequence of commands locks up the server. The lock is a hard lock; console is not responsive to keyboard input or to sysrq. Nothing is printed on the serial console. ip li set eth2 up ip li set eth2 down ip li set eth3 up There are no addresses assigned to either interface. This was done after a fresh boot. It is repeatable. If I do not down eth2, I can up eth3 assign addresses, and use both interfaces. The kernel is fresh from kernel.org and does not have any third party patches. lspci -vv output: :01:0a.0 Ethernet controller: Syskonnect (Schneider Koch) SK-98xx Gigabit Ethernet Server Adapter (rev 12) Subsystem: Syskonnect (Schneider Koch) SK-9844 Gigabit Ethernet Server Adapter (SK-NET GE-SX dual link) Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=medium TAbort- TAbort- MAbort- SERR- PERR- Latency: 64 (5750ns min, 7750ns max), Cache Line Size: 0x08 (32 bytes) Interrupt: pin A routed to IRQ 10 Region 0: Memory at ff8fc000 (32-bit, non-prefetchable) [size=16K] Region 1: I/O ports at d800 [size=256] Expansion ROM at ff40 [disabled] [size=128K] Capabilities: available only to root -Chris - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/6] RESEND of missed ucc_geth phylib and SGMII patches
This series is a re-send of the 6 patches that were missed: submitted 10 apr 2007: [PATCH] Add support for the Davicom DM9161A PHY - http://marc.info/?l=linux-netdevm=117624199831581w=2 submitted 13 apr 2007: [PATCH v2] phylib: add the ICPlus IP175C PHY driver - http://marc.info/?l=linux-netdevm=117648832616294w=2 [PATCH v2] phylib: enable RGMII-ID on the Marvell m88e PHY - http://marc.info/?l=linux-netdevm=117648831716153w=2 submitted 02 may 2007: [PATCH] ucc_geth: eliminate max-speed, change interface - http://marc.info/?l=linux-netdevm=117813384403311w=2 submitted 03 may 2007: [PATCH] gianfar: add support for SGMII - http://marc.info/?l=linux-netdevm=117822321305231w=2 [PATCH] phylib: m88e: enable SGMII mode - http://marc.info/?l=linux-netdevm=117822321202167w=2 Jeff, please apply; boards with ucc_geth are currently broken. Thanks, Kim --- Kapil Juneja (2): gianfar: add support for SGMII phylib: m88e: enable SGMII mode Kim Phillips (4): Add support for the Davicom DM9161A PHY phylib: add the ICPlus IP175C PHY driver phylib: enable RGMII-ID on the Marvell m88e PHY ucc_geth: eliminate max-speed, change interface-type to phy-connection-type drivers/net/gianfar.c | 27 + drivers/net/gianfar.h |6 ++ drivers/net/gianfar_mii.c | 55 +++ drivers/net/phy/Kconfig|6 ++ drivers/net/phy/Makefile |1 + drivers/net/phy/davicom.c | 34 +--- drivers/net/phy/icplus.c | 128 drivers/net/phy/marvell.c | 78 --- drivers/net/ucc_geth.c | 40 ++ drivers/net/ucc_geth_mii.c |9 ++-- drivers/net/ucc_geth_mii.h | 10 ++-- 11 files changed, 334 insertions(+), 60 deletions(-) create mode 100644 drivers/net/phy/icplus.c - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/6] phylib: enable RGMII-ID on the Marvell m88e1111 PHY
Support for configuring RGMII-ID (RGMII with internal delay) mode on the 88e and 88e1145. Also renamed 88es - 88e (no references to an 88es part were found), and fixed some whitespace. Signed-off-by: Kim Phillips [EMAIL PROTECTED] --- drivers/net/phy/marvell.c | 62 +++-- 1 files changed, 54 insertions(+), 8 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index 22aec5c..b87f8d2 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -54,6 +54,12 @@ #define MII_M_PHY_LED_CONTROL 0x18 #define MII_M_PHY_LED_DIRECT 0x4100 #define MII_M_PHY_LED_COMBINE 0x411c +#define MII_M_PHY_EXT_CR 0x14 +#define MII_M_RX_DELAY 0x80 +#define MII_M_TX_DELAY 0x2 +#define MII_M_PHY_EXT_SR 0x1b +#define MII_M_HWCFG_MODE_MASK 0xf +#define MII_M_HWCFG_MODE_RGMII 0xb MODULE_DESCRIPTION(Marvell PHY driver); MODULE_AUTHOR(Andy Fleming); @@ -131,6 +137,45 @@ static int marvell_config_aneg(struct phy_device *phydev) return err; } +static int m88e_config_init(struct phy_device *phydev) +{ + int err; + + if ((phydev-interface == PHY_INTERFACE_MODE_RGMII) || + (phydev-interface == PHY_INTERFACE_MODE_RGMII_ID)) { + int temp; + + if (phydev-interface == PHY_INTERFACE_MODE_RGMII_ID) { + temp = phy_read(phydev, MII_M_PHY_EXT_CR); + if (temp 0) + return temp; + + temp |= (MII_M_RX_DELAY | MII_M_TX_DELAY); + + err = phy_write(phydev, MII_M_PHY_EXT_CR, temp); + if (err 0) + return err; + } + + temp = phy_read(phydev, MII_M_PHY_EXT_SR); + if (temp 0) + return temp; + + temp = ~(MII_M_HWCFG_MODE_MASK); + temp |= MII_M_HWCFG_MODE_RGMII; + + err = phy_write(phydev, MII_M_PHY_EXT_SR, temp); + if (err 0) + return err; + } + + err = phy_write(phydev, MII_BMCR, BMCR_RESET); + if (err 0) + return err; + + return 0; +} + static int m88e1145_config_init(struct phy_device *phydev) { int err; @@ -152,7 +197,7 @@ static int m88e1145_config_init(struct phy_device *phydev) if (err 0) return err; - if (phydev-interface == PHY_INTERFACE_MODE_RGMII) { + if (phydev-interface == PHY_INTERFACE_MODE_RGMII_ID) { int temp = phy_read(phydev, MII_M1145_PHY_EXT_CR); if (temp 0) return temp; @@ -206,7 +251,7 @@ static struct phy_driver m88e1101_driver = { .driver = {.owner = THIS_MODULE,}, }; -static struct phy_driver m88es_driver = { +static struct phy_driver m88e_driver = { .phy_id = 0x01410cc0, .phy_id_mask = 0xfff0, .name = Marvell 88E, @@ -216,6 +261,7 @@ static struct phy_driver m88es_driver = { .read_status = genphy_read_status, .ack_interrupt = marvell_ack_interrupt, .config_intr = marvell_config_intr, + .config_init = m88e_config_init, .driver = {.owner = THIS_MODULE,}, }; @@ -241,9 +287,9 @@ static int __init marvell_init(void) if (ret) return ret; - ret = phy_driver_register(m88es_driver); + ret = phy_driver_register(m88e_driver); if (ret) - goto errs; + goto err; ret = phy_driver_register(m88e1145_driver); if (ret) @@ -251,9 +297,9 @@ static int __init marvell_init(void) return 0; - err1145: - phy_driver_unregister(m88es_driver); - errs: +err1145: + phy_driver_unregister(m88e_driver); +err: phy_driver_unregister(m88e1101_driver); return ret; } @@ -261,7 +307,7 @@ static int __init marvell_init(void) static void __exit marvell_exit(void) { phy_driver_unregister(m88e1101_driver); - phy_driver_unregister(m88es_driver); + phy_driver_unregister(m88e_driver); phy_driver_unregister(m88e1145_driver); } -- 1.5.0.3 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/6] phylib: m88e1111: enable SGMII mode
From: Kapil Juneja [EMAIL PROTECTED] If connected via SGMII, initialize with SGMII mode configured. Signed-off-by: Kapil Juneja [EMAIL PROTECTED] Signed-off-by: Andy Fleming [EMAIL PROTECTED] Signed-off-by: Kim Phillips [EMAIL PROTECTED] --- drivers/net/phy/marvell.c | 16 1 files changed, 16 insertions(+), 0 deletions(-) diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c index b87f8d2..d4cc952 100644 --- a/drivers/net/phy/marvell.c +++ b/drivers/net/phy/marvell.c @@ -60,6 +60,7 @@ #define MII_M_PHY_EXT_SR 0x1b #define MII_M_HWCFG_MODE_MASK 0xf #define MII_M_HWCFG_MODE_RGMII 0xb +#define MII_M_HWCFG_MODE_SGMII_NO_CLK 0x4 MODULE_DESCRIPTION(Marvell PHY driver); MODULE_AUTHOR(Andy Fleming); @@ -169,6 +170,21 @@ static int m88e_config_init(struct phy_device *phydev) return err; } + if (phydev-interface == PHY_INTERFACE_MODE_SGMII) + int temp; + + temp = phy_read(phydev, MII_M_PHY_EXT_SR); + if (temp 0) + return temp; + + temp = ~(MII_M_HWCFG_MODE_MASK); + temp |= MII_M_HWCFG_MODE_SGMII_NO_CLK; + + err = phy_write(phydev, MII_M_PHY_EXT_SR, temp); + if (err 0) + return err; + } + err = phy_write(phydev, MII_BMCR, BMCR_RESET); if (err 0) return err; -- 1.5.0.3 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH] IPROUTE: Modify tc for new PRIO multiqueue behavior
On Tue, 2007-08-05 at 08:35 -0700, Waskiewicz Jr, Peter P wrote: But the point is that although the DCE spec inspired the development of these patches, that is *not* the goal of these patches. As Yi stated in a previous reply to this thread, the ability for any hardware to control its queues at the stack level in the kernel is something that is missing in the kernel. If the hardware doesn't want to support it, then the patches as-is will not require anything to change in drivers to continue working as they do today. Wireless offers a strict priority scheduler with statistical transmit (as opposed to deterministic offered by the linux strict prio qdisc); so wireless is not in the same boat as DCE. Bottom line: these patches are not for a specific technology. I presented that spec to show a possible use case for these patches. Yi presented a use case he can use in the wireless world. I will be posting another use case shortly using ATA over Ethernet. Once you run the ATA over ethernet with your approach, please repeat the test with a single ring in hardware and an equivalent qdisc in linux. I dont believe you will see any difference - Linux is that good. This is not to say i am against your patches, I am just for optimizing for the common. I dont believe wireless needs anything other than the simple approach i described. The fact that there an occasional low prio packet may endup going out first before a high prio due to the contention is non-affecting to the overall results. I don't see how we can agree that having any type of head-of-line-blocking of a flow is or is not a problem. But where is this head-of-line blocking coming from? Please correct me if am wrong: If i had 4 hardware rings/queues in a wireless NIC with 4 different WMM priorities all filled up (I would say impposible to achieve but for the sake of discussion assume possible), then there is still a probability that a low prio packet will be sent first before a high prio one. It all depends on the probabilistic nature of the channel availability as well as the tx opportunity and backoff timings. You believe it isn't an issue, but this is a gap that I see existing in the stack today. As networking is used for more advanced features (such as ndb or VoIP), having the ability to separate flows from each other all the way to the wire I see is a huge advantage to ensure true QoS. You dont believe Linux has actually been doing QoS all these years before DCE? It has. And we have been separating flows all those years too. Wireless with CSMA/CA is a slightly different beast due to the shared channels; its worse but not very different in nature than the case where you have a shared ethernet hub (CSMA/CD) and you keep adding hosts to it - we dont ask the qdiscs to backoff because we have a collision. Where i find wireless intriguing is in the case where its available bandwidth adjusts given the signal strength - but you are talking about HOLs not that specific phenomena. cheers, jamal - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] AFS: Implement basic file write support
On Tue, 08 May 2007 20:44:11 +0100 David Howells [EMAIL PROTECTED] wrote: Implement support for writing to regular AFS files, including: (1) write (2) truncate (3) fsync, fdatasync (4) chmod, chown, chgrp, utime. AFS writeback attempts to batch writes into as chunks as large as it can manage up to the point that it writes back 65535 pages in one chunk or it meets a locked page. Furthermore, if a page has been written to using a particular key, then should another write to that page use some other key, the first write will be flushed before the second is allowed to take place. If the first write fails due to a security error, then the page will be scrapped and reread before the second write takes place. If a page is dirty and the callback on it is broken by the server, then the dirty data is not discarded (same behaviour as NFS). Shared-writable mappings are not supported by this patch. The below isn't a review - it's some random cherrypickling. ... +int afs_fs_store_data(struct afs_server *server, struct afs_writeback *wb, + pgoff_t first, pgoff_t last, + unsigned offset, unsigned to, + const struct afs_wait_mode *wait_mode) +{ + struct afs_vnode *vnode = wb-vnode; + struct afs_call *call; + loff_t size, pos, i_size; + __be32 *bp; + + _enter(,%x,{%x:%u},,, +key_serial(wb-key), vnode-fid.vid, vnode-fid.vnode); + + size = to - offset; + if (first != last) + size += (loff_t)(last - first) PAGE_SHIFT; + pos = (loff_t)first PAGE_SHIFT; + pos += offset; + + i_size = i_size_read(vnode-vfs_inode); + if (pos + size i_size) + i_size = size + pos; + + _debug(size %llx, at %llx, i_size %llx, +(unsigned long long) size, (unsigned long long) pos, +(unsigned long long) i_size); + + BUG_ON(i_size 0x); // TODO: use 64-bit store You're sure this isn't user-triggerable? +static int afs_prepare_page(struct afs_vnode *vnode, struct page *page, + struct key *key, unsigned offset, unsigned to) +{ + unsigned eof, tail, start, stop, len; + loff_t i_size, pos; + void *p; + int ret; + + _enter(); + + if (offset == 0 to == PAGE_SIZE) + return 0; + + p = kmap(page); + + i_size = i_size_read(vnode-vfs_inode); + pos = (loff_t) page-index PAGE_SHIFT; + if (pos = i_size) { + /* partial write, page beyond EOF */ + _debug(beyond); + if (offset 0) + memset(p, 0, offset); + if (to PAGE_SIZE) + memset(p + to, 0, PAGE_SIZE - to); + kunmap(page); + return 0; + } + + if (i_size - pos = PAGE_SIZE) { + /* partial write, page entirely before EOF */ + _debug(before); + tail = eof = PAGE_SIZE; + } else { + /* partial write, page overlaps EOF */ + eof = i_size - pos; + _debug(overlap %u, eof); + tail = max(eof, to); + if (tail PAGE_SIZE) + memset(p + tail, 0, PAGE_SIZE - tail); + if (offset eof) + memset(p + eof, 0, PAGE_SIZE - eof); + } + + kunmap(p); kmap_atomic() could be used here and is better. We have this zero_user_page() thing heading in which could perhaps be used here also. + ret = 0; + if (offset 0 || eof to) { + /* need to fill one or two bits that aren't going to be written + * (cover both fillers in one read if there are two) */ + start = (offset 0) ? 0 : to; + stop = (eof to) ? eof : offset; + len = stop - start; + _debug(wr=%u-%u av=0-%u [EMAIL PROTECTED], +offset, to, eof, start, len); + ret = afs_fill_page(vnode, key, start, len, page); + } + + _leave( = %d, ret); + return ret; +} + ... + ASSERTRANGE(wb-first, =, index, =, wb-last); wow. +} + +/* + * finalise part of a write to a page + */ +int afs_commit_write(struct file *file, struct page *page, + unsigned offset, unsigned to) +{ + struct afs_vnode *vnode = AFS_FS_I(file-f_dentry-d_inode); + loff_t i_size, maybe_i_size; + + _enter({%x:%u},{%lx},%u,%u, +vnode-fid.vid, vnode-fid.vnode, page-index, offset, to); + + maybe_i_size = (loff_t) page-index PAGE_SHIFT; + maybe_i_size += to; + + i_size = i_size_read(vnode-vfs_inode); + if (maybe_i_size i_size) { + spin_lock(vnode-writeback_lock); + i_size = i_size_read(vnode-vfs_inode); + if (maybe_i_size i_size) + i_size_write(vnode-vfs_inode, maybe_i_size); +
Re: skge- soft lockup on CPU#0 with mtu=9000 (2.6.20.1 + web100 patch)
On Tue, 8 May 2007, Chris Stromsoe wrote: On Mon, 12 Mar 2007, Chris Stromsoe wrote: On Thu, 8 Mar 2007, Chris Stromsoe wrote: On Thu, 8 Mar 2007, Jay Vosburgh wrote: Chris Stromsoe [EMAIL PROTECTED] wrote: 1) ip link set mtu 9000 eth2 -- eth2 is no longer responsive ip link set mtu 1500 eth2 -- eth2 remains unresponsive 2) ifup eth2 ifdown eth2 perl -pi -e 's/eth2/eth3/' /etc/network/interfaces ifup eth3 -- locks up here This would seem to suggest a problem with skge itself, although there might be some other interaction with bonding that causes the problems for that case. In both of the above mentioned cases, I was not using bonding. That was with the skge driver only. The above tests both work fine with the 2.6.20.1 sk98lin driver loaded as modprobe sk98lin RlmtMode=DualNet. I can change the MTU, add and remove eth2/eth3 from the bond, and up and down the interface. It also works fine with different hardware (e100, e1000, tg3, bnx2). Running both interfaces alone without the bonding driver also works (I can up and down the interfaces with no side-affects). Just an update - it looks like 2.6.20.1 fixed the MTU problem (1 above), but not the other problem (where the machine locks up if the second port on the dual-port card is downed). To recap: I am use SysKonnect SK-9843 cards. The sk98lin driver works fine; the skge I should proof-read first. The cards are SK-9844s, not SK-9843s. The rest of the prior message is still correct. -Chris driver does not. The following sequence of commands locks up the server. The lock is a hard lock; console is not responsive to keyboard input or to sysrq. Nothing is printed on the serial console. ip li set eth2 up ip li set eth2 down ip li set eth3 up There are no addresses assigned to either interface. This was done after a fresh boot. It is repeatable. If I do not down eth2, I can up eth3 assign addresses, and use both interfaces. The kernel is fresh from kernel.org and does not have any third party patches. lspci -vv output: :01:0a.0 Ethernet controller: Syskonnect (Schneider Koch) SK-98xx Gigabit Ethernet Server Adapter (rev 12) Subsystem: Syskonnect (Schneider Koch) SK-9844 Gigabit Ethernet Server Adapter (SK-NET GE-SX dual link) Control: I/O+ Mem+ BusMaster+ SpecCycle- MemWINV+ VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=medium TAbort- TAbort- MAbort- SERR- PERR- Latency: 64 (5750ns min, 7750ns max), Cache Line Size: 0x08 (32 bytes) Interrupt: pin A routed to IRQ 10 Region 0: Memory at ff8fc000 (32-bit, non-prefetchable) [size=16K] Region 1: I/O ports at d800 [size=256] Expansion ROM at ff40 [disabled] [size=128K] Capabilities: available only to root -Chris - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [1/2] [NET] link_watch: Move link watch list into net_device
From: Herbert Xu [EMAIL PROTECTED] Date: Tue, 8 May 2007 22:13:22 +1000 [NET] link_watch: Move link watch list into net_device These days the link watch mechanism is an integral part of the network subsystem as it manages the carrier status. So it now makes sense to allocate some memory for it in net_device rather than allocating it on demand. In fact, this is necessary because we can't tolerate a memory allocation failure since that means we'd have to potentially throw a link up event away. It also simplifies the code greatly. In doing so I discovered a subtle race condition in the use of singleevent. This race condition still exists (and is somewhat magnified) without singleevent but it's now plugged thanks to an smp_mb__before_clear_bit. Signed-off-by: Herbert Xu [EMAIL PROTECTED] Applied, thanks Herbert. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [2/2] [NET] link_watch: Remove delay for up even when we're down
From: Herbert Xu [EMAIL PROTECTED] Date: Tue, 8 May 2007 22:16:09 +1000 [NET]: Remove link_watch delay for up even when we're down Currently all link carrier events are delayed by up to a second before they're processed to prevent link storms. This causes unnecessary packet loss during that interval. In fact, we can achieve the same effect in preventing storms by only delaying down events and unnecssary up events. The latter is defined as up events when we're already up. Signed-off-by: Herbert Xu [EMAIL PROTECTED] Also applied, thanks Herbert. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] mac80211: include linux/delay.h instead of asm/delay.h
From: John W. Linville [EMAIL PROTECTED] Date: Tue, 8 May 2007 11:50:11 -0400 From: Geert Uytterhoeven [EMAIL PROTECTED] | CC net/mac80211/ieee80211_sta.o | In file included from linux/net/mac80211/ieee80211_sta.c:31: | include2/asm/delay.h: In function '__const_udelay': | include2/asm/delay.h:33: error: 'loops_per_jiffy' undeclared (first use in this function) | include2/asm/delay.h:33: error: (Each undeclared identifier is reported only once | include2/asm/delay.h:33: error: for each function it appears in.) mac80211: include linux/delay.h instead of asm/delay.h Signed-off-by: Geert Uytterhoeven [EMAIL PROTECTED] Signed-off-by: John W. Linville [EMAIL PROTECTED] Applied, thanks everyone. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [1/2] [NET] link_watch: Move link watch list into net_device
On Tue, May 08, 2007 at 01:19:33PM -0700, Jeremy Fitzhardinge wrote: Subject: xen: go back to using normal network stack carriers This effectively reverts xen-unstable change 14280:42b29f084c31. Herbert has changed the behaviour of the core networking to not delay an initial down-up transition, and so the timing concern has been solved. Signed-off-by: Jeremy Fitzhardinge [EMAIL PROTECTED] Cc: Herbert Xu [EMAIL PROTECTED] Cc: Keir Fraser [EMAIL PROTECTED] Looks good to me. Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] sched: teql_enqueue can check limits before skb enqueue
From: Krishna Kumar [EMAIL PROTECTED] Date: Tue, 08 May 2007 13:01:24 +0530 Optimize teql_enqueue so that it first checks limits before enqueing. Patch against net-2.6.22.git Signed-off-by: Krishna Kumar [EMAIL PROTECTED] Applied, thanks. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] core: Call net_tx_action only if work pending
From: Krishna Kumar [EMAIL PROTECTED] Date: Tue, 08 May 2007 13:01:30 +0530 Optimize call to net_tx_action only if work is pending. Patch against net-2.6.22.git Signed-off-by: Krishna Kumar [EMAIL PROTECTED] I don't think downing a cpu is done so often as to justify this microscopic optimization, do you? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] sched: Optimize return value of qdisc_restart
From: Krishna Kumar [EMAIL PROTECTED] Date: Tue, 08 May 2007 13:01:32 +0530 Optimize return value of qdisc_restart so that it is not called an extra time if there are no more packets on the queue to be sent out. It is also not required to check for gso_skb (though the lock is dropped) since another cpu which added this would have done a netif_schedule. Patch against net-2.6.22.git Signed-off-by: Krishna Kumar [EMAIL PROTECTED] 0 return value here means that the queue is not empty, and the device is throttled. If you want to do this, just branch down to the end of the function which asserts that q-q.qlen is not negative, and returns it. That will achieve the right effect. But I'm not so sure about this idea, I have this strange feeling that we do things this way for a reason... Hmmm... - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 01/17] sky2: fix oops on shutdown
If device is not fails during module startup (like unsupported chip version) then driver would crash dereferencing a null pointer. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- drivers/net/sky2.c |4 1 file changed, 4 insertions(+) --- sky2-2.6.21.orig/drivers/net/sky2.c 2007-05-08 15:14:45.0 -0700 +++ sky2-2.6.21/drivers/net/sky2.c 2007-05-08 15:16:12.0 -0700 @@ -3732,6 +3732,7 @@ err_out_free_regions: err_out_disable: pci_disable_device(pdev); err_out: + pci_set_drvdata(pdev, NULL); return err; } @@ -3861,6 +3862,9 @@ static void sky2_shutdown(struct pci_dev struct sky2_hw *hw = pci_get_drvdata(pdev); int i, wol = 0; + if (!hw) + return; + del_timer_sync(hw-idle_timer); netif_poll_disable(hw-dev[0]); -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 04/17] sky2: remove dual port workaround
This workaround was added to deal with NAPI core and how it affected dual port shared polling. It turned out not to be necessary, the core code in dev_stop() waits for NAPI but doesn't kill it. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- drivers/net/sky2.c |7 --- 1 file changed, 7 deletions(-) --- sky2-2.6.21.orig/drivers/net/sky2.c 2007-05-08 10:28:54.0 -0700 +++ sky2-2.6.21/drivers/net/sky2.c 2007-05-08 10:28:58.0 -0700 @@ -1570,13 +1570,6 @@ static int sky2_down(struct net_device * imask = ~portirq_msk[port]; sky2_write32(hw, B0_IMSK, imask); - /* -* Both ports share the NAPI poll on port 0, so if necessary undo the -* the disable that is done in dev_close. -*/ - if (sky2-port == 0 hw-ports 1) - netif_poll_enable(dev); - sky2_gmac_reset(hw, port); /* Stop transmitter */ -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 02/17] sky2: dont restrict config space access
Take out the code that protects driver from accessing the PCI config space. We are old enough to run with scissors now. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- drivers/net/sky2.c | 11 --- 1 file changed, 11 deletions(-) --- sky2-2.6.21.orig/drivers/net/sky2.c 2007-05-08 10:06:39.0 -0700 +++ sky2-2.6.21/drivers/net/sky2.c 2007-05-08 10:28:50.0 -0700 @@ -556,7 +556,6 @@ static void sky2_phy_power(struct sky2_h if (hw-chip_id == CHIP_ID_YUKON_XL hw-chip_rev 1) onoff = !onoff; - sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); reg1 = sky2_pci_read32(hw, PCI_DEV_REG1); if (onoff) /* Turn off phy power saving */ @@ -566,7 +565,6 @@ static void sky2_phy_power(struct sky2_h sky2_pci_write32(hw, PCI_DEV_REG1, reg1); sky2_pci_read32(hw, PCI_DEV_REG1); - sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); udelay(100); } @@ -634,11 +632,9 @@ static void sky2_wol_init(struct sky2_po sky2_write16(hw, WOL_REGS(port, WOL_CTRL_STAT), ctrl); /* Turn on legacy PCI-Express PME mode */ - sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); reg1 = sky2_pci_read32(hw, PCI_DEV_REG1); reg1 |= PCI_Y2_PME_LEGACY; sky2_pci_write32(hw, PCI_DEV_REG1, reg1); - sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); /* block receiver */ sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET); @@ -2292,10 +2288,8 @@ static void sky2_hw_intr(struct sky2_hw dev_err(hw-pdev-dev, PCI hardware error (0x%x)\n, pci_err); - sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); sky2_pci_write16(hw, PCI_STATUS, pci_err | PCI_STATUS_ERROR_BITS); - sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); } if (status Y2_IS_PCI_EXP) { @@ -2309,10 +2303,8 @@ static void sky2_hw_intr(struct sky2_hw pex_err); /* clear the interrupt */ - sky2_write32(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); sky2_pci_write32(hw, PEX_UNC_ERR_STAT, 0xUL); - sky2_write32(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); if (pex_err PEX_FATAL_ERRORS) { u32 hwmsk = sky2_read32(hw, B0_HWE_IMSK); @@ -2564,7 +2556,6 @@ static void sky2_reset(struct sky2_hw *h /* clear PCI errors, if any */ status = sky2_pci_read16(hw, PCI_STATUS); - sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_ON); sky2_pci_write16(hw, PCI_STATUS, status | PCI_STATUS_ERROR_BITS); @@ -2582,8 +2573,6 @@ static void sky2_reset(struct sky2_hw *h sky2_write8(hw, SK_REG(i, GMAC_LINK_CTRL), GMLC_RST_CLR); } - sky2_write8(hw, B2_TST_CTRL1, TST_CFG_WRITE_OFF); - /* Clear I2C IRQ noise */ sky2_write32(hw, B2_I2C_IRQ, 1); -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 08/17] sky2: MIB counter overflow handling
Make sure that if we ever get a MIB counter overflow interrupt (normally masked off), that it is cleared. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- drivers/net/sky2.c |6 ++ 1 file changed, 6 insertions(+) --- sky2-2.6.21.orig/drivers/net/sky2.c 2007-05-08 10:29:05.0 -0700 +++ sky2-2.6.21/drivers/net/sky2.c 2007-05-08 10:29:09.0 -0700 @@ -2332,6 +2332,12 @@ static void sky2_mac_intr(struct sky2_hw printk(KERN_INFO PFX %s: mac interrupt status 0x%x\n, dev-name, status); + if (status GM_IS_RX_CO_OV) + gma_read16(hw, port, GM_RX_IRQ_SRC); + + if (status GM_IS_TX_CO_OV) + gma_read16(hw, port, GM_TX_IRQ_SRC); + if (status GM_IS_RX_FF_OR) { ++sky2-net_stats.rx_fifo_errors; sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_CLI_RX_FO); -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 03/17] sky2: keep track of receive alloc failures
When driver can't allocate receive buffer it drops incoming packet, so update counter. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- drivers/net/sky2.c |4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) --- sky2-2.6.21.orig/drivers/net/sky2.c 2007-05-08 10:28:50.0 -0700 +++ sky2-2.6.21/drivers/net/sky2.c 2007-05-08 10:28:54.0 -0700 @@ -2132,8 +2132,10 @@ static int sky2_status_intr(struct sky2_ switch (le-opcode ~HW_OWNER) { case OP_RXSTAT: skb = sky2_receive(dev, length, status); - if (!skb) + if (unlikely(!skb)) { + sky2-net_stats.rx_dropped++; goto force_update; + } skb-protocol = eth_type_trans(skb, dev); sky2-net_stats.rx_packets++; -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 07/17] sky2: use pci_config access functions
Use the PCI layer config access functions. The driver was using the memory mapped window in device, to workaround issues accessing the advanced error reporting registers. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- drivers/net/sky2.c | 44 ++-- drivers/net/sky2.h | 21 - 2 files changed, 22 insertions(+), 43 deletions(-) --- sky2-2.6.21.orig/drivers/net/sky2.c 2007-05-08 15:29:07.0 -0700 +++ sky2-2.6.21/drivers/net/sky2.c 2007-05-08 15:29:09.0 -0700 @@ -220,11 +220,11 @@ static void sky2_power_on(struct sky2_hw if (hw-chip_id == CHIP_ID_YUKON_EC_U || hw-chip_id == CHIP_ID_YUKON_EX) { u32 reg1; - sky2_pci_write32(hw, PCI_DEV_REG3, 0); - reg1 = sky2_pci_read32(hw, PCI_DEV_REG4); + pci_write_config_dword(hw-pdev, PCI_DEV_REG3, 0); + pci_read_config_dword(hw-pdev, PCI_DEV_REG4, reg1); reg1 = P_ASPM_CONTROL_MSK; - sky2_pci_write32(hw, PCI_DEV_REG4, reg1); - sky2_pci_write32(hw, PCI_DEV_REG5, 0); + pci_write_config_dword(hw-pdev, PCI_DEV_REG4, reg1); + pci_write_config_dword(hw-pdev, PCI_DEV_REG5, 0); } } @@ -556,16 +556,16 @@ static void sky2_phy_power(struct sky2_h /* looks like this XL is back asswards .. */ if (hw-chip_id == CHIP_ID_YUKON_XL hw-chip_rev 1) onoff = !onoff; - - reg1 = sky2_pci_read32(hw, PCI_DEV_REG1); + pci_read_config_dword(hw-pdev, PCI_DEV_REG1, reg1); if (onoff) /* Turn off phy power saving */ reg1 = ~phy_power[port]; else reg1 |= phy_power[port]; - sky2_pci_write32(hw, PCI_DEV_REG1, reg1); - sky2_pci_read32(hw, PCI_DEV_REG1); + pci_write_config_dword(hw-pdev, PCI_DEV_REG1, reg1); + pci_read_config_dword(hw-pdev, PCI_DEV_REG1, reg1); + udelay(100); } @@ -633,9 +633,9 @@ static void sky2_wol_init(struct sky2_po sky2_write16(hw, WOL_REGS(port, WOL_CTRL_STAT), ctrl); /* Turn on legacy PCI-Express PME mode */ - reg1 = sky2_pci_read32(hw, PCI_DEV_REG1); + pci_read_config_dword(hw-pdev, PCI_DEV_REG1, reg1); reg1 |= PCI_Y2_PME_LEGACY; - sky2_pci_write32(hw, PCI_DEV_REG1, reg1); + pci_write_config_dword(hw-pdev, PCI_DEV_REG1, reg1); /* block receiver */ sky2_write8(hw, SK_REG(port, RX_GMF_CTRL_T), GMF_RST_SET); @@ -1216,9 +1216,9 @@ static int sky2_up(struct net_device *de struct sky2_port *osky2 = netdev_priv(otherdev); u16 cmd; - cmd = sky2_pci_read16(hw, cap + PCI_X_CMD); + pci_read_config_word(hw-pdev, cap + PCI_X_CMD, cmd); cmd = ~PCI_X_CMD_MAX_SPLIT; - sky2_pci_write16(hw, cap + PCI_X_CMD, cmd); + pci_write_config_word(hw-pdev, cap + PCI_X_CMD, cmd); sky2-rx_csum = 0; osky2-rx_csum = 0; @@ -2283,13 +2283,13 @@ static void sky2_hw_intr(struct sky2_hw if (status (Y2_IS_MST_ERR | Y2_IS_IRQ_STAT)) { u16 pci_err; - pci_err = sky2_pci_read16(hw, PCI_STATUS); + pci_read_config_word(pdev, PCI_STATUS, pci_err); if (net_ratelimit()) dev_err(pdev-dev, PCI hardware error (0x%x)\n, pci_err); - sky2_pci_write16(hw, PCI_STATUS, -pci_err | PCI_STATUS_ERROR_BITS); + pci_write_config_word(pdev, PCI_STATUS, + pci_err | PCI_STATUS_ERROR_BITS); } /* PCI-Express error occurred */ @@ -2513,7 +2513,7 @@ static int __devinit sky2_init(struct sk /* Make sure and enable all clocks */ if (hw-chip_id == CHIP_ID_YUKON_EX || hw-chip_id == CHIP_ID_YUKON_EC_U) - sky2_pci_write32(hw, PCI_DEV_REG3, 0); + pci_write_config_dword(hw-pdev, PCI_DEV_REG3, 0); hw-chip_rev = (sky2_read8(hw, B2_MAC_CFG) CFG_CHIP_R_MSK) 4; @@ -2557,9 +2557,9 @@ static void sky2_reset(struct sky2_hw *h sky2_write8(hw, B0_CTST, CS_RST_CLR); /* clear PCI errors, if any */ - status = sky2_pci_read16(hw, PCI_STATUS); - sky2_pci_write16(hw, PCI_STATUS, status | PCI_STATUS_ERROR_BITS); - + pci_read_config_word(hw-pdev, PCI_STATUS, status); + pci_write_config_word(hw-pdev, PCI_STATUS, + status | PCI_STATUS_ERROR_BITS); sky2_write8(hw, B0_CTST, CS_MRST_CLR); @@ -3631,9 +3631,9 @@ static int __devinit sky2_probe(struct p */ { u32 reg; - reg = sky2_pci_read32(hw, PCI_DEV_REG2); + pci_read_config_dword(pdev,PCI_DEV_REG2, reg); reg = ~PCI_REV_DESC; - sky2_pci_write32(hw, PCI_DEV_REG2,
[PATCH 09/17] sky2: memory barriers change
Do some memory barrier changes for safety/perfomance: Don't need read after update to index, mmiowb() followed by read at end of irq is sufficient. Signed-off-by: Stephn Hemminger [EMAIL PROTECTED] --- drivers/net/sky2.c | 15 +++ 1 file changed, 11 insertions(+), 4 deletions(-) --- sky2-2.6.21.orig/drivers/net/sky2.c 2007-05-08 10:29:09.0 -0700 +++ sky2-2.6.21/drivers/net/sky2.c 2007-05-08 10:29:14.0 -0700 @@ -836,10 +836,12 @@ static inline struct tx_ring_info *tx_le /* Update chip's next pointer */ static inline void sky2_put_idx(struct sky2_hw *hw, unsigned q, u16 idx) { - q = Y2_QADDR(q, PREF_UNIT_PUT_IDX); + /* Make sure write' to descriptors are complete before we tell hardware */ wmb(); - sky2_write16(hw, q, idx); - sky2_read16(hw, q); + sky2_write16(hw, Y2_QADDR(q, PREF_UNIT_PUT_IDX), idx); + + /* Synchronize I/O on since next processor may write to tail */ + mmiowb(); } @@ -971,6 +973,7 @@ stopped: /* reset the Rx prefetch unit */ sky2_write32(hw, Y2_QADDR(rxq, PREF_UNIT_CTRL), PREF_UNIT_RST_SET); + mmiowb(); } /* Clean out receive buffer area, assumes receiver hardware stopped */ @@ -1190,7 +1193,7 @@ static int sky2_rx_start(struct sky2_por } /* Tell chip about available buffers */ - sky2_write16(hw, Y2_QADDR(rxq, PREF_UNIT_PUT_IDX), sky2-rx_put); + sky2_put_idx(hw, rxq, sky2-rx_put); return 0; nomem: sky2_rx_clean(sky2); @@ -1532,6 +1535,8 @@ static void sky2_tx_complete(struct sky2 } sky2-tx_cons = idx; + smp_mb(); + if (tx_avail(sky2) MAX_SKB_TX_LE + 4) netif_wake_queue(dev); } @@ -2210,6 +2215,7 @@ force_update: /* Fully processed status ring so clear irq */ sky2_write32(hw, STAT_CTRL, SC_STAT_CLR_IRQ); + mmiowb(); exit_loop: if (buf_write[0]) { @@ -2436,6 +2442,7 @@ static int sky2_poll(struct net_device * if (work_done work_limit) { netif_rx_complete(dev0); + /* end of interrupt, re-enables also acts as I/O synchronization */ sky2_read32(hw, B0_Y2_SP_LISR); return 0; } else { -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 10/17] sky2: add prefetch for next skb on receive
Prefetch the next skb information in receive. Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] --- drivers/net/sky2.c |1 + 1 file changed, 1 insertion(+) --- sky2-2.6.21.orig/drivers/net/sky2.c 2007-05-08 10:29:14.0 -0700 +++ sky2-2.6.21/drivers/net/sky2.c 2007-05-08 10:29:17.0 -0700 @@ -2160,6 +2160,7 @@ force_update: /* Stop after net poll weight */ if (++work_done = to_do) goto exit_loop; + prefetch(sky2-rx_ring[sky2-rx_next].skb); break; #ifdef SKY2_VLAN_TAG_USED -- - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html