[git patches] net driver fixes
Please pull from 'upstream-fixes' branch of master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git to receive the following updates: drivers/net/8139too.c| 38 +-- drivers/net/Kconfig |7 - drivers/net/bonding/bond_main.c | 15 ++- drivers/net/bonding/bond_sysfs.c |6 - drivers/net/e100.c |2 drivers/net/gianfar.c| 24 ++--- drivers/net/gianfar.h|8 - drivers/net/gianfar_ethtool.c|8 - drivers/net/gianfar_mii.c| 17 +-- drivers/net/r8169.c | 13 +- drivers/net/sis900.h |1 drivers/net/sky2.c | 186 +-- drivers/net/sky2.h |9 - drivers/net/tulip/uli526x.c |2 drivers/net/wan/dscc4.c |2 15 files changed, 222 insertions(+), 116 deletions(-) Alexey Dobriyan: dscc4: fix dscc4_init_dummy_skb check dscc4: fix dscc4_init_dummy_skb check [same change, two different paths. -ed] Andrew Morton: uli526x warning fix Andy Gospodarek: r8169: fix forced-mode link settings Francois Romieu: r8169: prevent excessive busy-waiting 8139too: fix a TX timeout watchdog thread against NAPI softirq race Jay Vosburgh: bonding: allow bond to use TSO if slaves support it Jesse Brandeburg: e100: remove init_hw call to fix panic Kumar Gala: gianfar: Fix sparse warnings Lennert Buytenhek: sis900: remove cfgpmcsr I/O space register define Luiz Fernando Capitulino: bonding: Sparse warnings fix Paolo 'Blaisorblade' Giarrusso: Kbuild menu - hide empty NETDEVICES menu when NET is disabled Stephen Hemminger: sky2: power management fix sky2: pci config space checking sky2: ethtool rx_coalesce settings fix sky2: set mac address fix sky2: clear irq race sky2: add irq to entropy pool sky2: support msi interrupt (revised) sky2: version 0.15 update diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c index adfba44..2beac55 100644 --- a/drivers/net/8139too.c +++ b/drivers/net/8139too.c @@ -586,6 +586,7 @@ struct rtl8139_private { dma_addr_t tx_bufs_dma; signed char phys[4];/* MII device addresses. */ char twistie, twist_row, twist_col; /* Twister tune state. */ + unsigned int watchdog_fired : 1; unsigned int default_port : 4; /* Last dev-if_port value. */ unsigned int have_thread : 1; spinlock_t lock; @@ -638,6 +639,7 @@ static void rtl8139_set_rx_mode (struct static void __set_rx_mode (struct net_device *dev); static void rtl8139_hw_start (struct net_device *dev); static void rtl8139_thread (void *_data); +static void rtl8139_tx_timeout_task(void *_data); static struct ethtool_ops rtl8139_ethtool_ops; /* write MMIO register, with flush */ @@ -1598,13 +1600,14 @@ static void rtl8139_thread (void *_data) { struct net_device *dev = _data; struct rtl8139_private *tp = netdev_priv(dev); - unsigned long thr_delay; + unsigned long thr_delay = next_tick; - if (rtnl_shlock_nowait() == 0) { + if (tp-watchdog_fired) { + tp-watchdog_fired = 0; + rtl8139_tx_timeout_task(_data); + } else if (rtnl_shlock_nowait() == 0) { rtl8139_thread_iter (dev, tp, tp-mmio_addr); rtnl_unlock (); - - thr_delay = next_tick; } else { /* unlikely race. mitigate with fast poll. */ thr_delay = HZ / 2; @@ -1631,7 +1634,8 @@ static void rtl8139_stop_thread(struct r if (tp-have_thread) { cancel_rearming_delayed_work(tp-thread); tp-have_thread = 0; - } + } else + flush_scheduled_work(); } static inline void rtl8139_tx_clear (struct rtl8139_private *tp) @@ -1642,14 +1646,13 @@ static inline void rtl8139_tx_clear (str /* XXX account for unsent Tx packets in tp-stats.tx_dropped */ } - -static void rtl8139_tx_timeout (struct net_device *dev) +static void rtl8139_tx_timeout_task (void *_data) { + struct net_device *dev = _data; struct rtl8139_private *tp = netdev_priv(dev); void __iomem *ioaddr = tp-mmio_addr; int i; u8 tmp8; - unsigned long flags; printk (KERN_DEBUG %s: Transmit timeout, status %2.2x %4.4x %4.4x media %2.2x.\n, dev-name, RTL_R8 (ChipCmd), @@ -1670,23 +1673,34 @@ static void rtl8139_tx_timeout (struct n if (tmp8 CmdTxEnb) RTL_W8 (ChipCmd, CmdRxEnb); - spin_lock(tp-rx_lock); + spin_lock_bh(tp-rx_lock); /* Disable interrupts by clearing the interrupt mask. */ RTL_W16 (IntrMask, 0x); /* Stop a shared interrupt from scavenging while we are. */ - spin_lock_irqsave (tp-lock, flags); + spin_lock_irq(tp-lock);
Re: [PATCH] check connect(2) status for IPv6 UDP socket
Hi all, in the same way of this patch, why dst_entry are stored for RAW socket ? In case of specific IPSec rules for ICMPv6, xfrm state can be different for the same destination. Attached, a proposed patch. Regards, Nicolas [IPV6] Don't store dst_entry for RAW socket Signed-off-by: Nicolas DICHTEL [EMAIL PROTECTED] Mitsuru KANDA a écrit : Hello, I recreated a patch for unconnected udpv6 socket checking. (Fixed forgetting dst refcnt decrement from the previous patch.) Regards, -mk At Thu, 22 Sep 2005 23:44:05 -0700 (PDT), David S. Miller [EMAIL PROTECTED] wrote: ... I think you're patch adds a route leak. If we elide the ip6_dst_store() where does the 'dst' reference go? In fact, I think the UDPv6 change might have done the same thing. Mitsuru-san? ipv4 code like this does explicit dst_clone() when storeing the 'dst' to the socket, but I can't see where ipv6 is doing this. Perhaps ipv6 dst's work a little differently. :-) --- linux-2.6.15.2/net/ipv6/raw.c 2006-01-31 07:25:07.0 +0100 +++ linux-2.6.15.2-new/net/ipv6/raw.c 2006-02-06 11:46:13.0 +0100 @@ -814,10 +814,7 @@ err = rawv6_push_pending_frames(sk, fl, rp); } done: - ip6_dst_store(sk, dst, - ipv6_addr_equal(fl.fl6_dst, np-daddr) ? - np-daddr : NULL); - + dst_release(dst); release_sock(sk); out: fl6_sock_release(flowlabel);
Re: [2.6 patch] net/tipc/: possible cleanups
On Sat, 4 Feb 2006, Adrian Bunk wrote: This patch contains the following possible cleanups: - make needlessly global code static Good catch. - #if 0 the following unused global functions: - name_table.c: tipc_nametbl_print() - name_table.c: tipc_nametbl_dump() - net.c: tipc_net_next_node() Thanks! I'll apply this to my tree. /Per - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
e1000 not working on AMD64
I've bought four new e1000 cards (PCI id 8086:107c, chip label 82541PI), three of them are working without problems (i386, kernel 2.4.x). One of them is installed in an AMD64 SMP system (Athlon dual core 4GB). It gets detected, link is reported to be up, but no data goes through (in fact _sometimes_ it succeeds to get an IP adress via DHCP after 10-20 retries). If I set an IP manually, I am not able to ping any other host (ping sizes 30bytes ... 1000bytes). Tested on kernel 2.6.15, 2.6.16-rc2 and 2.6.16-rc2-git2. (Un-)setting NAPI does not change anything. Initially I assumed an IRQ-related issue, but the 3ware RAID controller works without any problems. Hajo +++ :00:00.0 RAM memory: nVidia Corporation: Unknown device 02f1 (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- Latency: 0 Capabilities: [44] #08 [01e0] Capabilities: [e0] #08 [a800] :00:00.1 RAM memory: nVidia Corporation: Unknown device 02fa (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- :00:00.2 RAM memory: nVidia Corporation: Unknown device 02fe (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- :00:00.3 RAM memory: nVidia Corporation: Unknown device 02f8 (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap- 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- :00:00.4 RAM memory: nVidia Corporation: Unknown device 02f9 (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap- 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- Latency: 0 :00:00.5 RAM memory: nVidia Corporation: Unknown device 02ff (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- Latency: 0 Capabilities: [44] #00 [00fe] Capabilities: [fc] #00 [] :00:00.6 RAM memory: nVidia Corporation: Unknown device 027f (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- :00:00.7 RAM memory: nVidia Corporation: Unknown device 027e (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- :00:02.0 PCI bridge: nVidia Corporation: Unknown device 02fc (rev a1) (prog-if 00 [Normal decode]) Control: I/O- Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- Latency: 0, Cache Line Size: 0x10 (64 bytes) Bus: primary=00, secondary=01, subordinate=01, sec-latency=0 I/O behind bridge: f000-0fff Memory behind bridge: fff0-000f Prefetchable memory behind bridge: fff0- BridgeCtl: Parity- SERR+ NoISA- VGA- MAbort- Reset- FastB2B- Capabilities: [40] #0d [] Capabilities: [48] Power Management version 2 Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold+) Status: D0 PME-Enable- DSel=0 DScale=0 PME- Capabilities: [50] Message Signalled Interrupts: 64bit+ Queue=0/1 Enable- Address: Data: Capabilities: [60] #08 [a800] Capabilities: [80] #10 [0141] :00:03.0 PCI bridge: nVidia Corporation: Unknown device 02fd (rev a1) (prog-if 00 [Normal decode]) Control: I/O- Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- MAbort-
Re: [PATCH] snap: needs hardware checksum fix
On Fri, Feb 03, 2006 at 10:01:17AM -0800, Stephen Hemminger wrote: static unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len) { if (unlikely(len skb-len)) return NULL; if (skb-ip_summed == CHECKSUM_HW) skb-csum = csum_sub(skb-csum, csum_partial(skb-data, len, 0)); return __skb_pull(skb, len); } Thanks Stephen. I've changed most of the places that call skb_postpull_rcsum over to use this instead. The only places left are IPv6 where it makes sense to separate the checksum update since it wants to pull in bits and pieces and update at the very end. The other place is GRE which is in fact buggy with respect to the pulling (the bug was introduced with the WCCP patch). I'll send a separate patch for that. [NET]: Replace skb_pull/skb_postpull_rcsum with skb_pull_rcsum We're now starting to have quite a number of places that do skb_pull followed immediately by an skb_postpull_rcsum. We can merge these two operations into one function with skb_pull_rcsum. This makes sense since most pull operations on receive skb's need to update the checksum. I've decided to make this out-of-line since it is fairly big and the fast path where hardware checksums are enabled need to call csum_partial anyway. Since this is a brand new function we get to add an extra check on the len argument. As it is most callers of skb_pull ignore its return value which essentially means that there is no check on the len argument. Signed-off-by: Herbert Xu [EMAIL PROTECTED] Cheers -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c --- a/drivers/net/ppp_generic.c +++ b/drivers/net/ppp_generic.c @@ -1691,8 +1691,8 @@ ppp_receive_nonmp_frame(struct ppp *ppp, || ppp-npmode[npi] != NPMODE_PASS) { kfree_skb(skb); } else { - skb_pull(skb, 2); /* chop off protocol */ - skb_postpull_rcsum(skb, skb-data - 2, 2); + /* chop off protocol */ + skb_pull_rcsum(skb, 2); skb-dev = ppp-dev; skb-protocol = htons(npindex_to_ethertype[npi]); skb-mac.raw = skb-data; diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c --- a/drivers/net/pppoe.c +++ b/drivers/net/pppoe.c @@ -337,8 +337,7 @@ static int pppoe_rcv_core(struct sock *s if (sk-sk_state PPPOX_BOUND) { struct pppoe_hdr *ph = (struct pppoe_hdr *) skb-nh.raw; int len = ntohs(ph-length); - skb_pull(skb, sizeof(struct pppoe_hdr)); - skb_postpull_rcsum(skb, ph, sizeof(*ph)); + skb_pull_rcsum(skb, sizeof(struct pppoe_hdr)); if (pskb_trim_rcsum(skb, len)) goto abort_kfree; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1174,12 +1174,14 @@ static inline int skb_linearize(struct s */ static inline void skb_postpull_rcsum(struct sk_buff *skb, -const void *start, int len) + const void *start, unsigned int len) { if (skb-ip_summed == CHECKSUM_HW) skb-csum = csum_sub(skb-csum, csum_partial(start, len, 0)); } +unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len); + /** * pskb_trim_rcsum - trim received skb and update checksum * @skb: buffer to trim diff --git a/net/802/psnap.c b/net/802/psnap.c --- a/net/802/psnap.c +++ b/net/802/psnap.c @@ -61,8 +61,7 @@ static int snap_rcv(struct sk_buff *skb, /* Pass the frame on. */ u8 *hdr = skb-data; skb-h.raw += 5; - skb_pull(skb, 5); - skb_postpull_rcsum(skb, hdr, 5); + skb_pull_rcsum(skb, 5); rc = proto-rcvfunc(skb, dev, snap_packet_type, orig_dev); } else { skb-sk = NULL; diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c --- a/net/8021q/vlan_dev.c +++ b/net/8021q/vlan_dev.c @@ -163,10 +163,8 @@ int vlan_skb_recv(struct sk_buff *skb, s stats-rx_packets++; stats-rx_bytes += skb-len; - skb_pull(skb, VLAN_HLEN); /* take off the VLAN header (4 bytes currently) */ - - /* Need to correct hardware checksum */ - skb_postpull_rcsum(skb, vhdr, VLAN_HLEN); + /* Take off the VLAN header (4 bytes currently) */ + skb_pull_rcsum(skb, VLAN_HLEN); /* Ok, lets check to make sure the device (dev) we * came in on is what this VLAN is attached to. diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c --- a/net/bridge/br_netfilter.c +++
[PATCH] af_unix: use shift instead of integer division
The patch below replaces a divide by 2 with a shift -- sk_sndbuf is an integer, so gcc emits an idiv, which takes 10x longer than a shift by 1. This improves af_unix bandwidth by ~6-10K/s. Also, tidy up the comment to fit in 80 columns while we're at it. -ben Signed-off-by: Benjamin LaHaise [EMAIL PROTECTED] diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 1b5989b..b57d4d9 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -1427,15 +1427,15 @@ static int unix_stream_sendmsg(struct ki while(sent len) { /* -* Optimisation for the fact that under 0.01% of X messages typically -* need breaking up. +* Optimisation for the fact that under 0.01% of X +* messages typically need breaking up. */ - size=len-sent; + size = len-sent; /* Keep two messages in the pipe so it schedules better */ - if (size sk-sk_sndbuf / 2 - 64) - size = sk-sk_sndbuf / 2 - 64; + if (size ((sk-sk_sndbuf 1) - 64)) + size = (sk-sk_sndbuf 1) - 64; if (size SKB_MAX_ALLOC) size = SKB_MAX_ALLOC; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] af_unix: scm: better initialization
Instead of doing a memset then initialization of the fields of the scm structure, just initialize all the members explicitly. Prevent reloading of current on x86 and x86-64 by storing the value in a local variable for subsequent dereferences. This is worth a ~7KB/s increase in af_unix bandwidth. Note that we avoid the issues surrounding potentially uninitialized members of the ucred structure by constructing a struct ucred instead of assigning the members individually, which forces the compiler to zero any padding. Signed-off-by: Benjamin LaHaise [EMAIL PROTECTED] diff --git a/include/net/scm.h b/include/net/scm.h index c3fa3d5..0d90fa2 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -37,10 +37,14 @@ static __inline__ void scm_destroy(struc static __inline__ int scm_send(struct socket *sock, struct msghdr *msg, struct scm_cookie *scm) { - memset(scm, 0, sizeof(*scm)); - scm-creds.uid = current-uid; - scm-creds.gid = current-gid; - scm-creds.pid = current-tgid; + struct task_struct *p = current; + scm-creds = (struct ucred) { + .uid = p-uid, + .gid = p-gid, + .pid = p-tgid + }; + scm-fp = NULL; + scm-seq = 0; if (msg-msg_controllen = 0) return 0; return __scm_send(sock, msg, scm); - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] af_unix: use shift instead of integer division
On Tue, Feb 07, 2006 at 04:15:31PM +0100, Andi Kleen wrote: On Tuesday 07 February 2006 15:54, Benjamin LaHaise wrote: + if (size ((sk-sk_sndbuf 1) - 64)) + size = (sk-sk_sndbuf 1) - 64; This is really surprising. Are you double plus sure gcc doesn't do this automatically? As I said, sk_sndbuf is a signed integer, so gcc can't use an arithmetic shift (which would round to infinity if the result is negative -- gcc has no way of knowing that sk_sndbuf will be positive). The alternative would be to convert sk_sndbuf to unsigned, but that would mean rechecking all the users for side effects. -ben - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Strange IPsec freeze/partial fix
Hi, there's a problem with IPsec that has been bugging some of our users for the last couple of kernel revs. Every now and then, IPsec will freeze the machine completely. This is with openswan user land, and with kernels up to and including 2.6.16-rc2. I managed to debug this a little, and what happens is that we end up looping in xfrm_lookup, and never get out. With a bit of debug printks added, I can this happening: ip_route_output_flow calls xfrm_lookup xfrm_find_bundle returns NULL (apparently we're in the middle of negotiating a new SA or something) We therefore call xfrm_tmpl_resolve. This returns EAGAIN We go to sleep, waiting for a policy update. Then we loop back to the top Apparently, the dst_orig that was passed into xfrm_lookup has been dropped from the routing table (obsolete=2) This leads to the endless loop, because we now create a new bundle, check the new bundle and find it's stale (stale_bundle - xfrm_bundle_ok - dst_check() return 0) People have been testing with the patch below, which seems to fix the problem partially. They still see connection hangs however (things only clear up when they start a new ping or new ssh). So the patch is obvsiouly not sufficient, and something else seems to go wrong. I'm grateful for any hints you may have... Olaf -- Subject: [XFRM] Fix infinite loop in xfrm_lookup It seems that the route xfrm_lookup is given on input can go away when we sleep. Signed-off-by: Olaf Kirch [EMAIL PROTECTED] net/ipv4/route.c | 25 - net/xfrm/xfrm_policy.c | 16 2 files changed, 32 insertions(+), 9 deletions(-) diff -r df2df438c970 net/ipv4/route.c --- a/net/ipv4/route.c Mon Feb 6 14:08:26 2006 -0500 +++ b/net/ipv4/route.c Mon Feb 6 15:52:09 2006 -0500 @@ -2609,18 +2609,25 @@ int ip_route_output_flow(struct rtable * { int err; - if ((err = __ip_route_output_key(rp, flp)) != 0) - return err; - - if (flp-proto) { - if (!flp-fl4_src) - flp-fl4_src = (*rp)-rt_src; - if (!flp-fl4_dst) - flp-fl4_dst = (*rp)-rt_dst; - return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags); - } - - return 0; + if (flp-proto == 0) { + err = __ip_route_output_key(rp, flp); + } else { + u32 fl_src = flp-fl4_src, fl_dst = flp-fl4_dst; + int repeat = 1; + + do { + if ((err = __ip_route_output_key(rp, flp)) != 0) + break; + + if (!fl_src) + flp-fl4_src = (*rp)-rt_src; + if (!fl_dst) + flp-fl4_dst = (*rp)-rt_dst; + err = xfrm_lookup((struct dst_entry **)rp, flp, sk, flags); + } while (err == -EAGAIN repeat--); + } + + return err; } EXPORT_SYMBOL_GPL(ip_route_output_flow); diff -r df2df438c970 net/xfrm/xfrm_policy.c --- a/net/xfrm/xfrm_policy.cMon Feb 6 14:08:26 2006 -0500 +++ b/net/xfrm/xfrm_policy.cMon Feb 6 15:52:09 2006 -0500 @@ -786,7 +786,22 @@ int xfrm_lookup(struct dst_entry **dst_p u16 family = dst_orig-ops-family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); u32 sk_sid = security_sk_sid(sk, fl, dir); + int loops = 0; + restart: + if (loops dst_orig dst_orig-obsolete 0) { + printk(KERN_NOTICE xfrm_lookup: route is stale (obsolete=%d, loops=%d)\n, + dst_orig-obsolete, loops); + err = -EAGAIN; + goto error_nopol; + } + if (unlikely(++loops 10)) { + printk(KERN_NOTICE xfrm_lookup bailing out after %d loops\n, loops); + dump_stack(); + err = -EHOSTUNREACH; + goto error_nopol; + } + genid = atomic_read(flow_cache_genid); policy = NULL; if (sk sk-sk_policy[1]) @@ -854,6 +869,7 @@ restart: } if (nx == -EAGAIN || genid != atomic_read(flow_cache_genid)) { + printk(KERN_NOTICE xfrm_tmpl_resolve says EAGAIN, try again\n); xfrm_pol_put(policy); goto restart; } @@ -903,8 +919,9 @@ restart: return 0; error: + xfrm_pol_put(policy); +error_nopol: dst_release(dst_orig); - xfrm_pol_put(policy); *dst_p = NULL; return err; } -- Olaf Kirch | --- o --- Nous sommes du soleil we love when we play [EMAIL PROTECTED] |/ | \ sol.dhoop.naytheet.ah kin.ir.samse.qurax - To unsubscribe
[patch 1/2] s390: lcs performance enhancements
[patch 1/2] s390: lcs performance enhancements From: Klaus Wacker [EMAIL PROTECTED] - When flood pinging (with large packet size) an LCS device, about 90 % of all packets are dropped by driver. - increased number of lcs IO buffers to 32. - use netif_stop_queue/netif_wake_queue in lcs_start_xmit routine - don't lock the whole xmit routine but just the piece of code where tx_buffer is touched. Signed-off-by: Frank Pavlic [EMAIL PROTECTED] diffstat: lcs.c | 31 +-- lcs.h |2 +- 2 files changed, 18 insertions(+), 15 deletions(-) diff -Naupr git-orig/drivers/s390/net/lcs.c git-patched/drivers/s390/net/lcs.c --- git-orig/drivers/s390/net/lcs.c 2006-02-07 10:55:28.0 +0100 +++ git-patched/drivers/s390/net/lcs.c 2006-02-07 11:06:37.0 +0100 @@ -98,9 +98,9 @@ lcs_register_debug_facility(void) return -ENOMEM; } debug_register_view(lcs_dbf_setup, debug_hex_ascii_view); - debug_set_level(lcs_dbf_setup, 4); + debug_set_level(lcs_dbf_setup, 2); debug_register_view(lcs_dbf_trace, debug_hex_ascii_view); - debug_set_level(lcs_dbf_trace, 4); + debug_set_level(lcs_dbf_trace, 2); return 0; } @@ -1292,9 +1292,8 @@ lcs_set_multicast_list(struct net_device LCS_DBF_TEXT(4, trace, setmulti); card = (struct lcs_card *) dev-priv; -if (!lcs_set_thread_start_bit(card, LCS_SET_MC_THREAD)) { +if (!lcs_set_thread_start_bit(card, LCS_SET_MC_THREAD)) schedule_work(card-kernel_thread_starter); - } } #endif /* CONFIG_IP_MULTICAST */ @@ -1459,6 +1458,8 @@ lcs_txbuffer_cb(struct lcs_channel *chan lcs_release_buffer(channel, buffer); card = (struct lcs_card *) ((char *) channel - offsetof(struct lcs_card, write)); + if (netif_queue_stopped(card-dev)) + netif_wake_queue(card-dev); spin_lock(card-lock); card-tx_emitted--; if (card-tx_emitted = 0 card-tx_buffer != NULL) @@ -1478,6 +1479,7 @@ __lcs_start_xmit(struct lcs_card *card, struct net_device *dev) { struct lcs_header *header; + int rc = 0; LCS_DBF_TEXT(5, trace, hardxmit); if (skb == NULL) { @@ -1492,10 +1494,8 @@ __lcs_start_xmit(struct lcs_card *card, card-stats.tx_carrier_errors++; return 0; } - if (netif_queue_stopped(dev) ) { - card-stats.tx_dropped++; - return -EBUSY; - } + netif_stop_queue(card-dev); + spin_lock(card-lock); if (card-tx_buffer != NULL card-tx_buffer-count + sizeof(struct lcs_header) + skb-len + sizeof(u16) LCS_IOBUFFERSIZE) @@ -1506,7 +1506,8 @@ __lcs_start_xmit(struct lcs_card *card, card-tx_buffer = lcs_get_buffer(card-write); if (card-tx_buffer == NULL) { card-stats.tx_dropped++; - return -EBUSY; + rc = -EBUSY; + goto out; } card-tx_buffer-callback = lcs_txbuffer_cb; card-tx_buffer-count = 0; @@ -1518,13 +1519,18 @@ __lcs_start_xmit(struct lcs_card *card, header-type = card-lan_type; header-slot = card-portno; memcpy(header + 1, skb-data, skb-len); + spin_unlock(card-lock); card-stats.tx_bytes += skb-len; card-stats.tx_packets++; dev_kfree_skb(skb); - if (card-tx_emitted = 0) + netif_wake_queue(card-dev); + spin_lock(card-lock); + if (card-tx_emitted = 0 card-tx_buffer != NULL) /* If this is the first tx buffer emit it immediately. */ __lcs_emit_txbuffer(card); - return 0; +out: + spin_unlock(card-lock); + return rc; } static int @@ -1535,9 +1541,7 @@ lcs_start_xmit(struct sk_buff *skb, stru LCS_DBF_TEXT(5, trace, pktxmit); card = (struct lcs_card *) dev-priv; - spin_lock(card-lock); rc = __lcs_start_xmit(card, skb, dev); - spin_unlock(card-lock); return rc; } @@ -2319,7 +2323,6 @@ __init lcs_init_module(void) PRINT_ERR(Initialization failed\n); return rc; } - return 0; } diff -Naupr git-orig/drivers/s390/net/lcs.h git-patched/drivers/s390/net/lcs.h --- git-orig/drivers/s390/net/lcs.h 2006-02-07 10:55:28.0 +0100 +++ git-patched/drivers/s390/net/lcs.h 2006-02-07 11:00:08.0 +0100 @@ -95,7 +95,7 @@ do { */ #define LCS_ILLEGAL_OFFSET 0x #define LCS_IOBUFFERSIZE 0x5000 -#define LCS_NUM_BUFFS 8 /* needs to be power of 2 */ +#define LCS_NUM_BUFFS 32 /* needs to be power of 2 */ #define LCS_MAC_LENGTH 6 #define
[patch 2/2] s390: some qeth driver fixes
[patch 2/2] s390: some qeth driver fixes From: Frank Pavlic [EMAIL PROTECTED] - fixed kernel panic when using EDDP support in Layer 2 mode - NULL pointer exception in qeth_set_offline fixed. - setting EDDP in Layer 2 mode did not set NETIF_F_(SG/TSO) flags when device became online. - use sscanf for parsing and converting IPv4 addresses from string to __u8 values. - qeth_string_to_ipaddr6 fixed. in case of double colon the converted IPv6 address out from the string was not correct in previous implementation. Signed-off-by: Frank Pavlic [EMAIL PROTECTED] diffstat: qeth.h | 112 +--- qeth_eddp.c | 11 - qeth_main.c | 17 +++-- 3 files changed, 63 insertions(+), 77 deletions(-) diff -Naupr git-orig/drivers/s390/net/qeth_eddp.c git-patched/drivers/s390/net/qeth_eddp.c --- git-orig/drivers/s390/net/qeth_eddp.c 2006-02-07 10:55:28.0 +0100 +++ git-patched/drivers/s390/net/qeth_eddp.c2006-02-07 11:17:11.0 +0100 @@ -59,8 +59,7 @@ qeth_eddp_free_context(struct qeth_eddp_ for (i = 0; i ctx-num_pages; ++i) free_page((unsigned long)ctx-pages[i]); kfree(ctx-pages); - if (ctx-elements != NULL) - kfree(ctx-elements); + kfree(ctx-elements); kfree(ctx); } @@ -413,6 +412,13 @@ __qeth_eddp_fill_context_tcp(struct qeth QETH_DBF_TEXT(trace, 5, eddpftcp); eddp-skb_offset = sizeof(struct qeth_hdr) + eddp-nhl + eddp-thl; + if (eddp-qh.hdr.l2.id == QETH_HEADER_TYPE_LAYER2) { + eddp-skb_offset += sizeof(struct ethhdr); +#ifdef CONFIG_QETH_VLAN + if (eddp-mac.h_proto == __constant_htons(ETH_P_8021Q)) + eddp-skb_offset += VLAN_HLEN; +#endif /* CONFIG_QETH_VLAN */ + } tcph = eddp-skb-h.th; while (eddp-skb_offset eddp-skb-len) { data_len = min((int)skb_shinfo(eddp-skb)-tso_size, @@ -483,6 +489,7 @@ qeth_eddp_fill_context_tcp(struct qeth_e return -ENOMEM; } if (qhdr-hdr.l2.id == QETH_HEADER_TYPE_LAYER2) { + skb-mac.raw = (skb-data) + sizeof(struct qeth_hdr); memcpy(eddp-mac, eth_hdr(skb), ETH_HLEN); #ifdef CONFIG_QETH_VLAN if (eddp-mac.h_proto == __constant_htons(ETH_P_8021Q)) { diff -Naupr git-orig/drivers/s390/net/qeth.h git-patched/drivers/s390/net/qeth.h --- git-orig/drivers/s390/net/qeth.h2006-02-07 10:55:28.0 +0100 +++ git-patched/drivers/s390/net/qeth.h 2006-02-07 11:17:11.0 +0100 @@ -1076,16 +1076,6 @@ qeth_get_qdio_q_format(struct qeth_card } static inline int -qeth_isdigit(char * buf) -{ - while (*buf) { - if (!isdigit(*buf++)) - return 0; - } - return 1; -} - -static inline int qeth_isxdigit(char * buf) { while (*buf) { @@ -1104,33 +1094,17 @@ qeth_ipaddr4_to_string(const __u8 *addr, static inline int qeth_string_to_ipaddr4(const char *buf, __u8 *addr) { - const char *start, *end; - char abuf[4]; - char *tmp; - int len; - int i; - - start = buf; - for (i = 0; i 4; i++) { - if (i == 3) { - end = strchr(start,0xa); - if (end) - len = end - start; - else - len = strlen(start); - } - else { - end = strchr(start, '.'); - len = end - start; - } - if ((len = 0) || (len 3)) - return -EINVAL; - memset(abuf, 0, 4); - strncpy(abuf, start, len); - if (!qeth_isdigit(abuf)) + int count = 0, rc = 0; + int in[4]; + + rc = sscanf(buf, %d.%d.%d.%d%n, + in[0], in[1], in[2], in[3], count); + if (rc != 4 || count) + return -EINVAL; + for (count = 0; count 4; count++) { + if (in[count] 255) return -EINVAL; - addr[i] = simple_strtoul(abuf, tmp, 10); - start = end + 1; + addr[count] = in[count]; } return 0; } @@ -1149,36 +1123,44 @@ qeth_ipaddr6_to_string(const __u8 *addr, static inline int qeth_string_to_ipaddr6(const char *buf, __u8 *addr) { - const char *start, *end; - u16 *tmp_addr; - char abuf[5]; - char *tmp; - int len; - int i; - - tmp_addr = (u16 *)addr; - start = buf; - for (i = 0; i 8; i++) { - if (i == 7) { - end = strchr(start,0xa); - if (end) - len = end - start; - else - len =
Re: Kernel BUG at drivers/net/tg3.c:2914 on SMP amd64
On Thu, 2006-02-02 at 13:37 +, Mike Crowe wrote: I'm running the Debian 2.6.15 kernel from backports.org on a machine with two Opteron 275s. I am getting a BUG in tg3.c quite reliably if I ping flood the machine from a few others and cause a bit of other network activity. Sometimes it takes a few minutes, sometimes half an hour. The BUG also fires in more realistic situations - it just takes longer to reproduce. On Thu, Feb 02, 2006 at 08:01:57AM -0800, Michael Chan wrote: Most likely due to MMIO being re-ordered. We've seen this on a number of AMD machines. Please try this test patch below. If the problem goes away, send me the output of lspci -vvvxxx on your machine and I'll create a patch to fix this automatically on your machine. Thanks. It looks like the machine had issues. After a bit of fiddling around I've persuaded the problem to go away both with and without the patch. It doesn't look like it was caused by the MMIO being reordered unless this is a BIOS option that I've now turned off. Thanks for your help anyway. -- Mike Crowe - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: e1000 not working on AMD64
Does /proc/interrupts show the interrupts incrementing for the interface? -Jeb -Original Message- From: [EMAIL PROTECTED] [mailto:[EMAIL PROTECTED] On Behalf Of Hajo Noerenberg Sent: Tuesday, February 07, 2006 3:22 AM To: netdev@vger.kernel.org Subject: e1000 not working on AMD64 I've bought four new e1000 cards (PCI id 8086:107c, chip label 82541PI), three of them are working without problems (i386, kernel 2.4.x). One of them is installed in an AMD64 SMP system (Athlon dual core 4GB). It gets detected, link is reported to be up, but no data goes through (in fact _sometimes_ it succeeds to get an IP adress via DHCP after 10-20 retries). If I set an IP manually, I am not able to ping any other host (ping sizes 30bytes ... 1000bytes). Tested on kernel 2.6.15, 2.6.16-rc2 and 2.6.16-rc2-git2. (Un-)setting NAPI does not change anything. Initially I assumed an IRQ-related issue, but the 3ware RAID controller works without any problems. Hajo +++ :00:00.0 RAM memory: nVidia Corporation: Unknown device 02f1 (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- Latency: 0 Capabilities: [44] #08 [01e0] Capabilities: [e0] #08 [a800] :00:00.1 RAM memory: nVidia Corporation: Unknown device 02fa (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- :00:00.2 RAM memory: nVidia Corporation: Unknown device 02fe (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- :00:00.3 RAM memory: nVidia Corporation: Unknown device 02f8 (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap- 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- :00:00.4 RAM memory: nVidia Corporation: Unknown device 02f9 (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap- 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- Latency: 0 :00:00.5 RAM memory: nVidia Corporation: Unknown device 02ff (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- Latency: 0 Capabilities: [44] #00 [00fe] Capabilities: [fc] #00 [] :00:00.6 RAM memory: nVidia Corporation: Unknown device 027f (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR+ FastB2B- Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- :00:00.7 RAM memory: nVidia Corporation: Unknown device 027e (rev a2) Subsystem: Asustek Computer, Inc.: Unknown device 81bf Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- :00:02.0 PCI bridge: nVidia Corporation: Unknown device 02fc (rev a1) (prog-if 00 [Normal decode]) Control: I/O- Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop- ParErr- Stepping- SERR- FastB2B- Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort- TAbort- MAbort- SERR- PERR- Latency: 0, Cache Line Size: 0x10 (64 bytes) Bus: primary=00, secondary=01, subordinate=01, sec-latency=0 I/O behind bridge: f000-0fff Memory behind bridge: fff0-000f Prefetchable memory behind bridge: fff0- BridgeCtl: Parity- SERR+ NoISA- VGA- MAbort- Reset- FastB2B- Capabilities: [40] #0d [] Capabilities: [48] Power Management version 2 Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA PME(D0+,D1+,D2+,D3hot+,D3cold+) Status: D0 PME-Enable- DSel=0 DScale=0 PME- Capabilities: [50] Message Signalled Interrupts: 64bit+ Queue=0/1 Enable-
Re: e1000 not working on AMD64
Hajo Noerenberg wrote: I've bought four new e1000 cards (PCI id 8086:107c, chip label 82541PI), three of them are working without problems (i386, kernel 2.4.x). One of them is installed in an AMD64 SMP system (Athlon dual core 4GB). It gets detected, link is reported to be up, but no data goes through (in fact _sometimes_ it succeeds to get an IP adress via DHCP after 10-20 retries). If I set an IP manually, I am not able to ping any other host (ping sizes 30bytes ... 1000bytes). Tested on kernel 2.6.15, 2.6.16-rc2 and 2.6.16-rc2-git2. (Un-)setting NAPI does not change anything. Initially I assumed an IRQ-related issue, but the 3ware RAID controller works without any problems. Do any of the three known-good cards work in your AMD64 SMP system? Does the card that does not work in your AMD64 system work in any of the other systems? rick jones - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [test] airo : first WPA-PSK support
Dan Williams wrote: AFAIK anything less than 5.40.x doesn't work anyway. The latest stuff (5.60.x) has worked fine. I previously had 5.30.17, which tended to hang the card after a while. Anyway, perhaps we require people to update their firmware. Not sure. What's the minimum firmware version for WPA support? according to http://www.cisco.com/en/US/products/hw/wireless/ps4555/prod_release_notes_list.html 5.30.17 But we need to support older cards 340, ... Matthieu - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] acxsm: Add _{get,set}_encodeext and improve logging in _encode
Add _{get,set}_encodeext and improve logging in _encode The code in _{get,set}_encode has been reordered a bit so we have better logging (function entry and exit) and _{get,set}_encodeext have been implemented as a wrapper for the ieee80211 stack functions. diff --git a/ioctl.c b/ioctl.c index 041f165..1f7 100644 --- a/ioctl.c +++ b/ioctl.c @@ -1007,7 +1007,9 @@ acx_ioctl_set_encode( union iwreq_data *wrqu, char *extra) { - int result = ieee80211_wx_set_encode(netdev_priv(ndev), info, wrqu, extra); + int result; + FN_ENTER; + result = ieee80211_wx_set_encode(netdev_priv(ndev), info, wrqu, extra); FN_EXIT1(result); return result; } @@ -1023,12 +1025,45 @@ acx_ioctl_get_encode( union iwreq_data *wrqu, char *extra) { - int result = ieee80211_wx_get_encode(netdev_priv(ndev), info, wrqu, extra); + int result; + FN_ENTER; + result = ieee80211_wx_get_encode(netdev_priv(ndev), info, wrqu, extra); FN_EXIT1(result); return result; } - +/*** +** acx_ioctl_set_encodeext +*/ +static int +acx_ioctl_set_encodeext( + struct net_device *ndev, + struct iw_request_info *info, + union iwreq_data *wrqu, + char *extra) +{ + int result; + FN_ENTER; + result = ieee80211_wx_set_encodeext(netdev_priv(ndev), info, wrqu, extra); + FN_EXIT1(result); + return result; +} +/*** +** acx_ioctl_get_encodeext +*/ +static int +acx_ioctl_get_encodeext( + struct net_device *ndev, + struct iw_request_info *info, + union iwreq_data *wrqu, + char *extra) +{ + int result; + FN_ENTER; + result = ieee80211_wx_get_encodeext(netdev_priv(ndev), info, wrqu, extra); + FN_EXIT1(result); + return result; +} /*** */ static int @@ -2502,6 +2537,8 @@ static const iw_handler acx_ioctl_handle /* Encoding */ WX(SIOCSIWENCODE) = acx_ioctl_set_encode, WX(SIOCGIWENCODE) = acx_ioctl_get_encode, + WX(SIOCSIWENCODEEXT)= acx_ioctl_set_encodeext, + WX(SIOCGIWENCODEEXT)= acx_ioctl_get_encodeext, /* Power saving */ WX(SIOCSIWPOWER)= acx_ioctl_set_power, WX(SIOCGIWPOWER)= acx_ioctl_get_power, -- Carlos Martín Nieto| http://www.cmartin.tk Erdbeben? Sicherlich etwas, das mit Erdberen zu tun hat. -- me, paraphrased - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Strange IPsec freeze/partial fix
Olaf Kirch [EMAIL PROTECTED] wrote: People have been testing with the patch below, which seems to fix the problem partially. They still see connection hangs however (things only clear up when they start a new ping or new ssh). So the patch is obvsiouly not sufficient, and something else seems to go wrong. I suggest that we simply bail out always. If the dst decides to die on us later on, the packet will be dropped anyway. So there is no great urgency to retry here. Once we have the proper resolution queueing, we can then do the retry again. Signed-off-by: Herbert Xu [EMAIL PROTECTED] Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -890,7 +890,9 @@ restart: xfrm_pol_put(policy); if (dst) dst_free(dst); - goto restart; + + err = -EHOSTUNREACH; + goto error; } dst-next = policy-bundles; policy-bundles = dst; - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Fix softmac scan
John, Softmac scanning fails because the stop flag is not cleared before scanning is started. The attached one-line patch fixes this. Signed-Off-By: Larry Finger [EMAIL PROTECTED] diff --git a/net/ieee80211/softmac/ieee80211softmac_scan.c b/net/ieee80211/softmac/ieee80211softmac_scan.c index d90d31f..1cdd9f1 100644 --- a/net/ieee80211/softmac/ieee80211softmac_scan.c +++ b/net/ieee80211/softmac/ieee80211softmac_scan.c @@ -178,6 +178,7 @@ int ieee80211softmac_start_scan_implemen dprintk(PFX Scanning %d channels\n, sm-scaninfo-number_channels); sm-scaninfo-current_channel_idx = 0; sm-scaninfo-started = 1; + sm-scaninfo-stop = 0; INIT_COMPLETION(sm-scaninfo-finished); schedule_work(sm-scaninfo-softmac_scan); spin_unlock_irqrestore(sm-lock, flags);
Re: [Patch] 2.4.32 - Neighbour Cache (ARP) State machine bug Fixed
Hi, On Tue, Feb 07, 2006 at 12:57:43AM -0700, Pradeep Vincent wrote: In 2.4.21, arp code uses gc_timer to check for stale arp cache entries. In 2.6, each entry has its own timer to check for stale arp cache. 2.4.29 to 2.4.32 kernels (atleast) use neither of these timers. This causes problems in environments where IPs or MACs are reassigned - saw this problem on load balancing router based networks that use VMACs. Tested this code on load balancing router based networks as well as peer-linux systems. Thanks, Signed off by: Pradeep Vincent [EMAIL PROTECTED] diff -Naur old/net/core/neighbour.c new/net/core/neighbour.c --- old/net/core/neighbour.cWed Nov 23 17:15:30 2005 +++ new/net/core/neighbour.cWed Nov 23 17:26:01 2005 @@ -14,6 +14,7 @@ * Vitaly E. Lavrovreleasing NULL neighbor in neigh_add. * Harald WelteAdd neighbour cache statistics like rtstat * Harald Welteport neighbour cache rework from 2.6.9-rcX + * Pradeep Vincent Move neighbour cache entry to stale state */ As you can see above, your mailer is still broken. Leading spaces get removed and it seems like tabs are replaced with spaces. This makes it really annoying to fix by hand because we all have to do your work again. You should try to fix your mailer options, possibly by sending a few mails to yourself or someone else (if you send *a few* mails to me, I can confirm which one looks OK). If your mailer is definitely broken, then you may send it as plain text first (for review), with a text attachment for people to apply it without trouble. Thanks, Willy - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
IBM_EMAC_PHY_RX_CLK_FIX depends on non-existing option 440GR
Jean-Luc Leger [EMAIL PROTECTED] reported the following: from drivers/net/Kconfig: config IBM_EMAC_PHY_RX_CLK_FIX bool PHY Rx clock workaround depends on IBM_EMAC (405EP || 440GX || 440EP || 440GR) - maybe this is 440GP ? The non-existing CONFIG_440GR is also present in the driver itself. Is this a typo or a not yet merged platform? cu Adrian -- Is there not promise of rain? Ling Tan asked suddenly out of the darkness. There had been need of rain for many days. Only a promise, Lao Er said. Pearl S. Buck - Dragon Seed - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: IBM_EMAC_PHY_RX_CLK_FIX depends on non-existing option 440GR
On Tue, Feb 07, 2006 at 11:14:49PM +0100, Adrian Bunk wrote: Jean-Luc Leger [EMAIL PROTECTED] reported the following: from drivers/net/Kconfig: config IBM_EMAC_PHY_RX_CLK_FIX bool PHY Rx clock workaround depends on IBM_EMAC (405EP || 440GX || 440EP || 440GR) - maybe this is 440GP ? The non-existing CONFIG_440GR is also present in the driver itself. Is this a typo or a not yet merged platform? Not yet merged platform. -- Eugene - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] recent commit breaks multi-descriptor receives with ip fragments
From: Jesse Brandeburg [EMAIL PROTECTED] Date: Tue, 7 Feb 2006 14:11:46 -0800 (Pacific Standard Time) A recent commit in 2.6.14 broke this, see this git commit: http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=bc8dfcb93970ad7139c976356bfc99d7e251deaf Or for a shorter version http://tinyurl.com/drpu8 I think we should revert that thing, it's caused more grief than anything else. I thought it was a complete waste of time from the get-go even assuming that fraglists within fraglists never occur... - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] NET : SMP optimization of netdevice refcount
Struct net_device's atomic refcnt are probably one of the hotest memory spots in a SMP/NUMA network router or network server. This counter is constantly incremented/decremented each time a network packet is handled, or a IP route is added/deleted in route cache. This is *not* SMP nor NUMA friendly (because of the locked op that are expensive and memory ping pongs between cpus) But as a matter of fact, the counter is *never* read: It's only read when the device must be unregistered. Some devices are *never* unregistered : loopback, or statically linked drivers, thus we are refcounting them for nothing. This patch try to avoid atomic ops on SMP for the cases were the device wont be unregistered. A 'int static_dev' integer is added next to 'atomic_t refcnt', and may be set to one by drivers that are statically linked. I changed SET_MODULE_OWNER(dev) macro to avoid changing all network drivers, but I'm open to other suggestions. All drivers that are currently using this macro automatically benefit from this SMP optimization : It's better to perform a test/conditional branch (even if badly predicted) than an atomic_{inc|dec}() Signed-off-by: Eric Dumazet [EMAIL PROTECTED] --- a/include/linux/netdevice.h 2006-02-07 11:55:42.0 +0100 +++ b/include/linux/netdevice.h 2006-02-07 13:06:14.0 +0100 @@ -417,10 +417,14 @@ struct timer_list watchdog_timer; /* - * refcnt is a very hot point, so align it on SMP + * {static_dev,refcnt} is a very hot point, so align it on SMP */ /* Number of references to this device */ - atomic_trefcnt cacheline_aligned_in_smp; +#ifdef CONFIG_SMP + /* SMP optimization : if dev is static, no need to modify refcnt */ + int static_dev cacheline_aligned_in_smp; +#endif + atomic_trefcnt; /* delayed register/unregister */ struct list_headtodo_list; @@ -514,7 +518,29 @@ ~NETDEV_ALIGN_CONST); } -#define SET_MODULE_OWNER(dev) do { } while (0) +static inline int netif_static(const struct net_device *dev) +{ +#if defined(CONFIG_SMP) + return dev-static_dev; +#else + return 0; +#endif +} + +static inline void netif_setstatic(struct net_device *dev, int v) +{ +#if defined(CONFIG_SMP) +#if defined(MODULE) + v = 0; +#endif + dev-static_dev = v; +#endif +} +/* + * If a driver is a not a module, dev can be marked as static + */ +#define SET_MODULE_OWNER(dev) do { netif_setstatic(dev, 1); } while (0) + /* Set the sysfs physical device reference for the network logical device * if set prior to registration will cause a symlink during initialization. */ @@ -705,11 +731,12 @@ static inline void dev_put(struct net_device *dev) { - atomic_dec(dev-refcnt); + if (!netif_static(dev)) + atomic_dec(dev-refcnt); } -#define __dev_put(dev) atomic_dec((dev)-refcnt) -#define dev_hold(dev) atomic_inc((dev)-refcnt) +#define __dev_put(dev) if (!netif_static(dev)) atomic_dec((dev)-refcnt) +#define dev_hold(dev) if (!netif_static(dev)) atomic_inc((dev)-refcnt) /* Carrier loss detection, dial on demand. The functions netif_carrier_on * and _off may be called from IRQ context, but it is caller --- a/net/core/dev.c2006-02-07 11:59:53.0 +0100 +++ b/net/core/dev.c2006-02-07 12:52:27.0 +0100 @@ -2990,12 +2990,11 @@ alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST) ~NETDEV_ALIGN_CONST; alloc_size += sizeof_priv + NETDEV_ALIGN_CONST; - p = kmalloc(alloc_size, GFP_KERNEL); + p = kzalloc(alloc_size, GFP_KERNEL); if (!p) { printk(KERN_ERR alloc_dev: Unable to allocate device.\n); return NULL; } - memset(p, 0, alloc_size); dev = (struct net_device *) (((long)p + NETDEV_ALIGN_CONST) ~NETDEV_ALIGN_CONST); @@ -3248,6 +3247,7 @@ queue-backlog_dev.weight = weight_p; queue-backlog_dev.poll = process_backlog; atomic_set(queue-backlog_dev.refcnt, 1); + netif_setstatic(queue-backlog_dev, 1); } dev_boot_phase = 0; --- a/drivers/net/loopback.c2006-02-07 12:10:55.0 +0100 +++ b/drivers/net/loopback.c2006-02-07 12:37:49.0 +0100 @@ -224,16 +224,18 @@ int __init loopback_init(void) { struct net_device_stats *stats; + int res; /* Can survive without statistics */ - stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL); + stats = kzalloc(sizeof(struct net_device_stats), GFP_KERNEL); if (stats) { - memset(stats, 0, sizeof(struct net_device_stats)); loopback_dev.priv = stats; loopback_dev.get_stats = get_stats; } - return register_netdev(loopback_dev); + res = register_netdev(loopback_dev); +
Re: [PATCH] NET : SMP optimization of netdevice refcount
From: Eric Dumazet [EMAIL PROTECTED] Date: Wed, 08 Feb 2006 00:23:45 +0100 Some devices are *never* unregistered : loopback, or statically linked drivers, thus we are refcounting them for nothing. Statically linked drivers can have netdev's that get unregistered and free'd up. For example we have a few cases where configuration calls allocate/register and deallocate/unregister net devices. I understand what you're trying to do, but I don't think this is the way to do it. I once thought we could play some games because of the invariant that if we have a route attached to the SKB, that holds an implicit reference for the netdevice too. But I know there are cases where the the route attached to the SKB is to a different device than the one that skb-dev is and should be set to. There is also a temptation to deal with this using per-cpu (or per-node) counters, and that's a too bloated solution. We should be making datastructures smaller not larger. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] NET : SMP optimization of netdevice refcount
David S. Miller a écrit : From: Eric Dumazet [EMAIL PROTECTED] Date: Wed, 08 Feb 2006 00:23:45 +0100 Some devices are *never* unregistered : loopback, or statically linked drivers, thus we are refcounting them for nothing. Statically linked drivers can have netdev's that get unregistered and free'd up. For example we have a few cases where configuration calls allocate/register and deallocate/unregister net devices. Yes, e1000 currently does this, and I actually am using this patch on a machine with a e1000 card : I didnt change e1000 source. I understand what you're trying to do, but I don't think this is the way to do it. I once thought we could play some games because of the invariant that if we have a route attached to the SKB, that holds an implicit reference for the netdevice too. But I know there are cases where the the route attached to the SKB is to a different device than the one that skb-dev is and should be set to. There is also a temptation to deal with this using per-cpu (or per-node) counters, and that's a too bloated solution. We should be making datastructures smaller not larger. Yes, I played with a structure I called a llref (Long Lived Reference count), that is suitable for netdevice refcount and struct vfsmount 'refcnt' It uses a per cpu local_t, but current implementation of alloc_percpu(local_t) uses at least 32 bytes per cpu... (sock_mnt, mqueue_mnt, shm_mnt, pipe_mnt, inotify_mnt, bd_mnt, devpts_mnt) are another examples of objects having unecessary refcounters. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] NET : SMP optimization of netdevice refcount
Eric Dumazet wrote: David S. Miller a écrit : From: Eric Dumazet [EMAIL PROTECTED] Date: Wed, 08 Feb 2006 00:23:45 +0100 Some devices are *never* unregistered : loopback, or statically linked drivers, thus we are refcounting them for nothing. Statically linked drivers can have netdev's that get unregistered and free'd up. For example we have a few cases where configuration calls allocate/register and deallocate/unregister net devices. Yes, e1000 currently does this, and I actually am using this patch on a machine with a e1000 card : I didnt change e1000 source. I understand what you're trying to do, but I don't think this is the way to do it. What do you think about having no ref counting, and upon removal of a network device, we notify each logic unit that deals with skbs or other things that link to the netdev and ask it to clean all references to the NIC in question? I'm not sure how much fun this would be to code..but it would at least force us to understand exactly what code holds netdev references, and get rid of an atomic op or two in the hot path... Ben -- Ben Greear [EMAIL PROTECTED] Candela Technologies Inc http://www.candelatech.com - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] NET : SMP optimization of netdevice refcount
From: Ben Greear [EMAIL PROTECTED] Date: Tue, 07 Feb 2006 15:54:06 -0800 What do you think about having no ref counting, and upon removal of a network device, we notify each logic unit that deals with skbs or other things that link to the netdev and ask it to clean all references to the NIC in question? That's a lot of notifiers. Routes and neighbour cache entries are relatively easy, and we do that today, but sockets and netfilter can get really messy. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] NET : SMP optimization of netdevice refcount
On Tue, 07 Feb 2006 16:11:51 -0800 (PST) David S. Miller [EMAIL PROTECTED] wrote: From: Ben Greear [EMAIL PROTECTED] Date: Tue, 07 Feb 2006 15:54:06 -0800 What do you think about having no ref counting, and upon removal of a network device, we notify each logic unit that deals with skbs or other things that link to the netdev and ask it to clean all references to the NIC in question? That's a lot of notifiers. Routes and neighbour cache entries are relatively easy, and we do that today, but sockets and netfilter can get really messy. How bad would per-cpu'ish counting be? There are always user's with 1000's of vlan's etc; so it would have to scale. Even a counter that is hashed on cpu # would be better (assuming they weren't all in the same cache line). Also, isn't a lot of the problem reduced if network devices are affinitied? -- Stephen Hemminger [EMAIL PROTECTED] OSDL http://developer.osdl.org/~shemminger - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] NET : SMP optimization of netdevice refcount
From: Stephen Hemminger [EMAIL PROTECTED] Date: Tue, 7 Feb 2006 16:19:42 -0800 Also, isn't a lot of the problem reduced if network devices are affinitied? Not for routing/firewalling, we touch the destination device's counters on input softing of the source device. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] NET : SMP optimization of netdevice refcount
From: Rick Jones [EMAIL PROTECTED] Date: Tue, 07 Feb 2006 16:29:34 -0800 In the realm of straw ideas, how often are netdevs added and removed, and would leaving a tombstone behind consume too much memory? That could work. Another idea is to revisit the scheme of storing just the ifindex in the SKB instead of the device pointer. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] NET : SMP optimization of netdevice refcount
David S. Miller wrote: From: Rick Jones [EMAIL PROTECTED] Date: Tue, 07 Feb 2006 16:29:34 -0800 In the realm of straw ideas, how often are netdevs added and removed, and would leaving a tombstone behind consume too much memory? That could work. Another idea is to revisit the scheme of storing just the ifindex in the SKB instead of the device pointer. That means extra lookups of interface name to device pointer right? Would that even be sufficient to keep a reference-count free netdev from being yanked out from under someone? The only thing that worries me about tombstones is someone with a QA test that adds and removes a device over and over again, and fills the cemetery as it were. It is afterall something of a deliberate memory leak. Is there some way for a daemon or somesuch to try to garbage collect the tombstones? I guess that gets back to knowing where things that might have a reference to the netdev happen to be, which would be just about the same as the suggestion to call into them somehow so it becomes known that none of them have a reference... Unless there is some sort of event where it is known that when it happens all netdev references have been refreshed - something short of reboot of course. What sort of pain and suffering would happen if an old tombstone were brought back to life for a new device? Would that cause code referencing it to get all bent out of shape? Is there anything that would naturally inform the entity dereferencing to the newly undead that they were resurrected? just thinking while typing rick jones - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] NET : SMP optimization of netdevice refcount
David S. Miller wrote: From: Ben Greear [EMAIL PROTECTED] Date: Tue, 07 Feb 2006 15:54:06 -0800 What do you think about having no ref counting, and upon removal of a network device, we notify each logic unit that deals with skbs or other things that link to the netdev and ask it to clean all references to the NIC in question? That's a lot of notifiers. Routes and neighbour cache entries are relatively easy, and we do that today, but sockets and netfilter can get really messy. I ended up touching every part of the tree that grabbed or released a netdevice when chasing down that arp-cache bug some months ago. It is a bit of work, but it's not insurmountable. Allowing a notifier to clean the references would probably be a similar amount of work, and as you say, many modules already listen for the notifiers and clean up their references accordingly. At least some sockets (packet-sockets, for example) don't keep actual netdev references, but just use the ifindex. I'd personally prefer that they actually hold a reference and listen to notifiers appropriately, but in this case, it could make moving to a notifier scheme quicker. I'm less sure about netfilter Thanks, Ben -- Ben Greear [EMAIL PROTECTED] Candela Technologies Inc http://www.candelatech.com - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] NET : SMP optimization of netdevice refcount
Ben Greear wrote: Rick Jones wrote: In the realm of straw ideas, how often are netdevs added and removed, and would leaving a tombstone behind consume too much memory? In certain cases...say, with vlans, you could very often create and destroy net devices. I think that giving up and leaking the memory is not a good idea. What makes vlans more likely to be more dynamic in that way? rick - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] NET : SMP optimization of netdevice refcount
David S. Miller wrote: From: Ben Greear [EMAIL PROTECTED] Date: Tue, 07 Feb 2006 16:39:52 -0800 Rick Jones wrote: In the realm of straw ideas, how often are netdevs added and removed, and would leaving a tombstone behind consume too much memory? In certain cases...say, with vlans, you could very often create and destroy net devices. I think that giving up and leaking the memory is not a good idea. I think he's suggesting another thing. Reattach the skb-dev to some dummy device that always persists, when a device goes down. Actually, I think that Ben had me pegged right the first time - I was not going to chase down all the skb's (assuming there isn't already a list of all skb's) I was just going to leave the minimum of a devices structures out there so it would still be safe to follow the pointer from the skb's etc. The only problem is finding all SKB's that reference the dev going down. Indeed. rick jones - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] NET : SMP optimization of netdevice refcount
Rick Jones wrote: Ben Greear wrote: Rick Jones wrote: In the realm of straw ideas, how often are netdevs added and removed, and would leaving a tombstone behind consume too much memory? In certain cases...say, with vlans, you could very often create and destroy net devices. I think that giving up and leaking the memory is not a good idea. What makes vlans more likely to be more dynamic in that way? They are easy to create and destroy..and in mass. You can be sure that someone has realized this and has made a cool application on top of this feature. If we suddenly start leaking memory to gain a small bit of performance in the 1+Gbps speeds, then people will be upset. At the least, I have applications that do this..and whether they are cool or not is open to contention, but I will certainly be upset anyway :) Ben -- Ben Greear [EMAIL PROTECTED] Candela Technologies Inc http://www.candelatech.com - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [IPVS] Shrink ip_vs_*.c includes
Horms [EMAIL PROTECTED] wrote: Dave, please apply. Looks bogus to me. Why are we removing linux/modules.h from ip_vs_app.c when it uses things like EXPORT_SYMBOL? -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] recent commit breaks multi-descriptor receives with ip fragments
On Tue, 7 Feb 2006, Herbert Xu wrote: David S. Miller [EMAIL PROTECTED] wrote: I think we should revert that thing, it's caused more grief than anything else. I thought it was a complete waste of time from the get-go even assuming that fraglists within fraglists never occur... I share your feelings towards this patch. However, what e1000 is doing is broken. It should be filling in the frags array, not frag_list. so we generally call dev_alloc_skb to get the receive buffers to give to our hardware. When we use multiple receive buffers what is the right way to allocate memory to give buffers to the hardware and then later, to chain the descriptors together to make the packet? Using skb's is the common way as far as I've understood it. Your input on the correct way to do these things is greatly appreciated. Jesse - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [BUG] recent commit breaks multi-descriptor receives with ip fragments
From: Jesse Brandeburg [EMAIL PROTECTED] Date: Tue, 7 Feb 2006 17:41:28 -0800 (Pacific Standard Time) so we generally call dev_alloc_skb to get the receive buffers to give to our hardware. When we use multiple receive buffers what is the right way to allocate memory to give buffers to the hardware and then later, to chain the descriptors together to make the packet? Using skb's is the common way as far as I've understood it. Your input on the correct way to do these things is greatly appreciated. Allocate a single SKB and fill in the skb_shared_info() page/offset/len pairs, making sure to take proper references to the pages you add. Coalesce when possible. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [Patch] 2.4.32 - Neighbour Cache (ARP) State machine bug Fixed
One more attempt. Attaching the diff file as well. Signed off by: Pradeep Vincent [EMAIL PROTECTED] --- old/net/core/neighbour.cWed Nov 9 16:48:10 2005 +++ new/net/core/neighbour.cTue Feb 7 17:38:26 2006 @@ -14,6 +14,7 @@ * Vitaly E. Lavrovreleasing NULL neighbor in neigh_add. * Harald WelteAdd neighbour cache statistics like rtstat * Harald Welteport neighbour cache rework from 2.6.9-rcX + * Pradeep Vincent fix neighbour cache state machine */ #include linux/config.h @@ -705,6 +706,13 @@ neigh_release(n); continue; } + /* Move to NUD_STALE state */ + if (n-nud_stateNUD_REACHABLE + now - n-confirmed n-parms-reachable_time) { + n-nud_state = NUD_STALE; + neigh_suspect(n); + } + write_unlock(n-lock); next_elt: Thanks, Pradeep On 2/7/06, Willy Tarreau [EMAIL PROTECTED] wrote: Hi, On Tue, Feb 07, 2006 at 12:57:43AM -0700, Pradeep Vincent wrote: In 2.4.21, arp code uses gc_timer to check for stale arp cache entries. In 2.6, each entry has its own timer to check for stale arp cache. 2.4.29 to 2.4.32 kernels (atleast) use neither of these timers. This causes problems in environments where IPs or MACs are reassigned - saw this problem on load balancing router based networks that use VMACs. Tested this code on load balancing router based networks as well as peer-linux systems. Thanks, Signed off by: Pradeep Vincent [EMAIL PROTECTED] diff -Naur old/net/core/neighbour.c new/net/core/neighbour.c --- old/net/core/neighbour.cWed Nov 23 17:15:30 2005 +++ new/net/core/neighbour.cWed Nov 23 17:26:01 2005 @@ -14,6 +14,7 @@ * Vitaly E. Lavrovreleasing NULL neighbor in neigh_add. * Harald WelteAdd neighbour cache statistics like rtstat * Harald Welteport neighbour cache rework from 2.6.9-rcX + * Pradeep Vincent Move neighbour cache entry to stale state */ As you can see above, your mailer is still broken. Leading spaces get removed and it seems like tabs are replaced with spaces. This makes it really annoying to fix by hand because we all have to do your work again. You should try to fix your mailer options, possibly by sending a few mails to yourself or someone else (if you send *a few* mails to me, I can confirm which one looks OK). If your mailer is definitely broken, then you may send it as plain text first (for review), with a text attachment for people to apply it without trouble. Thanks, Willy linux-2.4.29-arp-fix.patch Description: Binary data
Re: [PATCH] acxsm: merge from acx 0.3.32
On Tue, Feb 07, 2006 at 05:41:45PM +0200, Denis Vlasenko wrote: On Friday 03 February 2006 14:14, Denis Vlasenko wrote: Standalone acx driver had several fixes since acxsm fork, this patch merges them: - initial support for new TNETW1450 USB chip - support for firmware 2.3.1.31 Also we had one report that acxsm is actually working. That's quite unexpected. Signed-off-by: Denis Vlasenko [EMAIL PROTECTED] What is the status of this patch? Accepted? Rejected? Other (please specify): I intened to merge it. I had a busy week last week, with some personal obligations. I apologize for my slow speed. I'll try to do better! :-) John -- John W. Linville [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [IPVS] Shrink ip_vs_*.c includes
On Wed, Feb 08, 2006 at 12:19:32PM +1100, Herbert Xu wrote: Horms [EMAIL PROTECTED] wrote: Dave, please apply. Looks bogus to me. Why are we removing linux/modules.h from ip_vs_app.c when it uses things like EXPORT_SYMBOL? Given that the code still compiles, I guess linux/modules.h is included in some other header that is included. I'm happy to put linux/modules.h back in. Do you have any more suggestions? -- Horms - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [IPVS] Shrink ip_vs_*.c includes
Horms [EMAIL PROTECTED] wrote: Looks bogus to me. Why are we removing linux/modules.h from ip_vs_app.c when it uses things like EXPORT_SYMBOL? Given that the code still compiles, I guess linux/modules.h is included in some other header that is included. I'm happy to put linux/modules.h back in. Do you have any more suggestions? This is the wrong way to go about it. You should never rely on indirect inclusions because they might be removed one day and your file will not compile anymore. The correct way to go about this is to go through each included header file and check if any of its symbols are used in the source file. Or if this is too tedious just leave it alone. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [IPVS] Shrink ip_vs_*.c includes
On Wed, Feb 08, 2006 at 01:36:11PM +1100, Herbert Xu wrote: Horms [EMAIL PROTECTED] wrote: Looks bogus to me. Why are we removing linux/modules.h from ip_vs_app.c when it uses things like EXPORT_SYMBOL? Given that the code still compiles, I guess linux/modules.h is included in some other header that is included. I'm happy to put linux/modules.h back in. Do you have any more suggestions? This is the wrong way to go about it. You should never rely on indirect inclusions because they might be removed one day and your file will not compile anymore. The correct way to go about this is to go through each included header file and check if any of its symbols are used in the source file. Or if this is too tedious just leave it alone. Hi Herbert, thanks for your feedback. Dave, please discard this patch for now. Ratz, Unfortunately this seems like it is going to be more tedious than we first thought. I would guess writing some sort of tool to analyse symbols and headers is the way to go. Else it seems more or less impossible to clean up headers, even on a small scale. -- Horms - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [IPVS] Shrink ip_vs_*.c includes
Unfortunately this seems like it is going to be more tedious than we first thought. I would guess writing some sort of tool to analyse symbols and headers is the way to go. Else it seems more or less impossible to clean up headers, even on a small scale. Search the netdev archives or look at Arnaldo's kernel.org space as he has done some scripts to do this once. -- Ian McDonald http://wand.net.nz/~iam4 WAND Network Research Group University of Waikato New Zealand - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] [IPVS] Shrink ip_vs_*.c includes
From: Horms [EMAIL PROTECTED] Date: Wed, 8 Feb 2006 12:09:29 +0900 Unfortunately this seems like it is going to be more tedious than we first thought. I would guess writing some sort of tool to analyse symbols and headers is the way to go. Else it seems more or less impossible to clean up headers, even on a small scale. It's doable on a small scale, you just have to approach the problem from the other direction. Ie. pick a header file and audit the use of that specific header file across the tree. Folks have done this with headers like linux/sched.h and friends in the past. And it's worthwhile because anything that minimises kernel rebuild when touching a header file helps streamline development. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH linux-2.6.16-rc2] bonding: fix a locking bug in bond_release
bond_release returns EINVAL without releasing the bond lock if the slave device is not being bonded by the bond. The following patch ensures that the lock is released in this case. Signed-off-by: Stephen J. Bevan [EMAIL PROTECTED] Acked-by: Jay Vosburgh [EMAIL PROTECTED] --- --- linux-2.6.16-rc2/drivers/net/bonding/bond_main.c.orig 2006-02-07 20:26:08.0 -0800 +++ linux-2.6.16-rc2/drivers/net/bonding/bond_main.c2006-02-07 20:33:20.447899952 -0800 @@ -1570,6 +1570,7 @@ int bond_release(struct net_device *bond printk(KERN_INFO DRV_NAME : %s: %s not enslaved\n, bond_dev-name, slave_dev-name); + write_unlock_bh(bond-lock); return -EINVAL; } - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland
tpacket_hdr structure includes 'unsigned long' though kernel and userland shares it in the mmapped ring buffer. Seems it would be better to fix all data structures in the header file than fixing only tpacket_hdr structure. Signed-off-by: FUJITA Tomonori [EMAIL PROTECTED] Signed-off-by: Mike Christie [EMAIL PROTECTED] --- include/linux/if_packet.h | 54 +++-- 1 files changed, 27 insertions(+), 27 deletions(-) b8afaafd2a40e36daa030a852a5c92f0b6cd7531 diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index b925585..bdd1e1e 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -3,20 +3,20 @@ struct sockaddr_pkt { - unsigned short spkt_family; - unsigned char spkt_device[14]; - unsigned short spkt_protocol; + __u16 spkt_family; + __u8spkt_device[14]; + __u16 spkt_protocol; }; struct sockaddr_ll { - unsigned short sll_family; - unsigned short sll_protocol; - int sll_ifindex; - unsigned short sll_hatype; - unsigned char sll_pkttype; - unsigned char sll_halen; - unsigned char sll_addr[8]; + __u16 sll_family; + __u16 sll_protocol; + __s32 sll_ifindex; + __u16 sll_hatype; + __u8sll_pkttype; + __u8sll_halen; + __u8sll_addr[8]; }; /* Packet types */ @@ -42,24 +42,24 @@ struct sockaddr_ll struct tpacket_stats { - unsigned inttp_packets; - unsigned inttp_drops; + __u32 tp_packets; + __u32 tp_drops; }; struct tpacket_hdr { - unsigned long tp_status; + __u32 tp_status; #define TP_STATUS_KERNEL 0 #define TP_STATUS_USER 1 #define TP_STATUS_COPY 2 #define TP_STATUS_LOSING 4 #define TP_STATUS_CSUMNOTREADY 8 - unsigned inttp_len; - unsigned inttp_snaplen; - unsigned short tp_mac; - unsigned short tp_net; - unsigned inttp_sec; - unsigned inttp_usec; + __u32 tp_len; + __u32 tp_snaplen; + __u16 tp_mac; + __u16 tp_net; + __u32 tp_sec; + __u32 tp_usec; }; #define TPACKET_ALIGNMENT 16 @@ -81,18 +81,18 @@ struct tpacket_hdr struct tpacket_req { - unsigned inttp_block_size; /* Minimal size of contiguous block */ - unsigned inttp_block_nr;/* Number of blocks */ - unsigned inttp_frame_size; /* Size of frame */ - unsigned inttp_frame_nr;/* Total number of frames */ + __u32 tp_block_size; /* Minimal size of contiguous block */ + __u32 tp_block_nr;/* Number of blocks */ + __u32 tp_frame_size; /* Size of frame */ + __u32 tp_frame_nr;/* Total number of frames */ }; struct packet_mreq { - int mr_ifindex; - unsigned short mr_type; - unsigned short mr_alen; - unsigned char mr_address[8]; + __s32 mr_ifindex; + __u16 mr_type; + __u16 mr_alen; + __u8mr_address[8]; }; #define PACKET_MR_MULTICAST0 -- 1.1.3 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Packet socket: directly access the mmapped ring buffer
Mike Christie and I've developed the SCSI Userspace target framework. Target LLDs (for Fibre channel, iSCSI HBAs, etc) pass SCSI commands to SCSI commands to the user-space daemon. The daemon executes the commands and sends the results back to the LLDs. Please refer scsi-ml for further details. http://thread.gmane.org/gmane.linux.scsi/22409 We need efficient kernel and user-space communication interface and used netlink. Jeff Garzik suggested the packet socket mmap'd ring buffer. The mmap'd ring buffer is really nice, but we want to access directly the ring buffer withough going through the networking stack to avoid memory allocation and overhead. Signed-off-by: FUJITA Tomonori [EMAIL PROTECTED] Signed-off-by: Mike Christie [EMAIL PROTECTED] --- include/net/af_packet.h |6 ++ net/packet/af_packet.c | 17 + 2 files changed, 23 insertions(+), 0 deletions(-) create mode 100644 include/net/af_packet.h c627f3a1da6e5e7e9e46d58401adcf168ea45787 diff --git a/include/net/af_packet.h b/include/net/af_packet.h new file mode 100644 index 000..5a75e07 --- /dev/null +++ b/include/net/af_packet.h @@ -0,0 +1,6 @@ +#ifndef __LINUX_NET_AFPACKET_H +#define __LINUX_NET_AFPACKET_H + +extern struct tpacket_hdr *packet_socket_frame(struct sock *sk); + +#endif diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9db7dbd..b5fbd74 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -562,6 +562,23 @@ drop: } #ifdef CONFIG_PACKET_MMAP +struct tpacket_hdr *packet_socket_frame(struct sock *sk) +{ + struct packet_sock *po; + struct tpacket_hdr *h; + + po = pkt_sk(sk); + spin_lock(sk-sk_receive_queue.lock); + h = (struct tpacket_hdr *) packet_lookup_frame(po, po-head); + if (h-tp_status) + h = ERR_PTR(-ENOBUFS); + else + po-head = po-head != po-frame_max ? po-head+1 : 0; + spin_unlock(sk-sk_receive_queue.lock); + return h; +} +EXPORT_SYMBOL_GPL(packet_socket_frame); + static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev) { struct sock *sk; -- 1.1.3 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland
From: FUJITA Tomonori [EMAIL PROTECTED] Date: Wed, 08 Feb 2006 14:24:49 +0900 tpacket_hdr structure includes 'unsigned long' though kernel and userland shares it in the mmapped ring buffer. Seems it would be better to fix all data structures in the header file than fixing only tpacket_hdr structure. Signed-off-by: FUJITA Tomonori [EMAIL PROTECTED] Signed-off-by: Mike Christie [EMAIL PROTECTED] You broke 64-bit userland by changing that unsigned long to a __u32. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland
From: David S. Miller [EMAIL PROTECTED] Subject: Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland Date: Tue, 07 Feb 2006 21:36:06 -0800 (PST) tpacket_hdr structure includes 'unsigned long' though kernel and userland shares it in the mmapped ring buffer. Seems it would be better to fix all data structures in the header file than fixing only tpacket_hdr structure. Signed-off-by: FUJITA Tomonori [EMAIL PROTECTED] Signed-off-by: Mike Christie [EMAIL PROTECTED] You broke 64-bit userland by changing that unsigned long to a __u32. You mean that currently it's broken in 64-bit kernel and 32-bit userland anyway so use __u64 instead of __u32? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland
From: David S. Miller [EMAIL PROTECTED] Subject: Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland Date: Tue, 07 Feb 2006 21:48:02 -0800 (PST) From: FUJITA Tomonori [EMAIL PROTECTED] Date: Wed, 08 Feb 2006 14:41:41 +0900 From: David S. Miller [EMAIL PROTECTED] Subject: Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland Date: Tue, 07 Feb 2006 21:36:06 -0800 (PST) tpacket_hdr structure includes 'unsigned long' though kernel and userland shares it in the mmapped ring buffer. Seems it would be better to fix all data structures in the header file than fixing only tpacket_hdr structure. Signed-off-by: FUJITA Tomonori [EMAIL PROTECTED] Signed-off-by: Mike Christie [EMAIL PROTECTED] You broke 64-bit userland by changing that unsigned long to a __u32. You mean that currently it's broken in 64-bit kernel and 32-bit userland anyway so use __u64 instead of __u32? I mean that unsigned long is 64-bit in a 64-bit kernel, and thus your changes break packet mmap() ringer buffers for native 64-bit binaries. I see. The following patch is OK? diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h index b925585..0fc6998 100644 --- a/include/linux/if_packet.h +++ b/include/linux/if_packet.h @@ -3,20 +3,20 @@ struct sockaddr_pkt { - unsigned short spkt_family; - unsigned char spkt_device[14]; - unsigned short spkt_protocol; + __u16 spkt_family; + __u8spkt_device[14]; + __u16 spkt_protocol; }; struct sockaddr_ll { - unsigned short sll_family; - unsigned short sll_protocol; - int sll_ifindex; - unsigned short sll_hatype; - unsigned char sll_pkttype; - unsigned char sll_halen; - unsigned char sll_addr[8]; + __u16 sll_family; + __u16 sll_protocol; + __s32 sll_ifindex; + __u16 sll_hatype; + __u8sll_pkttype; + __u8sll_halen; + __u8sll_addr[8]; }; /* Packet types */ @@ -42,24 +42,24 @@ struct sockaddr_ll struct tpacket_stats { - unsigned inttp_packets; - unsigned inttp_drops; + __u32 tp_packets; + __u32 tp_drops; }; struct tpacket_hdr { - unsigned long tp_status; + __u64 tp_status; #define TP_STATUS_KERNEL 0 #define TP_STATUS_USER 1 #define TP_STATUS_COPY 2 #define TP_STATUS_LOSING 4 #define TP_STATUS_CSUMNOTREADY 8 - unsigned inttp_len; - unsigned inttp_snaplen; - unsigned short tp_mac; - unsigned short tp_net; - unsigned inttp_sec; - unsigned inttp_usec; + __u32 tp_len; + __u32 tp_snaplen; + __u16 tp_mac; + __u16 tp_net; + __u32 tp_sec; + __u32 tp_usec; }; #define TPACKET_ALIGNMENT 16 @@ -81,18 +81,18 @@ struct tpacket_hdr struct tpacket_req { - unsigned inttp_block_size; /* Minimal size of contiguous block */ - unsigned inttp_block_nr;/* Number of blocks */ - unsigned inttp_frame_size; /* Size of frame */ - unsigned inttp_frame_nr;/* Total number of frames */ + __u32 tp_block_size; /* Minimal size of contiguous block */ + __u32 tp_block_nr;/* Number of blocks */ + __u32 tp_frame_size; /* Size of frame */ + __u32 tp_frame_nr;/* Total number of frames */ }; struct packet_mreq { - int mr_ifindex; - unsigned short mr_type; - unsigned short mr_alen; - unsigned char mr_address[8]; + __s32 mr_ifindex; + __u16 mr_type; + __u16 mr_alen; + __u8mr_address[8]; }; #define PACKET_MR_MULTICAST0 - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland
From: FUJITA Tomonori [EMAIL PROTECTED] Subject: Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland Date: Wed, 08 Feb 2006 14:59:06 +0900 From: David S. Miller [EMAIL PROTECTED] Subject: Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland Date: Tue, 07 Feb 2006 21:48:02 -0800 (PST) From: FUJITA Tomonori [EMAIL PROTECTED] Date: Wed, 08 Feb 2006 14:41:41 +0900 From: David S. Miller [EMAIL PROTECTED] Subject: Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland Date: Tue, 07 Feb 2006 21:36:06 -0800 (PST) tpacket_hdr structure includes 'unsigned long' though kernel and userland shares it in the mmapped ring buffer. Seems it would be better to fix all data structures in the header file than fixing only tpacket_hdr structure. Signed-off-by: FUJITA Tomonori [EMAIL PROTECTED] Signed-off-by: Mike Christie [EMAIL PROTECTED] You broke 64-bit userland by changing that unsigned long to a __u32. You mean that currently it's broken in 64-bit kernel and 32-bit userland anyway so use __u64 instead of __u32? I mean that unsigned long is 64-bit in a 64-bit kernel, and thus your changes break packet mmap() ringer buffers for native 64-bit binaries. I see. The following patch is OK? Sorry. seems I don't see that at all. Is there any good way to solve this problem? - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland
From: FUJITA Tomonori [EMAIL PROTECTED] Date: Wed, 08 Feb 2006 14:59:06 +0900 I see. The following patch is OK? This breaks existing 32-bit programs which really want a 32-bit value there. Please sit and think about this problem for some time before proposing more patches. We have a whole compatability layer designed to handle the differing size of data types when running a 32-bit program on a 64-bit kernel. It is not allowed to change data structures which exist already (and are thus compiled into existing binaries) in order to fix this problem. Instead we must give the application what it expects. If we are a 64-bit kernel running a 32-bit binary, this means giving 32-bit compatible data structures. If we are a 64-bit kernel running a 64-bit binary, this means giving native 64-bit data structures. Thank you. - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 19/23] [PATCH] bridge: netfilter races on device removal
-stable review patch. If anyone has any objections, please let us know. -- Fix bridge netfilter to handle case where interface is deleted from bridge while packet is being processed (on other CPU). Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=5803 Signed-off-by: Stephen Hemminger [EMAIL PROTECTED] Signed-off-by: Chris Wright [EMAIL PROTECTED] --- net/bridge/br_netfilter.c | 55 +++--- 1 files changed, 38 insertions(+), 17 deletions(-) Index: linux-2.6.15.3/net/bridge/br_netfilter.c === --- linux-2.6.15.3.orig/net/bridge/br_netfilter.c +++ linux-2.6.15.3/net/bridge/br_netfilter.c @@ -47,9 +47,6 @@ #define store_orig_dstaddr(skb) (skb_origaddr(skb) = (skb)-nh.iph-daddr) #define dnat_took_place(skb)(skb_origaddr(skb) != (skb)-nh.iph-daddr) -#define has_bridge_parent(device) ((device)-br_port != NULL) -#define bridge_parent(device) ((device)-br_port-br-dev) - #ifdef CONFIG_SYSCTL static struct ctl_table_header *brnf_sysctl_header; static int brnf_call_iptables = 1; @@ -94,6 +91,12 @@ static struct rtable __fake_rtable = { .rt_flags = 0, }; +static inline struct net_device *bridge_parent(const struct net_device *dev) +{ + struct net_bridge_port *port = rcu_dereference(dev-br_port); + + return port ? port-br-dev : NULL; +} /* PF_BRIDGE/PRE_ROUTING */ /* Undo the changes made for ip6tables PREROUTING and continue the @@ -185,11 +188,15 @@ static int br_nf_pre_routing_finish_brid skb-nf_bridge-mask ^= BRNF_NF_BRIDGE_PREROUTING; skb-dev = bridge_parent(skb-dev); - if (skb-protocol == __constant_htons(ETH_P_8021Q)) { - skb_pull(skb, VLAN_HLEN); - skb-nh.raw += VLAN_HLEN; + if (!skb-dev) + kfree_skb(skb); + else { + if (skb-protocol == __constant_htons(ETH_P_8021Q)) { + skb_pull(skb, VLAN_HLEN); + skb-nh.raw += VLAN_HLEN; + } + skb-dst-output(skb); } - skb-dst-output(skb); return 0; } @@ -266,7 +273,7 @@ bridged_dnat: } /* Some common code for IPv4/IPv6 */ -static void setup_pre_routing(struct sk_buff *skb) +static struct net_device *setup_pre_routing(struct sk_buff *skb) { struct nf_bridge_info *nf_bridge = skb-nf_bridge; @@ -278,6 +285,8 @@ static void setup_pre_routing(struct sk_ nf_bridge-mask |= BRNF_NF_BRIDGE_PREROUTING; nf_bridge-physindev = skb-dev; skb-dev = bridge_parent(skb-dev); + + return skb-dev; } /* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway */ @@ -372,7 +381,8 @@ static unsigned int br_nf_pre_routing_ip nf_bridge_put(skb-nf_bridge); if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) return NF_DROP; - setup_pre_routing(skb); + if (!setup_pre_routing(skb)) + return NF_DROP; NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb-dev, NULL, br_nf_pre_routing_finish_ipv6); @@ -409,7 +419,6 @@ static unsigned int br_nf_pre_routing(un if (skb-protocol == __constant_htons(ETH_P_8021Q)) { skb_pull(skb, VLAN_HLEN); - (skb)-nh.raw += VLAN_HLEN; } return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn); } @@ -426,7 +435,6 @@ static unsigned int br_nf_pre_routing(un if (skb-protocol == __constant_htons(ETH_P_8021Q)) { skb_pull(skb, VLAN_HLEN); - (skb)-nh.raw += VLAN_HLEN; } if (!pskb_may_pull(skb, sizeof(struct iphdr))) @@ -456,7 +464,8 @@ static unsigned int br_nf_pre_routing(un nf_bridge_put(skb-nf_bridge); if ((nf_bridge = nf_bridge_alloc(skb)) == NULL) return NF_DROP; - setup_pre_routing(skb); + if (!setup_pre_routing(skb)) + return NF_DROP; store_orig_dstaddr(skb); NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb-dev, NULL, @@ -530,11 +539,16 @@ static unsigned int br_nf_forward_ip(uns struct sk_buff *skb = *pskb; struct nf_bridge_info *nf_bridge; struct vlan_ethhdr *hdr = vlan_eth_hdr(skb); + struct net_device *parent; int pf; if (!skb-nf_bridge) return NF_ACCEPT; + parent = bridge_parent(out); + if (!parent) + return NF_DROP; + if (skb-protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP) pf = PF_INET; else @@ -555,8 +569,8 @@ static unsigned int br_nf_forward_ip(uns nf_bridge-mask |= BRNF_BRIDGED; nf_bridge-physoutdev = skb-dev; - NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in), - bridge_parent(out), br_nf_forward_finish); +