[git patches] net driver fixes

2006-02-07 Thread Jeff Garzik

Please pull from 'upstream-fixes' branch of
master.kernel.org:/pub/scm/linux/kernel/git/jgarzik/netdev-2.6.git

to receive the following updates:

 drivers/net/8139too.c|   38 +--
 drivers/net/Kconfig  |7 -
 drivers/net/bonding/bond_main.c  |   15 ++-
 drivers/net/bonding/bond_sysfs.c |6 -
 drivers/net/e100.c   |2 
 drivers/net/gianfar.c|   24 ++---
 drivers/net/gianfar.h|8 -
 drivers/net/gianfar_ethtool.c|8 -
 drivers/net/gianfar_mii.c|   17 +--
 drivers/net/r8169.c  |   13 +-
 drivers/net/sis900.h |1 
 drivers/net/sky2.c   |  186 +--
 drivers/net/sky2.h   |9 -
 drivers/net/tulip/uli526x.c  |2 
 drivers/net/wan/dscc4.c  |2 
 15 files changed, 222 insertions(+), 116 deletions(-)

Alexey Dobriyan:
  dscc4: fix dscc4_init_dummy_skb check
  dscc4: fix dscc4_init_dummy_skb check
  [same change, two different paths. -ed]

Andrew Morton:
  uli526x warning fix

Andy Gospodarek:
  r8169: fix forced-mode link settings

Francois Romieu:
  r8169: prevent excessive busy-waiting
  8139too: fix a TX timeout watchdog thread against NAPI softirq race

Jay Vosburgh:
  bonding: allow bond to use TSO if slaves support it

Jesse Brandeburg:
  e100: remove init_hw call to fix panic

Kumar Gala:
  gianfar: Fix sparse warnings

Lennert Buytenhek:
  sis900: remove cfgpmcsr I/O space register define

Luiz Fernando Capitulino:
  bonding: Sparse warnings fix

Paolo 'Blaisorblade' Giarrusso:
  Kbuild menu - hide empty NETDEVICES menu when NET is disabled

Stephen Hemminger:
  sky2: power management fix
  sky2: pci config space checking
  sky2: ethtool rx_coalesce settings fix
  sky2: set mac address fix
  sky2: clear irq race
  sky2: add irq to entropy pool
  sky2: support msi interrupt (revised)
  sky2: version 0.15 update

diff --git a/drivers/net/8139too.c b/drivers/net/8139too.c
index adfba44..2beac55 100644
--- a/drivers/net/8139too.c
+++ b/drivers/net/8139too.c
@@ -586,6 +586,7 @@ struct rtl8139_private {
dma_addr_t tx_bufs_dma;
signed char phys[4];/* MII device addresses. */
char twistie, twist_row, twist_col; /* Twister tune state. */
+   unsigned int watchdog_fired : 1;
unsigned int default_port : 4;  /* Last dev-if_port value. */
unsigned int have_thread : 1;
spinlock_t lock;
@@ -638,6 +639,7 @@ static void rtl8139_set_rx_mode (struct 
 static void __set_rx_mode (struct net_device *dev);
 static void rtl8139_hw_start (struct net_device *dev);
 static void rtl8139_thread (void *_data);
+static void rtl8139_tx_timeout_task(void *_data);
 static struct ethtool_ops rtl8139_ethtool_ops;
 
 /* write MMIO register, with flush */
@@ -1598,13 +1600,14 @@ static void rtl8139_thread (void *_data)
 {
struct net_device *dev = _data;
struct rtl8139_private *tp = netdev_priv(dev);
-   unsigned long thr_delay;
+   unsigned long thr_delay = next_tick;
 
-   if (rtnl_shlock_nowait() == 0) {
+   if (tp-watchdog_fired) {
+   tp-watchdog_fired = 0;
+   rtl8139_tx_timeout_task(_data);
+   } else if (rtnl_shlock_nowait() == 0) {
rtl8139_thread_iter (dev, tp, tp-mmio_addr);
rtnl_unlock ();
-
-   thr_delay = next_tick;
} else {
/* unlikely race.  mitigate with fast poll. */
thr_delay = HZ / 2;
@@ -1631,7 +1634,8 @@ static void rtl8139_stop_thread(struct r
if (tp-have_thread) {
cancel_rearming_delayed_work(tp-thread);
tp-have_thread = 0;
-   }
+   } else
+   flush_scheduled_work();
 }
 
 static inline void rtl8139_tx_clear (struct rtl8139_private *tp)
@@ -1642,14 +1646,13 @@ static inline void rtl8139_tx_clear (str
/* XXX account for unsent Tx packets in tp-stats.tx_dropped */
 }
 
-
-static void rtl8139_tx_timeout (struct net_device *dev)
+static void rtl8139_tx_timeout_task (void *_data)
 {
+   struct net_device *dev = _data;
struct rtl8139_private *tp = netdev_priv(dev);
void __iomem *ioaddr = tp-mmio_addr;
int i;
u8 tmp8;
-   unsigned long flags;
 
printk (KERN_DEBUG %s: Transmit timeout, status %2.2x %4.4x %4.4x 
media %2.2x.\n, dev-name, RTL_R8 (ChipCmd),
@@ -1670,23 +1673,34 @@ static void rtl8139_tx_timeout (struct n
if (tmp8  CmdTxEnb)
RTL_W8 (ChipCmd, CmdRxEnb);
 
-   spin_lock(tp-rx_lock);
+   spin_lock_bh(tp-rx_lock);
/* Disable interrupts by clearing the interrupt mask. */
RTL_W16 (IntrMask, 0x);
 
/* Stop a shared interrupt from scavenging while we are. */
-   spin_lock_irqsave (tp-lock, flags);
+   spin_lock_irq(tp-lock);

Re: [PATCH] check connect(2) status for IPv6 UDP socket

2006-02-07 Thread Nicolas DICHTEL

Hi all,

in the same way of this patch, why dst_entry are stored for
RAW socket ? In case of specific IPSec rules for ICMPv6,
xfrm state can be different for the same destination.
Attached, a proposed patch.

Regards,
Nicolas

[IPV6] Don't store dst_entry for RAW socket

Signed-off-by: Nicolas DICHTEL [EMAIL PROTECTED]

Mitsuru KANDA a écrit :


Hello,

I recreated a patch for unconnected udpv6 socket checking.
(Fixed forgetting dst refcnt decrement from the previous patch.)

Regards,
-mk

At Thu, 22 Sep 2005 23:44:05 -0700 (PDT),
David S. Miller [EMAIL PROTECTED] wrote:
...


I think you're patch adds a route leak.  If we elide the
ip6_dst_store() where does the 'dst' reference go?

In fact, I think the UDPv6 change might have done the same thing.
Mitsuru-san?

ipv4 code like this does explicit dst_clone() when storeing
the 'dst' to the socket, but I can't see where ipv6 is doing
this.  Perhaps ipv6 dst's work a little differently. :-)

--- linux-2.6.15.2/net/ipv6/raw.c   2006-01-31 07:25:07.0 +0100
+++ linux-2.6.15.2-new/net/ipv6/raw.c   2006-02-06 11:46:13.0 +0100
@@ -814,10 +814,7 @@
err = rawv6_push_pending_frames(sk, fl, rp);
}
 done:
-   ip6_dst_store(sk, dst,
- ipv6_addr_equal(fl.fl6_dst, np-daddr) ?
- np-daddr : NULL);
-
+   dst_release(dst);
release_sock(sk);
 out:   
fl6_sock_release(flowlabel);


Re: [2.6 patch] net/tipc/: possible cleanups

2006-02-07 Thread Per Liden
On Sat, 4 Feb 2006, Adrian Bunk wrote:

 This patch contains the following possible cleanups:
 - make needlessly global code static

Good catch.

 - #if 0 the following unused global functions:
   - name_table.c: tipc_nametbl_print()
   - name_table.c: tipc_nametbl_dump()
   - net.c: tipc_net_next_node()

Thanks! I'll apply this to my tree.

/Per
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


e1000 not working on AMD64

2006-02-07 Thread Hajo Noerenberg

I've bought four new e1000 cards (PCI id 8086:107c, chip label 82541PI),
three of them are working without problems (i386, kernel 2.4.x).

One of them is installed in an AMD64 SMP system (Athlon dual core 4GB).
It gets detected, link is reported to be up, but no data goes through
(in fact _sometimes_ it succeeds to get an IP adress via DHCP after
10-20 retries). If I set an IP manually, I am not able to ping any other
host (ping sizes 30bytes ... 1000bytes).

Tested on kernel 2.6.15, 2.6.16-rc2 and 2.6.16-rc2-git2.

(Un-)setting NAPI does not change anything.

Initially I assumed an IRQ-related issue, but the 3ware RAID controller
works without any problems.

Hajo


+++

:00:00.0 RAM memory: nVidia Corporation: Unknown device 02f1 (rev a2)
Subsystem: Asustek Computer, Inc.: Unknown device 81bf
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort-
TAbort- MAbort- SERR- PERR-
Latency: 0
Capabilities: [44] #08 [01e0]
Capabilities: [e0] #08 [a800]

:00:00.1 RAM memory: nVidia Corporation: Unknown device 02fa (rev a2)
Subsystem: Asustek Computer, Inc.: Unknown device 81bf
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR+ FastB2B-
Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort-
TAbort- MAbort- SERR- PERR-

:00:00.2 RAM memory: nVidia Corporation: Unknown device 02fe (rev a2)
Subsystem: Asustek Computer, Inc.: Unknown device 81bf
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B-
Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort-
TAbort- MAbort- SERR- PERR-

:00:00.3 RAM memory: nVidia Corporation: Unknown device 02f8 (rev a2)
Subsystem: Asustek Computer, Inc.: Unknown device 81bf
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR+ FastB2B-
Status: Cap- 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort-
TAbort- MAbort- SERR- PERR-

:00:00.4 RAM memory: nVidia Corporation: Unknown device 02f9 (rev a2)
Subsystem: Asustek Computer, Inc.: Unknown device 81bf
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR+ FastB2B-
Status: Cap- 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort-
TAbort- MAbort- SERR- PERR-
Latency: 0

:00:00.5 RAM memory: nVidia Corporation: Unknown device 02ff (rev a2)
Subsystem: Asustek Computer, Inc.: Unknown device 81bf
Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR+ FastB2B-
Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort-
TAbort- MAbort- SERR- PERR-
Latency: 0
Capabilities: [44] #00 [00fe]
Capabilities: [fc] #00 []

:00:00.6 RAM memory: nVidia Corporation: Unknown device 027f (rev a2)
Subsystem: Asustek Computer, Inc.: Unknown device 81bf
Control: I/O- Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR+ FastB2B-
Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort-
TAbort- MAbort- SERR- PERR-

:00:00.7 RAM memory: nVidia Corporation: Unknown device 027e (rev a2)
Subsystem: Asustek Computer, Inc.: Unknown device 81bf
Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B-
Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort-
TAbort- MAbort- SERR- PERR-

:00:02.0 PCI bridge: nVidia Corporation: Unknown device 02fc (rev
a1) (prog-if 00 [Normal decode])
Control: I/O- Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort-
TAbort- MAbort- SERR- PERR-
Latency: 0, Cache Line Size: 0x10 (64 bytes)
Bus: primary=00, secondary=01, subordinate=01, sec-latency=0
I/O behind bridge: f000-0fff
Memory behind bridge: fff0-000f
Prefetchable memory behind bridge: fff0-
BridgeCtl: Parity- SERR+ NoISA- VGA- MAbort- Reset- FastB2B-
Capabilities: [40] #0d []
Capabilities: [48] Power Management version 2
Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA
PME(D0+,D1+,D2+,D3hot+,D3cold+)
Status: D0 PME-Enable- DSel=0 DScale=0 PME-
Capabilities: [50] Message Signalled Interrupts: 64bit+
Queue=0/1 Enable-
Address:   Data: 
Capabilities: [60] #08 [a800]
Capabilities: [80] #10 [0141]

:00:03.0 PCI bridge: nVidia Corporation: Unknown device 02fd (rev
a1) (prog-if 00 [Normal decode])
Control: I/O- Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop-
ParErr- Stepping- SERR- FastB2B-
Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort-
TAbort- MAbort- 

Re: [PATCH] snap: needs hardware checksum fix

2006-02-07 Thread Herbert Xu
On Fri, Feb 03, 2006 at 10:01:17AM -0800, Stephen Hemminger wrote:
 
 static unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len)
 {
   if (unlikely(len  skb-len))
   return NULL;
   if (skb-ip_summed == CHECKSUM_HW)
   skb-csum = csum_sub(skb-csum, csum_partial(skb-data, len, 
 0));
   return __skb_pull(skb, len);
 }

Thanks Stephen.  I've changed most of the places that call
skb_postpull_rcsum over to use this instead.  The only places
left are IPv6 where it makes sense to separate the checksum
update since it wants to pull in bits and pieces and update
at the very end.  The other place is GRE which is in fact
buggy with respect to the pulling (the bug was introduced
with the WCCP patch).  I'll send a separate patch for that.

[NET]: Replace skb_pull/skb_postpull_rcsum with skb_pull_rcsum

We're now starting to have quite a number of places that do skb_pull
followed immediately by an skb_postpull_rcsum.  We can merge these
two operations into one function with skb_pull_rcsum.  This makes
sense since most pull operations on receive skb's need to update
the checksum.

I've decided to make this out-of-line since it is fairly big and the
fast path where hardware checksums are enabled need to call csum_partial
anyway.

Since this is a brand new function we get to add an extra check on the
len argument.  As it is most callers of skb_pull ignore its return value
which essentially means that there is no check on the len argument.

Signed-off-by: Herbert Xu [EMAIL PROTECTED]

Cheers
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
diff --git a/drivers/net/ppp_generic.c b/drivers/net/ppp_generic.c
--- a/drivers/net/ppp_generic.c
+++ b/drivers/net/ppp_generic.c
@@ -1691,8 +1691,8 @@ ppp_receive_nonmp_frame(struct ppp *ppp,
|| ppp-npmode[npi] != NPMODE_PASS) {
kfree_skb(skb);
} else {
-   skb_pull(skb, 2);   /* chop off protocol */
-   skb_postpull_rcsum(skb, skb-data - 2, 2);
+   /* chop off protocol */
+   skb_pull_rcsum(skb, 2);
skb-dev = ppp-dev;
skb-protocol = htons(npindex_to_ethertype[npi]);
skb-mac.raw = skb-data;
diff --git a/drivers/net/pppoe.c b/drivers/net/pppoe.c
--- a/drivers/net/pppoe.c
+++ b/drivers/net/pppoe.c
@@ -337,8 +337,7 @@ static int pppoe_rcv_core(struct sock *s
if (sk-sk_state  PPPOX_BOUND) {
struct pppoe_hdr *ph = (struct pppoe_hdr *) skb-nh.raw;
int len = ntohs(ph-length);
-   skb_pull(skb, sizeof(struct pppoe_hdr));
-   skb_postpull_rcsum(skb, ph, sizeof(*ph));
+   skb_pull_rcsum(skb, sizeof(struct pppoe_hdr));
if (pskb_trim_rcsum(skb, len))
goto abort_kfree;
 
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1174,12 +1174,14 @@ static inline int skb_linearize(struct s
  */
 
 static inline void skb_postpull_rcsum(struct sk_buff *skb,
-const void *start, int len)
+ const void *start, unsigned int len)
 {
if (skb-ip_summed == CHECKSUM_HW)
skb-csum = csum_sub(skb-csum, csum_partial(start, len, 0));
 }
 
+unsigned char *skb_pull_rcsum(struct sk_buff *skb, unsigned int len);
+
 /**
  * pskb_trim_rcsum - trim received skb and update checksum
  * @skb: buffer to trim
diff --git a/net/802/psnap.c b/net/802/psnap.c
--- a/net/802/psnap.c
+++ b/net/802/psnap.c
@@ -61,8 +61,7 @@ static int snap_rcv(struct sk_buff *skb,
/* Pass the frame on. */
u8 *hdr = skb-data;
skb-h.raw  += 5;
-   skb_pull(skb, 5);
-   skb_postpull_rcsum(skb, hdr, 5);
+   skb_pull_rcsum(skb, 5);
rc = proto-rcvfunc(skb, dev, snap_packet_type, orig_dev);
} else {
skb-sk = NULL;
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -163,10 +163,8 @@ int vlan_skb_recv(struct sk_buff *skb, s
stats-rx_packets++;
stats-rx_bytes += skb-len;
 
-   skb_pull(skb, VLAN_HLEN); /* take off the VLAN header (4 bytes 
currently) */
-
-   /* Need to correct hardware checksum */
-   skb_postpull_rcsum(skb, vhdr, VLAN_HLEN);
+   /* Take off the VLAN header (4 bytes currently) */
+   skb_pull_rcsum(skb, VLAN_HLEN);
 
/* Ok, lets check to make sure the device (dev) we
 * came in on is what this VLAN is attached to.
diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c
--- a/net/bridge/br_netfilter.c
+++ 

[PATCH] af_unix: use shift instead of integer division

2006-02-07 Thread Benjamin LaHaise
The patch below replaces a divide by 2 with a shift -- sk_sndbuf is an 
integer, so gcc emits an idiv, which takes 10x longer than a shift by 1.  
This improves af_unix bandwidth by ~6-10K/s.  Also, tidy up the comment 
to fit in 80 columns while we're at it.

-ben

Signed-off-by: Benjamin LaHaise [EMAIL PROTECTED]
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index 1b5989b..b57d4d9 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1427,15 +1427,15 @@ static int unix_stream_sendmsg(struct ki
while(sent  len)
{
/*
-*  Optimisation for the fact that under 0.01% of X 
messages typically
-*  need breaking up.
+*  Optimisation for the fact that under 0.01% of X
+*  messages typically need breaking up.
 */
 
-   size=len-sent;
+   size = len-sent;
 
/* Keep two messages in the pipe so it schedules better */
-   if (size  sk-sk_sndbuf / 2 - 64)
-   size = sk-sk_sndbuf / 2 - 64;
+   if (size  ((sk-sk_sndbuf  1) - 64))
+   size = (sk-sk_sndbuf  1) - 64;
 
if (size  SKB_MAX_ALLOC)
size = SKB_MAX_ALLOC;
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] af_unix: scm: better initialization

2006-02-07 Thread Benjamin LaHaise
Instead of doing a memset then initialization of the fields of the scm 
structure, just initialize all the members explicitly.  Prevent reloading 
of current on x86 and x86-64 by storing the value in a local variable for 
subsequent dereferences.  This is worth a ~7KB/s increase in af_unix 
bandwidth.  Note that we avoid the issues surrounding potentially 
uninitialized members of the ucred structure by constructing a struct 
ucred instead of assigning the members individually, which forces the 
compiler to zero any padding.

Signed-off-by: Benjamin LaHaise [EMAIL PROTECTED]
diff --git a/include/net/scm.h b/include/net/scm.h
index c3fa3d5..0d90fa2 100644
--- a/include/net/scm.h
+++ b/include/net/scm.h
@@ -37,10 +37,14 @@ static __inline__ void scm_destroy(struc
 static __inline__ int scm_send(struct socket *sock, struct msghdr *msg,
   struct scm_cookie *scm)
 {
-   memset(scm, 0, sizeof(*scm));
-   scm-creds.uid = current-uid;
-   scm-creds.gid = current-gid;
-   scm-creds.pid = current-tgid;
+   struct task_struct *p = current;
+   scm-creds = (struct ucred) {
+   .uid = p-uid,
+   .gid = p-gid,
+   .pid = p-tgid
+   };
+   scm-fp = NULL;
+   scm-seq = 0;
if (msg-msg_controllen = 0)
return 0;
return __scm_send(sock, msg, scm);
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] af_unix: use shift instead of integer division

2006-02-07 Thread Benjamin LaHaise
On Tue, Feb 07, 2006 at 04:15:31PM +0100, Andi Kleen wrote:
 On Tuesday 07 February 2006 15:54, Benjamin LaHaise wrote:
 
  +   if (size  ((sk-sk_sndbuf  1) - 64))
  +   size = (sk-sk_sndbuf  1) - 64;
 
 This is really surprising. Are you double plus sure gcc doesn't 
 do this automatically?

As I said, sk_sndbuf is a signed integer, so gcc can't use an arithmetic 
shift (which would round to infinity if the result is negative -- gcc has 
no way of knowing that sk_sndbuf will be positive).  The alternative would 
be to convert sk_sndbuf to unsigned, but that would mean rechecking all the 
users for side effects.

-ben
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Strange IPsec freeze/partial fix

2006-02-07 Thread Olaf Kirch
Hi,

there's a problem with IPsec that has been bugging some of our users
for the last couple of kernel revs. Every now and then, IPsec will
freeze the machine completely. This is with openswan user land,
and with kernels up to and including 2.6.16-rc2.

I managed to debug this a little, and what happens is that we end
up looping in xfrm_lookup, and never get out. With a bit of debug
printks added, I can this happening:

ip_route_output_flow calls xfrm_lookup

xfrm_find_bundle returns NULL (apparently we're in the
middle of negotiating a new SA or something)

We therefore call xfrm_tmpl_resolve. This returns EAGAIN
We go to sleep, waiting for a policy update.
Then we loop back to the top

Apparently, the dst_orig that was passed into xfrm_lookup
has been dropped from the routing table (obsolete=2)
This leads to the endless loop, because we now create
a new bundle, check the new bundle and find it's stale
(stale_bundle - xfrm_bundle_ok - dst_check() return 0)

People have been testing with the patch below, which seems to fix the
problem partially. They still see connection hangs however (things
only clear up when they start a new ping or new ssh). So the patch
is obvsiouly not sufficient, and something else seems to go wrong.

I'm grateful for any hints you may have...

Olaf
-- 
Subject: [XFRM] Fix infinite loop in xfrm_lookup

It seems that the route xfrm_lookup is given on input can go
away when we sleep.

Signed-off-by: Olaf Kirch [EMAIL PROTECTED]

 net/ipv4/route.c   |   25 -
 net/xfrm/xfrm_policy.c |   16 
 2 files changed, 32 insertions(+), 9 deletions(-)

diff -r df2df438c970 net/ipv4/route.c
--- a/net/ipv4/route.c  Mon Feb  6 14:08:26 2006 -0500
+++ b/net/ipv4/route.c  Mon Feb  6 15:52:09 2006 -0500
@@ -2609,18 +2609,25 @@ int ip_route_output_flow(struct rtable *
 {
int err;
 
-   if ((err = __ip_route_output_key(rp, flp)) != 0)
-   return err;
-
-   if (flp-proto) {
-   if (!flp-fl4_src)
-   flp-fl4_src = (*rp)-rt_src;
-   if (!flp-fl4_dst)
-   flp-fl4_dst = (*rp)-rt_dst;
-   return xfrm_lookup((struct dst_entry **)rp, flp, sk, flags);
-   }
-
-   return 0;
+   if (flp-proto == 0) {
+   err = __ip_route_output_key(rp, flp);
+   } else {
+   u32 fl_src = flp-fl4_src, fl_dst = flp-fl4_dst;
+   int repeat = 1;
+
+   do {
+   if ((err = __ip_route_output_key(rp, flp)) != 0)
+   break;
+
+   if (!fl_src)
+   flp-fl4_src = (*rp)-rt_src;
+   if (!fl_dst)
+   flp-fl4_dst = (*rp)-rt_dst;
+   err = xfrm_lookup((struct dst_entry **)rp, flp, sk, 
flags);
+   } while (err == -EAGAIN  repeat--);
+   }
+
+   return err;
 }
 
 EXPORT_SYMBOL_GPL(ip_route_output_flow);
diff -r df2df438c970 net/xfrm/xfrm_policy.c
--- a/net/xfrm/xfrm_policy.cMon Feb  6 14:08:26 2006 -0500
+++ b/net/xfrm/xfrm_policy.cMon Feb  6 15:52:09 2006 -0500
@@ -786,7 +786,22 @@ int xfrm_lookup(struct dst_entry **dst_p
u16 family = dst_orig-ops-family;
u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT);
u32 sk_sid = security_sk_sid(sk, fl, dir);
+   int loops = 0;
+
 restart:
+   if (loops  dst_orig  dst_orig-obsolete  0) {
+   printk(KERN_NOTICE xfrm_lookup: route is stale (obsolete=%d, 
loops=%d)\n,
+   dst_orig-obsolete, loops);
+   err = -EAGAIN;
+   goto error_nopol;
+   }
+   if (unlikely(++loops  10)) {
+   printk(KERN_NOTICE xfrm_lookup bailing out after %d loops\n, 
loops);
+   dump_stack();
+   err = -EHOSTUNREACH;
+   goto error_nopol;
+   }
+
genid = atomic_read(flow_cache_genid);
policy = NULL;
if (sk  sk-sk_policy[1])
@@ -854,6 +869,7 @@ restart:
}
if (nx == -EAGAIN ||
genid != atomic_read(flow_cache_genid)) {
+   printk(KERN_NOTICE xfrm_tmpl_resolve 
says EAGAIN, try again\n);
xfrm_pol_put(policy);
goto restart;
}
@@ -903,8 +919,9 @@ restart:
return 0;
 
 error:
+   xfrm_pol_put(policy);
+error_nopol:
dst_release(dst_orig);
-   xfrm_pol_put(policy);
*dst_p = NULL;
return err;
 }
-- 
Olaf Kirch   |  --- o --- Nous sommes du soleil we love when we play
[EMAIL PROTECTED] |/ | \   sol.dhoop.naytheet.ah kin.ir.samse.qurax
-
To unsubscribe 

[patch 1/2] s390: lcs performance enhancements

2006-02-07 Thread Frank Pavlic
[patch 1/2] s390: lcs performance enhancements 

From: Klaus Wacker [EMAIL PROTECTED]
- When flood pinging (with large packet size) an LCS device,
  about 90 % of all packets are dropped by driver.
- increased number of lcs IO buffers to 32. 
- use netif_stop_queue/netif_wake_queue in lcs_start_xmit routine
- don't lock the whole xmit routine but just the piece of code where
  tx_buffer is touched. 

Signed-off-by: Frank Pavlic [EMAIL PROTECTED]

diffstat:
 lcs.c |   31 +--
 lcs.h |2 +-
 2 files changed, 18 insertions(+), 15 deletions(-)

diff -Naupr git-orig/drivers/s390/net/lcs.c git-patched/drivers/s390/net/lcs.c
--- git-orig/drivers/s390/net/lcs.c 2006-02-07 10:55:28.0 +0100
+++ git-patched/drivers/s390/net/lcs.c  2006-02-07 11:06:37.0 +0100
@@ -98,9 +98,9 @@ lcs_register_debug_facility(void)
return -ENOMEM;
}
debug_register_view(lcs_dbf_setup, debug_hex_ascii_view);
-   debug_set_level(lcs_dbf_setup, 4);
+   debug_set_level(lcs_dbf_setup, 2);
debug_register_view(lcs_dbf_trace, debug_hex_ascii_view);
-   debug_set_level(lcs_dbf_trace, 4);
+   debug_set_level(lcs_dbf_trace, 2);
return 0;
 }
 
@@ -1292,9 +1292,8 @@ lcs_set_multicast_list(struct net_device
 LCS_DBF_TEXT(4, trace, setmulti);
 card = (struct lcs_card *) dev-priv;
 
-if (!lcs_set_thread_start_bit(card, LCS_SET_MC_THREAD)) {
+if (!lcs_set_thread_start_bit(card, LCS_SET_MC_THREAD)) 
schedule_work(card-kernel_thread_starter);
-   }
 }
 
 #endif /* CONFIG_IP_MULTICAST */
@@ -1459,6 +1458,8 @@ lcs_txbuffer_cb(struct lcs_channel *chan
lcs_release_buffer(channel, buffer);
card = (struct lcs_card *)
((char *) channel - offsetof(struct lcs_card, write));
+   if (netif_queue_stopped(card-dev))
+   netif_wake_queue(card-dev);
spin_lock(card-lock);
card-tx_emitted--;
if (card-tx_emitted = 0  card-tx_buffer != NULL)
@@ -1478,6 +1479,7 @@ __lcs_start_xmit(struct lcs_card *card, 
 struct net_device *dev)
 {
struct lcs_header *header;
+   int rc = 0;
 
LCS_DBF_TEXT(5, trace, hardxmit);
if (skb == NULL) {
@@ -1492,10 +1494,8 @@ __lcs_start_xmit(struct lcs_card *card, 
card-stats.tx_carrier_errors++;
return 0;
}
-   if (netif_queue_stopped(dev) ) {
-   card-stats.tx_dropped++;
-   return -EBUSY;
-   }
+   netif_stop_queue(card-dev);
+   spin_lock(card-lock);
if (card-tx_buffer != NULL 
card-tx_buffer-count + sizeof(struct lcs_header) +
skb-len + sizeof(u16)  LCS_IOBUFFERSIZE)
@@ -1506,7 +1506,8 @@ __lcs_start_xmit(struct lcs_card *card, 
card-tx_buffer = lcs_get_buffer(card-write);
if (card-tx_buffer == NULL) {
card-stats.tx_dropped++;
-   return -EBUSY;
+   rc = -EBUSY;
+   goto out;
}
card-tx_buffer-callback = lcs_txbuffer_cb;
card-tx_buffer-count = 0;
@@ -1518,13 +1519,18 @@ __lcs_start_xmit(struct lcs_card *card, 
header-type = card-lan_type;
header-slot = card-portno;
memcpy(header + 1, skb-data, skb-len);
+   spin_unlock(card-lock);
card-stats.tx_bytes += skb-len;
card-stats.tx_packets++;
dev_kfree_skb(skb);
-   if (card-tx_emitted = 0)
+   netif_wake_queue(card-dev);
+   spin_lock(card-lock);
+   if (card-tx_emitted = 0  card-tx_buffer != NULL)
/* If this is the first tx buffer emit it immediately. */
__lcs_emit_txbuffer(card);
-   return 0;
+out:
+   spin_unlock(card-lock);
+   return rc;
 }
 
 static int
@@ -1535,9 +1541,7 @@ lcs_start_xmit(struct sk_buff *skb, stru
 
LCS_DBF_TEXT(5, trace, pktxmit);
card = (struct lcs_card *) dev-priv;
-   spin_lock(card-lock);
rc = __lcs_start_xmit(card, skb, dev);
-   spin_unlock(card-lock);
return rc;
 }
 
@@ -2319,7 +2323,6 @@ __init lcs_init_module(void)
PRINT_ERR(Initialization failed\n);
return rc;
}
-
return 0;
 }
 
diff -Naupr git-orig/drivers/s390/net/lcs.h git-patched/drivers/s390/net/lcs.h
--- git-orig/drivers/s390/net/lcs.h 2006-02-07 10:55:28.0 +0100
+++ git-patched/drivers/s390/net/lcs.h  2006-02-07 11:00:08.0 +0100
@@ -95,7 +95,7 @@ do {
  */
 #define LCS_ILLEGAL_OFFSET 0x
 #define LCS_IOBUFFERSIZE   0x5000
-#define LCS_NUM_BUFFS  8   /* needs to be power of 2 */
+#define LCS_NUM_BUFFS  32  /* needs to be power of 2 */
 #define LCS_MAC_LENGTH 6
 #define 

[patch 2/2] s390: some qeth driver fixes

2006-02-07 Thread Frank Pavlic
[patch 2/2] s390: some qeth driver fixes 

From: Frank Pavlic [EMAIL PROTECTED]
- fixed kernel panic when using EDDP support in Layer 2 mode
- NULL pointer exception in qeth_set_offline fixed.
- setting EDDP in Layer 2 mode did not set NETIF_F_(SG/TSO)
  flags when device became online.
- use sscanf for parsing and converting IPv4 addresses
  from string to __u8 values.
- qeth_string_to_ipaddr6 fixed. in case of double colon
  the converted IPv6 address out from the string was not correct
  in previous implementation.

Signed-off-by: Frank Pavlic [EMAIL PROTECTED]

diffstat:
 qeth.h  |  112 +---
 qeth_eddp.c |   11 -
 qeth_main.c |   17 +++--
 3 files changed, 63 insertions(+), 77 deletions(-)

diff -Naupr git-orig/drivers/s390/net/qeth_eddp.c 
git-patched/drivers/s390/net/qeth_eddp.c
--- git-orig/drivers/s390/net/qeth_eddp.c   2006-02-07 10:55:28.0 
+0100
+++ git-patched/drivers/s390/net/qeth_eddp.c2006-02-07 11:17:11.0 
+0100
@@ -59,8 +59,7 @@ qeth_eddp_free_context(struct qeth_eddp_
for (i = 0; i  ctx-num_pages; ++i)
free_page((unsigned long)ctx-pages[i]);
kfree(ctx-pages);
-   if (ctx-elements != NULL)
-   kfree(ctx-elements);
+   kfree(ctx-elements);
kfree(ctx);
 }
 
@@ -413,6 +412,13 @@ __qeth_eddp_fill_context_tcp(struct qeth

QETH_DBF_TEXT(trace, 5, eddpftcp);
eddp-skb_offset = sizeof(struct qeth_hdr) + eddp-nhl + eddp-thl;
+   if (eddp-qh.hdr.l2.id == QETH_HEADER_TYPE_LAYER2) {
+   eddp-skb_offset += sizeof(struct ethhdr);
+#ifdef CONFIG_QETH_VLAN
+   if (eddp-mac.h_proto == __constant_htons(ETH_P_8021Q))
+   eddp-skb_offset += VLAN_HLEN;
+#endif /* CONFIG_QETH_VLAN */
+   }
tcph = eddp-skb-h.th;
while (eddp-skb_offset  eddp-skb-len) {
data_len = min((int)skb_shinfo(eddp-skb)-tso_size,
@@ -483,6 +489,7 @@ qeth_eddp_fill_context_tcp(struct qeth_e
return -ENOMEM;
}
if (qhdr-hdr.l2.id == QETH_HEADER_TYPE_LAYER2) {
+   skb-mac.raw = (skb-data) + sizeof(struct qeth_hdr);
memcpy(eddp-mac, eth_hdr(skb), ETH_HLEN);
 #ifdef CONFIG_QETH_VLAN
if (eddp-mac.h_proto == __constant_htons(ETH_P_8021Q)) {
diff -Naupr git-orig/drivers/s390/net/qeth.h git-patched/drivers/s390/net/qeth.h
--- git-orig/drivers/s390/net/qeth.h2006-02-07 10:55:28.0 +0100
+++ git-patched/drivers/s390/net/qeth.h 2006-02-07 11:17:11.0 +0100
@@ -1076,16 +1076,6 @@ qeth_get_qdio_q_format(struct qeth_card 
 }
 
 static inline int
-qeth_isdigit(char * buf)
-{
-   while (*buf) {
-   if (!isdigit(*buf++))
-   return 0;
-   }
-   return 1;
-}
-
-static inline int
 qeth_isxdigit(char * buf)
 {
while (*buf) {
@@ -1104,33 +1094,17 @@ qeth_ipaddr4_to_string(const __u8 *addr,
 static inline int
 qeth_string_to_ipaddr4(const char *buf, __u8 *addr)
 {
-   const char *start, *end;
-   char abuf[4];
-   char *tmp;
-   int len;
-   int i;
-
-   start = buf;
-   for (i = 0; i  4; i++) {
-   if (i == 3) {
-   end = strchr(start,0xa);
-   if (end)
-   len = end - start;
-   else
-   len = strlen(start);
-   }
-   else {
-   end = strchr(start, '.');
-   len = end - start;
-   }
-   if ((len = 0) || (len  3))
-   return -EINVAL;
-   memset(abuf, 0, 4);
-   strncpy(abuf, start, len);
-   if (!qeth_isdigit(abuf))
+   int count = 0, rc = 0;
+   int in[4];
+
+   rc = sscanf(buf, %d.%d.%d.%d%n, 
+   in[0], in[1], in[2], in[3], count);
+   if (rc != 4  || count) 
+   return -EINVAL;
+   for (count = 0; count  4; count++) {
+   if (in[count]  255)
return -EINVAL;
-   addr[i] = simple_strtoul(abuf, tmp, 10);
-   start = end + 1;
+   addr[count] = in[count];
}
return 0;
 }
@@ -1149,36 +1123,44 @@ qeth_ipaddr6_to_string(const __u8 *addr,
 static inline int
 qeth_string_to_ipaddr6(const char *buf, __u8 *addr)
 {
-   const char *start, *end;
-   u16 *tmp_addr;
-   char abuf[5];
-   char *tmp;
-   int len;
-   int i;
-
-   tmp_addr = (u16 *)addr;
-   start = buf;
-   for (i = 0; i  8; i++) {
-   if (i == 7) {
-   end = strchr(start,0xa);
-   if (end)
-   len = end - start;
-   else
-   len = 

Re: Kernel BUG at drivers/net/tg3.c:2914 on SMP amd64

2006-02-07 Thread Mike Crowe
On Thu, 2006-02-02 at 13:37 +, Mike Crowe wrote:
 I'm running the Debian 2.6.15 kernel from backports.org on a
 machine with two Opteron 275s. I am getting a BUG in tg3.c quite
 reliably if I ping flood the machine from a few others and cause a
 bit of other network activity. Sometimes it takes a few minutes,
 sometimes half an hour. The BUG also fires in more realistic
 situations - it just takes longer to reproduce.
 
On Thu, Feb 02, 2006 at 08:01:57AM -0800, Michael Chan wrote:
 Most likely due to MMIO being re-ordered. We've seen this on a number of
 AMD machines.
 
 Please try this test patch below. If the problem goes away, send me the
 output of lspci -vvvxxx on your machine and I'll create a patch to fix
 this automatically on your machine. Thanks.

It looks like the machine had issues. After a bit of fiddling around
I've persuaded the problem to go away both with and without the
patch. It doesn't look like it was caused by the MMIO being reordered
unless this is a BIOS option that I've now turned off.

Thanks for your help anyway.

-- 
Mike Crowe
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


RE: e1000 not working on AMD64

2006-02-07 Thread cramerj
Does /proc/interrupts show the interrupts incrementing for the
interface?

-Jeb

 -Original Message-
 From: [EMAIL PROTECTED]
[mailto:[EMAIL PROTECTED]
 On Behalf Of Hajo Noerenberg
 Sent: Tuesday, February 07, 2006 3:22 AM
 To: netdev@vger.kernel.org
 Subject: e1000 not working on AMD64
 
 
 I've bought four new e1000 cards (PCI id 8086:107c, chip label
82541PI),
 three of them are working without problems (i386, kernel 2.4.x).
 
 One of them is installed in an AMD64 SMP system (Athlon dual core
4GB).
 It gets detected, link is reported to be up, but no data goes through
 (in fact _sometimes_ it succeeds to get an IP adress via DHCP after
 10-20 retries). If I set an IP manually, I am not able to ping any
other
 host (ping sizes 30bytes ... 1000bytes).
 
 Tested on kernel 2.6.15, 2.6.16-rc2 and 2.6.16-rc2-git2.
 
 (Un-)setting NAPI does not change anything.
 
 Initially I assumed an IRQ-related issue, but the 3ware RAID
controller
 works without any problems.
 
 Hajo
 
 
 +++
 
 :00:00.0 RAM memory: nVidia Corporation: Unknown device 02f1 (rev
a2)
 Subsystem: Asustek Computer, Inc.: Unknown device 81bf
 Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
 ParErr- Stepping- SERR+ FastB2B-
 Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort-
 TAbort- MAbort- SERR- PERR-
 Latency: 0
 Capabilities: [44] #08 [01e0]
 Capabilities: [e0] #08 [a800]
 
 :00:00.1 RAM memory: nVidia Corporation: Unknown device 02fa (rev
a2)
 Subsystem: Asustek Computer, Inc.: Unknown device 81bf
 Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
 ParErr- Stepping- SERR+ FastB2B-
 Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort-
 TAbort- MAbort- SERR- PERR-
 
 :00:00.2 RAM memory: nVidia Corporation: Unknown device 02fe (rev
a2)
 Subsystem: Asustek Computer, Inc.: Unknown device 81bf
 Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
 ParErr- Stepping- SERR- FastB2B-
 Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort-
 TAbort- MAbort- SERR- PERR-
 
 :00:00.3 RAM memory: nVidia Corporation: Unknown device 02f8 (rev
a2)
 Subsystem: Asustek Computer, Inc.: Unknown device 81bf
 Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
 ParErr- Stepping- SERR+ FastB2B-
 Status: Cap- 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort-
 TAbort- MAbort- SERR- PERR-
 
 :00:00.4 RAM memory: nVidia Corporation: Unknown device 02f9 (rev
a2)
 Subsystem: Asustek Computer, Inc.: Unknown device 81bf
 Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
 ParErr- Stepping- SERR+ FastB2B-
 Status: Cap- 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort-
 TAbort- MAbort- SERR- PERR-
 Latency: 0
 
 :00:00.5 RAM memory: nVidia Corporation: Unknown device 02ff (rev
a2)
 Subsystem: Asustek Computer, Inc.: Unknown device 81bf
 Control: I/O- Mem+ BusMaster+ SpecCycle- MemWINV- VGASnoop-
 ParErr- Stepping- SERR+ FastB2B-
 Status: Cap+ 66MHz+ UDF- FastB2B+ ParErr- DEVSEL=fast TAbort-
 TAbort- MAbort- SERR- PERR-
 Latency: 0
 Capabilities: [44] #00 [00fe]
 Capabilities: [fc] #00 []
 
 :00:00.6 RAM memory: nVidia Corporation: Unknown device 027f (rev
a2)
 Subsystem: Asustek Computer, Inc.: Unknown device 81bf
 Control: I/O- Mem+ BusMaster- SpecCycle- MemWINV- VGASnoop-
 ParErr- Stepping- SERR+ FastB2B-
 Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort-
 TAbort- MAbort- SERR- PERR-
 
 :00:00.7 RAM memory: nVidia Corporation: Unknown device 027e (rev
a2)
 Subsystem: Asustek Computer, Inc.: Unknown device 81bf
 Control: I/O- Mem- BusMaster- SpecCycle- MemWINV- VGASnoop-
 ParErr- Stepping- SERR- FastB2B-
 Status: Cap- 66MHz+ UDF- FastB2B- ParErr- DEVSEL=fast TAbort-
 TAbort- MAbort- SERR- PERR-
 
 :00:02.0 PCI bridge: nVidia Corporation: Unknown device 02fc (rev
 a1) (prog-if 00 [Normal decode])
 Control: I/O- Mem- BusMaster+ SpecCycle- MemWINV- VGASnoop-
 ParErr- Stepping- SERR- FastB2B-
 Status: Cap+ 66MHz- UDF- FastB2B- ParErr- DEVSEL=fast TAbort-
 TAbort- MAbort- SERR- PERR-
 Latency: 0, Cache Line Size: 0x10 (64 bytes)
 Bus: primary=00, secondary=01, subordinate=01, sec-latency=0
 I/O behind bridge: f000-0fff
 Memory behind bridge: fff0-000f
 Prefetchable memory behind bridge: fff0-
 
 BridgeCtl: Parity- SERR+ NoISA- VGA- MAbort- Reset- FastB2B-
 Capabilities: [40] #0d []
 Capabilities: [48] Power Management version 2
 Flags: PMEClk- DSI- D1- D2- AuxCurrent=0mA
 PME(D0+,D1+,D2+,D3hot+,D3cold+)
 Status: D0 PME-Enable- DSel=0 DScale=0 PME-
 Capabilities: [50] Message Signalled Interrupts: 64bit+
 Queue=0/1 Enable-
 

Re: e1000 not working on AMD64

2006-02-07 Thread Rick Jones

Hajo Noerenberg wrote:

I've bought four new e1000 cards (PCI id 8086:107c, chip label 82541PI),
three of them are working without problems (i386, kernel 2.4.x).

One of them is installed in an AMD64 SMP system (Athlon dual core 4GB).
It gets detected, link is reported to be up, but no data goes through
(in fact _sometimes_ it succeeds to get an IP adress via DHCP after
10-20 retries). If I set an IP manually, I am not able to ping any other
host (ping sizes 30bytes ... 1000bytes).

Tested on kernel 2.6.15, 2.6.16-rc2 and 2.6.16-rc2-git2.

(Un-)setting NAPI does not change anything.

Initially I assumed an IRQ-related issue, but the 3ware RAID controller
works without any problems.


Do any of the three known-good cards work in your AMD64 SMP system?

Does the card that does not work in your AMD64 system work in any of the other 
systems?


rick jones
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [test] airo : first WPA-PSK support

2006-02-07 Thread matthieu castet

Dan Williams wrote:


AFAIK anything less than 5.40.x doesn't work anyway.  The latest stuff
(5.60.x) has worked fine.  I previously had 5.30.17, which tended to
hang the card after a while.  Anyway, perhaps we require people to
update their firmware.  Not sure.

What's the minimum firmware version for WPA support?
according to 
http://www.cisco.com/en/US/products/hw/wireless/ps4555/prod_release_notes_list.html


5.30.17

But we need to support older cards 340, ...

Matthieu
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] acxsm: Add _{get,set}_encodeext and improve logging in _encode

2006-02-07 Thread Carlos Martín
Add _{get,set}_encodeext and improve logging in _encode

The code in _{get,set}_encode has been reordered a bit so we have
better logging (function entry and exit) and _{get,set}_encodeext have
been implemented as a wrapper for the ieee80211 stack functions.

diff --git a/ioctl.c b/ioctl.c
index 041f165..1f7 100644
--- a/ioctl.c
+++ b/ioctl.c
@@ -1007,7 +1007,9 @@ acx_ioctl_set_encode(
union iwreq_data *wrqu,
char *extra)
 {
-   int result = ieee80211_wx_set_encode(netdev_priv(ndev), info, wrqu, 
extra);
+   int result;
+   FN_ENTER;
+   result = ieee80211_wx_set_encode(netdev_priv(ndev), info, wrqu, extra);
FN_EXIT1(result);
return result;
 }
@@ -1023,12 +1025,45 @@ acx_ioctl_get_encode(
union iwreq_data *wrqu,
char *extra)
 {
-   int result = ieee80211_wx_get_encode(netdev_priv(ndev), info, wrqu, 
extra);
+   int result;
+   FN_ENTER;
+   result = ieee80211_wx_get_encode(netdev_priv(ndev), info, wrqu, extra);
FN_EXIT1(result);
return result;
 }
 
-
+/***
+** acx_ioctl_set_encodeext
+*/
+static int
+acx_ioctl_set_encodeext(
+   struct net_device *ndev,
+   struct iw_request_info *info,
+   union iwreq_data *wrqu,
+   char *extra)
+{
+   int result;
+   FN_ENTER;
+   result = ieee80211_wx_set_encodeext(netdev_priv(ndev), info, wrqu, 
extra);
+   FN_EXIT1(result);
+   return result;
+}
+/***
+** acx_ioctl_get_encodeext
+*/
+static int
+acx_ioctl_get_encodeext(
+   struct net_device *ndev,
+   struct iw_request_info *info,
+   union iwreq_data *wrqu,
+   char *extra)
+{
+   int result;
+   FN_ENTER;
+   result = ieee80211_wx_get_encodeext(netdev_priv(ndev), info, wrqu, 
extra);
+   FN_EXIT1(result);
+   return result;
+}
 /***
 */
 static int
@@ -2502,6 +2537,8 @@ static const iw_handler acx_ioctl_handle
/* Encoding */
WX(SIOCSIWENCODE)   = acx_ioctl_set_encode,
WX(SIOCGIWENCODE)   = acx_ioctl_get_encode,
+   WX(SIOCSIWENCODEEXT)= acx_ioctl_set_encodeext,
+   WX(SIOCGIWENCODEEXT)= acx_ioctl_get_encodeext,
/* Power saving */
WX(SIOCSIWPOWER)= acx_ioctl_set_power,
WX(SIOCGIWPOWER)= acx_ioctl_get_power,

-- 
Carlos Martín Nieto|   http://www.cmartin.tk

Erdbeben? Sicherlich etwas, das mit Erdberen zu tun hat. -- me, paraphrased
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Strange IPsec freeze/partial fix

2006-02-07 Thread Herbert Xu
Olaf Kirch [EMAIL PROTECTED] wrote:
 
 People have been testing with the patch below, which seems to fix the
 problem partially. They still see connection hangs however (things
 only clear up when they start a new ping or new ssh). So the patch
 is obvsiouly not sufficient, and something else seems to go wrong.

I suggest that we simply bail out always.  If the dst decides to die
on us later on, the packet will be dropped anyway.  So there is no
great urgency to retry here.  Once we have the proper resolution
queueing, we can then do the retry again.

Signed-off-by: Herbert Xu [EMAIL PROTECTED]

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c
--- a/net/xfrm/xfrm_policy.c
+++ b/net/xfrm/xfrm_policy.c
@@ -890,7 +890,9 @@ restart:
xfrm_pol_put(policy);
if (dst)
dst_free(dst);
-   goto restart;
+
+   err = -EHOSTUNREACH;
+   goto error;
}
dst-next = policy-bundles;
policy-bundles = dst;
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Fix softmac scan

2006-02-07 Thread Larry Finger

John,

Softmac scanning fails because the stop flag is not cleared before scanning is started. The attached 
one-line patch fixes this.


Signed-Off-By: Larry Finger [EMAIL PROTECTED]

diff --git a/net/ieee80211/softmac/ieee80211softmac_scan.c 
b/net/ieee80211/softmac/ieee80211softmac_scan.c
index d90d31f..1cdd9f1 100644
--- a/net/ieee80211/softmac/ieee80211softmac_scan.c
+++ b/net/ieee80211/softmac/ieee80211softmac_scan.c
@@ -178,6 +178,7 @@ int ieee80211softmac_start_scan_implemen
dprintk(PFX Scanning %d channels\n, sm-scaninfo-number_channels);
sm-scaninfo-current_channel_idx = 0;
sm-scaninfo-started = 1;
+   sm-scaninfo-stop = 0;
INIT_COMPLETION(sm-scaninfo-finished);
schedule_work(sm-scaninfo-softmac_scan);
spin_unlock_irqrestore(sm-lock, flags);


Re: [Patch] 2.4.32 - Neighbour Cache (ARP) State machine bug Fixed

2006-02-07 Thread Willy Tarreau
Hi,

On Tue, Feb 07, 2006 at 12:57:43AM -0700, Pradeep Vincent wrote:
 In 2.4.21, arp code uses gc_timer to check for stale arp cache
 entries. In 2.6, each entry has its own timer to check for stale arp
 cache. 2.4.29 to 2.4.32 kernels (atleast) use neither of these timers.
 This causes problems in environments where IPs or MACs are reassigned
 - saw this problem on load balancing router based networks that use
 VMACs. Tested this code on load balancing router based networks as
 well as peer-linux systems.
 
 
 Thanks,
 
 
 Signed off by: Pradeep Vincent [EMAIL PROTECTED]
 
 diff -Naur old/net/core/neighbour.c new/net/core/neighbour.c
 --- old/net/core/neighbour.cWed Nov 23 17:15:30 2005
 +++ new/net/core/neighbour.cWed Nov 23 17:26:01 2005
 @@ -14,6 +14,7 @@
 * Vitaly E. Lavrovreleasing NULL neighbor in neigh_add.
 * Harald WelteAdd neighbour cache statistics like rtstat
 * Harald Welteport neighbour cache rework from 2.6.9-rcX
 + *  Pradeep Vincent Move neighbour cache entry to stale state
 */

As you can see above, your mailer is still broken. Leading spaces get
removed and it seems like tabs are replaced with spaces. This makes it
really annoying to fix by hand because we all have to do your work again.
You should try to fix your mailer options, possibly by sending a few
mails to yourself or someone else (if you send *a few* mails to me, I
can confirm which one looks OK). If your mailer is definitely broken,
then you may send it as plain text first (for review), with a text
attachment for people to apply it without trouble.

Thanks,
Willy

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


IBM_EMAC_PHY_RX_CLK_FIX depends on non-existing option 440GR

2006-02-07 Thread Adrian Bunk
Jean-Luc Leger [EMAIL PROTECTED] reported the following:

from drivers/net/Kconfig:
config IBM_EMAC_PHY_RX_CLK_FIX
bool PHY Rx clock workaround
depends on IBM_EMAC  (405EP || 440GX || 440EP || 440GR)
- maybe this is 440GP ?


The non-existing CONFIG_440GR is also present in the driver itself.

Is this a typo or a not yet merged platform?

cu
Adrian

-- 

   Is there not promise of rain? Ling Tan asked suddenly out
of the darkness. There had been need of rain for many days.
   Only a promise, Lao Er said.
   Pearl S. Buck - Dragon Seed

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: IBM_EMAC_PHY_RX_CLK_FIX depends on non-existing option 440GR

2006-02-07 Thread Eugene Surovegin
On Tue, Feb 07, 2006 at 11:14:49PM +0100, Adrian Bunk wrote:
 Jean-Luc Leger [EMAIL PROTECTED] reported the following:
 
 from drivers/net/Kconfig:
 config IBM_EMAC_PHY_RX_CLK_FIX
 bool PHY Rx clock workaround
 depends on IBM_EMAC  (405EP || 440GX || 440EP || 440GR)
 - maybe this is 440GP ?
 
 
 The non-existing CONFIG_440GR is also present in the driver itself.
 
 Is this a typo or a not yet merged platform?

Not yet merged platform.

-- 
Eugene

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] recent commit breaks multi-descriptor receives with ip fragments

2006-02-07 Thread David S. Miller
From: Jesse Brandeburg [EMAIL PROTECTED]
Date: Tue, 7 Feb 2006 14:11:46 -0800 (Pacific Standard Time)

 A recent commit in 2.6.14 broke this, see this git commit: 
 http://git.kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=bc8dfcb93970ad7139c976356bfc99d7e251deaf
  
 Or for a shorter version http://tinyurl.com/drpu8

I think we should revert that thing, it's caused more grief than
anything else.  I thought it was a complete waste of time from the
get-go even assuming that fraglists within fraglists never occur...
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread Eric Dumazet
Struct net_device's atomic refcnt are probably one of the hotest memory spots 
in a SMP/NUMA network router or network server.


This counter is constantly incremented/decremented each time a network packet 
is handled, or a IP route is added/deleted in route cache. This is *not* SMP 
nor NUMA friendly (because of the locked op that are expensive and memory ping 
pongs between cpus)


But as a matter of fact, the counter is *never* read: It's only read when the 
device must be unregistered.


Some devices are *never* unregistered : loopback, or statically linked 
drivers, thus we are refcounting them for nothing.


This patch try to avoid atomic ops on SMP for the cases were the device wont 
be unregistered.


A 'int static_dev' integer is added next to 'atomic_t refcnt', and may be set 
to one by drivers that are statically linked.


I changed SET_MODULE_OWNER(dev) macro to avoid changing all network drivers, 
but I'm open to other suggestions.


All drivers that are currently using this macro automatically benefit from 
this SMP optimization : It's better to perform a test/conditional branch (even 
if badly predicted) than an atomic_{inc|dec}()


Signed-off-by: Eric Dumazet [EMAIL PROTECTED]
--- a/include/linux/netdevice.h 2006-02-07 11:55:42.0 +0100
+++ b/include/linux/netdevice.h 2006-02-07 13:06:14.0 +0100
@@ -417,10 +417,14 @@
struct timer_list   watchdog_timer;
 
 /*
- * refcnt is a very hot point, so align it on SMP
+ * {static_dev,refcnt} is a very hot point, so align it on SMP
  */
/* Number of references to this device */
-   atomic_trefcnt cacheline_aligned_in_smp;
+#ifdef CONFIG_SMP
+   /* SMP optimization : if dev is static, no need to modify refcnt */
+   int static_dev cacheline_aligned_in_smp;
+#endif
+   atomic_trefcnt;
 
/* delayed register/unregister */
struct list_headtodo_list;
@@ -514,7 +518,29 @@
 ~NETDEV_ALIGN_CONST);
 }
 
-#define SET_MODULE_OWNER(dev) do { } while (0)
+static inline int netif_static(const struct net_device *dev)
+{
+#if defined(CONFIG_SMP)
+   return dev-static_dev;
+#else
+   return 0;
+#endif
+}
+
+static inline void netif_setstatic(struct net_device *dev, int v)
+{
+#if defined(CONFIG_SMP)
+#if defined(MODULE)
+   v = 0;
+#endif
+   dev-static_dev = v;
+#endif
+}
+/*
+ * If a driver is a not a module, dev can be marked as static
+ */
+#define SET_MODULE_OWNER(dev) do { netif_setstatic(dev, 1); } while (0)
+
 /* Set the sysfs physical device reference for the network logical device
  * if set prior to registration will cause a symlink during initialization.
  */
@@ -705,11 +731,12 @@
 
 static inline void dev_put(struct net_device *dev)
 {
-   atomic_dec(dev-refcnt);
+   if (!netif_static(dev))
+   atomic_dec(dev-refcnt);
 }
 
-#define __dev_put(dev) atomic_dec((dev)-refcnt)
-#define dev_hold(dev) atomic_inc((dev)-refcnt)
+#define __dev_put(dev) if (!netif_static(dev)) atomic_dec((dev)-refcnt)
+#define dev_hold(dev) if (!netif_static(dev)) atomic_inc((dev)-refcnt)
 
 /* Carrier loss detection, dial on demand. The functions netif_carrier_on
  * and _off may be called from IRQ context, but it is caller
--- a/net/core/dev.c2006-02-07 11:59:53.0 +0100
+++ b/net/core/dev.c2006-02-07 12:52:27.0 +0100
@@ -2990,12 +2990,11 @@
alloc_size = (sizeof(*dev) + NETDEV_ALIGN_CONST)  ~NETDEV_ALIGN_CONST;
alloc_size += sizeof_priv + NETDEV_ALIGN_CONST;
 
-   p = kmalloc(alloc_size, GFP_KERNEL);
+   p = kzalloc(alloc_size, GFP_KERNEL);
if (!p) {
printk(KERN_ERR alloc_dev: Unable to allocate device.\n);
return NULL;
}
-   memset(p, 0, alloc_size);
 
dev = (struct net_device *)
(((long)p + NETDEV_ALIGN_CONST)  ~NETDEV_ALIGN_CONST);
@@ -3248,6 +3247,7 @@
queue-backlog_dev.weight = weight_p;
queue-backlog_dev.poll = process_backlog;
atomic_set(queue-backlog_dev.refcnt, 1);
+   netif_setstatic(queue-backlog_dev, 1);
}
 
dev_boot_phase = 0;
--- a/drivers/net/loopback.c2006-02-07 12:10:55.0 +0100
+++ b/drivers/net/loopback.c2006-02-07 12:37:49.0 +0100
@@ -224,16 +224,18 @@
 int __init loopback_init(void)
 {
struct net_device_stats *stats;
+   int res;
 
/* Can survive without statistics */
-   stats = kmalloc(sizeof(struct net_device_stats), GFP_KERNEL);
+   stats = kzalloc(sizeof(struct net_device_stats), GFP_KERNEL);
if (stats) {
-   memset(stats, 0, sizeof(struct net_device_stats));
loopback_dev.priv = stats;
loopback_dev.get_stats = get_stats;
}

-   return register_netdev(loopback_dev);
+   res = register_netdev(loopback_dev);
+   

Re: [PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread David S. Miller
From: Eric Dumazet [EMAIL PROTECTED]
Date: Wed, 08 Feb 2006 00:23:45 +0100

 Some devices are *never* unregistered : loopback, or statically linked 
 drivers, thus we are refcounting them for nothing.

Statically linked drivers can have netdev's that get unregistered
and free'd up.  For example we have a few cases where configuration
calls allocate/register and deallocate/unregister net devices.

I understand what you're trying to do, but I don't think this is
the way to do it.

I once thought we could play some games because of the invariant that
if we have a route attached to the SKB, that holds an implicit
reference for the netdevice too.  But I know there are cases where
the the route attached to the SKB is to a different device than the
one that skb-dev is and should be set to.

There is also a temptation to deal with this using per-cpu (or
per-node) counters, and that's a too bloated solution.  We should
be making datastructures smaller not larger.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread Eric Dumazet

David S. Miller a écrit :

From: Eric Dumazet [EMAIL PROTECTED]
Date: Wed, 08 Feb 2006 00:23:45 +0100

Some devices are *never* unregistered : loopback, or statically linked 
drivers, thus we are refcounting them for nothing.


Statically linked drivers can have netdev's that get unregistered
and free'd up.  For example we have a few cases where configuration
calls allocate/register and deallocate/unregister net devices.



Yes, e1000 currently does this, and I actually am using this patch on a 
machine with a e1000 card : I didnt change e1000 source.



I understand what you're trying to do, but I don't think this is
the way to do it.

I once thought we could play some games because of the invariant that
if we have a route attached to the SKB, that holds an implicit
reference for the netdevice too.  But I know there are cases where
the the route attached to the SKB is to a different device than the
one that skb-dev is and should be set to.

There is also a temptation to deal with this using per-cpu (or
per-node) counters, and that's a too bloated solution.  We should
be making datastructures smaller not larger.


Yes, I played with a structure I called a llref (Long Lived Reference count), 
that is suitable for netdevice refcount and struct vfsmount 'refcnt'


It uses a per cpu local_t, but current implementation of alloc_percpu(local_t) 
uses at least 32 bytes per cpu...


(sock_mnt, mqueue_mnt, shm_mnt, pipe_mnt, inotify_mnt, bd_mnt, devpts_mnt) are 
 another examples of objects having unecessary refcounters.



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread Ben Greear

Eric Dumazet wrote:

David S. Miller a écrit :


From: Eric Dumazet [EMAIL PROTECTED]
Date: Wed, 08 Feb 2006 00:23:45 +0100

Some devices are *never* unregistered : loopback, or statically 
linked drivers, thus we are refcounting them for nothing.



Statically linked drivers can have netdev's that get unregistered
and free'd up.  For example we have a few cases where configuration
calls allocate/register and deallocate/unregister net devices.



Yes, e1000 currently does this, and I actually am using this patch on a 
machine with a e1000 card : I didnt change e1000 source.



I understand what you're trying to do, but I don't think this is
the way to do it.


What do you think about having no ref counting, and upon removal of
a network device, we notify each logic unit that deals with skbs
or other things that link to the netdev and ask it to clean all
references to the NIC in question?

I'm not sure how much fun this would be to code..but it would
at least force us to understand exactly what code holds netdev references,
and get rid of an atomic op or two in the hot path...

Ben

--
Ben Greear [EMAIL PROTECTED]
Candela Technologies Inc  http://www.candelatech.com

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread David S. Miller
From: Ben Greear [EMAIL PROTECTED]
Date: Tue, 07 Feb 2006 15:54:06 -0800

 What do you think about having no ref counting, and upon removal of
 a network device, we notify each logic unit that deals with skbs
 or other things that link to the netdev and ask it to clean all
 references to the NIC in question?

That's a lot of notifiers.  Routes and neighbour cache entries are
relatively easy, and we do that today, but sockets and netfilter
can get really messy.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread Stephen Hemminger
On Tue, 07 Feb 2006 16:11:51 -0800 (PST)
David S. Miller [EMAIL PROTECTED] wrote:

 From: Ben Greear [EMAIL PROTECTED]
 Date: Tue, 07 Feb 2006 15:54:06 -0800
 
  What do you think about having no ref counting, and upon removal of
  a network device, we notify each logic unit that deals with skbs
  or other things that link to the netdev and ask it to clean all
  references to the NIC in question?
 
 That's a lot of notifiers.  Routes and neighbour cache entries are
 relatively easy, and we do that today, but sockets and netfilter
 can get really messy.

How bad would per-cpu'ish counting be?  There are always user's
with 1000's of vlan's etc; so it would have to scale.

Even a counter that is hashed on cpu # would be better (assuming they
weren't all in the same cache line).

Also, isn't a lot of the problem reduced if network devices
are affinitied?

-- 
Stephen Hemminger [EMAIL PROTECTED]
OSDL http://developer.osdl.org/~shemminger
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread David S. Miller
From: Stephen Hemminger [EMAIL PROTECTED]
Date: Tue, 7 Feb 2006 16:19:42 -0800

 Also, isn't a lot of the problem reduced if network devices
 are affinitied?

Not for routing/firewalling, we touch the destination device's
counters on input softing of the source device.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread David S. Miller
From: Rick Jones [EMAIL PROTECTED]
Date: Tue, 07 Feb 2006 16:29:34 -0800

 In the realm of straw ideas, how often are netdevs added and
 removed, and would leaving a tombstone behind consume too much
 memory?

That could work.

Another idea is to revisit the scheme of storing just the
ifindex in the SKB instead of the device pointer.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread Rick Jones

David S. Miller wrote:

From: Rick Jones [EMAIL PROTECTED]
Date: Tue, 07 Feb 2006 16:29:34 -0800



In the realm of straw ideas, how often are netdevs added and
removed, and would leaving a tombstone behind consume too much
memory?



That could work.

Another idea is to revisit the scheme of storing just the
ifindex in the SKB instead of the device pointer.


That means extra lookups of interface name to device pointer right? Would that 
even be sufficient to keep a reference-count free netdev from being yanked out 
from under someone?


The only thing that worries me about tombstones is someone with a QA test that 
adds and removes a device over and over again, and fills the cemetery as it 
were.  It is afterall something of a deliberate memory leak.


Is there some way for a daemon or somesuch to try to garbage collect the 
tombstones?  I guess that gets back to knowing where things that might have a 
reference to the netdev happen to be, which would be just about the same as the 
suggestion to call into them somehow so it becomes known that none of them have 
a reference...


Unless there is some sort of event where it is known that when it happens all 
netdev references have been refreshed - something short of reboot of course.


What sort of pain and suffering would happen if an old tombstone were brought 
back to life for a new device?  Would that cause code referencing it to get all 
bent out of shape?  Is there anything that would naturally inform the entity 
dereferencing to the newly undead that they were resurrected?


just thinking while typing

rick  jones
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread Ben Greear

David S. Miller wrote:

From: Ben Greear [EMAIL PROTECTED]
Date: Tue, 07 Feb 2006 15:54:06 -0800



What do you think about having no ref counting, and upon removal of
a network device, we notify each logic unit that deals with skbs
or other things that link to the netdev and ask it to clean all
references to the NIC in question?



That's a lot of notifiers.  Routes and neighbour cache entries are
relatively easy, and we do that today, but sockets and netfilter
can get really messy.


I ended up touching every part of the tree that grabbed or released
a netdevice when chasing down that arp-cache bug some months ago.  It
is a bit of work, but it's not insurmountable.  Allowing a notifier to
clean the references would probably be a similar amount of work, and
as you say, many modules already listen for the notifiers and clean up
their references accordingly.

At least some sockets (packet-sockets, for example)
don't keep actual netdev references, but just
use the ifindex.  I'd personally prefer that they actually
hold a reference and listen to notifiers appropriately, but in this case,
it could make moving to a notifier scheme quicker.

I'm less sure about netfilter

Thanks,
Ben

--
Ben Greear [EMAIL PROTECTED]
Candela Technologies Inc  http://www.candelatech.com

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread Rick Jones

Ben Greear wrote:

Rick Jones wrote:

In the realm of straw ideas, how often are netdevs added and removed, 
and would leaving a tombstone behind consume too much memory?



In certain cases...say, with vlans, you could very often create and
destroy net devices.  I think that giving up and leaking the memory
is not a good idea.


What makes vlans more likely to be more dynamic in that way?

rick
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread Rick Jones

David S. Miller wrote:

From: Ben Greear [EMAIL PROTECTED]
Date: Tue, 07 Feb 2006 16:39:52 -0800



Rick Jones wrote:

In the realm of straw ideas, how often are netdevs added and removed, 
and would leaving a tombstone behind consume too much memory?


In certain cases...say, with vlans, you could very often create and
destroy net devices.  I think that giving up and leaking the memory
is not a good idea.



I think he's suggesting another thing.  Reattach the skb-dev
to some dummy device that always persists, when a device goes
down.


Actually, I think that Ben had me pegged right the first time - I was not going 
to chase down all the skb's (assuming there isn't already a list of all skb's) I 
was just going to leave the minimum of a devices structures out there so it 
would still be safe to follow the pointer from the skb's etc.



The only problem is finding all SKB's that reference the
dev going down.


Indeed.

rick jones
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] NET : SMP optimization of netdevice refcount

2006-02-07 Thread Ben Greear

Rick Jones wrote:

Ben Greear wrote:


Rick Jones wrote:

In the realm of straw ideas, how often are netdevs added and removed, 
and would leaving a tombstone behind consume too much memory?




In certain cases...say, with vlans, you could very often create and
destroy net devices.  I think that giving up and leaking the memory
is not a good idea.


What makes vlans more likely to be more dynamic in that way?


They are easy to create and destroy..and in mass.  You can be sure
that someone has realized this and has made a cool application
on top of this feature.  If we suddenly start leaking memory to
gain a small bit of performance in the 1+Gbps speeds, then people
will be upset.

At the least, I have applications that do this..and whether they
are cool or not is open to contention, but I will certainly be upset
anyway :)

Ben

--
Ben Greear [EMAIL PROTECTED]
Candela Technologies Inc  http://www.candelatech.com

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [IPVS] Shrink ip_vs_*.c includes

2006-02-07 Thread Herbert Xu
Horms [EMAIL PROTECTED] wrote:
 Dave, 
 
 please apply.

Looks bogus to me.  Why are we removing linux/modules.h from ip_vs_app.c
when it uses things like EXPORT_SYMBOL?
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] recent commit breaks multi-descriptor receives with ip fragments

2006-02-07 Thread Jesse Brandeburg

On Tue, 7 Feb 2006, Herbert Xu wrote:

David S. Miller [EMAIL PROTECTED] wrote:

 I think we should revert that thing, it's caused more grief than
 anything else.  I thought it was a complete waste of time from the
 get-go even assuming that fraglists within fraglists never occur...

I share your feelings towards this patch.  However, what e1000 is doing
is broken.  It should be filling in the frags array, not frag_list.


so we generally call dev_alloc_skb to get the receive buffers to give to 
our hardware.  When we use multiple receive buffers what is the right way 
to allocate memory to give buffers to the hardware and then later, to 
chain the descriptors together to make the packet?  Using skb's is the 
common way as far as I've understood it.


Your input on the correct way to do these things is greatly appreciated.

Jesse
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [BUG] recent commit breaks multi-descriptor receives with ip fragments

2006-02-07 Thread David S. Miller
From: Jesse Brandeburg [EMAIL PROTECTED]
Date: Tue, 7 Feb 2006 17:41:28 -0800 (Pacific Standard Time)

 so we generally call dev_alloc_skb to get the receive buffers to give to 
 our hardware.  When we use multiple receive buffers what is the right way 
 to allocate memory to give buffers to the hardware and then later, to 
 chain the descriptors together to make the packet?  Using skb's is the 
 common way as far as I've understood it.
 
 Your input on the correct way to do these things is greatly appreciated.

Allocate a single SKB and fill in the skb_shared_info() page/offset/len
pairs, making sure to take proper references to the pages you add.
Coalesce when possible.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Patch] 2.4.32 - Neighbour Cache (ARP) State machine bug Fixed

2006-02-07 Thread Pradeep Vincent
One more attempt. Attaching the diff file as well.

Signed off by: Pradeep Vincent [EMAIL PROTECTED]

--- old/net/core/neighbour.cWed Nov  9 16:48:10 2005
+++ new/net/core/neighbour.cTue Feb  7 17:38:26 2006
@@ -14,6 +14,7 @@
  * Vitaly E. Lavrovreleasing NULL neighbor in neigh_add.
  * Harald WelteAdd neighbour cache statistics like rtstat
  * Harald Welteport neighbour cache rework from 2.6.9-rcX
+ * Pradeep Vincent fix neighbour cache state machine
  */

 #include linux/config.h
@@ -705,6 +706,13 @@
neigh_release(n);
continue;
}
+   /* Move to NUD_STALE state */
+   if (n-nud_stateNUD_REACHABLE 
+   now - n-confirmed  n-parms-reachable_time) {
+   n-nud_state = NUD_STALE;
+   neigh_suspect(n);
+   }
+
write_unlock(n-lock);

 next_elt:

Thanks,

Pradeep
On 2/7/06, Willy Tarreau [EMAIL PROTECTED] wrote:
 Hi,

 On Tue, Feb 07, 2006 at 12:57:43AM -0700, Pradeep Vincent wrote:
  In 2.4.21, arp code uses gc_timer to check for stale arp cache
  entries. In 2.6, each entry has its own timer to check for stale arp
  cache. 2.4.29 to 2.4.32 kernels (atleast) use neither of these timers.
  This causes problems in environments where IPs or MACs are reassigned
  - saw this problem on load balancing router based networks that use
  VMACs. Tested this code on load balancing router based networks as
  well as peer-linux systems.
 
 
  Thanks,
 
 
  Signed off by: Pradeep Vincent [EMAIL PROTECTED]
 
  diff -Naur old/net/core/neighbour.c new/net/core/neighbour.c
  --- old/net/core/neighbour.cWed Nov 23 17:15:30 2005
  +++ new/net/core/neighbour.cWed Nov 23 17:26:01 2005
  @@ -14,6 +14,7 @@
  * Vitaly E. Lavrovreleasing NULL neighbor in neigh_add.
  * Harald WelteAdd neighbour cache statistics like rtstat
  * Harald Welteport neighbour cache rework from 2.6.9-rcX
  + *  Pradeep Vincent Move neighbour cache entry to stale state
  */

 As you can see above, your mailer is still broken. Leading spaces get
 removed and it seems like tabs are replaced with spaces. This makes it
 really annoying to fix by hand because we all have to do your work again.
 You should try to fix your mailer options, possibly by sending a few
 mails to yourself or someone else (if you send *a few* mails to me, I
 can confirm which one looks OK). If your mailer is definitely broken,
 then you may send it as plain text first (for review), with a text
 attachment for people to apply it without trouble.

 Thanks,
 Willy




linux-2.4.29-arp-fix.patch
Description: Binary data


Re: [PATCH] acxsm: merge from acx 0.3.32

2006-02-07 Thread John W. Linville
On Tue, Feb 07, 2006 at 05:41:45PM +0200, Denis Vlasenko wrote:
 On Friday 03 February 2006 14:14, Denis Vlasenko wrote:
  Standalone acx driver had several fixes since
  acxsm fork, this patch merges them:
  - initial support for new TNETW1450 USB chip
  - support for firmware 2.3.1.31
  
  Also we had one report that acxsm is actually working.
  That's quite unexpected.
  
  Signed-off-by: Denis Vlasenko [EMAIL PROTECTED]
 
 What is the status of this patch? Accepted? Rejected?
 Other (please specify): 

I intened to merge it.  I had a busy week last week, with some
personal obligations.  I apologize for my slow speed.  I'll try to
do better! :-)

John
-- 
John W. Linville
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [IPVS] Shrink ip_vs_*.c includes

2006-02-07 Thread Horms
On Wed, Feb 08, 2006 at 12:19:32PM +1100, Herbert Xu wrote:
 Horms [EMAIL PROTECTED] wrote:
  Dave, 
  
  please apply.
 
 Looks bogus to me.  Why are we removing linux/modules.h from ip_vs_app.c
 when it uses things like EXPORT_SYMBOL?

Given that the code still compiles, I guess linux/modules.h is included
in some other header that is included. I'm happy to put linux/modules.h
back in. Do you have any more suggestions?

-- 
Horms
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [IPVS] Shrink ip_vs_*.c includes

2006-02-07 Thread Herbert Xu
Horms [EMAIL PROTECTED] wrote:

 Looks bogus to me.  Why are we removing linux/modules.h from ip_vs_app.c
 when it uses things like EXPORT_SYMBOL?
 
 Given that the code still compiles, I guess linux/modules.h is included
 in some other header that is included. I'm happy to put linux/modules.h
 back in. Do you have any more suggestions?

This is the wrong way to go about it.  You should never rely on indirect
inclusions because they might be removed one day and your file will not
compile anymore.

The correct way to go about this is to go through each included header
file and check if any of its symbols are used in the source file.

Or if this is too tedious just leave it alone.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [IPVS] Shrink ip_vs_*.c includes

2006-02-07 Thread Horms
On Wed, Feb 08, 2006 at 01:36:11PM +1100, Herbert Xu wrote:
 Horms [EMAIL PROTECTED] wrote:
 
  Looks bogus to me.  Why are we removing linux/modules.h from ip_vs_app.c
  when it uses things like EXPORT_SYMBOL?
  
  Given that the code still compiles, I guess linux/modules.h is included
  in some other header that is included. I'm happy to put linux/modules.h
  back in. Do you have any more suggestions?
 
 This is the wrong way to go about it.  You should never rely on indirect
 inclusions because they might be removed one day and your file will not
 compile anymore.
 
 The correct way to go about this is to go through each included header
 file and check if any of its symbols are used in the source file.
 
 Or if this is too tedious just leave it alone.

Hi Herbert, 

thanks for your feedback. 

Dave, 

please discard this patch for now.

Ratz,

Unfortunately this seems like it is going to be more tedious than 
we first thought. I would guess writing some sort of tool to analyse
symbols and headers is the way to go. Else it seems more or less
impossible to clean up headers, even on a small scale.

-- 
Horms
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [IPVS] Shrink ip_vs_*.c includes

2006-02-07 Thread Ian McDonald
 Unfortunately this seems like it is going to be more tedious than
 we first thought. I would guess writing some sort of tool to analyse
 symbols and headers is the way to go. Else it seems more or less
 impossible to clean up headers, even on a small scale.

Search the netdev archives or look at Arnaldo's kernel.org space as he
has done some scripts to do this once.

--
Ian McDonald
http://wand.net.nz/~iam4
WAND Network Research Group
University of Waikato
New Zealand
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] [IPVS] Shrink ip_vs_*.c includes

2006-02-07 Thread David S. Miller
From: Horms [EMAIL PROTECTED]
Date: Wed, 8 Feb 2006 12:09:29 +0900

 Unfortunately this seems like it is going to be more tedious than 
 we first thought. I would guess writing some sort of tool to analyse
 symbols and headers is the way to go. Else it seems more or less
 impossible to clean up headers, even on a small scale.

It's doable on a small scale, you just have to approach the problem
from the other direction.  Ie. pick a header file and audit the use of
that specific header file across the tree.

Folks have done this with headers like linux/sched.h and friends in
the past.

And it's worthwhile because anything that minimises kernel rebuild
when touching a header file helps streamline development.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH linux-2.6.16-rc2] bonding: fix a locking bug in bond_release

2006-02-07 Thread Jay Vosburgh

bond_release returns EINVAL without releasing the bond lock if the
slave device is not being bonded by the bond.  The following patch
ensures that the lock is released in this case.

Signed-off-by: Stephen J. Bevan [EMAIL PROTECTED]
Acked-by: Jay Vosburgh [EMAIL PROTECTED]

---

--- linux-2.6.16-rc2/drivers/net/bonding/bond_main.c.orig   2006-02-07 
20:26:08.0 -0800
+++ linux-2.6.16-rc2/drivers/net/bonding/bond_main.c2006-02-07 
20:33:20.447899952 -0800
@@ -1570,6 +1570,7 @@ int bond_release(struct net_device *bond
printk(KERN_INFO DRV_NAME
   : %s: %s not enslaved\n,
   bond_dev-name, slave_dev-name);
+   write_unlock_bh(bond-lock);
return -EINVAL;
}
 


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland

2006-02-07 Thread FUJITA Tomonori
tpacket_hdr structure includes 'unsigned long' though kernel and
userland shares it in the mmapped ring buffer.

Seems it would be better to fix all data structures in the header file
than fixing only tpacket_hdr structure.

Signed-off-by: FUJITA Tomonori [EMAIL PROTECTED]
Signed-off-by: Mike Christie [EMAIL PROTECTED]
---

 include/linux/if_packet.h |   54 +++--
 1 files changed, 27 insertions(+), 27 deletions(-)

b8afaafd2a40e36daa030a852a5c92f0b6cd7531
diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index b925585..bdd1e1e 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -3,20 +3,20 @@
 
 struct sockaddr_pkt
 {
-   unsigned short spkt_family;
-   unsigned char spkt_device[14];
-   unsigned short spkt_protocol;
+   __u16   spkt_family;
+   __u8spkt_device[14];
+   __u16   spkt_protocol;
 };
 
 struct sockaddr_ll
 {
-   unsigned short  sll_family;
-   unsigned short  sll_protocol;
-   int sll_ifindex;
-   unsigned short  sll_hatype;
-   unsigned char   sll_pkttype;
-   unsigned char   sll_halen;
-   unsigned char   sll_addr[8];
+   __u16   sll_family;
+   __u16   sll_protocol;
+   __s32   sll_ifindex;
+   __u16   sll_hatype;
+   __u8sll_pkttype;
+   __u8sll_halen;
+   __u8sll_addr[8];
 };
 
 /* Packet types */
@@ -42,24 +42,24 @@ struct sockaddr_ll
 
 struct tpacket_stats
 {
-   unsigned inttp_packets;
-   unsigned inttp_drops;
+   __u32   tp_packets;
+   __u32   tp_drops;
 };
 
 struct tpacket_hdr
 {
-   unsigned long   tp_status;
+   __u32   tp_status;
 #define TP_STATUS_KERNEL   0
 #define TP_STATUS_USER 1
 #define TP_STATUS_COPY 2
 #define TP_STATUS_LOSING   4
 #define TP_STATUS_CSUMNOTREADY 8
-   unsigned inttp_len;
-   unsigned inttp_snaplen;
-   unsigned short  tp_mac;
-   unsigned short  tp_net;
-   unsigned inttp_sec;
-   unsigned inttp_usec;
+   __u32   tp_len;
+   __u32   tp_snaplen;
+   __u16   tp_mac;
+   __u16   tp_net;
+   __u32   tp_sec;
+   __u32   tp_usec;
 };
 
 #define TPACKET_ALIGNMENT  16
@@ -81,18 +81,18 @@ struct tpacket_hdr
 
 struct tpacket_req
 {
-   unsigned inttp_block_size;  /* Minimal size of contiguous block */
-   unsigned inttp_block_nr;/* Number of blocks */
-   unsigned inttp_frame_size;  /* Size of frame */
-   unsigned inttp_frame_nr;/* Total number of frames */
+   __u32   tp_block_size;  /* Minimal size of contiguous block */
+   __u32   tp_block_nr;/* Number of blocks */
+   __u32   tp_frame_size;  /* Size of frame */
+   __u32   tp_frame_nr;/* Total number of frames */
 };
 
 struct packet_mreq
 {
-   int mr_ifindex;
-   unsigned short  mr_type;
-   unsigned short  mr_alen;
-   unsigned char   mr_address[8];
+   __s32   mr_ifindex;
+   __u16   mr_type;
+   __u16   mr_alen;
+   __u8mr_address[8];
 };
 
 #define PACKET_MR_MULTICAST0
-- 
1.1.3
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Packet socket: directly access the mmapped ring buffer

2006-02-07 Thread FUJITA Tomonori
Mike Christie and I've developed the SCSI Userspace target
framework. Target LLDs (for Fibre channel, iSCSI HBAs, etc) pass SCSI
commands to SCSI commands to the user-space daemon. The daemon
executes the commands and sends the results back to the LLDs.

Please refer scsi-ml for further details.

http://thread.gmane.org/gmane.linux.scsi/22409

We need efficient kernel and user-space communication interface and
used netlink. Jeff Garzik suggested the packet socket mmap'd ring
buffer.

The mmap'd ring buffer is really nice, but we want to access directly
the ring buffer withough going through the networking stack to avoid
memory allocation and overhead.


Signed-off-by: FUJITA Tomonori [EMAIL PROTECTED]
Signed-off-by: Mike Christie [EMAIL PROTECTED]
---

 include/net/af_packet.h |6 ++
 net/packet/af_packet.c  |   17 +
 2 files changed, 23 insertions(+), 0 deletions(-)
 create mode 100644 include/net/af_packet.h

c627f3a1da6e5e7e9e46d58401adcf168ea45787
diff --git a/include/net/af_packet.h b/include/net/af_packet.h
new file mode 100644
index 000..5a75e07
--- /dev/null
+++ b/include/net/af_packet.h
@@ -0,0 +1,6 @@
+#ifndef __LINUX_NET_AFPACKET_H
+#define __LINUX_NET_AFPACKET_H
+
+extern struct tpacket_hdr *packet_socket_frame(struct sock *sk);
+
+#endif
diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c
index 9db7dbd..b5fbd74 100644
--- a/net/packet/af_packet.c
+++ b/net/packet/af_packet.c
@@ -562,6 +562,23 @@ drop:
 }
 
 #ifdef CONFIG_PACKET_MMAP
+struct tpacket_hdr *packet_socket_frame(struct sock *sk)
+{
+   struct packet_sock *po;
+   struct tpacket_hdr *h;
+
+   po = pkt_sk(sk);
+   spin_lock(sk-sk_receive_queue.lock);
+   h = (struct tpacket_hdr *) packet_lookup_frame(po, po-head);
+   if (h-tp_status)
+   h = ERR_PTR(-ENOBUFS);
+   else
+   po-head = po-head != po-frame_max ? po-head+1 : 0;
+   spin_unlock(sk-sk_receive_queue.lock);
+   return h;
+}
+EXPORT_SYMBOL_GPL(packet_socket_frame);
+
 static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct 
packet_type *pt, struct net_device *orig_dev)
 {
struct sock *sk;
-- 
1.1.3
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland

2006-02-07 Thread David S. Miller
From: FUJITA Tomonori [EMAIL PROTECTED]
Date: Wed, 08 Feb 2006 14:24:49 +0900

 tpacket_hdr structure includes 'unsigned long' though kernel and
 userland shares it in the mmapped ring buffer.
 
 Seems it would be better to fix all data structures in the header file
 than fixing only tpacket_hdr structure.
 
 Signed-off-by: FUJITA Tomonori [EMAIL PROTECTED]
 Signed-off-by: Mike Christie [EMAIL PROTECTED]

You broke 64-bit userland by changing that unsigned long
to a __u32.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland

2006-02-07 Thread FUJITA Tomonori
From: David S. Miller [EMAIL PROTECTED]
Subject: Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland
Date: Tue, 07 Feb 2006 21:36:06 -0800 (PST)

  tpacket_hdr structure includes 'unsigned long' though kernel and
  userland shares it in the mmapped ring buffer.
  
  Seems it would be better to fix all data structures in the header file
  than fixing only tpacket_hdr structure.
  
  Signed-off-by: FUJITA Tomonori [EMAIL PROTECTED]
  Signed-off-by: Mike Christie [EMAIL PROTECTED]
 
 You broke 64-bit userland by changing that unsigned long
 to a __u32.

You mean that currently it's broken in 64-bit kernel and 32-bit
userland anyway so use __u64 instead of __u32?
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland

2006-02-07 Thread FUJITA Tomonori
From: David S. Miller [EMAIL PROTECTED]
Subject: Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland
Date: Tue, 07 Feb 2006 21:48:02 -0800 (PST)

 From: FUJITA Tomonori [EMAIL PROTECTED]
 Date: Wed, 08 Feb 2006 14:41:41 +0900
 
  From: David S. Miller [EMAIL PROTECTED]
  Subject: Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit 
  userland
  Date: Tue, 07 Feb 2006 21:36:06 -0800 (PST)
  
tpacket_hdr structure includes 'unsigned long' though kernel and
userland shares it in the mmapped ring buffer.

Seems it would be better to fix all data structures in the header file
than fixing only tpacket_hdr structure.

Signed-off-by: FUJITA Tomonori [EMAIL PROTECTED]
Signed-off-by: Mike Christie [EMAIL PROTECTED]
   
   You broke 64-bit userland by changing that unsigned long
   to a __u32.
  
  You mean that currently it's broken in 64-bit kernel and 32-bit
  userland anyway so use __u64 instead of __u32?
 
 I mean that unsigned long is 64-bit in a 64-bit kernel, and thus
 your changes break packet mmap() ringer buffers for native 64-bit
 binaries.

I see. The following patch is OK?


diff --git a/include/linux/if_packet.h b/include/linux/if_packet.h
index b925585..0fc6998 100644
--- a/include/linux/if_packet.h
+++ b/include/linux/if_packet.h
@@ -3,20 +3,20 @@
 
 struct sockaddr_pkt
 {
-   unsigned short spkt_family;
-   unsigned char spkt_device[14];
-   unsigned short spkt_protocol;
+   __u16   spkt_family;
+   __u8spkt_device[14];
+   __u16   spkt_protocol;
 };
 
 struct sockaddr_ll
 {
-   unsigned short  sll_family;
-   unsigned short  sll_protocol;
-   int sll_ifindex;
-   unsigned short  sll_hatype;
-   unsigned char   sll_pkttype;
-   unsigned char   sll_halen;
-   unsigned char   sll_addr[8];
+   __u16   sll_family;
+   __u16   sll_protocol;
+   __s32   sll_ifindex;
+   __u16   sll_hatype;
+   __u8sll_pkttype;
+   __u8sll_halen;
+   __u8sll_addr[8];
 };
 
 /* Packet types */
@@ -42,24 +42,24 @@ struct sockaddr_ll
 
 struct tpacket_stats
 {
-   unsigned inttp_packets;
-   unsigned inttp_drops;
+   __u32   tp_packets;
+   __u32   tp_drops;
 };
 
 struct tpacket_hdr
 {
-   unsigned long   tp_status;
+   __u64   tp_status;
 #define TP_STATUS_KERNEL   0
 #define TP_STATUS_USER 1
 #define TP_STATUS_COPY 2
 #define TP_STATUS_LOSING   4
 #define TP_STATUS_CSUMNOTREADY 8
-   unsigned inttp_len;
-   unsigned inttp_snaplen;
-   unsigned short  tp_mac;
-   unsigned short  tp_net;
-   unsigned inttp_sec;
-   unsigned inttp_usec;
+   __u32   tp_len;
+   __u32   tp_snaplen;
+   __u16   tp_mac;
+   __u16   tp_net;
+   __u32   tp_sec;
+   __u32   tp_usec;
 };
 
 #define TPACKET_ALIGNMENT  16
@@ -81,18 +81,18 @@ struct tpacket_hdr
 
 struct tpacket_req
 {
-   unsigned inttp_block_size;  /* Minimal size of contiguous block */
-   unsigned inttp_block_nr;/* Number of blocks */
-   unsigned inttp_frame_size;  /* Size of frame */
-   unsigned inttp_frame_nr;/* Total number of frames */
+   __u32   tp_block_size;  /* Minimal size of contiguous block */
+   __u32   tp_block_nr;/* Number of blocks */
+   __u32   tp_frame_size;  /* Size of frame */
+   __u32   tp_frame_nr;/* Total number of frames */
 };
 
 struct packet_mreq
 {
-   int mr_ifindex;
-   unsigned short  mr_type;
-   unsigned short  mr_alen;
-   unsigned char   mr_address[8];
+   __s32   mr_ifindex;
+   __u16   mr_type;
+   __u16   mr_alen;
+   __u8mr_address[8];
 };
 
 #define PACKET_MR_MULTICAST0
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland

2006-02-07 Thread FUJITA Tomonori
From: FUJITA Tomonori [EMAIL PROTECTED]
Subject: Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland
Date: Wed, 08 Feb 2006 14:59:06 +0900

 From: David S. Miller [EMAIL PROTECTED]
 Subject: Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland
 Date: Tue, 07 Feb 2006 21:48:02 -0800 (PST)
 
  From: FUJITA Tomonori [EMAIL PROTECTED]
  Date: Wed, 08 Feb 2006 14:41:41 +0900
  
   From: David S. Miller [EMAIL PROTECTED]
   Subject: Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit 
   userland
   Date: Tue, 07 Feb 2006 21:36:06 -0800 (PST)
   
 tpacket_hdr structure includes 'unsigned long' though kernel and
 userland shares it in the mmapped ring buffer.
 
 Seems it would be better to fix all data structures in the header file
 than fixing only tpacket_hdr structure.
 
 Signed-off-by: FUJITA Tomonori [EMAIL PROTECTED]
 Signed-off-by: Mike Christie [EMAIL PROTECTED]

You broke 64-bit userland by changing that unsigned long
to a __u32.
   
   You mean that currently it's broken in 64-bit kernel and 32-bit
   userland anyway so use __u64 instead of __u32?
  
  I mean that unsigned long is 64-bit in a 64-bit kernel, and thus
  your changes break packet mmap() ringer buffers for native 64-bit
  binaries.
 
 I see. The following patch is OK?

Sorry. seems I don't see that at all. Is there any good way to solve
this problem?
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Packet socket: fix for 64-bit kernel and 32-bit userland

2006-02-07 Thread David S. Miller
From: FUJITA Tomonori [EMAIL PROTECTED]
Date: Wed, 08 Feb 2006 14:59:06 +0900

 I see. The following patch is OK?

This breaks existing 32-bit programs which really want a 32-bit value
there.

Please sit and think about this problem for some time before proposing
more patches.

We have a whole compatability layer designed to handle the differing
size of data types when running a 32-bit program on a 64-bit kernel.
It is not allowed to change data structures which exist already (and
are thus compiled into existing binaries) in order to fix this
problem.  Instead we must give the application what it expects.

If we are a 64-bit kernel running a 32-bit binary, this means giving
32-bit compatible data structures.  If we are a 64-bit kernel running
a 64-bit binary, this means giving native 64-bit data structures.

Thank you.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 19/23] [PATCH] bridge: netfilter races on device removal

2006-02-07 Thread Chris Wright
-stable review patch.  If anyone has any objections, please let us know.
--

Fix bridge netfilter to handle case where interface is deleted
from bridge while packet is being processed (on other CPU).

Fixes: http://bugzilla.kernel.org/show_bug.cgi?id=5803

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]
Signed-off-by: Chris Wright [EMAIL PROTECTED]
---

 net/bridge/br_netfilter.c |   55 +++---
 1 files changed, 38 insertions(+), 17 deletions(-)

Index: linux-2.6.15.3/net/bridge/br_netfilter.c
===
--- linux-2.6.15.3.orig/net/bridge/br_netfilter.c
+++ linux-2.6.15.3/net/bridge/br_netfilter.c
@@ -47,9 +47,6 @@
 #define store_orig_dstaddr(skb) (skb_origaddr(skb) = 
(skb)-nh.iph-daddr)
 #define dnat_took_place(skb)(skb_origaddr(skb) != (skb)-nh.iph-daddr)
 
-#define has_bridge_parent(device)  ((device)-br_port != NULL)
-#define bridge_parent(device)  ((device)-br_port-br-dev)
-
 #ifdef CONFIG_SYSCTL
 static struct ctl_table_header *brnf_sysctl_header;
 static int brnf_call_iptables = 1;
@@ -94,6 +91,12 @@ static struct rtable __fake_rtable = {
.rt_flags   = 0,
 };
 
+static inline struct net_device *bridge_parent(const struct net_device *dev)
+{
+   struct net_bridge_port *port = rcu_dereference(dev-br_port);
+
+   return port ? port-br-dev : NULL;
+}
 
 /* PF_BRIDGE/PRE_ROUTING */
 /* Undo the changes made for ip6tables PREROUTING and continue the
@@ -185,11 +188,15 @@ static int br_nf_pre_routing_finish_brid
skb-nf_bridge-mask ^= BRNF_NF_BRIDGE_PREROUTING;
 
skb-dev = bridge_parent(skb-dev);
-   if (skb-protocol == __constant_htons(ETH_P_8021Q)) {
-   skb_pull(skb, VLAN_HLEN);
-   skb-nh.raw += VLAN_HLEN;
+   if (!skb-dev)
+   kfree_skb(skb);
+   else {
+   if (skb-protocol == __constant_htons(ETH_P_8021Q)) {
+   skb_pull(skb, VLAN_HLEN);
+   skb-nh.raw += VLAN_HLEN;
+   }
+   skb-dst-output(skb);
}
-   skb-dst-output(skb);
return 0;
 }
 
@@ -266,7 +273,7 @@ bridged_dnat:
 }
 
 /* Some common code for IPv4/IPv6 */
-static void setup_pre_routing(struct sk_buff *skb)
+static struct net_device *setup_pre_routing(struct sk_buff *skb)
 {
struct nf_bridge_info *nf_bridge = skb-nf_bridge;
 
@@ -278,6 +285,8 @@ static void setup_pre_routing(struct sk_
nf_bridge-mask |= BRNF_NF_BRIDGE_PREROUTING;
nf_bridge-physindev = skb-dev;
skb-dev = bridge_parent(skb-dev);
+
+   return skb-dev;
 }
 
 /* We only check the length. A bridge shouldn't do any hop-by-hop stuff anyway 
*/
@@ -372,7 +381,8 @@ static unsigned int br_nf_pre_routing_ip
nf_bridge_put(skb-nf_bridge);
if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
return NF_DROP;
-   setup_pre_routing(skb);
+   if (!setup_pre_routing(skb))
+   return NF_DROP;
 
NF_HOOK(PF_INET6, NF_IP6_PRE_ROUTING, skb, skb-dev, NULL,
br_nf_pre_routing_finish_ipv6);
@@ -409,7 +419,6 @@ static unsigned int br_nf_pre_routing(un
 
if (skb-protocol == __constant_htons(ETH_P_8021Q)) {
skb_pull(skb, VLAN_HLEN);
-   (skb)-nh.raw += VLAN_HLEN;
}
return br_nf_pre_routing_ipv6(hook, skb, in, out, okfn);
}
@@ -426,7 +435,6 @@ static unsigned int br_nf_pre_routing(un
 
if (skb-protocol == __constant_htons(ETH_P_8021Q)) {
skb_pull(skb, VLAN_HLEN);
-   (skb)-nh.raw += VLAN_HLEN;
}
 
if (!pskb_may_pull(skb, sizeof(struct iphdr)))
@@ -456,7 +464,8 @@ static unsigned int br_nf_pre_routing(un
nf_bridge_put(skb-nf_bridge);
if ((nf_bridge = nf_bridge_alloc(skb)) == NULL)
return NF_DROP;
-   setup_pre_routing(skb);
+   if (!setup_pre_routing(skb))
+   return NF_DROP;
store_orig_dstaddr(skb);
 
NF_HOOK(PF_INET, NF_IP_PRE_ROUTING, skb, skb-dev, NULL,
@@ -530,11 +539,16 @@ static unsigned int br_nf_forward_ip(uns
struct sk_buff *skb = *pskb;
struct nf_bridge_info *nf_bridge;
struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
+   struct net_device *parent;
int pf;
 
if (!skb-nf_bridge)
return NF_ACCEPT;
 
+   parent = bridge_parent(out);
+   if (!parent)
+   return NF_DROP;
+
if (skb-protocol == __constant_htons(ETH_P_IP) || IS_VLAN_IP)
pf = PF_INET;
else
@@ -555,8 +569,8 @@ static unsigned int br_nf_forward_ip(uns
nf_bridge-mask |= BRNF_BRIDGED;
nf_bridge-physoutdev = skb-dev;
 
-   NF_HOOK(pf, NF_IP_FORWARD, skb, bridge_parent(in),
-   bridge_parent(out), br_nf_forward_finish);
+