date:20070118

On Wed, Jan 17, 2007 at 06:19:07PM -0500, Jeff Garzik wrote:
 Christoph Hellwig wrote:
 On Wed, Jan 17, 2007 at 01:00:47PM -0500, Dan Williams wrote:

 allows the 8388 to continue routing other laptops' packets over the mesh
 *while the host CPU is asleep*.
 
 We're not going to put a lot of junk into the kernel just because the OLPC
 folks decide to do odd powermanagment schemes.
 
 We're not going to ignore useful power management schemes just because 
 they don't fit neatly into a pre-existing category.
 
 I think the request to determine how all this maps into MLME is fair, 
 though.

Definitely.  Also, I wonder if there was any attempt to evaluate how
the ieee80211 (or d80211) code might be extended in order to elimnate
the need for some of the libertas wlan_* files?

John
-- 
John W. Linville
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: watchdog timeout panic in e1000 driver

2007-01-18 Thread Kenzo Iwami

Hi,

 My patch may seem like a huge change, but in essence the change is
 pretty simple.
 
 In my patch, the interrupt handler code will check whether the interrupted
 code is holding the swfw semaphore. If it is held, the watchdog function
 is deferred until swfw semaphore is released.
 The modification is for the interrupted code which is holding the
 semaphore, and the interrupt handler, so they are both directly related
 to this problem.
 
 I will try to add some comments to my code to make it more readable.

I have rebased this patch for 2.6.20-rc5 and added some comments to this
patch to make it more readable.

Does this patch have problems? Or do you have any other ideas?

--
  Kenzo Iwami ([EMAIL PROTECTED])

Signed-off-by: Kenzo Iwami [EMAIL PROTECTED]

diff -urpN linux-2.6.20-rc5_org/drivers/net/e1000/e1000_hw.c 
linux-2.6.20-rc5_fix/drivers/net/e1000/e1000_hw.c
--- linux-2.6.20-rc5_org/drivers/net/e1000/e1000_hw.c   2007-01-13 
03:54:26.0 +0900
+++ linux-2.6.20-rc5_fix/drivers/net/e1000/e1000_hw.c   2007-01-18 
18:04:04.0 +0900
@@ -35,6 +35,7 @@

 static int32_t e1000_swfw_sync_acquire(struct e1000_hw *hw, uint16_t mask);
 static void e1000_swfw_sync_release(struct e1000_hw *hw, uint16_t mask);
+static void e1000_check_watchdog_deferred(struct e1000_hw *hw);
 static int32_t e1000_read_kmrn_reg(struct e1000_hw *hw, uint32_t reg_addr, 
uint16_t *data);
 static int32_t e1000_write_kmrn_reg(struct e1000_hw *hw, uint32_t reg_addr, 
uint16_t data);
 static int32_t e1000_get_software_semaphore(struct e1000_hw *hw);
@@ -3396,6 +3397,29 @@ e1000_shift_in_mdi_bits(struct e1000_hw
 return data;
 }

+static void
+e1000_check_watchdog_deferred(struct e1000_hw *hw)
+{
+/* If watchdog interrupt was deferred while this process was holding
+ * the swfw semaphore, process the watchdog now.
+ */
+if (atomic_xchg(hw-watchdog_deferred, 0)) {
+retry:
+e1000_do_watchdog(hw);
+}
+atomic_dec(hw-swfw_sem_count);
+
+/* Check watchdog_deferred once more just in case watchdog interrupt
+ * occurred between atomic_xchg and atomic_dec.
+ * By nature of the watchdog interrupt, we shouldn't go through this
+ * more than once.
+ */
+if (atomic_xchg(hw-watchdog_deferred, 0)) {
+atomic_inc(hw-swfw_sem_count);
+goto retry;
+}
+}
+
 static int32_t
 e1000_swfw_sync_acquire(struct e1000_hw *hw, uint16_t mask)
 {
@@ -3413,8 +3437,11 @@ e1000_swfw_sync_acquire(struct e1000_hw
 return e1000_get_hw_eeprom_semaphore(hw);

 while (timeout) {
-if (e1000_get_hw_eeprom_semaphore(hw))
+atomic_inc(hw-swfw_sem_count);
+if (e1000_get_hw_eeprom_semaphore(hw)) {
+e1000_check_watchdog_deferred(hw);
 return -E1000_ERR_SWFW_SYNC;
+}

 swfw_sync = E1000_READ_REG(hw, SW_FW_SYNC);
 if (!(swfw_sync  (fwmask | swmask))) {
@@ -3424,6 +3451,7 @@ e1000_swfw_sync_acquire(struct e1000_hw
 /* firmware currently using resource (fwmask) */
 /* or other software thread currently using resource (swmask) */
 e1000_put_hw_eeprom_semaphore(hw);
+e1000_check_watchdog_deferred(hw);
 mdelay(5);
 timeout--;
 }
@@ -3437,6 +3465,8 @@ e1000_swfw_sync_acquire(struct e1000_hw
 E1000_WRITE_REG(hw, SW_FW_SYNC, swfw_sync);

 e1000_put_hw_eeprom_semaphore(hw);
+e1000_check_watchdog_deferred(hw);
+
 return E1000_SUCCESS;
 }

@@ -3458,6 +3488,7 @@ e1000_swfw_sync_release(struct e1000_hw
 return;
 }

+atomic_inc(hw-swfw_sem_count);
 /* if (e1000_get_hw_eeprom_semaphore(hw))
  *return -E1000_ERR_SWFW_SYNC; */
 while (e1000_get_hw_eeprom_semaphore(hw) != E1000_SUCCESS);
@@ -3468,6 +3499,7 @@ e1000_swfw_sync_release(struct e1000_hw
 E1000_WRITE_REG(hw, SW_FW_SYNC, swfw_sync);

 e1000_put_hw_eeprom_semaphore(hw);
+e1000_check_watchdog_deferred(hw);
 }

 /*
diff -urpN linux-2.6.20-rc5_org/drivers/net/e1000/e1000_hw.h 
linux-2.6.20-rc5_fix/drivers/net/e1000/e1000_hw.h
--- linux-2.6.20-rc5_org/drivers/net/e1000/e1000_hw.h   2007-01-13 
03:54:26.0 +0900
+++ linux-2.6.20-rc5_fix/drivers/net/e1000/e1000_hw.h   2007-01-18 
18:04:42.0 +0900
@@ -306,6 +306,7 @@ typedef enum {
 #define E1000_BYTE_SWAP_WORD(_value) _value)  0x00ff)  8) | \
  (((_value)  0xff00)  8))

+extern void e1000_do_watchdog(struct e1000_hw *hw);
 /* Function prototypes */
 /* Initialization */
 int32_t e1000_reset_hw(struct e1000_hw *hw);
@@ -1465,6 +1466,8 @@ struct e1000_hw {
boolean_t   has_manc2h;
boolean_t   rx_needs_kicking;
boolean_t   has_smbus;
+   atomic_tswfw_sem_count;/* 0 if swfw_sem held on 
ESB2 */
+   atomic_twatchdog_deferred; /*

Re: [PATCH 1/12] L2 network namespace (v3): current network namespace operations

2007-01-18 Thread Dmitry Mishin

On Wednesday 17 January 2007 23:16, Eric W. Biederman wrote:
 Dmitry Mishin [EMAIL PROTECTED] writes:
 
  Added functions and macros required to operate with network namespaces.
  They are required in order to switch network namespace for incoming packets 
  and
  to not extend current network interface by additional network namespace 
  argue.
 
 Is exec_net only used in interrupt context?
I tried to do so.

 Or how do you ensure a sleeping function does not get called and the
 kernel process comes back on another cpu?
Seems that I forgot to remove it's usage at least in one place - in
clone_net_ns(). If you caught more, please, let me know. 

-- 
Thanks,
Dmitry.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Can someone please try...

2007-01-18 Thread Pavel Roskin

On Wed, 2007-01-17 at 10:52 +0100, Michael Buesch wrote:

 Doesn't happen for me. I have no idea what's happening.
 Care to debug it?
 But it's weird that _killing_ the supplicant calls add_interface.
 I'd expect it to call remove_interface.

I'm sorry, I was actually running wpa_supplicant again at the time of
the crash.

What I have now is very different behavior in two configurations on the
same machine.

The i386 kernel without SMP with most debug enabled and serial console.
wpa_supplicant times out.  If I restart is, the kernel oopses, every
time in a different place.

The x86_64 kernel with SMP and with very few debug options.
wpa_supplicant connects.  Killing and restarting wpa_supplicant doesn't
cause any problems.  In fact, wpa_supplicant reconnects quickly.  I can
even ping the station from the AP, but the packet loss is horrible.  It
appears that most loss is on the receiving side.

I'll try to debug the problem when I have time.  At least I'll try to
find out if it's specific to the architecture or to another kernel
option.

Anyway, it's exciting that I could send first packets today!

-- 
Regards,
Pavel Roskin


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 9/9] net: vm deadlock avoidance core

On Wed, Jan 17, 2007 at 10:07:28AM +0100, Peter Zijlstra ([EMAIL PROTECTED]) 
wrote:
  You operate with 'current' in different contexts without any locks which
  looks racy and even is not allowed. What will be 'current' for
  netif_rx() case, which schedules softirq from hard irq context -
  ksoftirqd, why do you want to set its flags?
 
 I don't touch current in hardirq context, do I (if I did, that is indeed
 a mistake)?
 
 In all other contexts, current is valid.

Well, if you think that setting PF_MEMALLOC flag for keventd and
ksoftirqd is valid, then probably yes...

I meant that you can just mark process which created such socket as
PF_MEMALLOC, and clone that flag on forks and other relatest calls 
without 
all that checks for 'current' in different places.
   
   Ah, thats the wrong level to think here, these processes never reach
   user-space - nor should these sockets.
  
  You limit this just to send an ack?
  What about 'level-7' ack as you described in introduction?
 
 Take NFS, it does full data traffic in kernel.

NFS case is exactly the situation, when you only need to generate an ACK.

   Also, I only want the processing of the actual network packet to be able
   to eat the reserves, not any other thing that might happen in that
   context.
   
   And since network processing is mostly done in softirq context I must
   mark these sections like I did.
  
  You artificially limit system to just add a reserve to generate one ack.
  For that purpose you do not need to have all those flags - just reseve
  some data in network core and use it when system is in OOM (or reclaim)
  for critical data pathes.
 
 How would that end up being different, I would have to replace all
 allocations done in the full network processing path.
 
 This seems a much less invasive method, all the (allocation) code can
 stay the way it is and use the normal allocation functions.

Ack is only generated in one place in TCP.

And acutally we are starting to talk about different approach - having
separated allocator for network, which will be turned on on OOM (reclaim
or at any other time). If you do not mind, I would likw to refresh a
discussion about network tree allocator, which utilizes own pool of
pages, performs self-defragmentation of the memeory, is very SMP
friendly in that regard that it is per-cpu like slab and never free
objects on different CPUs, so they always stay in the same cache.
Among other goodies it allows to have full sending/receiving zero-copy.

Here is a link:
http://tservice.net.ru/~s0mbre/old/?section=projectsitem=nta

   + /*
   +decrease window size..
   +tcp_enter_quickack_mode(sk);
   + */
  
  How does this decrease window size?
  Maybe ack scheduling would be better handled by 
  inet_csk_schedule_ack()
  or just directly send an ack, which in turn requires allocation, 
  which
  can be bound to this received frame processing...
 
 It doesn't, I thought that it might be a good idea doing that, but 
 never
 got around to actually figuring out how to do it.

tcp_send_ack()?

   
   does that shrink the window automagically?
  
  Yes, it updates window, but having ack generated in that place is
  actually very wrong. In that place system has not processed incoming
  packet yet, so it can not generate correct ACK for received frame at
  all. And it seems that the only purpose of the whole patchset is to
  generate that poor ack - reseve 2007 ack packets (MAX_TCP_HEADER) 
  in system startup and reuse them when you are under memory pressure.
 
 Right, I suspected something like that; hence I wanted to just shrink
 the window. Anyway, this is not a very important issue.

tcp_enter_quickack_mode() does not update window, it allows to send ack
immediately after packet has been processed, window can be changed in
any way TCP state machine and congestion control want.

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 9/9] net: vm deadlock avoidance core

2007-01-18 Thread Peter Zijlstra

On Thu, 2007-01-18 at 13:41 +0300, Evgeniy Polyakov wrote:

   What about 'level-7' ack as you described in introduction?
  
  Take NFS, it does full data traffic in kernel.
 
 NFS case is exactly the situation, when you only need to generate an ACK.

No it is not, it needs the full RPC response.

   You artificially limit system to just add a reserve to generate one ack.
   For that purpose you do not need to have all those flags - just reseve
   some data in network core and use it when system is in OOM (or reclaim)
   for critical data pathes.
  
  How would that end up being different, I would have to replace all
  allocations done in the full network processing path.
  
  This seems a much less invasive method, all the (allocation) code can
  stay the way it is and use the normal allocation functions.

 And acutally we are starting to talk about different approach - having
 separated allocator for network, which will be turned on on OOM (reclaim
 or at any other time).

I think we might be, I'm more talking about requirements on the
allocator, while you seem to talk about implementations.

Replacing the allocator, or splitting it in two based on a condition are
all fine as long as they observe the requirements.

The requirement I add is that there is a reserve nobody touches unless
given express permission.

You could implement this by modifying each reachable allocator call site
and stick a branch in and use an alternate allocator when the normal
route fails and we do have permission; much like:

   foo = kmalloc(size, gfp_mask);
+  if (!foo  special)
+foo = my_alloc(size)

And earlier versions of this work did something like that. But it
litters the code quite badly and its quite easy to miss spots. There can
be quite a few allocations in processing network data.

Hence my work on integrating this into the regular memory allocators.

FYI; 'special' evaluates to something like:
  !(gfp_mask  __GFP_NOMEMALLOC) 
  ((gfp_mask  __GFP_EMERGENCY) || 
   (!in_irq()  (current-flags  PF_MEMALLOC)))


  If you do not mind, I would likw to refresh a
 discussion about network tree allocator,

  which utilizes own pool of
 pages, 

very high order pages, no?

This means that you have to either allocate at boot time and cannot
resize/add pools; which means you waste all that memory if the network
load never comes near using the reserved amount.

Or, you get into all the same trouble the hugepages folks are trying so
very hard to solve.

 performs self-defragmentation of the memeory, 

Does it move memory about? 

All it does is try to avoid fragmentation by policy - a problem
impossible to solve in general; but can achieve good results in view of
practical limitations on program behaviour.

Does your policy work for the given workload? we'll see.

Also, on what level, each level has both internal and external
fragmentation. I can argue that having large immovable objects in memory
adds to the fragmentation issues on the page-allocator level.

 is very SMP
 friendly in that regard that it is per-cpu like slab and never free
 objects on different CPUs, so they always stay in the same cache.

This makes it very hard to guarantee a reserve limit. (Not impossible,
just more difficult)

 Among other goodies it allows to have full sending/receiving zero-copy.

That won't ever work unless you have page aligned objects, otherwise you
cannot map them into user-space. Which seems to be at odds with your
tight packing/reduce internal fragmentation goals.

Zero-copy entails mapping the page the hardware writes the packet in
into user-space, right?

Since its impossible to predict to whoem the next packet is addressed
the packets must be written (by hardware) to different pages.


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH V2] bcm43xx: Fix failure to deliver PCI-E interrupts

2007-01-18 Thread evan foss


One thing I forgot is that the LED will be on only if the switch is in the on 
position _AND_ the
interface is UP.


Yes I figured that out. (eventually)


 dhcpcd eth1   -low transmit power???
 Most likely. How far are you from the AP?

 This is a timeout issue. I tried to apply the timeout patch which I
 assume is for this and it didn't take. I have logs for both patch
 applications below. To the distance issue my ap died so I am trying to
 just connect to one next door. (with permission) The distance is about
 50ft. So this could be the main factor. This is why I said it might be
 my fault. The thing is I can connect to it in windows most of the time
 so I know it is in range. I will be going to school on Tuesday and
 will report back then. They have a much better AP.

With the current transmit levels of the 4311, 50 feet through at least one wall 
is probably too much.


I have tried 3 more AP at much closer distances 9ft and 4ft and it
still won't work. I ether get Network Down or a timeout.



 Presario1600 bcm43xx # patch -p0 patch_2.6.18.1_signal_quality

...

 Presario1600 bcm43xx # patch -p0 patch_2.6.18.1_watchdog_timeout2

...

Those patches are already applied to your code.


Thanks for the clarification, and assistance.

--
http://www.coe.neu.edu/~efoss/
http://evanfoss.googlepages.com/
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Please pull 'upstream' branch of wireless-2.6

On Tue, Jan 02, 2007 at 09:42:47PM -0500, John W. Linville wrote:
 The following changes since commit fe5f8e2a1c5c040209c598a28e19c55f30e1040d:
   Zhu Yi (1):
 ipw2100: Fix dropping fragmented small packet problem
 
 are found in the git repository at:
 
   git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
 upstream
 
 Daniel Drake (5):
   zd1211rw: Generic HMAC initialization
   zd1211rw: 2 new ZD1211B device ID's
   zd1211rw: Consistency for address space constants
   zd1211rw: Remove addressing abstraction
   zd1211rw: Add ID for Linksys WUSBF54G
 
 John W. Linville (1):
   softmac: avoid assert in ieee80211softmac_wx_get_rate
 
 Kai Engert (1):
   prism54: add ethtool -i interface
 
 Larry Finger (1):
   bcm43xx: Interrogate hardware-enable switch and update LEDs
 
 Michael Buesch (1):
   Update Prism54 MAINTAINERS entry

Jeff, it looks like you didn't pull this one yet.  I'm going to rebase
my upstream branch on top of my additions to my upstream-fixes branch.
So, just ignore this request and wait for the next round of pull
requests.

The next round is ready to go, except I can't login to
master.kernel.org today.  As soon as that is resolved, I'll push and
send the pull requests.

Thanks,

John

P.S.  If you haven't pulled due to some objection, please send me a
note to let me know why.
-- 
John W. Linville
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/12] L2 network namespace (v3): current network namespace operations

2007-01-18 Thread Eric W. Biederman

Dmitry Mishin [EMAIL PROTECTED] writes:

 On Wednesday 17 January 2007 23:16, Eric W. Biederman wrote:
 Dmitry Mishin [EMAIL PROTECTED] writes:
 
  Added functions and macros required to operate with network namespaces.
  They are required in order to switch network namespace for incoming packets
 and
  to not extend current network interface by additional network namespace
 argue.
 
 Is exec_net only used in interrupt context?
 I tried to do so.

 Or how do you ensure a sleeping function does not get called and the
 kernel process comes back on another cpu?
 Seems that I forgot to remove it's usage at least in one place - in
 clone_net_ns(). If you caught more, please, let me know. 

Sure.  It was not clear from what I saw of the patch it was intended to
be restricted to only interrupt context.  So if figured I would ask if
that was what you meant.

Eric
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: TKIP encryption should allocate enough tailroom

2007-01-18 Thread Pekka Pietikainen

On Wed, Jan 17, 2007 at 11:46:35AM -0500, Brandon Craig Rhodes wrote:
 Having further reviewed my code, I have changed my mind; the
 ieee80211_crypt_tkip routines are not designed to be responsible for
 creating enough headroom and tailroom.  The hostap driver should be
 doing this.  In fact, I now see that the hostap driver actually
 attempts to create enough headroom and tailroom, but computes them
 incorrectly.
Even then, if ieee80211_tkip_encrypt() didn't produce debug output for the
not enough space-case, should that be added to catch other
potentially broken drivers?
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: TKIP encryption should allocate enough tailroom

2007-01-18 Thread Brandon Craig Rhodes

Pekka Pietikainen [EMAIL PROTECTED] writes:

 On Wed, Jan 17, 2007 at 11:46:35AM -0500, Brandon Craig Rhodes wrote:
 Having further reviewed my code, I have changed my mind; the
 ieee80211_crypt_tkip routines are not designed to be responsible for
 creating enough headroom and tailroom.

 Even then, if ieee80211_tkip_encrypt() didn't produce debug output
 for the not enough space-case, should that be added to catch other
 potentially broken drivers?

I think your idea is an excellent one, and would have prevented my
having to add a half-dozen printk()'s to the code myself to discover
what was going on!

I would be happy to submit such a patch myself, but am not sure what
the local kernel conventions are regarding error messages - and the
ieee80211_crypt_tkip.c functions seem wildly inconsistent with regard
to debugging messages!  In some circumstances, debug messages are
always produced; in several others, net_ratelimit() is called to
decided whether to print an error (but why in these cases and not
others?); and in many cases, nothing is printed at all (is this
because convention would dictate that the caller discover the error
and print something out?).

If I want to generate a patch that festoons the ieee80211 functions
with informative error messages, what are the guidelines?

-- 
Brandon Craig Rhodes   [EMAIL PROTECTED]   http://rhodesmill.org/brandon
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Marvell Libertas 8388 802.11b/g USB driver (v2)

On Wed, 2007-01-17 at 23:09 +, Christoph Hellwig wrote:
 On Wed, Jan 17, 2007 at 01:00:47PM -0500, Dan Williams wrote:
  Furthermore, I might add that the entire reason this part was chosen for
  OLPC was because it _is_ fullmac.
  
  To drive down power consumption, the OLPC puts the host CPU to sleep but
  keeps the 8388 powered on.  That consumes around 400mW max.  But it
  allows the 8388 to continue routing other laptops' packets over the mesh
  *while the host CPU is asleep*.
 
 We're not going to put a lot of junk into the kernel just because the OLPC
 folks decide to do odd powermanagment schemes.

Nor are we going to try to push highly OLPC-specific changes into this
driver in a non-modular manner.  This chip is also used in, for example,
the X-Box 360 wireless dongle, and other non-OLPC people are working on
adding support for the 8385 SDIO/CF variant.

It would be arrogant of us to think that the driver would _just_ be
useful for OLPC, and that's why nobody ever said that it was going to be
an OLPC specific driver, not I, nor Marcelo.  Give us some credit here
before you start jumping all over something neither of us have said or
implied.

If we do make any OLPC specific changes (there are currently none [1]),
they will be properly abstracted, conditionalized, and/or generalized.
We will not be throwing random crap into this driver.

Cheers,
Dan

[1] The code for the 802.11s mesh interface is in the driver, but that
will/should automatically turn itself off if the firmware doesn't
support that functionality.  Furthermore, none of the mesh bits are
OLPC-specific and may be used with any platform on which the driver
runs.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Marvell Libertas 8388 802.11b/g USB driver (v2)

On Wed, 2007-01-17 at 23:06 +, Christoph Hellwig wrote:
 On Wed, Jan 17, 2007 at 04:42:04PM -0200, Marcelo Tosatti wrote:
  And using the Marvell provided firmware is a requirement for OLPC
  machines, where the CPU will be shut down but the chip+firmware will
  continue to forward packets in the mesh network (there are extreme power
  saving constraints on these machines).
 
 Well, than it probably doesn't go into mainline if you want to continue
 doing this stupid layering violation.

In the future we'll likely need some layering to support the 8385
SDIO/CF variant, but most likely not in way the USB support is currently
excessively layered and abstracted.

Dan


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Please pull 'upstream-fixes' branch of wireless-2.6

The following changes since commit a8b3485287731978899ced11f24628c927890e78:
  Linus Torvalds (1):
Linux v2.6.20-rc5

are found in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
upstream-fixes

Larry Finger (1):
  bcm43xx: Fix failure to deliver PCI-E interrupts

 drivers/net/wireless/bcm43xx/bcm43xx_main.c |   11 +++
 1 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_main.c 
b/drivers/net/wireless/bcm43xx/bcm43xx_main.c
index 2ec2e5a..91b752e 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_main.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_main.c
@@ -2701,8 +2701,8 @@ static int bcm43xx_probe_cores(struct bcm43xx_private 
*bcm)
sb_id_hi = bcm43xx_read32(bcm, BCM43xx_CIR_SB_ID_HI);
 
/* extract core_id, core_rev, core_vendor */
-   core_id = (sb_id_hi  0xFFF0)  4;
-   core_rev = (sb_id_hi  0xF);
+   core_id = (sb_id_hi  0x8FF0)  4;
+   core_rev = ((sb_id_hi  0xF) | ((sb_id_hi  0x7000)  8));
core_vendor = (sb_id_hi  0x)  16;
 
dprintk(KERN_INFO PFX Core %d: ID 0x%x, rev 0x%x, vendor 
0x%x\n,
@@ -2873,7 +2873,10 @@ static int bcm43xx_wireless_core_init(struct 
bcm43xx_private *bcm,
sbimconfiglow = bcm43xx_read32(bcm, BCM43xx_CIR_SBIMCONFIGLOW);
sbimconfiglow = ~ BCM43xx_SBIMCONFIGLOW_REQUEST_TOUT_MASK;
sbimconfiglow = ~ BCM43xx_SBIMCONFIGLOW_SERVICE_TOUT_MASK;
-   sbimconfiglow |= 0x32;
+   if (bcm-bustype == BCM43xx_BUSTYPE_PCI)
+   sbimconfiglow |= 0x32;
+   else
+   sbimconfiglow |= 0x53;
bcm43xx_write32(bcm, BCM43xx_CIR_SBIMCONFIGLOW, sbimconfiglow);
}
 
@@ -3077,7 +3080,7 @@ static int bcm43xx_setup_backplane_pci_connection(struct 
bcm43xx_private *bcm,
if (err)
goto out;
 
-   if (bcm-current_core-rev  6 ||
+   if (bcm-current_core-rev  6 
bcm-current_core-id == BCM43xx_COREID_PCI) {
value = bcm43xx_read32(bcm, BCM43xx_CIR_SBINTVEC);
value |= (1  backplane_flag_nr);
-- 
John W. Linville
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Please pull 'upstream' branch of wireless-2.6

The following changes since commit 10764889c6355cbb335cf0578ce12427475d1a65:
  Larry Finger (1):
bcm43xx: Fix failure to deliver PCI-E interrupts

are found in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
upstream

Daniel Drake (6):
  zd1211rw: Generic HMAC initialization
  zd1211rw: 2 new ZD1211B device ID's
  zd1211rw: Consistency for address space constants
  zd1211rw: Remove addressing abstraction
  zd1211rw: Add ID for Linksys WUSBF54G
  zd1211rw: Add ID for ZyXEL ZyAIR G-220 v2

John W. Linville (1):
  softmac: avoid assert in ieee80211softmac_wx_get_rate

Kai Engert (1):
  prism54: add ethtool -i interface

Larry Finger (1):
  bcm43xx: Interrogate hardware-enable switch and update LEDs

Michael Buesch (1):
  Update Prism54 MAINTAINERS entry

Zhu Yi (1):
  ipw2200: add iwconfig rts/frag auto support

 MAINTAINERS   |2 +-
 drivers/net/wireless/bcm43xx/bcm43xx.h|7 +-
 drivers/net/wireless/bcm43xx/bcm43xx_leds.c   |   11 +-
 drivers/net/wireless/bcm43xx/bcm43xx_main.c   |   36 --
 drivers/net/wireless/bcm43xx/bcm43xx_radio.c  |2 +
 drivers/net/wireless/bcm43xx/bcm43xx_radio.h  |   16 +++
 drivers/net/wireless/ipw2200.c|4 +-
 drivers/net/wireless/prism54/islpci_dev.c |   13 ++
 drivers/net/wireless/prism54/islpci_dev.h |4 +
 drivers/net/wireless/prism54/islpci_hotplug.c |3 -
 drivers/net/wireless/zd1211rw/zd_chip.c   |  126 ++--
 drivers/net/wireless/zd1211rw/zd_chip.h   |  158 ++---
 drivers/net/wireless/zd1211rw/zd_def.h|2 +
 drivers/net/wireless/zd1211rw/zd_ieee80211.h  |1 -
 drivers/net/wireless/zd1211rw/zd_rf.h |2 -
 drivers/net/wireless/zd1211rw/zd_types.h  |   71 ---
 drivers/net/wireless/zd1211rw/zd_usb.c|  128 ++--
 drivers/net/wireless/zd1211rw/zd_usb.h|6 +-
 net/ieee80211/softmac/ieee80211softmac_wx.c   |6 +
 19 files changed, 256 insertions(+), 342 deletions(-)
 delete mode 100644 drivers/net/wireless/zd1211rw/zd_types.h

diff --git a/MAINTAINERS b/MAINTAINERS
index 1b1491d..42b57cf 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -2654,7 +2654,7 @@ S:Supported
 
 PRISM54 WIRELESS DRIVER
 P: Prism54 Development Team
-M: [EMAIL PROTECTED]
+M: [EMAIL PROTECTED]
 L: netdev@vger.kernel.org
 W: http://prism54.org
 S: Maintained
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx.h 
b/drivers/net/wireless/bcm43xx/bcm43xx.h
index 8286678..3a064de 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx.h
+++ b/drivers/net/wireless/bcm43xx/bcm43xx.h
@@ -352,6 +352,10 @@
 #define BCM43xx_UCODEFLAG_UNKPACTRL0x0040
 #define BCM43xx_UCODEFLAG_JAPAN0x0080
 
+/* Hardware Radio Enable masks */
+#define BCM43xx_MMIO_RADIO_HWENABLED_HI_MASK (1  16)
+#define BCM43xx_MMIO_RADIO_HWENABLED_LO_MASK (1  4)
+
 /* Generic-Interrupt reasons. */
 #define BCM43xx_IRQ_READY  (1  0)
 #define BCM43xx_IRQ_BEACON (1  1)
@@ -758,7 +762,8 @@ struct bcm43xx_private {
bad_frames_preempt:1,   /* Use Bad Frames Preemption (default 
off) */
reg124_set_0x4:1,   /* Some variable to keep track of IRQ 
stuff. */
short_preamble:1,   /* TRUE, if short preamble is enabled. 
*/
-   firmware_norelease:1;   /* Do not release the firmware. Used on 
suspend. */
+   firmware_norelease:1,   /* Do not release the firmware. Used on 
suspend. */
+   radio_hw_enable:1;  /* TRUE if radio is hardware enabled */
 
struct bcm43xx_stats stats;
 
diff --git a/drivers/net/wireless/bcm43xx/bcm43xx_leds.c 
b/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
index 7d383a2..8f198be 100644
--- a/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
+++ b/drivers/net/wireless/bcm43xx/bcm43xx_leds.c
@@ -26,6 +26,7 @@
 */
 
 #include bcm43xx_leds.h
+#include bcm43xx_radio.h
 #include bcm43xx.h
 
 #include asm/bitops.h
@@ -108,6 +109,7 @@ static void bcm43xx_led_init_hardcoded(struct 
bcm43xx_private *bcm,
switch (led_index) {
case 0:
led-behaviour = BCM43xx_LED_ACTIVITY;
+   led-activelow = 1;
if (bcm-board_vendor == PCI_VENDOR_ID_COMPAQ)
led-behaviour = BCM43xx_LED_RADIO_ALL;
break;
@@ -199,20 +201,21 @@ void bcm43xx_leds_update(struct bcm43xx_private *bcm, int 
activity)
turn_on = activity;
break;
case BCM43xx_LED_RADIO_ALL:
-   turn_on = radio-enabled;
+   turn_on = radio-enabled  
bcm43xx_is_hw_radio_enabled(bcm);
break;
case BCM43xx_LED_RADIO_A:
case BCM43xx_LED_BCM4303_2:
-   turn_on = (radio-enabled  phy-type ==

Re: [PATCH] [IPV6] fixed the size of the netlink message notified by inet6_rt_notify().

2007-01-18 Thread Noriaki TAKAMIYA

Hi,

 Wed, 17 Jan 2007 13:33:22 +0100
 [Subject: Re: [PATCH] [IPV6] fixed the size of the netlink message notified 
 by inet6_rt_notify().]
 Patrick McHardy [EMAIL PROTECTED] wrote...

 Somewhat related: I have this patch for 2.6.21 to get rid of the
 BUG_ON()s.

  I think the problem is the return value of inet6_rt_notify() is less
  than expected.

--
Noriaki TAKAMIYA
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Please pull 'upstream-fixes' branch of wireless-2.6


John W. Linville wrote:

The following changes since commit a8b3485287731978899ced11f24628c927890e78:
  Linus Torvalds (1):
Linux v2.6.20-rc5

are found in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
upstream-fixes

Larry Finger (1):
  bcm43xx: Fix failure to deliver PCI-E interrupts

 drivers/net/wireless/bcm43xx/bcm43xx_main.c |   11 +++
 1 files changed, 7 insertions(+), 4 deletions(-)


pulled


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/8] partial resend: e1000 fixes and updates


Kok, Auke wrote:

Hi,

This patch series contains exclusively fixes for e1000. Some of these patches 
were
already sent in december, but didn't make it into any usptream tree yet. Most
importantly, it addresses two issues in the recently merged msi interrupt
handler and dynamic itr code. A performance fix and some minor cleanups are also
added. This brings the driver up to version 7.3.20-k2.

The summary below lists all patches. Once that were previously acked are 
annotated
with (*)

These patches apply against netdev-2.6 #upstream-linus commit
77aab8bf22042d1658d4adbca8b71779e7f2d0ff. Please pull:

git pull git://lost.foo-projects.org/~ahkok/git/netdev-2.6 upstream-linus


Sigh.  I /know/ I've told you this before, but let's review the branches 
again.  NEVER EVER use branch 'upstream-linus'.  That is for Linus only.


There are three branches you should care about:

master  Vanilla upstream Linus tree, as of my last pull
upstream-fixes  netdev fixes being sent to Linus for -rc
upstreamEverything else pending, but not yet merged

The decision tree is as follows:

* If there are no dependent e1000 changes, send patches against master 
(or torvalds/linux-2.6.git, if you prefer)
* If there are dependent e1000 changes in #upstream-fixes, send patches 
against #upstream-fixes
* If there are dependent e1000 changes in #upstream, send patches 
against #upstream


In the current tree there are no e1000 changes, so please regenerate 
your tree against vanilla linux-2.6.git and resubmit.


I ACK patches 1-8.

Jeff




-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2.6.20 1/5] s2io: updates for s2io driver.


Ananda Raju wrote:
Hello, 


List of changes in this patch:

This patch adds two load parameters napi and ufo. Previously NAPI was
compilation option with these changes wan enable disable NAPI using load
parameter. Also we are introducing ufo load parameter to enable/disable
ufo feature

Signed-off-by: Sivakumar Subramani [EMAIL PROTECTED]


OK, you're getting closer :)

Problems that need correcting:

1) Your email subject line is a one-line summary of the patch.  s2io: 
updates for s2io driver is useless, because it tells us nothing about 
the patch itself.  When applied in a series,


git log master..upstream-fixes | git shortlog

will produce

Ananda Raju (5):
s2io: updates for s2io driver
s2io: updates for s2io driver
s2io: updates for s2io driver
s2io: updates for s2io driver
s2io: updates for s2io driver

which clearly makes it impossible to distinguish between changesets. 
Please re-read Rule #1 of http://linux.yyz.us/patch-format.html


Also, re-read Rule #2.  Everything in your email body before the --- 
terminator is copied DIRECTLY into the kernel changelog.  As such, 
comments like Hello, and List of changes in this patch: must be 
hand-edited out of your email, before applying the patch.


Please fix these problems and resubmit.

Jeff


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/3] myri10ge: make wc_fifo usage load-time tunable


Brice Goglin wrote:

Under some circumstances, using WC without the WC fifo is faster.
So we make it possible to tune wc_fifo with a module parameter.

Signed-off-by: Brice Goglin [EMAIL PROTECTED]
---
 drivers/net/myri10ge/myri10ge.c |6 +-
 1 file changed, 5 insertions(+), 1 deletion(-)


applied 1-3


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Update ucc_geth.c for new workqueue structure


[EMAIL PROTECTED] wrote:

From: Timur Tabi [EMAIL PROTECTED]

The workqueue interface changed with David Howell's patch on 11/22/2006
(SHA 65f27f38446e1976cc98fd3004b110fedcddd189).  Several drivers were
updated with that patch to handle the new interface, but ucc_geth.c
was not one of them.  This patch updates ucc_geth.c to support the new
model.

A compiler warning in set_mac_addr() was also fixed.

Signed-off-by: Timur Tabi [EMAIL PROTECTED]
Signed-off-by: Kumar Gala [EMAIL PROTECTED]

---
 drivers/net/ucc_geth.c |   12 +++-
 1 files changed, 7 insertions(+), 5 deletions(-)


applied


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 8/12] net_device seq_file

2007-01-18 Thread Eric W. Biederman

Stephen Hemminger [EMAIL PROTECTED] writes:

 On Wed, 17 Jan 2007 19:11:52 +0300
 Dmitry Mishin [EMAIL PROTECTED] wrote:

 Library function to create a seq_file in proc filesystem,
 showing some information for each netdevice.
 This code is present in the kernel in about 10 instances, and
 all of them can be converted to using introduced library function.
 
 Signed-off-by: Andrey Savochkin [EMAIL PROTECTED]

 But the implementations can use static data rather than allocation,
 also the long term desire is to get rid of supporting most of these proc
 interfaces.

While we do have the interfaces we need to support them, and not
let them bit rot.

That said so long as we have to individually register and unregister
proc entries it is an active measure by the caller and they should
be able to handle allocation and freeing of data structures as well.

Eric
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/8] partial resend: e1000 fixes and updates

2007-01-18 Thread Auke Kok


Jeff Garzik wrote:

Kok, Auke wrote:

Hi,

This patch series contains exclusively fixes for e1000. Some of these 
patches were
already sent in december, but didn't make it into any usptream tree 
yet. Most

importantly, it addresses two issues in the recently merged msi interrupt
handler and dynamic itr code. A performance fix and some minor 
cleanups are also

added. This brings the driver up to version 7.3.20-k2.

The summary below lists all patches. Once that were previously acked 
are annotated

with (*)

These patches apply against netdev-2.6 #upstream-linus commit
77aab8bf22042d1658d4adbca8b71779e7f2d0ff. Please pull:

git pull git://lost.foo-projects.org/~ahkok/git/netdev-2.6 upstream-linus


Sigh.  I /know/ I've told you this before, but let's review the branches 
again.  NEVER EVER use branch 'upstream-linus'.  That is for Linus only.


gah, sorry about that.

I've moved it all over to #master-e1000 which applies against 
bf81b46482c0fa8ea638e409d39768ea92a6b0f0 (Linux 2.6.20-rc4 --Linus Torvalds).


Please pull:

git pull git://lost.foo-projects.org/~ahkok/git/netdev-2.6 master-e1000


Thanks,

Auke


* If there are no dependent e1000 changes, send patches against master 
(or torvalds/linux-2.6.git, if you prefer)
* If there are dependent e1000 changes in #upstream-fixes, send patches 
against #upstream-fixes
* If there are dependent e1000 changes in #upstream, send patches 
against #upstream


*writes cheatsheet*
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH REPOST 1/2] NET: Accurate packet scheduling for ATM/ADSL (kernel)

2007-01-18 Thread Russell Stuart

On Thu, 2007-01-18 at 05:05 +0100, Patrick McHardy wrote:
  Yesterday I was chatting about this at LCA 2007, and 
  it dawned on me that there is a problem with the dual
  RTAB/STAB approach.
  
  Currently the lookup in the kernel is
time_to_transmit_a_packet = RTAB[packet_length_seen_by_kernel]
  
  As I understand it, you are proposing to change that to
time_to_transmit_a_packet
  = RTAB[STAB[packet_length_seen_by_kernel]]
  = RTAB[packet_length_seen_on_the_wire]
  
  Given RTAB is the same in both cases the results of the
  calculation will be different (and ergo wrong in one case
  or the other).  RTAB can't change and remain compatible 
  with old kernels.  Ergo this approach breaks backward 
  compatibility.
 
 RTABs don't change, they continue to work as before. But when
 an STAB is present the lookup is based on the STAB size mapping.
 Neither one is wrong, RTABs calculate the transmission time based
 only on the specified rate, RTABs + STABs calculate the
 transmission time based on the rate, but include external overhead.

No argument with RTAB works as before.  But aren't
you proposing to feed it the accurate packet lengths
calculated STAB?  For example, if the VOIP IP datagram
is 60 bytes, older kernels will index RTAB by 
(60 + ethernet_header_size), STAB kernels will index
it by 159 bytes (3 ATM cells - say).  If tc doesn't
do a uname call or something, then it will send the
same RTAB to both kernels.  Obviously the results
returned by the RTAB lookup will be different.

If you aren't proposing to feed the RTAB lookup with
the output of STAB, then I still don't understand
why the current ATM patch isn't needed.

Or are you proposing tc behave differently on different
kernel versions.  (I have no problem with that, but
isn't it officially frowned upon?)

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] sis190: failure to set the MAC address from EEPROM

2007-01-18 Thread Francois Romieu

Fix from http://bugzilla.kernel.org/show_bug.cgi?id=7747

Signed-off-by: Andrew Morton [EMAIL PROTECTED]
Cc: [EMAIL PROTECTED]
Signed-off-by: Francois Romieu [EMAIL PROTECTED]
---
 drivers/net/sis190.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/net/sis190.c b/drivers/net/sis190.c
index b70ed79..45d91b1 100644
--- a/drivers/net/sis190.c
+++ b/drivers/net/sis190.c
@@ -1562,7 +1562,7 @@ static int __devinit 
sis190_get_mac_addr_from_eeprom(struct pci_dev *pdev,
for (i = 0; i  MAC_ADDR_LEN / 2; i++) {
__le16 w = sis190_read_eeprom(ioaddr, EEPROMMACAddr + i);
 
-   ((u16 *)dev-dev_addr)[0] = le16_to_cpu(w);
+   ((u16 *)dev-dev_addr)[i] = le16_to_cpu(w);
}
 
sis190_set_rgmii(tp, sis190_read_eeprom(ioaddr, EEPROMInfo));
-- 
1.4.4.4

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Marvell Libertas 8388 802.11b/g USB driver (v2)

2007-01-18 Thread Christoph Hellwig

On Thu, Jan 18, 2007 at 10:41:45AM -0500, Dan Williams wrote:
 In the future we'll likely need some layering to support the 8385
 SDIO/CF variant, but most likely not in way the USB support is currently
 excessively layered and abstracted.

Yeah.  Let's summarize my unfortunately a bit too nasty comments and
your more helpfull replies :-)

This driver still needs a lot more work, both to cleanup a lot of crap
and integreate it better with the wireless stack.  And OLPC needs this
is not going to be an excuse of it's own.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Marvell Libertas 8388 802.11b/g USB driver (v2)

2007-01-18 Thread Jon Smirl


On 1/18/07, Christoph Hellwig [EMAIL PROTECTED] wrote:

On Thu, Jan 18, 2007 at 10:41:45AM -0500, Dan Williams wrote:
 In the future we'll likely need some layering to support the 8385
 SDIO/CF variant, but most likely not in way the USB support is currently
 excessively layered and abstracted.

Yeah.  Let's summarize my unfortunately a bit too nasty comments and
your more helpfull replies :-)

This driver still needs a lot more work, both to cleanup a lot of crap
and integrate it better with the wireless stack.  And OLPC needs this
is not going to be an excuse of it's own.


The main feature of this chip is the on-chip support for 802.11s in
firmware. What is the plan for integrating 802.11s into the existing
wireless stacks? Does it make sense to do a softmac type 802.11s
implementation first to figure out the right places to put the hooks
for the 8388 hardware implementation?

--
Jon Smirl
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/12] L2 network namespace (v3)

2007-01-18 Thread YOSHIFUJI Hideaki / 吉藤英明

In article [EMAIL PROTECTED] (at Wed, 17 Jan 2007 18:51:14 +0300), Dmitry 
Mishin [EMAIL PROTECTED] says:

 ===
 L2 network namespaces
 
 The most straightforward concept of network virtualization is complete
 separation of namespaces, covering device list, routing tables, netfilter
 tables, socket hashes, and everything else.
 
 On input path, each packet is tagged with namespace right from the
 place where it appears from a device, and is processed by each layer
 in the context of this namespace.
 Non-root namespaces communicate with the outside world in two ways: by
 owning hardware devices, or receiving packets forwarded them by their parent
 namespace via pass-through device.

Can you handle multicast / broadcast and IPv6, which are very important?

--yoshfuji
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/12] forcedeth: ring access


Ayaz Abdulla wrote:
This patch modifys ring access by using pointers. This avoids computing 
the current index and avoids accessing the base address of the rings.


Signed-Off-By: Ayaz Abdulla [EMAIL PROTECTED]


ACK patches 1-9.

Applied patches 1-4 successfully, patch #5 failed to apply.  Please 
resend patches 5+ against netdev-2.6.git#upstream.


Also, git-am (the git component used to apply yours and others' patches) 
complains that your patches have spaces prior to the tab indentations. 
Fixing that in patches 5+, or appending a final whitespace cleanup 
patch, would be appreciated.


Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 10/12] forcedeth: tx max work


Ayaz Abdulla wrote:
This patch adds a limit to how much tx work can be done in each 
iteration of tx processing.


Signed-Off-By: Ayaz Abdulla [EMAIL PROTECTED]


What about the tail end of the work, when the limit is reached?

Remember that delaying the completion of TX's too long increases latency.

It seems to me that this patch needs a timer or somesuch, to guarantee 
that TX completions are not delayed too long in the worst case.


Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 11/12] forcedeth: statistics supported


Ayaz Abdulla wrote:
This patch introduces hw statistics for older devices that supported it. 
It breaks up the counters supported into separate versions.


Signed-Off-By: Ayaz Abdulla [EMAIL PROTECTED]


ACK patches 11-12


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/3] chelsio: NAPI speed improvement


Stephen Hemminger wrote:

Speedup and cleanup the receive processing by eliminating the
mmio read and a lock round trip.

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]


applied

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 3/3] chelsio: more rx speedup


Stephen Hemminger wrote:

Cleanup receive processing some more:
   * do the reserve padding of skb during setup
   * don't pass constants to get_packet
   * do smart prefetch of skb
   * make copybreak a module parameter

Signed-off-by: Stephen Hemminger [EMAIL PROTECTED]


applied


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [2.6 patch] bonding.h: extern inline - static inline


Adrian Bunk wrote:

extern inline generates a warning with -Wmissing-prototypes and I'm
currently working on getting the kernel cleaned up for adding this to
the CFLAGS since it will help us to avoid a nasty class of runtime
errors.

If there are places that really need a forced inline, __always_inline
would be the correct solution.

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]


applied


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Please pull 'upstream' branch of wireless-2.6


John W. Linville wrote:

The following changes since commit 10764889c6355cbb335cf0578ce12427475d1a65:
  Larry Finger (1):
bcm43xx: Fix failure to deliver PCI-E interrupts

are found in the git repository at:

  git://git.kernel.org/pub/scm/linux/kernel/git/linville/wireless-2.6.git 
upstream


ACK.  Open question of parentage, though:  I just rebased 
netdev-2.6.git#upstream.  Is your wireless-2.6 affected by this rebase?


If not, I will go ahead and pull.

Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH REPOST 1/2] NET: Accurate packet scheduling for ATM/ADSL (kernel)

2007-01-18 Thread Russell Stuart

On Thu, 2007-01-18 at 12:37 +0100, Patrick McHardy wrote:
  Or are you proposing tc behave differently on different
  kernel versions.  (I have no problem with that, but
  isn't it officially frowned upon?)
 
 Yes. There is no way you can make this work on old kernels,
 nobody expects that. The important part is that everything
 continues to work as before and that both old and new iproute
 binaries work properly on both old and new kernels (new
 iproute on old kernels without STABs obviously).

I thought that some degree of compatibility was 
expected.  At the very least the newest version 
of tc must work on _any_ kernel as least as 
well as the version it replaces did.

I also though newer kernels should work older
version of iproute2, albeit without the features
added in the newer versions.

Are you saying this is not so?



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Marvell Libertas 8388 802.11b/g USB driver (v2)

On Thu, 2007-01-18 at 22:40 +, Christoph Hellwig wrote:
 On Thu, Jan 18, 2007 at 10:41:45AM -0500, Dan Williams wrote:
  In the future we'll likely need some layering to support the 8385
  SDIO/CF variant, but most likely not in way the USB support is currently
  excessively layered and abstracted.
 
 Yeah.  Let's summarize my unfortunately a bit too nasty comments and
 your more helpfull replies :-)
 
 This driver still needs a lot more work, both to cleanup a lot of crap
 and integreate it better with the wireless stack.  And OLPC needs this
 is not going to be an excuse of it's own.

Of course; the we-need-it-now-so-you-take-it-now attitude never gets
vendors or anyone else anywhere, so there's no reason to expect it would
work for OLPC either.  OLPC does not need, nor does any of us who work
on it ask for, special treatment.  Stuff will go through the correct
channels and will follow the normal kernel process.  That's all we can
expect, and that's all we ask.

Cheers,
Dan


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Marvell Libertas 8388 802.11b/g USB driver (v2)

On Thu, 2007-01-18 at 17:54 -0500, Jon Smirl wrote:
 On 1/18/07, Christoph Hellwig [EMAIL PROTECTED] wrote:
  On Thu, Jan 18, 2007 at 10:41:45AM -0500, Dan Williams wrote:
   In the future we'll likely need some layering to support the 8385
   SDIO/CF variant, but most likely not in way the USB support is currently
   excessively layered and abstracted.
 
  Yeah.  Let's summarize my unfortunately a bit too nasty comments and
  your more helpfull replies :-)
 
  This driver still needs a lot more work, both to cleanup a lot of crap
  and integrate it better with the wireless stack.  And OLPC needs this
  is not going to be an excuse of it's own.
 
 The main feature of this chip is the on-chip support for 802.11s in
 firmware. What is the plan for integrating 802.11s into the existing
 wireless stacks? Does it make sense to do a softmac type 802.11s
 implementation first to figure out the right places to put the hooks
 for the 8388 hardware implementation?

I believe Javier Cordona (who will also be at the Linux Wireless Summit
this weekend) is going to do a d80211-based implementation alongside the
Libertas 8388 firmware and driver bits too.  802.11s networking in Linux
is still quite immature, and we need to get people interested in a
standard stack talking to each other.

Dan


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] [IPV6] fixed the size of the netlink message notified by inet6_rt_notify().

2007-01-18 Thread YOSHIFUJI Hideaki / 吉藤英明

In article [EMAIL PROTECTED] (at Fri, 19 Jan 2007 01:31:08 +0900 (JST)), 
Noriaki TAKAMIYA [EMAIL PROTECTED] says:

  Wed, 17 Jan 2007 13:33:22 +0100
  [Subject: Re: [PATCH] [IPV6] fixed the size of the netlink message 
  notified by inet6_rt_notify().]
  Patrick McHardy [EMAIL PROTECTED] wrote...
 
  Somewhat related: I have this patch for 2.6.21 to get rid of the
  BUG_ON()s.
 
   I think the problem is the return value of inet6_rt_notify() is less
   than expected.

I guess Patrick meant that using BUG_ON() would be an overkill.

--yoshfuji
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Possible ways of dealing with OOM conditions.

On Thu, Jan 18, 2007 at 01:18:44PM +0100, Peter Zijlstra ([EMAIL PROTECTED]) 
wrote:
   How would that end up being different, I would have to replace all
   allocations done in the full network processing path.
   
   This seems a much less invasive method, all the (allocation) code can
   stay the way it is and use the normal allocation functions.
 
  And acutally we are starting to talk about different approach - having
  separated allocator for network, which will be turned on on OOM (reclaim
  or at any other time).
 
 I think we might be, I'm more talking about requirements on the
 allocator, while you seem to talk about implementations.
 
 Replacing the allocator, or splitting it in two based on a condition are
 all fine as long as they observe the requirements.
 
 The requirement I add is that there is a reserve nobody touches unless
 given express permission.
 
 You could implement this by modifying each reachable allocator call site
 and stick a branch in and use an alternate allocator when the normal
 route fails and we do have permission; much like:
 
foo = kmalloc(size, gfp_mask);
 +  if (!foo  special)
 +foo = my_alloc(size)

Network is special in this regard, since it only has one allocation path
(actually it has one cache for skb, and usual kmalloc, but they are
called from only two functions).

So it would become 
ptr = network_alloc();
and network_alloc() would be usual kmalloc or call for own allocator in
case of deadlock.

 And earlier versions of this work did something like that. But it
 litters the code quite badly and its quite easy to miss spots. There can
 be quite a few allocations in processing network data.
 
 Hence my work on integrating this into the regular memory allocators.
 
 FYI; 'special' evaluates to something like:
   !(gfp_mask  __GFP_NOMEMALLOC) 
   ((gfp_mask  __GFP_EMERGENCY) || 
(!in_irq()  (current-flags  PF_MEMALLOC)))
 
 
   If you do not mind, I would likw to refresh a
  discussion about network tree allocator,
 
   which utilizes own pool of
  pages, 
 
 very high order pages, no?

 This means that you have to either allocate at boot time and cannot
 resize/add pools; which means you waste all that memory if the network
 load never comes near using the reserved amount.
 
 Or, you get into all the same trouble the hugepages folks are trying so
 very hard to solve.

It is configurable - by default it takes pool of 32k pages for allocations for
jumbo-frames (e1000 requires such allocations for 9k frames
unfortunately), without jumbo-frame support it works with pool of 0-order
pages, which grows dynamically when needed.

  performs self-defragmentation of the memeory, 
 
 Does it move memory about? 

It works in a page, not as pages - when neighbour regions are freed,
they are combined into single one with bigger size - it would be
extended to move pages around to combied them into bigger one though
too, but network stack requires high-order allocations in extremely rare
cases of broken design (Intel folks, sorry, but your hardware sucks in
that regard - jumbo frame of 9k should not require 16k of mem plu
network overhead).

NTA also does not align buffers to the power of two - extremely significant 
win of that approach can be found on project's homepage with graps of
failed allocations and state of the mem for different sizes of
allocaions. Power-of-two overhead of SLAB is extremely high.

 All it does is try to avoid fragmentation by policy - a problem
 impossible to solve in general; but can achieve good results in view of
 practical limitations on program behaviour.
 
 Does your policy work for the given workload? we'll see.

 Also, on what level, each level has both internal and external
 fragmentation. I can argue that having large immovable objects in memory
 adds to the fragmentation issues on the page-allocator level.

NTA works with pages, not with contiguous memory, it reduces
fragmentation inside pages, which can not be solved in SLAB, where
objects from the same page can live in different caches and thus _never_
can be combined. Thus, the only soultuin for SLAB is copy, which is not a
good one for big sizes and is just wrong for big pages.
It is not about page moving and VM tricks, which are generally described
as fragmentation avoidance technique, but about how fragmentation
problem is solved in one page.

  is very SMP
  friendly in that regard that it is per-cpu like slab and never free
  objects on different CPUs, so they always stay in the same cache.
 
 This makes it very hard to guarantee a reserve limit. (Not impossible,
 just more difficult)

The whole pool of pages becomes reserve, since no one (and mainly VFS)
can consume that reserve.

  Among other goodies it allows to have full sending/receiving zero-copy.
 
 That won't ever work unless you have page aligned objects, otherwise you
 cannot map them into user-space. Which seems to be at odds with your
 tight packing/reduce internal fragmentation goals.
 
 Zero-copy entails

Re: Possible ways of dealing with OOM conditions.

2007-01-18 Thread Peter Zijlstra

On Thu, 2007-01-18 at 16:58 +0300, Evgeniy Polyakov wrote:

 Network is special in this regard, since it only has one allocation path
 (actually it has one cache for skb, and usual kmalloc, but they are
 called from only two functions).
 
 So it would become 
 ptr = network_alloc();
 and network_alloc() would be usual kmalloc or call for own allocator in
 case of deadlock.

There is more to networking that skbs only, what about route cache,
there is quite a lot of allocs in this fib_* stuff, IGMP etc...

  very high order pages, no?
 
  This means that you have to either allocate at boot time and cannot
  resize/add pools; which means you waste all that memory if the network
  load never comes near using the reserved amount.
  
  Or, you get into all the same trouble the hugepages folks are trying so
  very hard to solve.
 
 It is configurable - by default it takes pool of 32k pages for allocations for
 jumbo-frames (e1000 requires such allocations for 9k frames
 unfortunately), without jumbo-frame support it works with pool of 0-order
 pages, which grows dynamically when needed.

With 0-order pages, you can only fit 2 1500 byte packets in there, you
could perhaps stick some small skb heads in there as well, but why
bother, the waste isn't _that_ high.

Esp if you would make a slab for 1500 mtu packets (5*1638  2*4096; and
1638 should be enough, right?)

It would make sense to pack related objects into a page so you could
free all together.

   performs self-defragmentation of the memeory, 
  
  Does it move memory about? 
 
 It works in a page, not as pages - when neighbour regions are freed,
 they are combined into single one with bigger size

Yeah, that is not defragmentation, defragmentation is moving active
regions about to create contiguous free space. What you do is free space
coalescence.

  but network stack requires high-order allocations in extremely rare
 cases of broken design (Intel folks, sorry, but your hardware sucks in
 that regard - jumbo frame of 9k should not require 16k of mem plu
 network overhead).

Well, if you have such hardware its not rare at all, But yeah that
sucks.

 NTA also does not align buffers to the power of two - extremely significant 
 win of that approach can be found on project's homepage with graps of
 failed allocations and state of the mem for different sizes of
 allocaions. Power-of-two overhead of SLAB is extremely high.

Sure you can pack the page a little better(*), but I thought the main
advantage was a speed increase.

(*) memory is generally cheaper than engineering efforts, esp on this
scale. The only advantage in the manual packing is that (with the fancy
hardware stream engine mentioned below) you could ensure they are
grouped together (then again, the hardware stream engine would, together
with a SG-DMA engine, take care of that).

 
  All it does is try to avoid fragmentation by policy - a problem
  impossible to solve in general; but can achieve good results in view of
  practical limitations on program behaviour.
  
  Does your policy work for the given workload? we'll see.
 
  Also, on what level, each level has both internal and external
  fragmentation. I can argue that having large immovable objects in memory
  adds to the fragmentation issues on the page-allocator level.
 
 NTA works with pages, not with contiguous memory, it reduces
 fragmentation inside pages, which can not be solved in SLAB, where
 objects from the same page can live in different caches and thus _never_
 can be combined. Thus, the only soultuin for SLAB is copy, which is not a
 good one for big sizes and is just wrong for big pages.

By allocating, and never returning the page to the page-allocator you've
increased the fragmentation on the page-allocator level significantly.
It will avoid a super page ever forming around that page.

 It is not about page moving and VM tricks, which are generally described
 as fragmentation avoidance technique, but about how fragmentation
 problem is solved in one page.

Short of defragmentation (move active regions about) fragmentation is an
unsolved problem. For any heuristic there is a pattern that will defeat
it. 

Luckily program allocation behaviour is usually very regular (or
decomposable in well behaved groups).

   is very SMP
   friendly in that regard that it is per-cpu like slab and never free
   objects on different CPUs, so they always stay in the same cache.
  
  This makes it very hard to guarantee a reserve limit. (Not impossible,
  just more difficult)
 
 The whole pool of pages becomes reserve, since no one (and mainly VFS)
 can consume that reserve.

Ah, but there you violate my requirement, any network allocation can
claim the last bit of memory. The whole idea was that the reserve is
explicitly managed.

It not only needs protection from other users but also from itself.

   Among other goodies it allows to have full sending/receiving zero-copy.
  
  That won't ever work unless you have page aligned objects, otherwise

Re: Possible ways of dealing with OOM conditions.

On Thu, Jan 18, 2007 at 04:10:52PM +0100, Peter Zijlstra ([EMAIL PROTECTED]) 
wrote:
 On Thu, 2007-01-18 at 16:58 +0300, Evgeniy Polyakov wrote:
 
  Network is special in this regard, since it only has one allocation path
  (actually it has one cache for skb, and usual kmalloc, but they are
  called from only two functions).
  
  So it would become 
  ptr = network_alloc();
  and network_alloc() would be usual kmalloc or call for own allocator in
  case of deadlock.
 
 There is more to networking that skbs only, what about route cache,
 there is quite a lot of allocs in this fib_* stuff, IGMP etc...

skbs are the most extensively used path.
Actually the same is applied to route - dst_entries and rtable are
allocated through own wrappers.

   very high order pages, no?
  
   This means that you have to either allocate at boot time and cannot
   resize/add pools; which means you waste all that memory if the network
   load never comes near using the reserved amount.
   
   Or, you get into all the same trouble the hugepages folks are trying so
   very hard to solve.
  
  It is configurable - by default it takes pool of 32k pages for allocations 
  for
  jumbo-frames (e1000 requires such allocations for 9k frames
  unfortunately), without jumbo-frame support it works with pool of 0-order
  pages, which grows dynamically when needed.
 
 With 0-order pages, you can only fit 2 1500 byte packets in there, you
 could perhaps stick some small skb heads in there as well, but why
 bother, the waste isn't _that_ high.
 
 Esp if you would make a slab for 1500 mtu packets (5*1638  2*4096; and
 1638 should be enough, right?)
 
 It would make sense to pack related objects into a page so you could
 free all together.

With power-of-two allocation SLAB wastes 500 bytes for each 1500 MTU
packet (roughly), it is actaly one ACK packet - and I hear it from
person who develops a system, which is aimed to guarantee ACK
allocation in OOM :)

SLAB overhead is _very_ expensive for network - what if jumbo frame is
used? It becomes incredible in that case, although modern NICs allows
scatter-gather, which is aimed to fix the problem.

Cache misses for small packet flow due to the fact, that the same data
is allocated and freed  and accessed on different CPUs will become an
issue soon, not right now, since two-four core CPUs are not yet to be
very popular and price for the cache miss is not _that_ high.

performs self-defragmentation of the memeory, 
   
   Does it move memory about? 
  
  It works in a page, not as pages - when neighbour regions are freed,
  they are combined into single one with bigger size
 
 Yeah, that is not defragmentation, defragmentation is moving active
 regions about to create contiguous free space. What you do is free space
 coalescence.

That is wrong definition just because no one developed different system.
Defragmentation is a result of broken system.

Existing design _does_not_ allow to have the situation when whole page
belongs to the same cache after it was actively used, the same is
applied to the situation when several pages, which create contiguous
region, are used by different users, so people start develop VM tricks
to move pages around so they would be placed near in address space.

Do not fix the result, fix the reason.

   but network stack requires high-order allocations in extremely rare
  cases of broken design (Intel folks, sorry, but your hardware sucks in
  that regard - jumbo frame of 9k should not require 16k of mem plu
  network overhead).
 
 Well, if you have such hardware its not rare at all, But yeah that
 sucks.

They do a good jop developing different approaches to workaround that
hardware 'feature', but this is still wrong situation.

  NTA also does not align buffers to the power of two - extremely significant 
  win of that approach can be found on project's homepage with graps of
  failed allocations and state of the mem for different sizes of
  allocaions. Power-of-two overhead of SLAB is extremely high.
 
 Sure you can pack the page a little better(*), but I thought the main
 advantage was a speed increase.
 
 (*) memory is generally cheaper than engineering efforts, esp on this
 scale. The only advantage in the manual packing is that (with the fancy
 hardware stream engine mentioned below) you could ensure they are
 grouped together (then again, the hardware stream engine would, together
 with a SG-DMA engine, take care of that).

Extensoin way of doing things.
That is wrong.

   All it does is try to avoid fragmentation by policy - a problem
   impossible to solve in general; but can achieve good results in view of
   practical limitations on program behaviour.
   
   Does your policy work for the given workload? we'll see.
  
   Also, on what level, each level has both internal and external
   fragmentation. I can argue that having large immovable objects in memory
   adds to the fragmentation issues on the page-allocator level.
  
  NTA works with pages, not with

Re: [PATCH 2.6.20-rc3]: 8139cp: Don't blindly enable interrupts

Chris Lalancette wrote:

Francois Romieu wrote:

Chris Lalancette [EMAIL PROTECTED] :
[...]

Thanks for the comments. While the patch you sent will help, there are
still other places that will have problems. For example, in netpoll_send_skb,
we call local_irq_save(flags), then call dev-hard_start_xmit(), and then call
local_irq_restore(flags). This is a similar situation to what I described
above; we will re-enable interrupts in cp_start_xmit(), when netpoll_send_skb
doesn't expect that, and will probably run into issues.
Is there a problem with changing cp_start_xmit to use the
spin_lock_irqsave(), besides the extra instructions it needs?

No. Given the history of locking in netpoll and the content of
Documentation/networking/netdevices.txt, asking Herbert which rule(s)
the code is supposed to follow seemed safer to me.

You can forget my patch.

Please resend your patch inlined to Jeff as described in
http://linux.yyz.us/patch-format.html.

Francois,
Great. Resending mail, shortening subject to 65 characters and
inlining the patch.

Thanks,
Chris Lalancette

Similar to this commit:

http://kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=d15e9c4d9a75702b30e00cdf95c71c88e3f3f51e

It's not safe in cp_start_xmit to blindly call spin_lock_irq and then
spin_unlock_irq, since it may very well be the case that cp_start_xmit
was called with interrupts already disabled (I came across this bug in
the context of netdump in RedHat kernels, but the same issue holds, for
example, in netconsole). Therefore, replace all instances of
spin_lock_irq and spin_unlock_irq with spin_lock_irqsave and
spin_unlock_irqrestore, respectively, in cp_start_xmit(). I tested this
against a fully-virtualized Xen guest using netdump, which happens to
use the 8139cp driver to talk to the emulated hardware. I don't have a
real piece of 8139cp hardware to test on, so someone else will have to
do that.

Signed-off-by: Chris Lalancette [EMAIL PROTECTED]

applied.

In the future, please remove the quoted emails stuff, and anything else
that does not belong in the kernel changelog. It must be hand-edited
out, before using git-am to merge your patch into the kernel tree.

Jeff

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html

Re: Possible ways of dealing with OOM conditions.

2007-01-18 Thread Peter Zijlstra

On Thu, 2007-01-18 at 18:50 +0300, Evgeniy Polyakov wrote:
 On Thu, Jan 18, 2007 at 04:10:52PM +0100, Peter Zijlstra ([EMAIL PROTECTED]) 
 wrote:
  On Thu, 2007-01-18 at 16:58 +0300, Evgeniy Polyakov wrote:
  
   Network is special in this regard, since it only has one allocation path
   (actually it has one cache for skb, and usual kmalloc, but they are
   called from only two functions).
   
   So it would become 
   ptr = network_alloc();
   and network_alloc() would be usual kmalloc or call for own allocator in
   case of deadlock.
  
  There is more to networking that skbs only, what about route cache,
  there is quite a lot of allocs in this fib_* stuff, IGMP etc...
 
 skbs are the most extensively used path.
 Actually the same is applied to route - dst_entries and rtable are
 allocated through own wrappers.

Still, edit all places and perhaps forget one and make sure all new code
doesn't forget about it, or pick a solution that covers everything.

 With power-of-two allocation SLAB wastes 500 bytes for each 1500 MTU
 packet (roughly), it is actaly one ACK packet - and I hear it from
 person who develops a system, which is aimed to guarantee ACK
 allocation in OOM :)

I need full data traffic during OOM, not just a single ACK.

 SLAB overhead is _very_ expensive for network - what if jumbo frame is
 used? It becomes incredible in that case, although modern NICs allows
 scatter-gather, which is aimed to fix the problem.

Jumbo frames are fine if the hardware can do SG-DMA..

 Cache misses for small packet flow due to the fact, that the same data
 is allocated and freed  and accessed on different CPUs will become an
 issue soon, not right now, since two-four core CPUs are not yet to be
 very popular and price for the cache miss is not _that_ high.

SGI does networking too, right?

 performs self-defragmentation of the memeory, 

Does it move memory about? 
   
   It works in a page, not as pages - when neighbour regions are freed,
   they are combined into single one with bigger size
  
  Yeah, that is not defragmentation, defragmentation is moving active
  regions about to create contiguous free space. What you do is free space
  coalescence.
 
 That is wrong definition just because no one developed different system.
 Defragmentation is a result of broken system.
 
 Existing design _does_not_ allow to have the situation when whole page
 belongs to the same cache after it was actively used, the same is
 applied to the situation when several pages, which create contiguous
 region, are used by different users, so people start develop VM tricks
 to move pages around so they would be placed near in address space.
 
 Do not fix the result, fix the reason.

*plonk* 30+yrs of research ignored.

   The whole pool of pages becomes reserve, since no one (and mainly VFS)
   can consume that reserve.
  
  Ah, but there you violate my requirement, any network allocation can
  claim the last bit of memory. The whole idea was that the reserve is
  explicitly managed.
  
  It not only needs protection from other users but also from itself.
 
 Specifying some users as good and others as bad generally tends to very
 bad behaviour. Your appwoach only covers some users, mine does not
 differentiate between users,

The kernel is special, right? It has priority over whatever user-land
does.

  but prevents system from such situation at all.

I'm not seeing that, with your approach nobody stops the kernel from
filling up the memory with user-space network traffic.

swapping is not some random user process, its a fundamental kernel task,
if this fails the machine is history.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Possible ways of dealing with OOM conditions.

On Thu, Jan 18, 2007 at 06:31:53PM +0100, Peter Zijlstra ([EMAIL PROTECTED]) 
wrote:

  skbs are the most extensively used path.
  Actually the same is applied to route - dst_entries and rtable are
  allocated through own wrappers.
 
 Still, edit all places and perhaps forget one and make sure all new code
 doesn't forget about it, or pick a solution that covers everything.

There is _one_ place for allocation of any kind of object.
skb path has two places.

  With power-of-two allocation SLAB wastes 500 bytes for each 1500 MTU
  packet (roughly), it is actaly one ACK packet - and I hear it from
  person who develops a system, which is aimed to guarantee ACK
  allocation in OOM :)
 
 I need full data traffic during OOM, not just a single ACK.

But your code exactly limit codepath to several allocaions, which must
be ACK. You do not have enough reserve to support whole traffic.
So the right solution, IMO, is to _prevent_ such situation, which means
that allocation is not allowed to depend on external conditions like
VFS.

Actually my above sentences were about the case, when anly having
different allocator, it is possible to dramatically change memory usage
model, which supffers greatly from power-of-two allocations. OOM
condition is one of the results which has big SLAB overhead among other
roots. Actually all pathes which work with kmem_cache are safe against
it, since kernel cache packs objects, but thos who uses raw kmalloc has
problems.

  SLAB overhead is _very_ expensive for network - what if jumbo frame is
  used? It becomes incredible in that case, although modern NICs allows
  scatter-gather, which is aimed to fix the problem.
 
 Jumbo frames are fine if the hardware can do SG-DMA..

Notice word _IF_ in you sentence. e1000 for example can not (or it can,
but driver is not developed for such scenario).

  Cache misses for small packet flow due to the fact, that the same data
  is allocated and freed  and accessed on different CPUs will become an
  issue soon, not right now, since two-four core CPUs are not yet to be
  very popular and price for the cache miss is not _that_ high.
 
 SGI does networking too, right?

Yep, Cristoph Lameter developed own allocator too.

I agreee with you, that if that price is too high already, then it is a
dditional sign to look into network tree allocator (yep, name is bad)
again.

  That is wrong definition just because no one developed different system.
  Defragmentation is a result of broken system.
  
  Existing design _does_not_ allow to have the situation when whole page
  belongs to the same cache after it was actively used, the same is
  applied to the situation when several pages, which create contiguous
  region, are used by different users, so people start develop VM tricks
  to move pages around so they would be placed near in address space.
  
  Do not fix the result, fix the reason.
 
 *plonk* 30+yrs of research ignored.

30 years to develop SLAB allocator? In what universe that is all about?

The whole pool of pages becomes reserve, since no one (and mainly VFS)
can consume that reserve.
   
   Ah, but there you violate my requirement, any network allocation can
   claim the last bit of memory. The whole idea was that the reserve is
   explicitly managed.
   
   It not only needs protection from other users but also from itself.
  
  Specifying some users as good and others as bad generally tends to very
  bad behaviour. Your appwoach only covers some users, mine does not
  differentiate between users,
 
 The kernel is special, right? It has priority over whatever user-land
 does.

Kernel only does ACK generation and allocation for userspace.
Kernel does not know that some of users are potentially good or bad, and
if you will export this socket option to the userspace, everyone will
think that his application is good enough to use reserve.

So, for kernel-only side you just need to preallocate pool of packets
and use them when system is in OOM (reclaim). For the long direction,
new approach of memory allocaiton should be developed, and there are
different works in that direction - NTA is one of them and not the only
one, for the best resutlts it must be combined with vm-tricks
defragmentation too.

   but prevents system from such situation at all.
 
 I'm not seeing that, with your approach nobody stops the kernel from
 filling up the memory with user-space network traffic.
 
 swapping is not some random user process, its a fundamental kernel task,
 if this fails the machine is history.

You completely misses the point. The main goal is to
1. reduce fragmentation and/or enable self defragmentation (which is
done in NTA), this also reduces memory usage.
2. perform correct recover steps in OOM - reduce memory usage, use
different allocator and/or reserve (which is the case, where NTA can be
used)
3. do not allow OOM condition - unfortunately it is not always possible,
but having separated allocation allows to not depend on external
conditions

Re: [PATCH 2.6.20-rc3]: 8139cp: Don't blindly enable interrupts

2007-01-18 Thread Chris Lalancette

Jeff Garzik wrote:

Chris Lalancette wrote:

Francois Romieu wrote:

Chris Lalancette [EMAIL PROTECTED] :
[...]

Thanks for the comments. While the patch you sent will help,
there are
still other places that will have problems. For example, in
netpoll_send_skb,
we call local_irq_save(flags), then call dev-hard_start_xmit(),
and then call
local_irq_restore(flags). This is a similar situation to what I
described
above; we will re-enable interrupts in cp_start_xmit(), when
netpoll_send_skb
doesn't expect that, and will probably run into issues.
Is there a problem with changing cp_start_xmit to use the
spin_lock_irqsave(), besides the extra instructions it needs?

No. Given the history of locking in netpoll and the content of
Documentation/networking/netdevices.txt, asking Herbert which rule(s)
the code is supposed to follow seemed safer to me.

You can forget my patch.

Please resend your patch inlined to Jeff as described in
http://linux.yyz.us/patch-format.html.

Francois,
Great. Resending mail, shortening subject to 65 characters and
inlining the patch.

Thanks,
Chris Lalancette

Similar to this commit:

http://kernel.org/git/?p=linux/kernel/git/torvalds/linux-2.6.git;a=commitdiff;h=d15e9c4d9a75702b30e00cdf95c71c88e3f3f51e

Signed-off-by: Chris Lalancette [EMAIL PROTECTED]

applied.

In the future, please remove the quoted emails stuff, and anything
else that does not belong in the kernel changelog. It must be
hand-edited out, before using git-am to merge your patch into the
kernel tree.

Jeff

Jeff,
Ah, I see. Noted. Thanks.

Chris Lalancette
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at http://vger.kernel.org/majordomo-info.html

[2.6 patch] drivers/net/irda/vlsi_ir.{h,c}: remove kernel 2.4 code

2007-01-18 Thread Adrian Bunk

This patch removes kernel 2.4 compatibility code.

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]

---

 drivers/net/irda/vlsi_ir.c |   16 
 drivers/net/irda/vlsi_ir.h |   33 -
 2 files changed, 8 insertions(+), 41 deletions(-)

--- linux-2.6.20-rc4-mm1/drivers/net/irda/vlsi_ir.h.old 2007-01-18 
21:50:43.0 +0100
+++ linux-2.6.20-rc4-mm1/drivers/net/irda/vlsi_ir.h 2007-01-18 
21:53:54.0 +0100
@@ -41,39 +41,6 @@
 #define PCI_CLASS_SUBCLASS_MASK0x
 #endif
 
-/* in recent 2.5 interrupt handlers have non-void return value */
-#ifndef IRQ_RETVAL
-typedef void irqreturn_t;
-#define IRQ_NONE
-#define IRQ_HANDLED
-#define IRQ_RETVAL(x)
-#endif
-
-/* some stuff need to check kernelversion. Not all 2.5 stuff was present
- * in early 2.5.x - the test is merely to separate 2.4 from 2.5
- */
-#include linux/version.h
-
-#if LINUX_VERSION_CODE  KERNEL_VERSION(2,5,0)
-
-/* PDE() introduced in 2.5.4 */
-#ifdef CONFIG_PROC_FS
-#define PDE(inode) ((inode)-i_private)
-#endif
-
-/* irda crc16 calculation exported in 2.5.42 */
-#define irda_calc_crc16(fcs,buf,len)   (GOOD_FCS)
-
-/* we use this for unified pci device name access */
-#define PCIDEV_NAME(pdev)  ((pdev)-name)
-
-#else /* 2.5 or later */
-
-/* whatever we get from the associated struct device - bus:slot:dev.fn id */
-#define PCIDEV_NAME(pdev)  (pci_name(pdev))
-
-#endif
-
 /*  */
 
 /* non-standard PCI registers */
--- linux-2.6.20-rc4-mm1/drivers/net/irda/vlsi_ir.c.old 2007-01-18 
21:53:58.0 +0100
+++ linux-2.6.20-rc4-mm1/drivers/net/irda/vlsi_ir.c 2007-01-18 
21:54:56.0 +0100
@@ -166,7 +166,7 @@
unsigned i;
 
seq_printf(seq, \n%s (vid/did: %04x/%04x)\n,
-  PCIDEV_NAME(pdev), (int)pdev-vendor, (int)pdev-device);
+  pci_name(pdev), (int)pdev-vendor, (int)pdev-device);
seq_printf(seq, pci-power-state: %u\n, (unsigned) 
pdev-current_state);
seq_printf(seq, resources: irq=%u / io=0x%04x / dma_mask=0x%016Lx\n,
   pdev-irq, (unsigned)pci_resource_start(pdev, 0), (unsigned 
long long)pdev-dma_mask);
@@ -1401,7 +1401,7 @@
 
if (vlsi_start_hw(idev))
IRDA_ERROR(%s: failed to restart hw - %s(%s) unusable!\n,
-  __FUNCTION__, PCIDEV_NAME(idev-pdev), ndev-name);
+  __FUNCTION__, pci_name(idev-pdev), ndev-name);
else
netif_start_queue(ndev);
 }
@@ -1643,7 +1643,7 @@
pdev-current_state = 0; /* hw must be running now */
 
IRDA_MESSAGE(%s: IrDA PCI controller %s detected\n,
-drivername, PCIDEV_NAME(pdev));
+drivername, pci_name(pdev));
 
if ( !pci_resource_start(pdev,0)
 || !(pci_resource_flags(pdev,0)  IORESOURCE_IO) ) {
@@ -1728,7 +1728,7 @@
 
pci_set_drvdata(pdev, NULL);
 
-   IRDA_MESSAGE(%s: %s removed\n, drivername, PCIDEV_NAME(pdev));
+   IRDA_MESSAGE(%s: %s removed\n, drivername, pci_name(pdev));
 }
 
 #ifdef CONFIG_PM
@@ -1748,7 +1748,7 @@
 
if (!ndev) {
IRDA_ERROR(%s - %s: no netdevice \n,
-  __FUNCTION__, PCIDEV_NAME(pdev));
+  __FUNCTION__, pci_name(pdev));
return 0;
}
idev = ndev-priv;  
@@ -1759,7 +1759,7 @@
pdev-current_state = state.event;
}
else
-   IRDA_ERROR(%s - %s: invalid suspend request %u - 
%u\n, __FUNCTION__, PCIDEV_NAME(pdev), pdev-current_state, state.event);
+   IRDA_ERROR(%s - %s: invalid suspend request %u - 
%u\n, __FUNCTION__, pci_name(pdev), pdev-current_state, state.event);
up(idev-sem);
return 0;
}
@@ -1787,7 +1787,7 @@
 
if (!ndev) {
IRDA_ERROR(%s - %s: no netdevice \n,
-  __FUNCTION__, PCIDEV_NAME(pdev));
+  __FUNCTION__, pci_name(pdev));
return 0;
}
idev = ndev-priv;  
@@ -1795,7 +1795,7 @@
if (pdev-current_state == 0) {
up(idev-sem);
IRDA_WARNING(%s - %s: already resumed\n,
-__FUNCTION__, PCIDEV_NAME(pdev));
+__FUNCTION__, pci_name(pdev));
return 0;
}


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[BUG] 2.6.20-rc4-mm1: Panic in e1000_write_vfta_82543()

2007-01-18 Thread Sukadev Bhattiprolu

I get following panic on 2.6.20-rc4-mm1 on a 2-cpu AMD Opteron system.

Same basic config file seems to work with 2.6.20-rc2-mm1 on this same
system. Have not tried -rc3-mm1 yet.

Attached are config file and lspci -vv output. Let me know if you need
more info.

Suka

---

[  168.925840] Freeing unused kernel memory: 320k freed
 * INIT: version 2.86 booting
 * Starting basic networking...  [ ok ]
 * Starting kernel event manager...  [ ok ]
 * Loading hardware drivers...   [ ok ]
 * Starting PCMCIA services... * PCMCIA not present
 * Loading manual drivers... [ ok ]
[  171.575122] Unable to handle kernel paging request at c20100ec55fc RIP:
[  171.584632]  [804a9858] e1000_write_vfta_82543+0x58/0xd0
[  171.614833] PGD 114e067 PUD 0
[  171.633943] Oops:  [1] PREEMPT SMP
[  171.654614] last sysfs file: 
/devices/pci:00/:00:06.0/:01:06.0/host0/target0:0:0/0:0:0:0/rev
[  171.698158] CPU 1
[  171.698161] Modules linked in:
[  171.698164] Pid: 2173, comm: ifconfig Not tainted 2.6.20-rc4-mm1 #1
 * Checking root[  171.698166] RIP: 0010:[804a9858]  
[804a9858] e1000_write_vfta_82543+0x58/0xd0
 file system...[  171.698171] RSP: 0018:81007dfc5cf8  EFLAGS: 00010286
[  171.698174] RAX: c20100ec55fc RBX: 81003e4c8948 RCX: 
/dev/shm/root: clean, 662056/2443200 files, 3121107/4883752 bloc[  171.698176] 
RDX:  RSI:  RDI: 81003e4c8948
ks
 [ ok ]
[  171.698178] RBP:  R08: 0010 R09: 
[  171.698181] R10: 0002 R11: 804a9800 R12: c2ec
[  171.698183] R13: fffc R14:  R15: 81003e4c8000
 * Setting up LVM Volume Groups...[  171.698186] FS:  2ab17ce3f6d0() 
GS:81007f0b0bc0() knlGS:
[  171.698188] CS:  0010 DS:  ES:  CR0: 8005003b
[  171.698191] CR2: c20100ec55fc CR3: 7d828000 CR4: 06e0
[  171.698194] Process ifconfig (pid: 2173, threadinfo 81007dfc4000, task 
81007dd957e0)
[  171.698196] Stack:  8000 81003e4c8680 c2ec 
81003e4c8000
[  171.698202]  7fff2dfba960 804c103d 81003e4c8680 

[  171.698205]  81003e4c8680   
804c3195
[  171.698208] Call Trace:
[  171.698216]  [804c103d] e1000_vlan_rx_register+0x1dd/0x210
[  171.698219]  [804c3195] e1000_up+0x35/0x4b0
[  171.698222]  [804c3724] e1000_open+0x74/0x100
[  171.698227]  [805626fe] dev_open+0x3e/0xa0
[  171.698230]  [8056184f] dev_change_flags+0x6f/0x160
[  171.698234]  [805a5174] devinet_ioctl+0x2d4/0x6e0
[  171.698238]  [803f7e01] __up_read+0x21/0xb0
[  171.698243]  [8055664c] sock_ioctl+0x1fc/0x230
[  171.698247]  [8029dc0f] do_ioctl+0x2f/0xa0
[  171.698249]  [8029df3b] vfs_ioctl+0x2bb/0x2f0
[  171.698252]  [8029dfb9] sys_ioctl+0x49/0x80
[  171.698256]  [805e375d] error_exit+0x0/0x84
[  171.698259]  [802098be] system_call+0x7e/0x83
[  171.698261]
[  171.698262]
[  171.698262] Code: 44 8b 20 e8 30 7e 00 00 83 bb 94 01 00 00 03 75 3c 83 e5 01
[  171.698268] RIP  [804a9858] e1000_write_vfta_82543+0x58/0xd0
[  171.698273]  RSP 81007dfc5cf8
[  171.698274] CR2: c20100ec55fc
[  171.698276]  6EXT3 FS on sda1, internal journal
#
# Automatically generated make config: don't edit
# Linux kernel version: 2.6.20-rc4-mm1
# Thu Jan 18 15:00:14 2007
#
CONFIG_X86_64=y
CONFIG_64BIT=y
CONFIG_X86=y
CONFIG_GENERIC_TIME=y
CONFIG_GENERIC_TIME_VSYSCALL=y
CONFIG_ZONE_DMA32=y
CONFIG_LOCKDEP_SUPPORT=y
CONFIG_STACKTRACE_SUPPORT=y
CONFIG_SEMAPHORE_SLEEPERS=y
CONFIG_MMU=y
CONFIG_ZONE_DMA=y
CONFIG_RWSEM_GENERIC_SPINLOCK=y
CONFIG_GENERIC_HWEIGHT=y
CONFIG_GENERIC_CALIBRATE_DELAY=y
CONFIG_X86_CMPXCHG=y
CONFIG_EARLY_PRINTK=y
CONFIG_GENERIC_ISA_DMA=y
CONFIG_GENERIC_IOMAP=y
CONFIG_ARCH_MAY_HAVE_PC_FDC=y
CONFIG_ARCH_POPULATES_NODE_MAP=y
CONFIG_DMI=y
CONFIG_AUDIT_ARCH=y
CONFIG_GENERIC_BUG=y
# CONFIG_ARCH_HAS_ILOG2_U32 is not set
# CONFIG_ARCH_HAS_ILOG2_U64 is not set
CONFIG_DEFCONFIG_LIST=/lib/modules/$UNAME_RELEASE/.config

#
# Code maturity level options
#
CONFIG_EXPERIMENTAL=y
CONFIG_LOCK_KERNEL=y
CONFIG_INIT_ENV_ARG_LIMIT=32

#
# General setup
#
CONFIG_LOCALVERSION=
# CONFIG_LOCALVERSION_AUTO is not set
CONFIG_SWAP=y
# CONFIG_SWAP_PREFETCH is not set
CONFIG_SYSVIPC=y
# CONFIG_IPC_NS is not set
CONFIG_POSIX_MQUEUE=y
CONFIG_BSD_PROCESS_ACCT=y
CONFIG_BSD_PROCESS_ACCT_V3=y
# CONFIG_TASKSTATS is not set
# CONFIG_UTS_NS is not set
CONFIG_USER_NS=y
CONFIG_AUDIT=y
# CONFIG_AUDITSYSCALL is not set
CONFIG_IKCONFIG=y
CONFIG_IKCONFIG_PROC=y
CONFIG_CPUSETS=y
CONFIG_SYSFS_DEPRECATED=y
#

Re: [BUG] 2.6.20-rc4-mm1: Panic in e1000_write_vfta_82543()

2007-01-18 Thread Sukadev Bhattiprolu

Auke Kok [EMAIL PROTECTED] wrote:
| Sukadev Bhattiprolu wrote:
| I get following panic on 2.6.20-rc4-mm1 on a 2-cpu AMD Opteron system.
| 
| Same basic config file seems to work with 2.6.20-rc2-mm1 on this same
| system. Have not tried -rc3-mm1 yet.
| 
| Attached are config file and lspci -vv output. Let me know if you need
| more info.
| 
| Suka
| 
| ---
| 
| [  168.925840] Freeing unused kernel memory: 320k freed
|  * INIT: version 2.86 booting
|  * Starting basic networking...  [ 
|  ok ]
|  * Starting kernel event manager...  [ 
|  ok ]
|  * Loading hardware drivers...   [ 
|  ok ]
|  * Starting PCMCIA services... * PCMCIA not present
|  * Loading manual drivers... [ 
|  ok ]
| [  171.575122] Unable to handle kernel paging request at c20100ec55fc 
| RIP:
| [  171.584632]  [804a9858] e1000_write_vfta_82543+0x58/0xd0
...
| [  171.698208] Call Trace:
| [  171.698216]  [804c103d] e1000_vlan_rx_register+0x1dd/0x210
| [  171.698219]  [804c3195] e1000_up+0x35/0x4b0
| [  171.698222]  [804c3724] e1000_open+0x74/0x100
| [  171.698227]  [805626fe] dev_open+0x3e/0xa0
| [  171.698230]  [8056184f] dev_change_flags+0x6f/0x160
| [  171.698234]  [805a5174] devinet_ioctl+0x2d4/0x6e0
| [  171.698238]  [803f7e01] __up_read+0x21/0xb0
| [  171.698243]  [8055664c] sock_ioctl+0x1fc/0x230
| [  171.698247]  [8029dc0f] do_ioctl+0x2f/0xa0
| [  171.698249]  [8029df3b] vfs_ioctl+0x2bb/0x2f0
| [  171.698252]  [8029dfb9] sys_ioctl+0x49/0x80
| [  171.698256]  [805e375d] error_exit+0x0/0x84
| [  171.698259]  [802098be] system_call+0x7e/0x83
| [  171.698261]
| [  171.698262]
| [  171.698262] Code: 44 8b 20 e8 30 7e 00 00 83 bb 94 01 00 00 03 75 3c 83 
| e5 01
| [  171.698268] RIP  [804a9858] e1000_write_vfta_82543+0x58/0xd0
| [  171.698273]  RSP 81007dfc5cf8
| [  171.698274] CR2: c20100ec55fc
| [  171.698276]  6EXT3 FS on sda1, internal journal
| 
| Hi,
| 
| I believe this is one of the bugs that is fixed in the patch that I sent 
| monday. Please
| try again with the patch applied to your tree and re-test. Thanks. I didn't 
| see Andrew
| merge the patch yet.
| 
| see: http://lkml.org/lkml/2007/1/16/226
| 
| I can mail the patch if you can't find it. Just ping me privately.

Yep. Seems to fix the crash. Thanks !

Suka
| 
| Cheers,
| 
| Auke
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [BUG] 2.6.20-rc4-mm1: Panic in e1000_write_vfta_82543()

2007-01-18 Thread Auke Kok


Sukadev Bhattiprolu wrote:

I get following panic on 2.6.20-rc4-mm1 on a 2-cpu AMD Opteron system.

Same basic config file seems to work with 2.6.20-rc2-mm1 on this same
system. Have not tried -rc3-mm1 yet.

Attached are config file and lspci -vv output. Let me know if you need
more info.

Suka

---

[  168.925840] Freeing unused kernel memory: 320k freed
 * INIT: version 2.86 booting
 * Starting basic networking...  [ ok ]
 * Starting kernel event manager...  [ ok ]
 * Loading hardware drivers...   [ ok ]
 * Starting PCMCIA services... * PCMCIA not present
 * Loading manual drivers... [ ok ]
[  171.575122] Unable to handle kernel paging request at c20100ec55fc RIP:
[  171.584632]  [804a9858] e1000_write_vfta_82543+0x58/0xd0
[  171.614833] PGD 114e067 PUD 0
[  171.633943] Oops:  [1] PREEMPT SMP
[  171.654614] last sysfs file: 
/devices/pci:00/:00:06.0/:01:06.0/host0/target0:0:0/0:0:0:0/rev
[  171.698158] CPU 1
[  171.698161] Modules linked in:
[  171.698164] Pid: 2173, comm: ifconfig Not tainted 2.6.20-rc4-mm1 #1
 * Checking root[  171.698166] RIP: 0010:[804a9858]  
[804a9858] e1000_write_vfta_82543+0x58/0xd0
 file system...[  171.698171] RSP: 0018:81007dfc5cf8  EFLAGS: 00010286
[  171.698174] RAX: c20100ec55fc RBX: 81003e4c8948 RCX: 
/dev/shm/root: clean, 662056/2443200 files, 3121107/4883752 bloc[  171.698176] 
RDX:  RSI:  RDI: 81003e4c8948
ks
 [ ok ]
[  171.698178] RBP:  R08: 0010 R09: 
[  171.698181] R10: 0002 R11: 804a9800 R12: c2ec
[  171.698183] R13: fffc R14:  R15: 81003e4c8000
 * Setting up LVM Volume Groups...[  171.698186] FS:  2ab17ce3f6d0() 
GS:81007f0b0bc0() knlGS:
[  171.698188] CS:  0010 DS:  ES:  CR0: 8005003b
[  171.698191] CR2: c20100ec55fc CR3: 7d828000 CR4: 06e0
[  171.698194] Process ifconfig (pid: 2173, threadinfo 81007dfc4000, task 
81007dd957e0)
[  171.698196] Stack:  8000 81003e4c8680 c2ec 
81003e4c8000
[  171.698202]  7fff2dfba960 804c103d 81003e4c8680 

[  171.698205]  81003e4c8680   
804c3195
[  171.698208] Call Trace:
[  171.698216]  [804c103d] e1000_vlan_rx_register+0x1dd/0x210
[  171.698219]  [804c3195] e1000_up+0x35/0x4b0
[  171.698222]  [804c3724] e1000_open+0x74/0x100
[  171.698227]  [805626fe] dev_open+0x3e/0xa0
[  171.698230]  [8056184f] dev_change_flags+0x6f/0x160
[  171.698234]  [805a5174] devinet_ioctl+0x2d4/0x6e0
[  171.698238]  [803f7e01] __up_read+0x21/0xb0
[  171.698243]  [8055664c] sock_ioctl+0x1fc/0x230
[  171.698247]  [8029dc0f] do_ioctl+0x2f/0xa0
[  171.698249]  [8029df3b] vfs_ioctl+0x2bb/0x2f0
[  171.698252]  [8029dfb9] sys_ioctl+0x49/0x80
[  171.698256]  [805e375d] error_exit+0x0/0x84
[  171.698259]  [802098be] system_call+0x7e/0x83
[  171.698261]
[  171.698262]
[  171.698262] Code: 44 8b 20 e8 30 7e 00 00 83 bb 94 01 00 00 03 75 3c 83 e5 01
[  171.698268] RIP  [804a9858] e1000_write_vfta_82543+0x58/0xd0
[  171.698273]  RSP 81007dfc5cf8
[  171.698274] CR2: c20100ec55fc
[  171.698276]  6EXT3 FS on sda1, internal journal


Hi,

I believe this is one of the bugs that is fixed in the patch that I sent monday. Please 
try again with the patch applied to your tree and re-test. Thanks. I didn't see Andrew 
merge the patch yet.


see: http://lkml.org/lkml/2007/1/16/226

I can mail the patch if you can't find it. Just ping me privately.

Cheers,

Auke
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 4/4] atl1: Ancillary C files for Attansic L1 driver


Jay Cliburn wrote:

+static u32 atl1_get_tx_csum(struct net_device *netdev)
+{
+   return (netdev-features  NETIF_F_HW_CSUM) != 0;
+}
+
+static int atl1_set_tx_csum(struct net_device *netdev, u32 data)
+{
+   if (data)
+   netdev-features |= NETIF_F_HW_CSUM;
+   else
+   netdev-features = ~NETIF_F_HW_CSUM;
+
+   return 0;
+}
+
+static int atl1_set_tso(struct net_device *netdev, u32 data)
+{
+   if (data)
+   netdev-features |= NETIF_F_TSO;
+   else
+   netdev-features = ~NETIF_F_TSO;
+   return 0;
+}


There should be generic functions covering this.


+static u32 ether_crc_le(int length, unsigned char *data)
+{
+   u32 crc = ~0;   /* Initial value. */
+   while (--length = 0) {
+   unsigned char current_octet = *data++;
+   int bit;
+   for (bit = 8; --bit = 0; current_octet = 1) {
+   if ((crc ^ current_octet)  1) {
+   crc = 1;
+   crc ^= 0xedb88320;
+   } else
+   crc = 1;
+   }
+   }
+   return ~crc;
+}


this duplicates a library function



+/**
+ * Reset the transmit and receive units; mask and clear all interrupts.
+ * hw - Struct containing variables accessed by shared code
+ * return : ATL1_SUCCESS  or  idle status (if error)
+ **/
+s32 atl1_reset_hw(struct atl1_hw * hw)
+{
+   u32 icr;
+   u16 pci_cfg_cmd_word;
+   int i;
+
+   /* Workaround for PCI problem when BIOS sets MMRBC incorrectly. */
+   atl1_read_pci_cfg(hw, PCI_REG_COMMAND, pci_cfg_cmd_word);
+   if ((pci_cfg_cmd_word 
+(CMD_IO_SPACE | CMD_MEMORY_SPACE | CMD_BUS_MASTER))
+   != (CMD_IO_SPACE | CMD_MEMORY_SPACE | CMD_BUS_MASTER)) {
+   pci_cfg_cmd_word |=
+   (CMD_IO_SPACE | CMD_MEMORY_SPACE | CMD_BUS_MASTER);
+   atl1_write_pci_cfg(hw, PCI_REG_COMMAND, pci_cfg_cmd_word);
+   }


This duplicates bits set by pci_enable_device() and pci_set_master()


+	/** 
+	 * Clear Interrupt mask to stop board from generating
+	 * interrupts  Clear any pending interrupt events 
+	 **/

+   /**
+* atl1_write32(hw, REG_IMR, 0);
+* atl1_write32(hw, REG_ISR, 0x);
+**/
+
+   /**
+* Issue Soft Reset to the MAC.  This will reset the chip's
+* transmit, receive, DMA.  It will not effect
+* the current PCI configuration.  The global reset bit is self-
+* clearing, and should clear within a microsecond.
+**/
+   /*atl1_write32(hw, REG_MASTER_CTRL, MASTER_CTRL_SOFT_RST);*/
+   atl1_write32(hw, REG_MASTER_CTRL, MASTER_CTRL_SOFT_RST);
+   wmb();


PCI posting (need to read a register before delaying to guarantee flush; 
probably makes wmb superfluous)




+   atl1_write16(hw, REG_GPHY_ENABLE, 1);
+
+   msec_delay(1);  /* delay about 1ms */


ditto



+   /* Wait at least 10ms for All module to be Idle */
+   for (i = 0; i  10; i++) {
+   icr = atl1_read32(hw, REG_IDLE_STATUS);
+   if (!icr)
+   break;
+   msec_delay(1);  /* delay 1 ms */
+   cpu_relax();/* FIXME: is this still the right way to do 
this? */
+   }
+
+   if (icr)
+   return icr;
+
+   return ATL1_SUCCESS;
+}
+
+static inline bool atl1_eth_address_valid(u8 * p_addr)
+{
+   /* Invalid PermanentAddress ? */
+   if (((p_addr[0] == 0) 
+(p_addr[1] == 0) 
+(p_addr[2] == 0) 
+(p_addr[3] == 0)  (p_addr[4] == 0)  (p_addr[5] == 0)
+	) || (p_addr[0]  1)) 
+		/* Multicast address or Broadcast Address */

+   return false;
+
+   return true;


look at is_valid_ether_addr() lib function



+   if (atl1_get_permanent_address(hw)) {
+   hw-perm_mac_addr[0] = 0x00;
+   hw-perm_mac_addr[1] = 0x13;
+   hw-perm_mac_addr[2] = 0x74;
+   hw-perm_mac_addr[3] = 0x00;
+   hw-perm_mac_addr[4] = 0x5c;
+   hw-perm_mac_addr[5] = 0x38;
+   }


standard technique is to use random bytes, not a fixed address like 
this, when MAC address is otherwise unavailable.  consider what happens 
when two MACs are present.





+   for (i = 0; i  NODE_ADDRESS_SIZE; i++)
+   hw-mac_addr[i] = hw-perm_mac_addr[i];
+   return ATL1_SUCCESS;
+}
+
+/**
+ * Hashes an address to determine its location in the multicast table
+ * hw - Struct containing variables accessed by shared code
+ * mc_addr - the multicast address to hash
+ *
+ * atl1_hash_mc_addr
+ *  purpose
+ *  set hash value for a multicast address
+ *  hash calcu processing :
+ *  1. calcu 32bit CRC for multicast address
+ *  2. reverse crc with MSB to LSB
+ **/
+u32 atl1_hash_mc_addr(struct atl1_hw * hw, u8 * mc_addr)
+{
+   u32 crc32,

Re: [2.6 patch] make hdlc_setup() static again


Adrian Bunk wrote:

hdlc_setup was exported, but this export was never used.

If a driver using it actually shows up it can still be exported again.

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]
Acked-by: Krzysztof Halasa [EMAIL PROTECTED]


applied


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/10] cxgb3 - main header files


Divy Le Ray wrote:

Jeff Garzik wrote:

Divy Le Ray wrote:

From: Divy Le Ray [EMAIL PROTECTED]

This patch implements the main header files of
the Chelsio T3 network driver.

Signed-off-by: Divy Le Ray [EMAIL PROTECTED]


Once you think it's ready, email me a URL to a single patch that adds 
the driver to the latest linux-2.6.git kernel.  Include in the email a 
description of the driver and signed-off-by line, which will get 
directly included in the git changelog.


Adding new drivers is a bit special, because we want to merge it as a 
single changeset, but that would create a patch too large to review on 
the common kernel mailing lists.

Jeff,

You can grab the monolithic patch at this URL:
http://service.chelsio.com/kernel.org/cxgb3.patch.bz2


applied to netdev-2.6.git#upstream

I'm really counting on Chelsio to actively maintain this driver, unlike 
the abandonware you guys first submitted.


Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC: 2.6 patch] remove the broken OAKNET driver


Adrian Bunk wrote:

The OAKNET driver:
- has been marked as BROKEN for more than two years and
- is still marked as BROKEN.

Drivers that had been marked as BROKEN for such a long time seem to be
unlikely to be revived in the forseeable future.

But if anyone wants to ever revive this driver, the code is still
present in the older kernel releases.

Signed-off-by: Adrian Bunk [EMAIL PROTECTED]


applied


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

2.6.20-rc5: known regressions with patches (v2)

2007-01-18 Thread Adrian Bunk

This email lists some known regressions in 2.6.20-rc5 compared to 2.6.19
with patches available

If you find your name in the Cc header, you are either submitter of one
of the bugs, maintainer of an affectected subsystem or driver, a patch
of you caused a breakage or I'm considering you in any other way possibly
involved with one or more of these issues.

Due to the huge amount of recipients, please trim the Cc when answering.


Subject: does not pickup ipv6 addresses
References : http://bugzilla.kernel.org/show_bug.cgi?id=7817
 http://lkml.org/lkml/2007/1/14/146
Submitter  : Michael Gernoth [EMAIL PROTECTED]
 Daniel Drake [EMAIL PROTECTED]
Caused-By  : David L Stevens [EMAIL PROTECTED]
 commit 30c4cf577fb5b68c16e5750d6bdbd7072e42b279
Handled-By : YOSHIFUJI Hideaki [EMAIL PROTECTED]
Patch  : http://bugzilla.kernel.org/show_bug.cgi?id=7817
Status : patch available


Subject: ACPI: fix cpufreq regression
References : http://lkml.org/lkml/2007/1/16/120
Submitter  : Ingo Molnar [EMAIL PROTECTED]
Caused-By  : Dave Jones [EMAIL PROTECTED]
 commit 0916bd3ebb7cefdd0f432e8491abe24f4b5a101e
Handled-By : Ingo Molnar [EMAIL PROTECTED]
Patch  : http://lkml.org/lkml/2007/1/16/120
Status : patch available


Subject: CONFIG_JFFS2_FS_DEBUG=2 compile error
References : http://lkml.org/lkml/2007/1/12/161
Submitter  : Russell King [EMAIL PROTECTED]
Caused-By  : Al Viro [EMAIL PROTECTED]
 commit 914e26379decf1fd984b22e51fd2e4209b7a7f1b
Handled-By : David Woodhouse [EMAIL PROTECTED]
Status : patch available


Subject: WARNING: profile_hits [drivers/kvm/kvm-intel.ko] undefined!
References : http://lkml.org/lkml/2007/1/12/16
Submitter  : Miles Lane [EMAIL PROTECTED]
Caused-By  : Ingo Molnar [EMAIL PROTECTED]
 commit 07031e14c1127fc7e1a5b98dfcc59f434e025104
Handled-By : Andrew Morton [EMAIL PROTECTED]
Patch  : http://lkml.org/lkml/2007/1/12/18
Status : patch available


Subject: KVM: guest crash
References : http://lkml.org/lkml/2007/1/8/163
Submitter  : Roland Dreier [EMAIL PROTECTED]
Handled-By : Avi Kivity [EMAIL PROTECTED]
Patch  : http://lkml.org/lkml/2007/1/9/280
Status : patch available


Subject: compile error: USB_HID must depend on INPUT
References : http://lkml.org/lkml/2007/1/12/157
Submitter  : Russell King [EMAIL PROTECTED]
Handled-By : Russell King [EMAIL PROTECTED]
Patch  : http://lkml.org/lkml/2007/1/12/177
Status : patch available
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: 2.6.20-rc5: known regressions with patches (v2)

2007-01-18 Thread David Woodhouse

On Thu, 2007-01-18 at 20:59 +0100, Adrian Bunk wrote:
 Subject: CONFIG_JFFS2_FS_DEBUG=2 compile error
 References : http://lkml.org/lkml/2007/1/12/161
 Submitter  : Russell King [EMAIL PROTECTED]
 Caused-By  : Al Viro [EMAIL PROTECTED]
  commit 914e26379decf1fd984b22e51fd2e4209b7a7f1b
 Handled-By : David Woodhouse [EMAIL PROTECTED]
 Status : patch available

Linus, please pull from git://git.infradead.org/mtd-2.6.git

This fixes the above bug along with a few others. It does also contain a
small amount of new code which has been waiting for a while (including
the driver for the CAFÉ NAND controller which we use on OLPC.).

My apologies for missing the merge window and first asking you to pull
this a few hours after 2.6.20-rc1 was cut; I'd been waiting for the
bitrev stuff to land, and had waited too long.

Adrian Bunk (3):
  [MTD] SSFDC must depend on BLOCK
  [MTD] [NAND] rtc_from4.c: use lib/bitrev.c
  [MTD] make drivers/mtd/cmdlinepart.c:mtdpart_setup() static

Adrian Hunter (2):
  [MTD] OneNAND: Implement read-while-load
  [MTD] OneNAND: Handle DDP chip boundary during read-while-load

Akinobu Mita (1):
  [JFFS2] Use rb_first() and rb_last() cleanup

Alan Cox (1):
  [MTD] MAPS: esb2rom: use hotplug safe interfaces

Alexey Dobriyan (1):
  [MTD] JEDEC probe: fix comment typo (devic)

Amit Choudhary (1):
  [JFFS2] Fix error-path leak in summary scan

Andrew Morton (1):
  [MTD] Tidy bitrev usage in rtc_from4.c

Andrew Victor (2):
  [MTD] NAND: AT91 NAND driver
  [MTD] NAND: Support for 16-bit bus-width on AT91.

Artem Bityutskiy (10):
  [MTD] core: trivial comments fix
  [MTD] NAND: nandsim: support subpage write
  [MTD] increase MAX_MTD_DEVICES
  [MTD] add get_mtd_device_nm() function
  [MTD] add get and put methods
  [MTD] return error code from get_mtd_device()
  [MTD] nandsim: bugfix in page addressing
  [JFFS2] add cond_resched() when garbage collecting deletion dirent
  [JFFS2] Reschedule in loops
  [MTD] OneNAND: release CPU in cycles

Burman Yan (1):
  [MTD] replace kmalloc+memset with kzalloc

Dave Olsen (1):
  [MTD] [MAPS] Support for BIOS flash chips on the nvidia ck804 southbridge

David Anders (1):
  [MTD] NOR: leave Intel chips in read-array mode on suspend

David Woodhouse (29):
  [MTD NAND] Initial import of CAFÉ NAND driver.
  [MTD NAND] OLPC CAFÉ driver update
  Merge branch 'master' of git://git.kernel.org/.../torvalds/linux-2.6
  [MTD] NAND: Combined oob buffer so it's contiguous with data
  [MTD] NAND: Correct setting of chip-oob_poi OOB buffer
  Merge git://git.infradead.org/~dwmw2/cafe-2.6
  [MTD] NAND: Add hardware ECC correction support to CAFÉ NAND driver
  [MTD] NAND: CAFÉ NAND driver cleanup, fix ECC on reading empty flash
  [MTD] NAND: Disable ECC checking on CAFÉ since it's broken for now
  [MTD] NAND: Café ECC -- remove spurious BUG_ON() in err_pos()
  [MTD] NAND: Reset Café controller before initialising.
  [MTD] CAFÉ NAND: Add 'slowtiming' parameter, default usedma and checkecc 
on
  [MTD] NAND: Add ECC debugging for CAFÉ
  [MTD] NAND: Remove empty block ECC workaround
  [MTD] NAND: Fix timing calculation in CAFÉ debugging message
  [MTD] NAND: Use register #defines throughout CAFÉ driver, not numbers
  [MTD] NAND: Add register debugging spew option to CAFÉ driver
  [MTD] NAND: Fix ECC settings in CAFÉ controller driver.
  Merge git://git.infradead.org/~dwmw2/cafe-2.6
  Merge git://git.infradead.org/~kmpark/onenand-mtd-2.6
  [MTD] [NAND] Update CAFÉ driver interrupt handler prototype
  [MTD] Use EXPORT_SYMBOL_GPL() for exported symbols.
  [MTD] Remove trailing whitespace
  Merge branch 'master' of git://git.kernel.org/.../torvalds/linux-2.6
  [MTD] Fix SSFDC build for variable blocksize.
  [MTD] Fix ssfdc blksize typo
  Merge branch 'master' of git://git.infradead.org/~kmpark/onenand-mtd-2.6
  [JFFS2] debug.h: include linux/sched.h for current-pid
  Merge branch 'master' of git://git.kernel.org/.../torvalds/linux-2.6

Haavard Skinnemoen (1):
  [MTD] bugfix: DataFlash is not bit writable

Jeff Garzik (1):
  [JFFS2] kill warning RE debug-only variables

Josh Boyer (1):
  [MTD] add MTD_BLKDEVS Kconfig option

Kyungmin Park (9):
  MTD: OneNAND: interrupt based wait support
  [MTD] OneNAND: lock support
  [MTD] OneNAND: Single bit error detection
  [MTD] OneNAND: fix oob handling in recent oob patch
  [JFFS2] use the ref_offset macro
  [MTD] OneNAND: fix onenand_wait bug
  [MTD] OneNAND: add subpage write support
  [MTD] OneNAND: fix onenand_wait bug in read ecc error
  [MTD] OneNAND: return ecc error code only when 2-bit ecc occurs

Lew Glendenning (1):
  [MTD] MAPS: Support for BIOS flash chips on Intel ESB2 southbridge

Mariusz Kozlowski (1):
  [MTD] [NAND] Compile fix in rfc_from4.c

Qi

Re: [take33 10/10] kevent: Kevent based AIO (aio_sendfile()/aio_sendfile_path()).

2007-01-18 Thread Suparna Bhattacharya

On Wed, Jan 17, 2007 at 05:39:51PM +0300, Evgeniy Polyakov wrote:
 On Wed, Jan 17, 2007 at 07:21:42PM +0530, Suparna Bhattacharya ([EMAIL 
 PROTECTED]) wrote:
 
  Since you are implementing new APIs here, have you considered doing an
  aio_sendfilev to be able to send a header with the data ?
 
 It is doable, but why people do not like corking?
 With Linux less than microsecond syscall overhead it is better and more
 flexible solution, doesn't it?

That is what I used to think as well. However ...

The problem as I understand it now is not about bunching data together, but
of ensuring some sort of atomicity between the header and the data, when
there can be multiple outstanding aio requests on the same socket - i.e
ensuring strict ordering without other data coming in between, when data
to be sent is not already in cache, and in the meantime another sendfile
or aio write requests comes in for the same socket. Without having to lock
the socket when reading data from disk.

There are alternate ways to address this, aio_sendfilev is one of the options
I have heard people requesting.

Regards
Suparna

 
 I'm not saying - 'no, there will not be any *v variants', just getting
 more info.
 
  Regards
  Suparna
 
 --
   Evgeniy Polyakov

-- 
Suparna Bhattacharya ([EMAIL PROTECTED])
Linux Technology Center
IBM Software Lab, India

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/12] L2 network namespace (v3)

2007-01-18 Thread Eric W. Biederman

YOSHIFUJI Hideaki / 吉藤英明 [EMAIL PROTECTED] writes:

 In article [EMAIL PROTECTED] (at Wed, 17 Jan 2007 18:51:14
 +0300), Dmitry Mishin [EMAIL PROTECTED] says:

 ===
 L2 network namespaces
 
 The most straightforward concept of network virtualization is complete
 separation of namespaces, covering device list, routing tables, netfilter
 tables, socket hashes, and everything else.
 
 On input path, each packet is tagged with namespace right from the
 place where it appears from a device, and is processed by each layer
 in the context of this namespace.
 Non-root namespaces communicate with the outside world in two ways: by
 owning hardware devices, or receiving packets forwarded them by their parent
 namespace via pass-through device.

 Can you handle multicast / broadcast and IPv6, which are very important?

The basic idea here is very simple.

Each network namespace appears to user space as a separate network stack,
with it's own set of routing tables etc.

All sockets and all network devices (the sources of packets) belong
to exactly one network namespace.  

From the socket or the network device a packet enters the network stack
you can infer the network namespace that it will be processed in.
Each network namespace should get it own complement of the data structures
necessary to process packets, and everything should work.

Talking between namespaces is accomplished either through an external network,
or through a special pseudo network device.  The simplest to implement
is two network devices where all packets transmitted on one are received
on the other.  Then by placing one network device in one namespace and
the other in another interface it looks like two machines connected by
a cross over cable.

Once you have that in a one namespace you can connect other namespaces
with the existing ethernet bridging or by configuring one of the
namespaces as a router and routing traffic between them.


Supporting IPv6 is roughly as difficult as supporting IPv4.  

What needs to happen to convert code is all variables either need
a per network namespace instance or the data structures needs to be
modified to have a network namespace tag.  For hash tables which
are hard to allocate dynamically tagging is the preferred conversion
method, for anything that is small enough duplication is preferred
as it allows the existing logic to be kept.

In the fast path the impact of all of the conversions should be very light,
to non-existent.  In network stack initialization and cleanup there
is work todo because you are initializing and cleanup variables more often
then at module insertion and removal.

So my expectation is that once we get a framework established and merged
to allow network namespaces eventually the entire network stack will be
converted.  Not just ipv4 and ipv6 but decnet, ipx, iptables, fair scheduling,
ethernet bridging and all of the other weird and twisty bits of the
linux network stack.

The primary practical hurdle is there is a lot of networking code in
the kernel.

I think I know a path by which we can incrementally merge support for
network namespaces without breaking anything.  More to come on this
when I finish up my demonstration patchset in a week or so that
is complete enough to show what I am talking about.

I hope this helps but the concept into perspective.

As for Dmitry's patchset in particular it currently does not support
IPv6 and I don't know where it is with respect to the broadcast and
multicast but I don't see any immediate problems that would preclude
those from working.  But any incompleteness is exactly that
incompleteness and an implementation problem not a fundamental design
issue.

Eric
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2.6.20 1/5] s2io: Making LRO and UFO as module loadable parameter.

This patch adds two load parameters napi and ufo. Previously NAPI was
compilation option with these changes wan enable disable NAPI using load
parameter. Also we are introducing ufo load parameter to enable/disable
ufo feature

Signed-off-by: Sivakumar Subramani [EMAIL PROTECTED]
---
diff -urpN orig/drivers/net/s2io.c patch1/drivers/net/s2io.c
--- orig/drivers/net/s2io.c 2006-12-21 10:06:58.0 +0530
+++ patch1/drivers/net/s2io.c   2007-01-08 11:56:23.0 +0530
@@ -401,9 +401,10 @@ S2IO_PARM_INT(lro, 0);
  * aggregation happens until we hit max IP pkt size(64K)
  */
 S2IO_PARM_INT(lro_max_pkts, 0x);
-#ifndef CONFIG_S2IO_NAPI
 S2IO_PARM_INT(indicate_max_pkts, 0);
-#endif
+
+S2IO_PARM_INT(napi, 1);
+S2IO_PARM_INT(ufo, 0);
 
 static unsigned int tx_fifo_len[MAX_TX_FIFOS] =
 {DEFAULT_FIFO_0_LEN, [1 ...(MAX_TX_FIFOS - 1)] = DEFAULT_FIFO_1_7_LEN};
@@ -2275,9 +2276,7 @@ static int fill_rx_buffers(struct s2io_n
struct config_param *config;
u64 tmp;
buffAdd_t *ba;
-#ifndef CONFIG_S2IO_NAPI
unsigned long flags;
-#endif
RxD_t *first_rxdp = NULL;
 
mac_control = nic-mac_control;
@@ -2321,12 +2320,15 @@ static int fill_rx_buffers(struct s2io_n
DBG_PRINT(INTR_DBG, %s: Next block at: %p\n,
  dev-name, rxdp);
}
-#ifndef CONFIG_S2IO_NAPI
-   spin_lock_irqsave(nic-put_lock, flags);
-   mac_control-rings[ring_no].put_pos =
-   (block_no * (rxd_count[nic-rxd_mode] + 1)) + off;
-   spin_unlock_irqrestore(nic-put_lock, flags);
-#endif
+   if(!napi) {
+   spin_lock_irqsave(nic-put_lock, flags);
+   mac_control-rings[ring_no].put_pos =
+   (block_no * (rxd_count[nic-rxd_mode] + 1)) + off;
+   spin_unlock_irqrestore(nic-put_lock, flags);
+   } else {
+   mac_control-rings[ring_no].put_pos =
+   (block_no * (rxd_count[nic-rxd_mode] + 1)) + off;
+   }   
if ((rxdp-Control_1  RXD_OWN_XENA) 
((nic-rxd_mode = RXD_MODE_3A) 
(rxdp-Control_2  BIT(0 {
@@ -2569,7 +2571,7 @@ static void free_rx_buffers(struct s2io_
  * 0 on success and 1 if there are No Rx packets to be processed.
  */
 
-#if defined(CONFIG_S2IO_NAPI)
+#if defined(HAVE_NETDEV_POLL)
 static int s2io_poll(struct net_device *dev, int *budget)
 {
nic_t *nic = dev-priv;
@@ -2708,9 +2710,7 @@ static void rx_intr_handler(ring_info_t 
rx_curr_get_info_t get_info, put_info;
RxD_t *rxdp;
struct sk_buff *skb;
-#ifndef CONFIG_S2IO_NAPI
int pkt_cnt = 0;
-#endif
int i;
 
spin_lock(nic-rx_lock);
@@ -2726,16 +2726,16 @@ static void rx_intr_handler(ring_info_t 
put_info = ring_data-rx_curr_put_info;
put_block = put_info.block_index;
rxdp = ring_data-rx_blocks[get_block].rxds[get_info.offset].virt_addr;
-#ifndef CONFIG_S2IO_NAPI
-   spin_lock(nic-put_lock);
-   put_offset = ring_data-put_pos;
-   spin_unlock(nic-put_lock);
-#else
-   put_offset = (put_block * (rxd_count[nic-rxd_mode] + 1)) +
-   put_info.offset;
-#endif
+   if (!napi) {
+   spin_lock(nic-put_lock);
+   put_offset = ring_data-put_pos;
+   spin_unlock(nic-put_lock);
+   } else
+   put_offset = ring_data-put_pos;
+
while (RXD_IS_UP2DT(rxdp)) {
-   /* If your are next to put index then it's FIFO full condition 
*/
+   /* If your are next to put index then it's
+  FIFO full condition */
if ((get_block == put_block) 
(get_info.offset + 1) == put_info.offset) {
DBG_PRINT(INTR_DBG, %s: Ring Full\n,dev-name);
@@ -2793,15 +2793,12 @@ static void rx_intr_handler(ring_info_t 
rxdp = ring_data-rx_blocks[get_block].block_virt_addr;
}
 
-#ifdef CONFIG_S2IO_NAPI
nic-pkts_to_process -= 1;
-   if (!nic-pkts_to_process)
+   if ((napi)  (!nic-pkts_to_process))
break;
-#else
pkt_cnt++;
if ((indicate_max_pkts)  (pkt_cnt  indicate_max_pkts))
break;
-#endif
}
if (nic-lro) {
/* Clear all LRO sessions before exiting */
@@ -4196,26 +4193,26 @@ static irqreturn_t s2io_isr(int irq, voi
org_mask = readq(bar0-general_int_mask);
writeq(val64, bar0-general_int_mask);
 
-#ifdef CONFIG_S2IO_NAPI
-   if (reason  GEN_INTR_RXTRAFFIC) {
-   if (netif_rx_schedule_prep(dev)) {
-   writeq(val64, bar0-rx_traffic_mask);
-   __netif_rx_schedule(dev);
+   if (napi) {
+   if (reason  GEN_INTR_RXTRAFFIC) {

[PATCH 2.6.20 2/5] S2IO: Fixes for reset and link handling.

1. Fix for reset and link handling.
2. Allow for promiscuos mode and multicast state be maintained through
   ifconfig up and down.
3. Support to print adapter serial number.

Signed-off-by: Sivakumar Subramani [EMAIL PROTECTED]
---
diff -urpN patch1/drivers/net/s2io.c patch2/drivers/net/s2io.c
--- patch1/drivers/net/s2io.c   2007-01-08 11:56:23.0 +0530
+++ patch2/drivers/net/s2io.c   2007-01-08 17:51:20.0 +0530
@@ -1416,7 +1416,7 @@ static int init_nic(struct s2io_nic *nic
 
val64 = TTI_DATA2_MEM_TX_UFC_A(0x10) |
TTI_DATA2_MEM_TX_UFC_B(0x20) |
-   TTI_DATA2_MEM_TX_UFC_C(0x70) | TTI_DATA2_MEM_TX_UFC_D(0x80);
+   TTI_DATA2_MEM_TX_UFC_C(0x40) | TTI_DATA2_MEM_TX_UFC_D(0x80);
writeq(val64, bar0-tti_data2_mem);
 
val64 = TTI_CMD_MEM_WE | TTI_CMD_MEM_STROBE_NEW_CMD;
@@ -1612,7 +1612,8 @@ static int init_nic(struct s2io_nic *nic
 * that does not start on an ADB to reduce disconnects.
 */
if (nic-device_type == XFRAME_II_DEVICE) {
-   val64 = EXT_REQ_EN | MISC_LINK_STABILITY_PRD(3);
+   val64 = FAULT_BEHAVIOUR | EXT_REQ_EN |
+   MISC_LINK_STABILITY_PRD(3);
writeq(val64, bar0-misc_control);
val64 = readq(bar0-pic_control2);
val64 = ~(BIT(13)|BIT(14)|BIT(15));
@@ -1879,41 +1880,36 @@ static void en_dis_able_nic_intrs(struct
}
 }
 
-static int check_prc_pcc_state(u64 val64, int flag, int rev_id, int herc)
+/**
+ *  verify_pcc_quiescent- Checks for PCC quiescent state
+ *  Return: 1 If PCC is quiescence
+ *  0 If PCC is not quiescence
+ */
+static int verify_pcc_quiescent(nic_t *sp, int flag)
 {
-   int ret = 0;
+   int ret = 0, herc;
+   XENA_dev_config_t __iomem *bar0 = sp-bar0;
+   u64 val64 = readq(bar0-adapter_status);
+   
+   herc = (sp-device_type == XFRAME_II_DEVICE);
 
if (flag == FALSE) {
-   if ((!herc  (rev_id = 4)) || herc) {
-   if (!(val64  ADAPTER_STATUS_RMAC_PCC_IDLE) 
-   ((val64  ADAPTER_STATUS_RC_PRC_QUIESCENT) ==
-ADAPTER_STATUS_RC_PRC_QUIESCENT)) {
+   if ((!herc  (get_xena_rev_id(sp-pdev) = 4)) || herc) {
+   if (!(val64  ADAPTER_STATUS_RMAC_PCC_IDLE)) 
ret = 1;
-   }
-   }else {
-   if (!(val64  ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE) 
-   ((val64  ADAPTER_STATUS_RC_PRC_QUIESCENT) ==
-ADAPTER_STATUS_RC_PRC_QUIESCENT)) {
+   } else {
+   if (!(val64  ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE))
ret = 1;
-   }
}
} else {
-   if ((!herc  (rev_id = 4)) || herc) {
+   if ((!herc  (get_xena_rev_id(sp-pdev) = 4)) || herc) {
if (((val64  ADAPTER_STATUS_RMAC_PCC_IDLE) ==
-ADAPTER_STATUS_RMAC_PCC_IDLE) 
-   (!(val64  ADAPTER_STATUS_RC_PRC_QUIESCENT) ||
-((val64  ADAPTER_STATUS_RC_PRC_QUIESCENT) ==
- ADAPTER_STATUS_RC_PRC_QUIESCENT))) {
+ADAPTER_STATUS_RMAC_PCC_IDLE))
ret = 1;
-   }
} else {
if (((val64  ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE) ==
-ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE) 
-   (!(val64  ADAPTER_STATUS_RC_PRC_QUIESCENT) ||
-((val64  ADAPTER_STATUS_RC_PRC_QUIESCENT) ==
- ADAPTER_STATUS_RC_PRC_QUIESCENT))) {
+ADAPTER_STATUS_RMAC_PCC_FOUR_IDLE))
ret = 1;
-   }
}
}
 
@@ -1921,9 +1917,6 @@ static int check_prc_pcc_state(u64 val64
 }
 /**
  *  verify_xena_quiescence - Checks whether the H/W is ready
- *  @val64 :  Value read from adapter status register.
- *  @flag : indicates if the adapter enable bit was ever written once
- *  before.
  *  Description: Returns whether the H/W is ready to go or not. Depending
  *  on whether adapter enable bit was written or not the comparison
  *  differs and the calling function passes the input argument flag to
@@ -1932,24 +1925,63 @@ static int check_prc_pcc_state(u64 val64
  *  0 If Xena is not quiescence
  */
 
-static int verify_xena_quiescence(nic_t *sp, u64 val64, int flag)
+static int verify_xena_quiescence(nic_t *sp)
 {
-   int ret = 0, herc;
-   u64 tmp64 = ~((u64) val64);
-   int rev_id = get_xena_rev_id(sp-pdev);
+   int  mode;
+   XENA_dev_config_t __iomem *bar0 = sp-bar0;
+   u64 val64 = readq(bar0-adapter_status);
+   mode = s2io_verify_pci_mode(sp);
 
-   herc =

[PATCH 2.6.20 3/5] s2io: Fixes in updating skb-truesize and code cleanup.

1. Fix for updating skb-truesize properly.
2. Disable NAPI only if more than one ring configured in case of MSI/MSI-X
   interrupts. Previously we were disabling NAPI irrespective of number of
   rings when MSI/MSI-X interrupts were used.
3. Code cleanup.

Signed-off-by: Sivakumar Subramani [EMAIL PROTECTED]
---
diff -urpN patch2/drivers/net/s2io.c patch3/drivers/net/s2io.c
--- patch2/drivers/net/s2io.c   2007-01-08 17:51:20.0 +0530
+++ patch3/drivers/net/s2io.c   2007-01-08 23:38:48.0 +0530
@@ -459,7 +459,7 @@ static int init_shared_mem(struct s2io_n
void *tmp_v_addr, *tmp_v_addr_next;
dma_addr_t tmp_p_addr, tmp_p_addr_next;
RxD_block_t *pre_rxd_blk = NULL;
-   int i, j, blk_cnt, rx_sz, tx_sz;
+   int i, j, blk_cnt;
int lst_size, lst_per_page;
struct net_device *dev = nic-dev;
unsigned long tmp;
@@ -484,7 +484,6 @@ static int init_shared_mem(struct s2io_n
}
 
lst_size = (sizeof(TxD_t) * config-max_txds);
-   tx_sz = lst_size * size;
lst_per_page = PAGE_SIZE / lst_size;
 
for (i = 0; i  config-tx_fifo_num; i++) {
@@ -584,7 +583,6 @@ static int init_shared_mem(struct s2io_n
size = (size * (sizeof(RxD1_t)));
else
size = (size * (sizeof(RxD3_t)));
-   rx_sz = size;
 
for (i = 0; i  config-rx_ring_num; i++) {
mac_control-rings[i].rx_curr_get_info.block_index = 0;
@@ -625,6 +623,8 @@ static int init_shared_mem(struct s2io_n
rx_blocks-rxds = kmalloc(sizeof(rxd_info_t)*
  rxd_count[nic-rxd_mode],
  GFP_KERNEL);
+   if (!rx_blocks-rxds)
+   return -ENOMEM;
for (l=0; lrxd_count[nic-rxd_mode];l++) {
rx_blocks-rxds[l].virt_addr =
rx_blocks-block_virt_addr +
@@ -2260,6 +2260,7 @@ static int fill_rxd_3buf(nic_t *nic, RxD
return -ENOMEM ;
}
frag_list = skb_shinfo(skb)-frag_list;
+   skb-truesize += frag_list-truesize; //updating skb-truesize
frag_list-next = NULL;
tmp = (void *)ALIGN((long)frag_list-data, ALIGN_SIZE + 1);
frag_list-data = tmp;
@@ -3186,6 +3187,8 @@ static void alarm_intr_handler(struct s2
register u64 val64 = 0, err_reg = 0;
u64 cnt;
int i;
+   if (atomic_read(nic-card_state) == CARD_DOWN)
+   return;
nic-mac_control.stats_info-sw_stat.ring_full_cnt = 0;
/* Handling the XPAK counters update */
if(nic-mac_control.stats_info-xpak_stat.xpak_timer_count  72000) {
@@ -6581,7 +6584,6 @@ static int rx_osm_handler(ring_info_t *r
skb_put(skb, buf1_len);
skb-len += buf2_len;
skb-data_len += buf2_len;
-   skb-truesize += buf2_len;
skb_put(skb_shinfo(skb)-frag_list, buf2_len);
sp-stats.rx_bytes += buf1_len;
 
@@ -6803,6 +6805,8 @@ static int s2io_verify_parm(struct pci_d
Defaulting to INTA\n);
*dev_intr_type = INTA;
}
+   if ( (rx_ring_num  1)  (*dev_intr_type != INTA) )
+   napi = 0;
if (rx_ring_mode  3) {
DBG_PRINT(ERR_DBG, s2io: Requested ring mode not supported\n);
DBG_PRINT(ERR_DBG, s2io: Defaulting to 3-buffer mode\n);
@@ -6999,7 +7003,7 @@ s2io_init_nic(struct pci_dev *pdev, cons
goto mem_alloc_failed;
}
 
-   sp-bar0 = ioremap(pci_resource_start(pdev, 0),
+   sp-bar0 = (caddr_t) ioremap(pci_resource_start(pdev, 0),
 pci_resource_len(pdev, 0));
if (!sp-bar0) {
DBG_PRINT(ERR_DBG, %s: Neterion: cannot remap io mem1\n,
@@ -7008,7 +7012,7 @@ s2io_init_nic(struct pci_dev *pdev, cons
goto bar0_remap_failed;
}
 
-   sp-bar1 = ioremap(pci_resource_start(pdev, 2),
+   sp-bar1 = (caddr_t) ioremap(pci_resource_start(pdev, 2),
 pci_resource_len(pdev, 2));
if (!sp-bar1) {
DBG_PRINT(ERR_DBG, %s: Neterion: cannot remap io mem2\n,
@@ -7324,7 +7328,7 @@ int __init s2io_starter(void)
  * Description: This function is the cleanup routine for the driver. It 
unregist * ers the driver.
  */
 
-static void s2io_closer(void)
+static __exit void s2io_closer(void)
 {
pci_unregister_driver(s2io_driver);
DBG_PRINT(INIT_DBG, cleanup done\n);
@@ -7645,6 +7649,7 @@ static void lro_append_pkt(nic_t *sp, lr
lro-last_frag-next = skb;
else
skb_shinfo(first)-frag_list = skb;
+   first-truesize += skb-truesize;//updating skb-truesize
lro-last_frag = skb;

[PATCH 2.6.20 4/5] s2io: Removed enabling of some of the unused interrupts.