date:20070802

strange tcp behavior

2007-08-02 Thread john

1186035057.207629127.0.0.1 - 127.0.0.1TCP 5  smtp [SYN]
Seq=0 Len=0
1186035057.207632127.0.0.1 - 127.0.0.1TCP smtp  5 [SYN, ACK]
Seq=0 Ack=1 Win=32792 Len=0 MSS=16396
1186035057.207666127.0.0.1 - 127.0.0.1TCP 5  smtp [ACK]
Seq=1 Ack=1 Win=1500 Len=0
1186035057.207699127.0.0.1 - 127.0.0.1SMTP Command: EHLO localhost
1186035057.207718127.0.0.1 - 127.0.0.1TCP smtp  5 [ACK]
Seq=1 Ack=17 Win=32792 Len=0
1186035057.207736127.0.0.1 - 127.0.0.1TCP 5  smtp [RST]
Seq=17 Len=0
1186035057.223934127.0.0.1 - 127.0.0.1TCP 33787  5 [RST,
ACK] Seq=0 Ack=0 Win=32792 Len=0



Can someone please comment as to why, tcp  stack sends rst packet from the
wrong source port in this situation.

This is the same problem that was described in my first two posts, witch 
unfortunately nobody seemed to notice.

Here is source code witch can reproduce the behavior described, the client
side code is a complete mess but with a little bit it works.

Server:

#include sys/types.h
#include sys/socket.h
#include arpa/inet.h
#include poll.h
#include fcntl.h

void main(void) {
int ms;
int ss;
struct sockaddr_in sa;
char *str = HELLO FRIEND;
struct pollfd fd;
int flags;

ms = socket(PF_INET, SOCK_STREAM, IPPROTO_TCP);
flags = fcntl(ms, F_GETFL, 0);
fcntl(ms, F_SETFL, flags | O_NONBLOCK);

memset(sa, 0, sizeof(sa));
sa.sin_family = AF_INET;
sa.sin_addr.s_addr = htonl(INADDR_ANY);
sa.sin_port = htons(25);

bind(ms, (struct sockaddr *) sa, sizeof(sa));

listen(ms, 0);

fd.fd = ms;
fd.events = POLLIN;

while(poll(fd, 1, -1)) {
ss = accept(ms, NULL, NULL);

usleep(1);
send(ss, str, strlen(str), MSG_NOSIGNAL);
close(ss);

memset(fd, 0, sizeof(fd));
fd.fd = ms;
fd.events = POLLIN;
}
}

Client:


#include stdio.h
#include sys/types.h
#include sys/socket.h
#include netinet/in.h
#include netinet/tcp.h
#include linux/if_ether.h
//#include arpa/inet.h

//#include linux/if_ether.h

struct sockaddr_in localaddr;
struct sockaddr_in remoteaddr;

struct sockaddr rawaddr;

int sdl, sdr;

struct tcphdr header;

struct pheader_t {
uint32_t saddr;
uint32_t daddr;
uint8_t r;
uint8_t protocol;
uint16_t length;
};

struct pheader_t pheader;

unsigned short tbuf[2048];
unsigned char buf[2048];

char *msg = EHLO localhost\r\n;

unsigned char *p;

char *src_addr = 127.0.0.1;
char *dst_addr = 127.0.0.1;

unsigned short sprt = 5;
unsigned short dprt = 25;


struct timeval tv;

unsigned seq, ack_seq;

int data;

void mysend(void) {
int i, sum;
int len;

if(data) {
len = strlen(msg);
memcpy((char *) tbuf + sizeof(pheader) + sizeof(header),
msg, len);
} else
len = 0;

bzero(pheader, sizeof(pheader));
pheader.saddr = (in_addr_t) inet_addr(src_addr);
pheader.daddr = (in_addr_t) inet_addr(dst_addr);
pheader.protocol = 6;
pheader.length = htons(sizeof(header) + len);

memcpy(tbuf, pheader, sizeof(pheader));
memcpy((char *) tbuf + sizeof(pheader), header, sizeof(header));



sum = 0;

for(i = 0; i  (sizeof(pheader) + sizeof(header)) / 2 + len / 2;
i++) {
sum += tbuf[i];
sum = (sum  0x) + (sum  16);
}

header.check = ~sum;

memcpy((char *) tbuf + sizeof(pheader), header, sizeof(header));

sendto(sdr,  (char *) tbuf + sizeof(pheader), sizeof(header) +
len, 0, (struct sockaddr *) remoteaddr, sizeof(remoteaddr));
}


void main(void)
{
gettimeofday(tv, NULL);
srand(tv.tv_sec  tv.tv_usec);

remoteaddr.sin_family = AF_INET;
remoteaddr.sin_addr.s_addr = (in_addr_t) inet_addr(dst_addr);


sdl = socket(PF_INET, SOCK_PACKET, htons(ETH_P_ALL));
strcpy(rawaddr.sa_data, lo);
bind(sdl, (struct sockaddr *) rawaddr, sizeof(rawaddr));

sdr = socket(AF_INET, SOCK_RAW, IPPROTO_TCP);


bzero(header, sizeof(header));
header.source = htons(sprt);
header.dest = htons(dprt);

seq = rand();
ack_seq = 0;

header.seq = htonl(seq);
header.ack_seq = htonl(ack_seq);

header.doff = sizeof(header) / 4;

header.syn = 1;

header.window = htons(1500);

mysend();

while(1) {
recvfrom(sdl, buf, sizeof(buf), 0, NULL, NULL);
//  p = buf + (*buf  0x0f) * 4;
p = (buf + 14) + (*(buf + 14)  0x0f) * 4;
if(ntohs(((struct tcphdr *)p)-source) == dprt 
ntohs(((struct tcphdr *)p)-dest) == sprt  ((struct
tcphdr *)p)-syn == 1  ((struct tcphdr *)p)-ack == 1)
break;
}

Re: [REGRESSION] tg3 dead after s2ram

2007-08-02 Thread Joachim Deguara

On Wednesday 01 August 2007 23:00:23 Michael Chan wrote:
 On Wed, 2007-08-01 at 10:47 -0700, Michael Chan wrote:
  You have 2 Broadcom devices in your system.  07:00.0 is a wireless
  device, I think.  8:4.0 is the tg3 device.
 
  It's clear that the tg3 device is still in D3 state after resume and
  that explains why all register accesses fail.  tg3_resume() should put
  the device back in D0 state in a very straight forward way and I don't
  see how that can fail.  It worked for me when I tested it last night.
  Can you add some printk() to tg3_resume() to see what's happening?  Let
  me know if you want me to send you some debug patches to do that.

 I misread the PCI registers below.  The power state was ok.

 The problem is that memory enable and bus master were not set in PCI
 register 4 after resume.  This also explains the register access
 failures.

 In tg3_resume(), we call pci_restore_state() which should re-enable
 those 2 bits in PCI register 4.  Can you add some printk() to see why
 those bits are not restored after pci_restore_state()?

Reading pci_restore_state() it looks already instrumented.  Sorry about the 
wrong pci device, looking at my BCM5788 it is pci device 08:04.0 and looking 
at the log from my first post there is nothing restored in the first 64 
bytes!  Otherwise it would have said PM: Writing back...

Is this what you are looking for or should I do other printk instrumentation?

-Joachim

 


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: NETPOLL=y , NETDEVICES=n compile error ( Re: 2.6.23-rc1-mm1 )

2007-08-02 Thread Jarek Poplawski

On Wed, Aug 01, 2007 at 09:02:19PM -0500, Matt Mackall wrote:
 On Wed, Aug 01, 2007 at 11:59:21AM +0200, Jarek Poplawski wrote:
  On Tue, Jul 31, 2007 at 05:05:00PM +0200, Gabriel C wrote:
   Jarek Poplawski wrote:
On Tue, Jul 31, 2007 at 12:14:36PM +0200, Gabriel C wrote:
Jarek Poplawski wrote:
On 28-07-2007 20:42, Gabriel C wrote:
Andrew Morton wrote:
On Sat, 28 Jul 2007 17:44:45 +0200 Gabriel C [EMAIL PROTECTED] 
wrote:
...
net/core/netpoll.c:155: error: 'struct net_device' has no member 
named 'poll_controller'
...
I think is because KGDBOE selects just NETPOLL.
   
Looks like it.
   
Select went and selected NETPOLL and NETPOLL_TRAP but things like
CONFIG_NETDEVICES and CONFIG_NET_POLL_CONTROLLER remain unset.  
`select'
remains evil.
...
  seems to select NET_POLL_CONTROLLER after selecting NETPOLL, but
  still doesn't check for NETDEVICES dependency.
 
 That's odd. Adding Sam to the cc:.

Looks right, but after reading Andrew's opinion about select I'd be
astonished if he doesn't know this problem already.

 
Now KGDBOE just selects NETPOLL and NETPOLL_TRAP.
Adding 'select CONFIG_NET_POLL_CONTROLLER' let kgdboe compiles but the 
question is does it work without any ethernet card ?

Why kgdboe should care what netpoll needs? So, I hope, you are adding
this select under config NETPOLL. On the other hand, if NETPOLL should
depend on NET_POLL_CONTROLLER there is probably no reason to have them
both.
   
   NET_POLL_CONTROLLER has def_bool NETPOLL if NETDEVICES .
   
   Net peoples ping ?:)
 
 How about cc:ing the netpoll maintainer?

Is there a new one or do you suggest possibility of abusing the
authority of the netpoll's author with such trifles...?!

BTW, I can't find any official meaning of def_bool, but it's name
suggests only default value, so logically it should be not enough
to assure NET_POLL_CONTROLLER=y, and netpoll should use depends,
require or select (IMHO more readable too), but on the other
hand this could be practially wrong...

  
  OK, I wasn't right here: there is no visible reason for both in the
  kernel code, but I can imagine there could be some external users of
  NET_POLL_CONTROLLER without NETPOLL.
 
 I don't know of any. As far as I can tell at this point,
 NET_POLL_CONTROLLER == NETPOLL.

There are some notions about other diagnostic tools in some
net drivers, eg. 3c509.c, so there would be a little bit of
work if, after changing this, they really exist (and even if
not - maybe it's reasonable to save such possibility for the
future?).

Best regards,
Jarek P.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [REGRESSION] tg3 dead after s2ram

2007-08-02 Thread Joachim Deguara

On Thursday 02 August 2007 10:05:44 Joachim Deguara wrote:
 On Wednesday 01 August 2007 23:00:23 Michael Chan wrote:
  On Wed, 2007-08-01 at 10:47 -0700, Michael Chan wrote:
  The problem is that memory enable and bus master were not set in PCI
  register 4 after resume.  This also explains the register access
  failures.

Found it! I did the instrumentation and found pci_restore was not being 
called. Why, because this code failed:

printk(KERN_INFO tg3_resume: entered\n);
if (!netif_running(dev))
return 0;
printk(KERN_INFO tg3_resume: before restore\n);

Bad dmesg:
[0.581236] PM: Writing back config space on device :07:00.0 at offset 
4
(was 4, writing d024)
[0.581259] tg3_resume: entered
[0.581276] PM: Writing back config space on device :08:09.0 at offset 
f
(was c001ff, writing 1c0010b)

why is this, oh damn it is because I was calling openSUSE's powersave -u and 
not s2ram directly.  powersave is disabling the network and the tg3 will not 
restore the pci device like this! right?

here is a good dmesg with by calling s2ram:
[0.577085] PM: Writing back config space on device :07:00.0 at offset 
4
(was 4, writing d024)
[0.577108] tg3_resume: entered
[0.577109] tg3_resume: before restore
[0.577140] PM: Writing back config space on device :08:04.0 at offset 
c
(was 0, writing )
[0.577171] PM: Writing back config space on device :08:04.0 at offset 
1
(was 2b0, writing 2b6)
[0.577304] tg3_resume: after set_power_state
[0.579119] tg3: eth0: Link is down.
[0.786266] ata2: SATA link down (SStatus 0 SControl 310)
[0.848176] tg3_resume: after tg3_restart_hw
[0.848265] PM: Writing back config space on device :08:09.0 at offset 
f
(was c001ff, writing 1c0010b)

Seams like even if powersave shuts down the network that the device should 
still work after a suspend to ram, so who is at fault here?

-Joachim


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: TCP SACK issue, hung connection, tcpdump included

2007-08-02 Thread David Miller

From: Ilpo_Järvinen [EMAIL PROTECTED]
Date: Thu, 2 Aug 2007 12:23:23 +0300 (EEST)

 ...Seriously, somebody else than me is probably better in suggesting what 
 could cause the discarding at the SERVER in this case. SNMP stuff Dave was 
 asking could help, you can find them from /proc/net/{netstat,snmp}...

That will also tell us if TCP discarded the packet due to
timestamps tests or similar.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][PATCH] Removal of FASTROUTE definition include/linux/if_packet.h

2007-08-02 Thread David Miller

From: Rami Rosen [EMAIL PROTECTED]
Date: Thu, 2 Aug 2007 12:23:41 +0300

 Hi,
   It seems that PACKET_FASTROUTE definition should be removed due to that
   fastroute is no longer supported.

It's a value exported to userland, so removing it could
break application compilation, so we can't remove it.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: TCP SACK issue, hung connection, tcpdump included

2007-08-02 Thread Ilpo Järvinen

On Tue, 31 Jul 2007, Darryl L. Miles wrote:

 I've been able to capture a tcpdump from both ends during the problem and its
 my belief there is a bug in 2.6.20.1 (at the client side) in that it issues a
 SACK option for an old sequence which the current window being advertised is
 beyond it.  This is the most concerning issue as the integrity of the sequence
 numbers doesn't seem right (to my limited understanding anyhow).

You probably didn't check the reference I explicitly gave to those who 
are not familiar how DSACK works, just in case you didn't pick it up last 
time, here it is again for you: RFC2883... However, if DSACKs really 
bother you still (though it shouldn't :-)), IIRC I also told you how 
you're able to turn it off (tcp_dsack sysctl) but I assure you that it's 
not a bug but feature called DSACK [RFC2883], there's _absolutely_ nothing 
wrong with it, instead, it would be wrong to _not_ send the below snd_una 
SACK in this scenario when tcp_dsack set to 1.

 There is another concern of why the SERVER performed a retransmission in the
 first place, when the tcpdump shows the ack covering it has been seen.

There are only three possible reasons to this thing:
1) The ACK didn't reach the SERVER (your logs prove this to not be the 
case)
2) The ACK got discarded by the SERVER
3) The SERVER (not the client) is buggy and sends an incorrect 
retransmission

...So we have just two options remaining...

 I have made available the full dumps at:
 
 http://darrylmiles.org/snippets/lkml/20070731/

Thanks about these... Based on a quick check, it is rather clear that the 
SERVER is for some reason discarding the packets it's receiving:

04:11:26.833935 IP CLIENT.43726  SERVER.ssh: P 4239:4287(48) ack 28176 win 501 
nop,nop,timestamp 819646456 16345815
04:11:27.132425 IP SERVER.ssh  CLIENT.43726: . 26016:27464(1448) ack 4239 win 
2728 nop,nop,timestamp 17096579 819458864
04:11:27.230081 IP CLIENT.43726  SERVER.ssh: . ack 28176 win 501 
nop,nop,timestamp 819646555 16345815,nop,nop

Notice, (cumulative) ack field didn't advance though new data arrived, and 
for the record, it's in advertised window too. There are no DSACK in here 
so your theory about below snd_una SACK won't help to explain this one 
at all... We'll just have to figure out why it's discarding it. And 
there's even more to prove this...

 This sequence is interesting from the client side:

 03:58:56.419034 IP SERVER.ssh  CLIENT.43726: . 26016:27464(1448) ack 4239
 win 2728 nop,nop,timestamp 16345815 819458859 # S1
 03:58:56.419100 IP CLIENT.43726  SERVER.ssh: . ack 27464 win 501
 nop,nop,timestamp 819458884 16345815 # C1
 03:58:56.422019 IP SERVER.ssh  CLIENT.43726: P 27464:28176(712) ack 4239
 win 2728 nop,nop,timestamp 16345815 819458859 # S2
 03:58:56.422078 IP CLIENT.43726  SERVER.ssh: . ack 28176 win 501
 nop,nop,timestamp 819458884 16345815 # C2

 The above 4 packets look as expect to me.  Then we suddenly see a
 retransmission of 26016:27464.
 
 03:58:56.731597 IP SERVER.ssh  CLIENT.43726: . 26016:27464(1448) ack 4239
 win 2728 nop,nop,timestamp 16346128 819458864 # S3

...Look at this on the retransmission:
   ... timestamp 16346128 819458864

...it tells us what really got received by the TCP. The corresponding ACK 
with matching timestamp is, surprise, surprise, this one:

 03:58:56.340180 IP CLIENT.43726  SERVER.ssh: . ack 26016 win 501 
 nop,nop,timestamp 819458864 16345734

...thus the SERVER has _not_ received but discarded the subsequent 
cumulative ACKs!!! Therefore it's retransmitting from 26016 onward but 
never receives any reply as everything seems to get discarded...

There was one bad checksum btw:

 03:58:56.365662 IP (tos 0x10, ttl  64, id 28685, offset 0, flags [DF], 
 proto 6, length: 764) SERVER.ssh  CLIENT.43726: P [bad tcp cksum 6662 
 (-ef2b)!] 617734888:617735600(712) ack 2634113543 win 2728 
 nop,nop,timestamp 16345815 819458859


 There are some changes in 2.6.22 that appear to affect TCP SACK handling
 does this fix a known issue ?

There is no such known issue :-)... This issue has nothing to do with 
TCP SACK handling, since that code _won't_ be reached... We could verify 
that from the timestamps. But if you still insist that SACK under snd_una 
is the issue, please turn tcp_dsack to 0 on the CLIENT, you will not get 
them after that and you can be happy as your out-of-window SACK issue
is then fixed :-)... 

...Seriously, somebody else than me is probably better in suggesting what 
could cause the discarding at the SERVER in this case. SNMP stuff Dave was 
asking could help, you can find them from /proc/net/{netstat,snmp}...


-- 
 i.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] SCTP: drop SACK if ctsn is not less than the next tsn of assoc

2007-08-02 Thread Wei Yongjun


Vlad Yasevich wrote:

This is a little better.

One suggestion.  The new function you create is almost exactly like
sctp_sf_violation_chunklen() with the exception of the error string.
Can you extract the common parts into a single function so that
we don't have duplication of code.

Thanks
-vlad


  
  
This is an interesting case, but I am not sure that simply discarding

the SACK is the right thing.

The peer in this case is violating the protocol whereby he is trying to
advance the cumulative tsn ack to a point beyond the max tsn currently
sent. I would vote for terminating the association in this case since
either the peer is a mis-behaved implementation, or the association is
under attack.
  

Patch has been modified base on comment.
Thanks.

Signed-off-by: Wei Yongjun [EMAIL PROTECTED]

--- net/sctp/sm_statefuns.c.orig2007-07-29 18:11:01.0 -0400
+++ net/sctp/sm_statefuns.c 2007-07-31 17:49:22.0 -0400
@@ -97,6 +97,13 @@
   const struct sctp_association *asoc,
   struct sctp_transport *transport);

+static sctp_disposition_t sctp_sf_abort_violation(
+const struct sctp_association *asoc,
+void *arg,
+sctp_cmd_seq_t *commands,
+const __u8 *payload,
+const size_t paylen);
+
static sctp_disposition_t sctp_sf_violation_chunklen(
 const struct sctp_endpoint *ep,
 const struct sctp_association *asoc,
@@ -104,6 +111,13 @@
 void *arg,
 sctp_cmd_seq_t *commands);

+static sctp_disposition_t sctp_sf_violation_ctsn(
+const struct sctp_endpoint *ep,
+const struct sctp_association *asoc,
+const sctp_subtype_t type,
+void *arg,
+sctp_cmd_seq_t *commands);
+
/* Small helper function that checks if the chunk length
 * is of the appropriate length.  The 'required_length' argument
 * is set to be the size of a specific chunk we are testing.
@@ -2880,6 +2894,13 @@
return SCTP_DISPOSITION_DISCARD;
}

+   /* If Cumulative TSN Ack beyond the max tsn currently
+* send, terminating the association and respond to the
+* sender with an ABORT.
+*/
+   if (!TSN_lt(ctsn, asoc-next_tsn))
+   return sctp_sf_violation_ctsn(ep, asoc, type, arg, commands);
+
/* Return this SACK for further processing.  */
sctp_add_cmd_sf(commands, SCTP_CMD_PROCESS_SACK, SCTP_SACKH(sackh));

@@ -3691,40 +3712,21 @@
return SCTP_DISPOSITION_VIOLATION;
}

-
/*
- * Handle a protocol violation when the chunk length is invalid.
- * Invalid length is identified as smaller then the minimal length a
- * given chunk can be.  For example, a SACK chunk has invalid length
- * if it's length is set to be smaller then the size of sctp_sack_chunk_t.
- *
- * We inform the other end by sending an ABORT with a Protocol Violation
- * error code.
- *
- * Section: Not specified
- * Verification Tag:  Nothing to do
- * Inputs
- * (endpoint, asoc, chunk)
- *
- * Outputs
- * (reply_msg, msg_up, counters)
- *
- * Generate an  ABORT chunk and terminate the association.
+ * Common function to handle a protocol violation.
 */
-static sctp_disposition_t sctp_sf_violation_chunklen(
-const struct sctp_endpoint *ep,
+static sctp_disposition_t sctp_sf_abort_violation(
 const struct sctp_association *asoc,
-const sctp_subtype_t type,
 void *arg,
-sctp_cmd_seq_t *commands)
+sctp_cmd_seq_t *commands,
+const __u8 *payload,
+const size_t paylen)
{
struct sctp_chunk *chunk =  arg;
struct sctp_chunk *abort = NULL;
-   char   err_str[]=The following chunk had invalid length:;

/* Make the abort chunk. */
-   abort = sctp_make_abort_violation(asoc, chunk, err_str,
- sizeof(err_str));
+   abort = sctp_make_abort_violation(asoc, chunk, payload, paylen);
if (!abort)
goto nomem;

@@ -3756,6 +3758,57 @@
return SCTP_DISPOSITION_NOMEM;
}

+/*
+ * Handle a protocol violation when the chunk length is invalid.
+ * Invalid length is identified as smaller then the minimal length a
+ * given chunk can be.  For example, a SACK chunk has invalid length
+ * if it's length is set to be smaller then the size of

Re: [REGRESSION] tg3 dead after s2ram

2007-08-02 Thread David Miller

From: Joachim Deguara [EMAIL PROTECTED]
Date: Thu, 2 Aug 2007 11:15:05 +0200

 Seams like even if powersave shuts down the network that the device should 
 still work after a suspend to ram, so who is at fault here?

It's a good question.

The pci_enable() is done on the PCI device at probe time, at least in
the tg3 driver, and with such a model restoring and saving of PCI
config space should not be dependant upon whether the netdev is
running or not.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[RFC][PATCH] Removal of FASTROUTE definition include/linux/if_packet.h

2007-08-02 Thread Rami Rosen

Hi,
  It seems that PACKET_FASTROUTE definition should be removed due to that
fastroute is no longer supported.

Regards,
Rami Rosen
--


Signed-off-by: Rami Rosen [EMAIL PROTECTED]


--- linux-2.6.23-rc1-clean/include/linux/if_packet.h2007-05-03
12:07:59.0 +0300
+++ linux-2.6.23-rc1/include/linux/if_packet.h  2007-08-02
11:37:16.0 +0300
@@ -30,7 +30,6 @@ struct sockaddr_ll
 #define PACKET_OUTGOING4   /* Outgoing of any type 
*/
 /* These ones are invisible by user level */
 #define PACKET_LOOPBACK5   /* MC/BRD frame looped 
back */
-#define PACKET_FASTROUTE   6   /* Fastrouted frame */

 /* Packet socket options */
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: strange tcp behavior

2007-08-02 Thread Evgeniy Polyakov

On Thu, Aug 02, 2007 at 09:19:06AM +0300, [EMAIL PROTECTED] ([EMAIL PROTECTED]) 
wrote:
 1186035057.207629127.0.0.1 - 127.0.0.1TCP 5  smtp [SYN]
 Seq=0 Len=0
 1186035057.207632127.0.0.1 - 127.0.0.1TCP smtp  5 [SYN, ACK]
 Seq=0 Ack=1 Win=32792 Len=0 MSS=16396
 1186035057.207666127.0.0.1 - 127.0.0.1TCP 5  smtp [ACK]
 Seq=1 Ack=1 Win=1500 Len=0
 1186035057.207699127.0.0.1 - 127.0.0.1SMTP Command: EHLO localhost
 1186035057.207718127.0.0.1 - 127.0.0.1TCP smtp  5 [ACK]
 Seq=1 Ack=17 Win=32792 Len=0
 1186035057.207736127.0.0.1 - 127.0.0.1TCP 5  smtp [RST]
 Seq=17 Len=0
 1186035057.223934127.0.0.1 - 127.0.0.1TCP 33787  5 [RST,
 ACK] Seq=0 Ack=0 Win=32792 Len=0
 
 Can someone please comment as to why, tcp  stack sends rst packet from the
 wrong source port in this situation.

Besides the fact, that test applications do not run if started not as
root, I got this:

13:51:12.180241 IP localhost.localdomain.5  localhost.localdomain.10250: S 
906222067:906222067(0) win 1500
13:51:12.180279 IP localhost.localdomain.10250  localhost.localdomain.5: S 
2011233747:2011233747(0) ack 906222068
win 32792 mss 16396
13:51:12.180293 IP localhost.localdomain.5  localhost.localdomain.10250: R 
906222068:906222068(0) win 0
13:51:12.180320 IP localhost.localdomain.5  localhost.localdomain.10250: . 
ack 1 win 1500
13:51:12.180329 IP localhost.localdomain.10250  localhost.localdomain.5: R 
2011233748:2011233748(0) win 0
13:51:12.180341 IP localhost.localdomain.5  localhost.localdomain.10250: P 
1:17(16) ack 1 win 1500
13:51:12.180349 IP localhost.localdomain.10250  localhost.localdomain.5: R 
2011233748:2011233748(0) win 0
13:51:12.180361 IP localhost.localdomain.5  localhost.localdomain.10250: R 
906222084:906222084(0) win 1500

I.e. there is no bug in this session.
FC7 2.6.22.1-27.fc7 kernel.

Here is vanilla (with my patches, unrelated to the problem though)
2.6.22-rc5:

09:33:37.650279 IP localhost.5  localhost.10250: S 
1326688203:1326688203(0) win 1500
09:33:37.664391 IP localhost.10250  localhost.5: S 
3637551175:3637551175(0) ack 1326688204 win 32792 mss 16396
09:33:37.664417 IP localhost.5  localhost.10250: R 
1326688204:1326688204(0) win 0
09:33:37.650451 IP localhost.5  localhost.10250: . ack 1 win 1500
09:33:37.650467 IP localhost.10250  localhost.5: R 
3637551176:3637551176(0) win 0
09:33:37.650481 IP localhost.5  localhost.10250: P 1:17(16) ack 1 win 1500
09:33:37.650493 IP localhost.10250  localhost.5: R 
3637551176:3637551176(0) win 0
09:33:37.650507 IP localhost.5  localhost.10250: R 
1326688220:1326688220(0) win 1500


Is it possible that your tcpdump is screwed?

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: NETPOLL=y , NETDEVICES=n compile error ( Re: 2.6.23-rc1-mm1 )

2007-08-02 Thread Sam Ravnborg

  
  ...
  endif # NETDEVICES
  
  config NETPOLL
  depends on NETDEVICES
  def_bool NETCONSOLE
  
  config NETPOLL_TRAP
  bool Netpoll traffic trapping
  default n
  depends on NETPOLL
  
  config NET_POLL_CONTROLLER
  def_bool NETPOLL
  depends on NETPOLL
  
  
  seems to select NET_POLL_CONTROLLER after selecting NETPOLL, but
  still doesn't check for NETDEVICES dependency.
 
 That's odd. Adding Sam to the cc:.

select is evil
select will by brute force set a symbol equal to 'y' without
visiting the dependencies.
So abusing select you are able to select a symbol FOO even 
if FOO depends on BAR that is not set.

In general use select only for non-visible symbols (no promts anywhere)
and for symbols with no dependencies.
That will limit the suefullness but on the other hand avoid the illegal
configurations all over.

kconfig should one day warn about such things but I have not fel inclined
to dive into the matters hoping that Roman does one day.

Sam
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: strange tcp behavior

2007-08-02 Thread Evgeniy Polyakov

On Thu, Aug 02, 2007 at 01:55:50PM +0400, Evgeniy Polyakov ([EMAIL PROTECTED]) 
wrote:
 On Thu, Aug 02, 2007 at 09:19:06AM +0300, [EMAIL PROTECTED] ([EMAIL 
 PROTECTED]) wrote:
  1186035057.207629127.0.0.1 - 127.0.0.1TCP 5  smtp [SYN]
  Seq=0 Len=0
  1186035057.207632127.0.0.1 - 127.0.0.1TCP smtp  5 [SYN, ACK]
  Seq=0 Ack=1 Win=32792 Len=0 MSS=16396
  1186035057.207666127.0.0.1 - 127.0.0.1TCP 5  smtp [ACK]
  Seq=1 Ack=1 Win=1500 Len=0
  1186035057.207699127.0.0.1 - 127.0.0.1SMTP Command: EHLO localhost
  1186035057.207718127.0.0.1 - 127.0.0.1TCP smtp  5 [ACK]
  Seq=1 Ack=17 Win=32792 Len=0
  1186035057.207736127.0.0.1 - 127.0.0.1TCP 5  smtp [RST]
  Seq=17 Len=0
  1186035057.223934127.0.0.1 - 127.0.0.1TCP 33787  5 [RST,
  ACK] Seq=0 Ack=0 Win=32792 Len=0
  
  Can someone please comment as to why, tcp  stack sends rst packet from the
  wrong source port in this situation.
 
 Besides the fact, that test applications do not run if started not as
 root, I got this:

And it actually does not initializes a session, since tird line below
shows RST, but not ack. The same with sendmail smtp server (i.e. 25 port
like in your server) and unmodified client.
Please provide application which can trigger the issue and I will help
to debug this issue. If it will help you to debug client, I can run
tcpdump on public server (say 194.85.82.65, please tell me your source 
address) to collect dumps. Current code does not trigger the issue on my
machines (and works not like was intended by you). Ugh, and code really
looks horrible...

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: NETPOLL=y , NETDEVICES=n compile error ( Re: 2.6.23-rc1-mm1 )

2007-08-02 Thread Satyam Sharma

Hi,


On Thu, 2 Aug 2007, Sam Ravnborg wrote:

   
   ...
   endif # NETDEVICES
   
   config NETPOLL
   depends on NETDEVICES
   def_bool NETCONSOLE
   
   config NETPOLL_TRAP
   bool Netpoll traffic trapping
   default n
   depends on NETPOLL
   
   config NET_POLL_CONTROLLER
   def_bool NETPOLL
   depends on NETPOLL
   
   
   seems to select NET_POLL_CONTROLLER after selecting NETPOLL, but
   still doesn't check for NETDEVICES dependency.
  
  That's odd. Adding Sam to the cc:.

I just noticed this thread, but I wonder what the fuss is all
about :-) Kconfig dependencies are easy, really -- any code that
pulls in code from elsewhere, must explicitly depends on it.
It is possible to use select as well, but could lead to breakages
as discussed to death on at least 64592 other threads on LKML already
and hence should only be used for library-like code that does not
have any dependencies itself.


 select is evil
 select will by brute force set a symbol equal to 'y' without
 visiting the dependencies.
 So abusing select you are able to select a symbol FOO even 
 if FOO depends on BAR that is not set.
 
 In general use select only for non-visible symbols (no promts anywhere)
 and for symbols with no dependencies.
 That will limit the suefullness but on the other hand avoid the illegal
 configurations all over.

The problem with using depends on is that your config symbol becomes
invisible unless the dependency has already been selected.

So, there's a workaround: make the ultimate config symbol itself depend
upon the grand-dependency (excuse the nomenclature) and just select
the immediate-parent-dependency, i.e. the following:

CONFIG_BAZ
...

CONFIG BAR
depends on BAZ

CONFIG_FOO
depends on BAZ
select BAR

is perfectly legal, and doesn't cause any build problems. Perhaps such a
solution makes sense here as well?


 kconfig should one day warn about such things but I have not fel inclined
 to dive into the matters hoping that Roman does one day.

Yup, I've wanted to do this myself, in fact I wanted to implement an idea
I had in mind ( http://lkml.org/lkml/2007/5/16/257 ) but for some reason
I tend to stay away from stuff in scripts/ :-)


Satyam
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] [TCP]: Also handle snd_una changes in tcp_cwnd_down

2007-08-02 Thread Ilpo Järvinen

On Wed, 1 Aug 2007, Ilpo Järvinen wrote:

 
 tcp_cwnd_down must check for it too as it should be conservative
 in case of collapse stuff and also when receiver is trying to
 lie (though that wouldn't be very successful/useful anyway).
 
 Note:
 - Separated also is_dupack and do_lost in fast_retransalert
   * Much cleaner look-and-feel now
   * This time it really fixes cumulative ACK with many new
 SACK blocks recovery entry (I claimed this fixes with
 last patch but it wasn't). TCP will now call
 tcp_update_scoreboard regardless of is_dupack when
 in recovery as long as there is enough fackets_out.
 - Introduce FLAG_SND_UNA_ADVANCED
   * Some prior_snd_una arguments are unnecessary after it
 - Added helper FLAG_ANY_PROGRESS to avoid long FLAG...|FLAG...
   constructs
 
 Signed-off-by: Ilpo Järvinen [EMAIL PROTECTED]
 ---
 
 Dave, BEWARE, I wasn't able to do anything else but compile test
 because Linus' tree didn't seem to boot on the machine I was
 trying to test it... :-(
 
 I think that to stable version only a small part of this change
 is necessary, not the full changeset. That should keep stable
 folks much happier... :-) I'll soon put my reduced proposal to:
   http://www.cs.helsinki.fi/u/ijjarvin/patches/stable-0001.patch
 The other patch (DSACK) can go to stable as is.

I placed those two earlier sent bidir fixes and these two additional fixes 
on top of 2.6.22 and was finally able to have them tested on a bootable 
kernel (I had a boot failure on another machine too with 2.6.23-rc1 
stuff). FACKNewReno/bidir and FACK/unidir tested, time-seq graphs were 
ok.

Dave, please put these two patches to net-2.6 to complete bidir fix 
series. ...And please push to stable as well, take just the minimized 
fix portion of this [TCP]: Also handle snd_una changes in 
tcp_cwnd_down patch as I described above. Other cleanups in it can be
put just to net-2.6.

-- 
 i.

Re: NETPOLL=y , NETDEVICES=n compile error ( Re: 2.6.23-rc1-mm1 )

2007-08-02 Thread Satyam Sharma

[ Read through the thread, looked at Kconfig files,
  did some tests. Adding Kconfig experts to Cc: list. ]


 On Thu, 2 Aug 2007, Sam Ravnborg wrote:
 

...
endif # NETDEVICES

config NETPOLL
depends on NETDEVICES
def_bool NETCONSOLE

config NETPOLL_TRAP
bool Netpoll traffic trapping
default n
depends on NETPOLL

config NET_POLL_CONTROLLER
def_bool NETPOLL
depends on NETPOLL


Gargh, what we're seeing here is a whole bunch of bugs, I think. First
I thought this must be one of those randconfig-producing-wrong-configs
issues, but surprisingly, running make oldconfig on this .config on
a fresh 2.6.23-rc1-mm1 tree didn't change anything in the .config.


Kconfig bug #1:
===

Which means, although:

*
menuconfig BAZ

if BAZ

config BAR

endif
*

is widely believed (by most folks, I've heard this on several threads,
and as written in the comment in drivers/net/Kconfig) to be equivalent to:

*
menuconfig BAZ

if BAZ
endif

config BAR
depends on BAZ
*

this is *not* enforced by make oldconfig! And hence, the NETPOLL 
!NETDEVICES situation we're seeing here.

[ We could also categorize this as a bug in Kconfig's if, fwiw. ]


Kconfig bug #2:
===

config FOO
def_bool BAR

is supposed to ensure that FOO == BAR (as Matt mentioned earlier).

However, even this is *not* enforced by make oldconfig. And hence,
the NETPOLL  !NET_POLL_CONTROLLER situation we're seeing here.

In fact, I believe it's possible to even pass a NETCONSOLE but
!NETPOLL kind of .config through make oldconfig but it still won't
catch it, and build breakages *will* occur.

[ We could also categorize this as a bug in Kconfig's def_bool, fwiw. ]

Possibly, we could also decide to just blame randconfig for the whole
issue, and forget about these, because I think it's highly unlikely
(though not impossible) for people with real .configs to hit the
problems we saw above.


KGDBOE bug #1:
==

config KGDBOE in lib/Kconfig.kgdb must also depend on NETDEVICES,
and select NET_POLL_CONTROLLER also.


KGDBOE bug #2:
==

config KGDBOE_NOMODULE is a sad, sad option, and must be killed. The
if !KGDBOE_NOMODULE in KGDBOE must be removed, and it must lose its
dependency on m.


Satyam
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: NETPOLL=y , NETDEVICES=n compile error ( Re: 2.6.23-rc1-mm1 )

2007-08-02 Thread Jarek Poplawski

On Thu, Aug 02, 2007 at 04:02:21PM +0530, Satyam Sharma wrote:
 Hi,
 
 
 On Thu, 2 Aug 2007, Sam Ravnborg wrote:
 

...
endif # NETDEVICES

config NETPOLL
depends on NETDEVICES
def_bool NETCONSOLE

config NETPOLL_TRAP
bool Netpoll traffic trapping
default n
depends on NETPOLL

config NET_POLL_CONTROLLER
def_bool NETPOLL
depends on NETPOLL


seems to select NET_POLL_CONTROLLER after selecting NETPOLL, but
still doesn't check for NETDEVICES dependency.
   
   That's odd. Adding Sam to the cc:.
 
 I just noticed this thread, but I wonder what the fuss is all
 about :-) Kconfig dependencies are easy, really -- any code that
 pulls in code from elsewhere, must explicitly depends on it.
 It is possible to use select as well, but could lead to breakages
 as discussed to death on at least 64592 other threads on LKML already
 and hence should only be used for library-like code that does not
 have any dependencies itself.

So, it seems at least one time not enough (or maybe it would be better
to write this 1 time only, but in Documentation/).

 
 
  select is evil
  select will by brute force set a symbol equal to 'y' without
  visiting the dependencies.
  So abusing select you are able to select a symbol FOO even 
  if FOO depends on BAR that is not set.
  
  In general use select only for non-visible symbols (no promts anywhere)
  and for symbols with no dependencies.
  That will limit the suefullness but on the other hand avoid the illegal
  configurations all over.
 
 The problem with using depends on is that your config symbol becomes
 invisible unless the dependency has already been selected.
 
 So, there's a workaround: make the ultimate config symbol itself depend
 upon the grand-dependency (excuse the nomenclature) and just select
 the immediate-parent-dependency, i.e. the following:
 
 CONFIG_BAZ
   ...
 
 CONFIG BAR
   depends on BAZ
 
 CONFIG_FOO
   depends on BAZ
   select BAR
 
 is perfectly legal, and doesn't cause any build problems. Perhaps such a
 solution makes sense here as well?
 
 
  kconfig should one day warn about such things but I have not fel inclined
  to dive into the matters hoping that Roman does one day.
 
 Yup, I've wanted to do this myself, in fact I wanted to implement an idea
 I had in mind ( http://lkml.org/lkml/2007/5/16/257 ) but for some reason
 I tend to stay away from stuff in scripts/ :-)

How often common developer has to make such decisions in Kconfig?
Probably no more than once per year. So, it's fair to blame anybody
for not reading lkml to find if there are some bugs or
recommendations before using apparently simple tool? I think there
is usually some README for such things (maybe in Documentation/)?

Thanks,
Jarek P.

PS: if it's so easy and it's enough to read only 64592 lkml messages,
I wonder why Andrew, who knows all lkml, and reads more messages per
hour, cared to remember mainly one short conclusion...
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: strange tcp behavior

2007-08-02 Thread Simon Arlott


On Thu, August 2, 2007 11:16, Evgeniy Polyakov wrote:
 On Thu, Aug 02, 2007 at 01:55:50PM +0400, Evgeniy Polyakov ([EMAIL 
 PROTECTED]) wrote:
 On Thu, Aug 02, 2007 at 09:19:06AM +0300, [EMAIL PROTECTED] ([EMAIL 
 PROTECTED]) wrote:
  1186035057.207629127.0.0.1 - 127.0.0.1TCP 5  smtp [SYN]
  Seq=0 Len=0
  1186035057.207632127.0.0.1 - 127.0.0.1TCP smtp  5 [SYN, ACK]
  Seq=0 Ack=1 Win=32792 Len=0 MSS=16396
  1186035057.207666127.0.0.1 - 127.0.0.1TCP 5  smtp [ACK]
  Seq=1 Ack=1 Win=1500 Len=0
  1186035057.207699127.0.0.1 - 127.0.0.1SMTP Command: EHLO localhost
  1186035057.207718127.0.0.1 - 127.0.0.1TCP smtp  5 [ACK]
  Seq=1 Ack=17 Win=32792 Len=0
  1186035057.207736127.0.0.1 - 127.0.0.1TCP 5  smtp [RST]
  Seq=17 Len=0
  1186035057.223934127.0.0.1 - 127.0.0.1TCP 33787  5 [RST,
  ACK] Seq=0 Ack=0 Win=32792 Len=0
 
  Can someone please comment as to why, tcp  stack sends rst packet from the
  wrong source port in this situation.

 Besides the fact, that test applications do not run if started not as
 root, I got this:

 And it actually does not initializes a session, since tird line below
 shows RST, but not ack. The same with sendmail smtp server (i.e. 25 port
 like in your server) and unmodified client.
 Please provide application which can trigger the issue and I will help
 to debug this issue. If it will help you to debug client, I can run
 tcpdump on public server (say 194.85.82.65, please tell me your source
 address) to collect dumps. Current code does not trigger the issue on my
 machines (and works not like was intended by you). Ugh, and code really
 looks horrible...


I just got multiple RSTs instead of a connection too. The second RST looks
like it's from another connection - and a RST for a RST is wrong...

-- 
Simon Arlott
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: strange tcp behavior

2007-08-02 Thread Simon Arlott

(Don't remove CC:s, don't top post)
 On Thu, August 2, 2007 11:16, Evgeniy Polyakov wrote:
 On Thu, Aug 02, 2007 at 01:55:50PM +0400, Evgeniy Polyakov
 ([EMAIL PROTECTED]) wrote:
 On Thu, Aug 02, 2007 at 09:19:06AM +0300, [EMAIL PROTECTED]
 ([EMAIL PROTECTED]) wrote:
  1186035057.207629127.0.0.1 - 127.0.0.1TCP 5  smtp [SYN]
  Seq=0 Len=0
  1186035057.207632127.0.0.1 - 127.0.0.1TCP smtp  5 [SYN,
 ACK]
  Seq=0 Ack=1 Win=32792 Len=0 MSS=16396
  1186035057.207666127.0.0.1 - 127.0.0.1TCP 5  smtp [ACK]
  Seq=1 Ack=1 Win=1500 Len=0
  1186035057.207699127.0.0.1 - 127.0.0.1SMTP Command: EHLO
 localhost
  1186035057.207718127.0.0.1 - 127.0.0.1TCP smtp  5 [ACK]
  Seq=1 Ack=17 Win=32792 Len=0
  1186035057.207736127.0.0.1 - 127.0.0.1TCP 5  smtp [RST]
  Seq=17 Len=0
  1186035057.223934127.0.0.1 - 127.0.0.1TCP 33787  5
 [RST,
  ACK] Seq=0 Ack=0 Win=32792 Len=0
 
  Can someone please comment as to why, tcp  stack sends rst packet
 from the
  wrong source port in this situation.

 Besides the fact, that test applications do not run if started not as
 root, I got this:

 And it actually does not initializes a session, since tird line below
 shows RST, but not ack. The same with sendmail smtp server (i.e. 25 port
 like in your server) and unmodified client.
 Please provide application which can trigger the issue and I will help
 to debug this issue. If it will help you to debug client, I can run
 tcpdump on public server (say 194.85.82.65, please tell me your source
 address) to collect dumps. Current code does not trigger the issue on my
 machines (and works not like was intended by you). Ugh, and code really
 looks horrible...


 I just got multiple RSTs instead of a connection too. The second RST looks
 like it's from another connection - and a RST for a RST is wrong...

On Thu, August 2, 2007 12:45, [EMAIL PROTECTED] wrote:
 you need to add iptables rule for this to
 work, or else the tcp resets connection too early because it does not know
 that something is listening on 5 port.

 iptables -I INPUT -p tcp --dport 5 -j DROP should do the job.

You didn't mention this before.

Without the server running:

13:02:23.314352 IP 127.0.0.1.5  127.0.0.1.2500: S 53123695:53123695(0) win 
1500
13:02:23.314442 IP 127.0.0.1.2500  127.0.0.1.5: R 0:0(0) ack 53123696 win 0
13:02:25.906975 IP 127.0.0.1.3315  127.0.0.1.49197: P 
1285306902:1285307318(416) ack 1267361915 win 1024
nop,nop,timestamp 3575709021 3575672670
13:02:25.907060 IP 127.0.0.1.49197  127.0.0.1.3315: . ack 416 win 1541 
nop,nop,timestamp 3575709021
3575709021

With the server running:

13:05:55.234696 IP 127.0.0.1.5  127.0.0.1.2500: S 1960601450:1960601450(0) 
win 1500
13:05:55.234799 IP 127.0.0.1.2500  127.0.0.1.5: S 2171862150:2171862150(0) 
ack 1960601451 win 32792
mss 16396
13:05:55.238271 IP 127.0.0.1.5  127.0.0.1.2500: . ack 1 win 1500
13:05:55.240034 IP 127.0.0.1.5  127.0.0.1.2500: P 1:17(16) ack 1 win 1500
13:05:55.240132 IP 127.0.0.1.2500  127.0.0.1.5: . ack 17 win 32792
13:05:55.242251 IP 127.0.0.1.5  127.0.0.1.2500: R 1960601467:1960601467(0) 
win 1500
13:05:55.253884 IP 127.0.0.1.56434  127.0.0.1.5: R 
2171862151:2171862151(0) ack 1960601467 win 32792

Weird. I resent your final RST a few times with a delay:

13:13:05.199275 IP 127.0.0.1.5  127.0.0.1.2500: S 83018811:83018811(0) win 
1500
13:13:05.199378 IP 127.0.0.1.2500  127.0.0.1.5: S 2627922927:2627922927(0) 
ack 83018812 win 32792 mss
16396
13:13:05.203368 IP 127.0.0.1.5  127.0.0.1.2500: . ack 1 win 1500
13:13:05.205049 IP 127.0.0.1.5  127.0.0.1.2500: P 1:17(16) ack 1 win 1500
13:13:05.205173 IP 127.0.0.1.2500  127.0.0.1.5: . ack 17 win 32792
13:13:05.206463 IP 127.0.0.1.5  127.0.0.1.2500: R 83018828:83018828(0) win 
1500
13:13:05.207656 IP 127.0.0.1.5  127.0.0.1.2500: R 83018828:83018828(0) win 
1500
13:13:05.217664 IP 127.0.0.1.55271  127.0.0.1.5: R 
2627922928:2627922928(0) ack 83018828 win 32792
13:13:05.510239 IP 127.0.0.1.5  127.0.0.1.2500: R 83018828:83018828(0) win 
1500
13:13:05.511644 IP 127.0.0.1.5  127.0.0.1.2500: R 83018828:83018828(0) win 
1500
13:13:05.512764 IP 127.0.0.1.5  127.0.0.1.2500: R 83018828:83018828(0) win 
1500

I don't know where that extra RST is coming from.
This test would be more convincing between two hosts, since your bizarre
client is using raw sockets as root and could be doing anything.

-- 
Simon Arlott
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: strange tcp behavior

2007-08-02 Thread Evgeniy Polyakov

On Thu, Aug 02, 2007 at 12:38:59PM +0100, Simon Arlott ([EMAIL PROTECTED]) 
wrote:
 I just got multiple RSTs instead of a connection too. The second RST looks
 like it's from another connection - and a RST for a RST is wrong...

You should use iptables rule to block non-raw access:
iptables -I INPUT -p tcp --dport 5 -j DROP

but even in that case I got valid session.

 -- 
 Simon Arlott

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: NETPOLL=y , NETDEVICES=n compile error ( Re: 2.6.23-rc1-mm1 )

2007-08-02 Thread Jarek Poplawski

On Thu, Aug 02, 2007 at 05:26:12PM +0530, Satyam Sharma wrote:
...
 Whoops, I only said that in humour, probably should've snuck in a
 smiley or two. Definitely not blaming anybody. Apologies to anyone
 who felt offended, sorry, nothing such was intended, I assure.

I see you probably didn't notice my smileys too. I need them so often
that I've to abbreviate them with something like this: ,.?!
But, I'm also sorry if you felt confused I felt offended etc...

Jarek P.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: strange tcp behavior

2007-08-02 Thread Evgeniy Polyakov

On Thu, Aug 02, 2007 at 04:04:53PM +0400, Evgeniy Polyakov ([EMAIL PROTECTED]) 
wrote:
 On Thu, Aug 02, 2007 at 12:38:59PM +0100, Simon Arlott ([EMAIL PROTECTED]) 
 wrote:
  I just got multiple RSTs instead of a connection too. The second RST looks
  like it's from another connection - and a RST for a RST is wrong...
 
 You should use iptables rule to block non-raw access:
 iptables -I INPUT -p tcp --dport 5 -j DROP
 
 but even in that case I got valid session.

Ok, I can now reproduce the problem.
I will try to debug it further.

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Merge the Sonics Silicon Backplane subsystem

2007-08-02 Thread Michael Buesch

On Thursday 02 August 2007, Geert Uytterhoeven wrote:
 On Fri, 27 Jul 2007, Michael Buesch wrote:
  The Sonics Silicon Backplane is a mini-bus used on
  various Broadcom chips and embedded devices.
  Devices using the SSB include b44, bcm43xx and various
  Broadcom based wireless routers.
  A b44 and bcm43xx port and a SSB based OHCI driver is available.
 
  --- a/drivers/Kconfig
  +++ b/drivers/Kconfig
  @@ -58,6 +58,8 @@ source drivers/power/Kconfig
   
   source drivers/hwmon/Kconfig
   
  +source drivers/ssb/Kconfig
  +
   source drivers/mfd/Kconfig
   
   source drivers/media/Kconfig
 
  --- /dev/null
  +++ b/drivers/ssb/Kconfig
  @@ -0,0 +1,92 @@
  +menu Sonics Silicon Backplane
  +
  +config SSB
  +   tristate Sonics Silicon Backplane support
  +   depends on EXPERIMENTAL
 
 Hence this will show up on all platforms?

So?
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Merge the Sonics Silicon Backplane subsystem

2007-08-02 Thread Geert Uytterhoeven

On Fri, 27 Jul 2007, Michael Buesch wrote:
 The Sonics Silicon Backplane is a mini-bus used on
 various Broadcom chips and embedded devices.
 Devices using the SSB include b44, bcm43xx and various
 Broadcom based wireless routers.
 A b44 and bcm43xx port and a SSB based OHCI driver is available.

 --- a/drivers/Kconfig
 +++ b/drivers/Kconfig
 @@ -58,6 +58,8 @@ source drivers/power/Kconfig
  
  source drivers/hwmon/Kconfig
  
 +source drivers/ssb/Kconfig
 +
  source drivers/mfd/Kconfig
  
  source drivers/media/Kconfig

 --- /dev/null
 +++ b/drivers/ssb/Kconfig
 @@ -0,0 +1,92 @@
 +menu Sonics Silicon Backplane
 +
 +config SSB
 + tristate Sonics Silicon Backplane support
 + depends on EXPERIMENTAL

Hence this will show up on all platforms?

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [EMAIL PROTECTED]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say programmer or something like that.
-- Linus Torvalds
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] SCTP: drop SACK if ctsn is not less than the next tsn of assoc

2007-08-02 Thread Vlad Yasevich

Wei Yongjun wrote:
 Patch has been modified base on comment.
 Thanks.
 
 Signed-off-by: Wei Yongjun [EMAIL PROTECTED]
 

Ok, I've applied this patch, but in the future, please
generate patches so that they can be applied
with a -p1 flag.

Please see Documentation/SubmittingPatches for proper
format.

Thanks
-vlad
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Merge the Sonics Silicon Backplane subsystem

2007-08-02 Thread Geert Uytterhoeven

On Thu, 2 Aug 2007, Michael Buesch wrote:
 On Thursday 02 August 2007, Geert Uytterhoeven wrote:
  On Fri, 27 Jul 2007, Michael Buesch wrote:
   The Sonics Silicon Backplane is a mini-bus used on
   various Broadcom chips and embedded devices.
   Devices using the SSB include b44, bcm43xx and various
   Broadcom based wireless routers.
   A b44 and bcm43xx port and a SSB based OHCI driver is available.
  
   --- a/drivers/Kconfig
   +++ b/drivers/Kconfig
   @@ -58,6 +58,8 @@ source drivers/power/Kconfig

source drivers/hwmon/Kconfig

   +source drivers/ssb/Kconfig
   +
source drivers/mfd/Kconfig

source drivers/media/Kconfig
  
   --- /dev/null
   +++ b/drivers/ssb/Kconfig
   @@ -0,0 +1,92 @@
   +menu Sonics Silicon Backplane
   +
   +config SSB
   + tristate Sonics Silicon Backplane support
   + depends on EXPERIMENTAL
  
  Hence this will show up on all platforms?
 
 So?

Shouldn't you add a dependency for platforms where it make sense to have SSB?

Gr{oetje,eeting}s,

Geert

--
Geert Uytterhoeven -- There's lots of Linux beyond ia32 -- [EMAIL PROTECTED]

In personal conversations with technical people, I call myself a hacker. But
when I'm talking to journalists I just say programmer or something like that.
-- Linus Torvalds
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] Merge the Sonics Silicon Backplane subsystem

2007-08-02 Thread Michael Buesch

On Thursday 02 August 2007, Geert Uytterhoeven wrote:
 On Thu, 2 Aug 2007, Michael Buesch wrote:
  On Thursday 02 August 2007, Geert Uytterhoeven wrote:
   On Fri, 27 Jul 2007, Michael Buesch wrote:
The Sonics Silicon Backplane is a mini-bus used on
various Broadcom chips and embedded devices.
Devices using the SSB include b44, bcm43xx and various
Broadcom based wireless routers.
A b44 and bcm43xx port and a SSB based OHCI driver is available.
   
--- a/drivers/Kconfig
+++ b/drivers/Kconfig
@@ -58,6 +58,8 @@ source drivers/power/Kconfig
 
 source drivers/hwmon/Kconfig
 
+source drivers/ssb/Kconfig
+
 source drivers/mfd/Kconfig
 
 source drivers/media/Kconfig
   
--- /dev/null
+++ b/drivers/ssb/Kconfig
@@ -0,0 +1,92 @@
+menu Sonics Silicon Backplane
+
+config SSB
+   tristate Sonics Silicon Backplane support
+   depends on EXPERIMENTAL
   
   Hence this will show up on all platforms?
  
  So?
 
 Shouldn't you add a dependency for platforms where it make sense to have SSB?

Well, that's everything where you can stick a PCI, PCMCIA, PC-Card or CF-Card
into, plus the MIPS platform, where we have the embedded SSB.
That's basically everything, no? Except these strange !HAS_IOMEM platforms,
which we already take care of in a followup patch.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: NETPOLL=y , NETDEVICES=n compile error ( Re: 2.6.23-rc1-mm1 )

2007-08-02 Thread Matt Mackall

On Thu, Aug 02, 2007 at 11:00:08AM +0200, Jarek Poplawski wrote:
 On Wed, Aug 01, 2007 at 09:02:19PM -0500, Matt Mackall wrote:
  On Wed, Aug 01, 2007 at 11:59:21AM +0200, Jarek Poplawski wrote:
   On Tue, Jul 31, 2007 at 05:05:00PM +0200, Gabriel C wrote:
Jarek Poplawski wrote:
 On Tue, Jul 31, 2007 at 12:14:36PM +0200, Gabriel C wrote:
 Jarek Poplawski wrote:
 On 28-07-2007 20:42, Gabriel C wrote:
 Andrew Morton wrote:
 On Sat, 28 Jul 2007 17:44:45 +0200 Gabriel C [EMAIL PROTECTED] 
 wrote:
 ...
 net/core/netpoll.c:155: error: 'struct net_device' has no member 
 named 'poll_controller'
 ...
 I think is because KGDBOE selects just NETPOLL.

 Looks like it.

 Select went and selected NETPOLL and NETPOLL_TRAP but things like
 CONFIG_NETDEVICES and CONFIG_NET_POLL_CONTROLLER remain unset.  
 `select'
 remains evil.
 ...
   seems to select NET_POLL_CONTROLLER after selecting NETPOLL, but
   still doesn't check for NETDEVICES dependency.
  
  That's odd. Adding Sam to the cc:.
 
 Looks right, but after reading Andrew's opinion about select I'd be
 astonished if he doesn't know this problem already.
 
  
 Now KGDBOE just selects NETPOLL and NETPOLL_TRAP.
 Adding 'select CONFIG_NET_POLL_CONTROLLER' let kgdboe compiles but 
 the question is does it work without any ethernet card ?
 
 Why kgdboe should care what netpoll needs? So, I hope, you are adding
 this select under config NETPOLL. On the other hand, if NETPOLL should
 depend on NET_POLL_CONTROLLER there is probably no reason to have them
 both.

NET_POLL_CONTROLLER has def_bool NETPOLL if NETDEVICES .

Net peoples ping ?:)
  
  How about cc:ing the netpoll maintainer?
 
 Is there a new one or do you suggest possibility of abusing the
 authority of the netpoll's author with such trifles...?!

I'm just subtly suggesting that if you're going to have a discussion
about netpoll, you ought to cc: me.

 There are some notions about other diagnostic tools in some
 net drivers, eg. 3c509.c, so there would be a little bit of
 work if, after changing this, they really exist (and even if
 not - maybe it's reasonable to save such possibility for the
 future?).

I created it for netpoll, only netpoll clients have ever cared.

-- 
Mathematics is the supreme nostalgia of our time.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

b44 compile error on !PCI

2007-08-02 Thread Meelis Roos

Tryng to compile todays git on SBus-only Sparc64 (Ultra 1), no PCI. b44 
is selectable but fails to compile:

  CC [M]  drivers/net/b44.o
drivers/net/b44.c: In function 'b44_sync_dma_desc_for_device':
drivers/net/b44.c:134: error: implicit declaration of function 
'dma_sync_single_range_for_device'
drivers/net/b44.c: In function 'b44_sync_dma_desc_for_cpu':
drivers/net/b44.c:144: error: implicit declaration of function 
'dma_sync_single_range_for_cpu'

-- 
Meelis Roos ([EMAIL PROTECTED])
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: TCP SACK issue, hung connection, tcpdump included

2007-08-02 Thread Darryl Miles


Ilpo Järvinen wrote:

On Tue, 31 Jul 2007, Darryl L. Miles wrote:


I've been able to capture a tcpdump from both ends during the problem and its
my belief there is a bug in 2.6.20.1 (at the client side) in that it issues a
SACK option for an old sequence which the current window being advertised is
beyond it.  This is the most concerning issue as the integrity of the sequence
numbers doesn't seem right (to my limited understanding anyhow).


You probably didn't check the reference I explicitly gave to those who 
are not familiar how DSACK works, just in case you didn't pick it up last 
time, here it is again for you: RFC2883... 


I've now squinted the D-SACK RFC and understand a little about this,
however the RFC does make the claim This extension is compatible with
current implementations of the SACK option in TCP.  That is, if one of
the TCP end-nodes does not implement this D-SACK extension and the other
TCP end-node does, we believe that this use of the D-SACK extension by
one of the end nodes will not introduce problems.

What if it turns out that is not true for a large enough number of SACK
implementations out there; in the timeframe that SACK was supported but
D-SACK was not supported.  Would it be possible to clearly catagorise an
implementation to be:

 * 100% SACK RFC compliant.  SACK works and by virtue of the mandatory
requirements written into the previous SACK RFCs then this
implementation would never see a problem with receiving D-SACK even
through the stack itself does not support D-SACK.

 * Mostly SACK RFC compliant.  SACK works but if it saw D-SACK it would
have a problems dealing with it, possibly resulting in fatal TCP
lockups.  Are there SACK implementation mandatory requirements in place
for to be able to clearly draw the line and state that the 2.6.9 SACK
implementation was not RFC compliant.

 * 100% SACK and D-DACK RFC compliant.  Such an implementation was 
written to support D-SACK on top of SACK.




So if there is a problem whos fault would it be:

 * The original SACK RFCs for not specifying a mandatory course of
action to take which D-SACK exploits.  Thus making the claim in RFC2883 
unsound.


 * The older linux kernel for not being 100% SACK RFC compliant in its
implementation ?  Not a lot we can do about this now, but if we're able
to identify there maybe backward compatibility issues with the same
implementation thats a useful point to take forward.

 * The newer linux kernel for enabling D-SACK by default when RFC2883
doesn't even claim a cast iron case for D-SACK to be compatible with any
100% RFC compliant SACK implementation.


Does TCP support the concept of vendor dependent options, that would be
TCP options which are in a special range that would both identify the
vendor and the vendors-specific option id.  Such a system would allow
Linux to implement a D-SACK Ok option, even if the RFC claims one is
not needed.  This would allow moving forward through this era until such
point in time when it was officially agreed it was just a linux problem 
or an RFC problem.  If its an RFC problem then IANA (or whoever) would 
issue a generic TCP option for it.


If the dump on this problem really does identify a risk/problem when as
its between 2 version of linux a vendor specific option also makes sense.

I don't really want to switch new useful stuff off by default (so it
never gets used), I'm all for experimentation but not to the point of
failure between default configurations of widely distributed version of 
the kernel.



So thats the technical approaches I can come up with to discuss.  Does
Ilpo have a particular vested interest in D-SACK that should be disclosed?



However, if DSACKs really
bother you still (though it shouldn't :-)), IIRC I also told you how
you're able to turn it off (tcp_dsack sysctl) but I assure you that it's
not a bug but feature called DSACK [RFC2883], there's _absolutely_ 

nothing
wrong with it, instead, it would be wrong to _not_ send the below 

snd_una

SACK in this scenario when tcp_dsack set to 1.


So it is necessary to turn off a TCP option (that is enabled by default)
to be sure to have reliable TCP connections (that don't lock up) in the
bugfree Linux networking stack ?  This is absurd.

If such an option causes such a problem; then that option should not be
enabled by default.  If however the problem is because of a bug then let
us continue to try to isolate the cause rather than wallpaper over the
cracks with the voodoo of turning things that are enabled by default off.

It only makes sense to turn options off when there is a 3rd party
involved (or other means beyond your control) which is affecting
function, the case here is that two Linux kernel stacks are affected and
no 3rd party device has been shown to be affecting function.



There is another concern of why the SERVER performed a retransmission in the
first place, when the tcpdump shows the ack covering it has been seen.


There are only three possible reasons to this

Re: strange tcp behavior

2007-08-02 Thread Simon Arlott

On 02/08/07 13:15, Simon Arlott wrote:
 (Don't remove CC:s, don't top post)
 On Thu, August 2, 2007 11:16, Evgeniy Polyakov wrote:
 On Thu, Aug 02, 2007 at 01:55:50PM +0400, Evgeniy Polyakov
 ([EMAIL PROTECTED]) wrote:
 On Thu, Aug 02, 2007 at 09:19:06AM +0300, [EMAIL PROTECTED]
 ([EMAIL PROTECTED]) wrote:
  1186035057.207629127.0.0.1 - 127.0.0.1TCP 5  smtp [SYN]
  Seq=0 Len=0
  1186035057.207632127.0.0.1 - 127.0.0.1TCP smtp  5 [SYN,
 ACK]
  Seq=0 Ack=1 Win=32792 Len=0 MSS=16396
  1186035057.207666127.0.0.1 - 127.0.0.1TCP 5  smtp [ACK]
  Seq=1 Ack=1 Win=1500 Len=0
  1186035057.207699127.0.0.1 - 127.0.0.1SMTP Command: EHLO
 localhost
  1186035057.207718127.0.0.1 - 127.0.0.1TCP smtp  5 [ACK]
  Seq=1 Ack=17 Win=32792 Len=0
  1186035057.207736127.0.0.1 - 127.0.0.1TCP 5  smtp [RST]
  Seq=17 Len=0
  1186035057.223934127.0.0.1 - 127.0.0.1TCP 33787  5
 [RST,
  ACK] Seq=0 Ack=0 Win=32792 Len=0
 
  Can someone please comment as to why, tcp  stack sends rst packet
 from the
  wrong source port in this situation.

 I don't know where that extra RST is coming from.
 This test would be more convincing between two hosts, since your bizarre
 client is using raw sockets as root and could be doing anything.

Server 192.168.7.8 (2.6.23)
Client 192.168.7.4 (2.6.20)

17:33:45.326246 IP 192.168.7.4.5  192.168.7.8.2500: S 
1385353579:1385353579(0) win 1500
17:33:45.326418 IP 192.168.7.8.2500  192.168.7.4.5: S 
1388203102:1388203102(0) ack 1385353580 win 14360 mss 7180
17:33:45.348833 IP 192.168.7.4.5  192.168.7.8.2500: . ack 1 win 1500
17:33:45.349977 IP 192.168.7.4.5  192.168.7.8.2500: P 1:17(16) ack 1 win 
1500
17:33:45.350117 IP 192.168.7.8.2500  192.168.7.4.5: . ack 17 win 14360
17:33:45.351273 IP 192.168.7.4.5  192.168.7.8.2500: R 
1385353596:1385353596(0) win 1500
17:33:45.360878 IP 192.168.7.8.48186  192.168.7.4.5: R 
1388203103:1388203103(0) ack 1385353596 win 14360

Seems to be losing the source port information when it decides to send 
that final RST|ACK. It's going through the TCPAbortOnClose path:

tcp_close:
- tcp_set_state(sk, TCP_CLOSE)
- inet_put_port(tcp_hashinfo, sk)
Perhaps it's losing the port information here?
- tcp_send_active_reset(sk, GFP_KERNEL)

TCP_CLOSE   socket is finished
Should these two calls be the other way round?


Also, I don't think it should be sending a RST after the other side has 
sent one - the connection no longer exists so there is nothing on the 
other side to reset.

-- 
Simon Arlott
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: b44 compile error on !PCI

2007-08-02 Thread Michael Buesch

On Thursday 02 August 2007, Meelis Roos wrote:
 Tryng to compile todays git on SBus-only Sparc64 (Ultra 1), no PCI. b44 
 is selectable but fails to compile:
 
   CC [M]  drivers/net/b44.o
 drivers/net/b44.c: In function 'b44_sync_dma_desc_for_device':
 drivers/net/b44.c:134: error: implicit declaration of function 
 'dma_sync_single_range_for_device'
 drivers/net/b44.c: In function 'b44_sync_dma_desc_for_cpu':
 drivers/net/b44.c:144: error: implicit declaration of function 
 'dma_sync_single_range_for_cpu'

Are you sure this is related to !PCI ?
If I grep include/asm-sparc64/dma-mapping.h I don't
find dma_sync_single_range_for_*
So I'd rather call this a bug in the arch code of sparc64.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [patch] genirq: temporary fix for level-triggered IRQ resend

2007-08-02 Thread Gabriel C

Ingo Molnar wrote:
 Linus,
 
 with -rc2 approaching i think we should apply the minimal fix below to 
 get Marcin's ne2k-pci networking back in working order. The 
 WARN_ON_ONCE() will not prevent the system from working and it will be a 
 reminder.
 
 a better workaround would be to inhibit the resent vector via the 
 IO-APIC irqchip - but i'd still like to have the patch below because the 
 ne2k driver _should_ be able to survive the spurious irq that happens. 
 (even on Marcin's system that ne2k-pci irq line is shared with another 
 networking card, so an irq could happen at any moment - it's just that 
 with the delayed-disable logic it happens _all the time_.)
 

I get a warning on each boot now with this patch .. 

[   63.686613] WARNING: at kernel/irq/resend.c:70 check_irq_resend()
[   63.686636]  [c013c55c] check_irq_resend+0x8c/0xa0
[   63.686653]  [c013c15f] enable_irq+0xad/0xb3
[   63.686662]  [e886481e] vortex_timer+0x20c/0x3d5 [3c59x]
[   63.686675]  [c01164b9] scheduler_tick+0x154/0x273
[   63.686685]  [c012fed1] getnstimeofday+0x34/0xe3
[   63.686697]  [c0121f4a] run_timer_softirq+0x137/0x197
[   63.686709]  [e8864612] vortex_timer+0x0/0x3d5 [3c59x]
[   63.686720]  [c011ed09] __do_softirq+0x75/0xe1
[   63.686729]  [c011edac] do_softirq+0x37/0x3d
[   63.686735]  [c011ef85] irq_exit+0x7c/0x7e
[   63.686740]  [c010e013] smp_apic_timer_interrupt+0x59/0x84
[   63.686751]  [c0103428] apic_timer_interrupt+0x28/0x30
[   63.686759]  [c0101355] default_idle+0x0/0x3f
[   63.686767]  [c0101385] default_idle+0x30/0x3f
[   63.686773]  [c0100c19] cpu_idle+0x5e/0x8e
[   63.686779]  [c03fdc5f] start_kernel+0x2d7/0x368


That means ?:)


   Ingo
 


Gabriel
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[GIT PULL] sctp updates

2007-08-02 Thread Vlad Yasevich

Hi David

Please pull the following changes since commit 
fc34f6c617bf2a845d793af12b96bcc0afd472c4:
 Andrew Morton (1):
   Fix up remove the arm26 port

which are found in branch 'master' of the git repository at:

 master.kernel.org:/pub/scm/linux/kernel/git/vxy/lksctp-dev.git

Dave Johnson (1):
  SCTP: IPv4 mapped addr not returned in SCTPv6 accept()

Sebastian Siewior (2):
  sctp: try to fix readlock
  sctp: fix shadow symbol in net/sctp/tsnmap.c

Vlad Yasevich (1):
  SCTP: IPv4 mapped addr not returned in SCTPv6 accept()

Wei Yongjun (2):
  SCTP: drop SACK if ctsn is not less than the next tsn of assoc
  SCTP: remove useless code in function sctp_init_cause

[EMAIL PROTECTED] (3):
  sctp: make locally used function static
  sctp: move global declaration to header file.
  sctp: remove shadowed symbols

 include/net/sctp/sctp.h  |   10 
 net/sctp/input.c |2 +-
 net/sctp/ipv6.c  |2 +
 net/sctp/sm_make_chunk.c |6 ---
 net/sctp/sm_statefuns.c  |  103 ++---
 net/sctp/socket.c|   45 +++-
 net/sctp/tsnmap.c|   14 +++---
 7 files changed, 123 insertions(+), 59 deletions(-)

Thanks
-vlad


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][BNX2X]: New driver for Broadcom 10Gb Ethernet.

2007-08-02 Thread Eliezer Tamir


Jeff, Roland,

Thanks for taking a look.

Jeff Garzik wrote:

Roland Dreier wrote:

   +{ PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_NX2_5710,
   +PCI_ANY_ID, PCI_ANY_ID, 0, 0, BCM5710 },

FWIW, this could be neater as

{ PCI_VDEVICE(BROADCOM, PCI_DEVICE_ID_NX2_5710), BCM5710 }


OK

Yes.  And additionally, I prefer (but not require) that people directly 
use a hexidecimal constant in the PCI ID table for device ID, if that is 
the only place in the entire codebase referring to that PCI device ID.


Using a named constant for a single-use PCI device ID merely aggrevates 
include/linux/pci_ids.h patching headache for what is ultimately an 
arbitrary number [usually] picked out of thin air by the hw vendor.


Jeff


For now it is the only place used, but this will change very soon, once 
we have several flavors of the chip out. then we will have to handle 
each in a slightly different manner.


Eliezer



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [REGRESSION] tg3 dead after s2ram

2007-08-02 Thread Michael Chan

On Thu, 2007-08-02 at 02:23 -0700, David Miller wrote:
 From: Joachim Deguara [EMAIL PROTECTED]
 Date: Thu, 2 Aug 2007 11:15:05 +0200

  Seams like even if powersave shuts down the network that the device should 
  still work after a suspend to ram, so who is at fault here?

 It's a good question.

 The pci_enable() is done on the PCI device at probe time, at least in
 the tg3 driver, and with such a model restoring and saving of PCI
 config space should not be dependant upon whether the netdev is
 running or not.

Alternatively, we can also fix it by calling pci_enable_device() again
in tg3_open().  But I think it is better to just always save and restore
in suspend/resume.  bnx2.c will also require the same fix.

Thanks Joachim for helping to debug this problem.  Please try this
patch:

[TG3]: Fix suspend/resume problem.

Joachim Deguara [EMAIL PROTECTED] reported that tg3 devices
would not resume properly if the device was shutdown before the system
was suspended.  In such scenario where the netif_running state is 0,
tg3_suspend() would not save the PCI state and so the memory enable bit
and bus master enable bit would be lost.

We fix this by always saving and restoring the PCI state in
tg3_suspend() and tg3_resume() regardless of netif_running() state.

Signed-off-by: Michael Chan [EMAIL PROTECTED]

diff --git a/drivers/net/tg3.c b/drivers/net/tg3.c
index dc41c05..5874042 100644
--- a/drivers/net/tg3.c
+++ b/drivers/net/tg3.c
@@ -64,8 +64,8 @@

 #define DRV_MODULE_NAMEtg3
 #define PFX DRV_MODULE_NAME: 
-#define DRV_MODULE_VERSION 3.79
-#define DRV_MODULE_RELDATE July 18, 2007
+#define DRV_MODULE_VERSION 3.80
+#define DRV_MODULE_RELDATE August 2, 2007

 #define TG3_DEF_MAC_MODE   0
 #define TG3_DEF_RX_MODE0
@@ -12111,6 +12111,12 @@ static int tg3_suspend(struct pci_dev *pdev, 
pm_message_t state)
struct tg3 *tp = netdev_priv(dev);
int err;

+   /* PCI register 4 needs to be saved whether netif_running() or not.
+* MSI address and data need to be saved if using MSI and
+* netif_running().
+*/
+   pci_save_state(pdev);
+
if (!netif_running(dev))
return 0;

@@ -12130,9 +12136,6 @@ static int tg3_suspend(struct pci_dev *pdev, 
pm_message_t state)
tp-tg3_flags = ~TG3_FLAG_INIT_COMPLETE;
tg3_full_unlock(tp);

-   /* Save MSI address and data for resume.  */
-   pci_save_state(pdev);
-
err = tg3_set_power_state(tp, pci_choose_state(pdev, state));
if (err) {
tg3_full_lock(tp, 0);
@@ -12160,11 +12163,11 @@ static int tg3_resume(struct pci_dev *pdev)
struct tg3 *tp = netdev_priv(dev);
int err;

+   pci_restore_state(tp-pdev);
+
if (!netif_running(dev))
return 0;

-   pci_restore_state(tp-pdev);
-
err = tg3_set_power_state(tp, PCI_D0);
if (err)
return err;

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: strange tcp behavior

2007-08-02 Thread Evgeniy Polyakov

On Thu, Aug 02, 2007 at 06:15:52PM +0100, Simon Arlott ([EMAIL PROTECTED]) 
wrote:
 17:33:45.351273 IP 192.168.7.4.5  192.168.7.8.2500: R 
 1385353596:1385353596(0) win 1500
 17:33:45.360878 IP 192.168.7.8.48186  192.168.7.4.5: R 
 1388203103:1388203103(0) ack 1385353596 win 14360
 
 Seems to be losing the source port information when it decides to send 
 that final RST|ACK. It's going through the TCPAbortOnClose path:
 
 tcp_close:
   - tcp_set_state(sk, TCP_CLOSE)
   - inet_put_port(tcp_hashinfo, sk)
   Perhaps it's losing the port information here?
   - tcp_send_active_reset(sk, GFP_KERNEL)
 
 TCP_CLOSE   socket is finished
 Should these two calls be the other way round?
 
 
 Also, I don't think it should be sending a RST after the other side has 
 sent one - the connection no longer exists so there is nothing on the 
 other side to reset.

Problem is not in tcp_send_active_reset(), when socket is being released
it is already damaged.
Problem is that inet_autobind() function is called for socket, which is
already dead, but not yet completely - it smells bad (since it has its
port freed), but stil alive (accessible via send()), so for its last
word inet_sendmsg() tries to bind it again, and only after that time it
will be eventually closed and freed completely.

So, following patch fixes problem for me.
Another solution might not to release port until socket is being
released, but that can lead to performance degradation.
Correct me if sk_err can be reset.

Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 06c08e5..6790b23 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -168,8 +169,14 @@ void inet_sock_destruct(struct sock *sk)
 static int inet_autobind(struct sock *sk)
 {
struct inet_sock *inet;
+
/* We may need to bind the socket. */
lock_sock(sk);
+   if (sk-sk_err) {
+   release_sock(sk);
+   return sk-sk_err;
+   }
+
inet = inet_sk(sk);
if (!inet-num) {
if (sk-sk_prot-get_port(sk, 0)) {
@@ -686,8 +703,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, 
struct msghdr *msg,
struct sock *sk = sock-sk;
 
/* We may need to bind the socket. */
-   if (!inet_sk(sk)-num  inet_autobind(sk))
-   return -EAGAIN;
+   if (!inet_sk(sk)-num) {
+   int err = inet_autobind(sk);
+   if (err)
+   return err;
+   }
 
return sk-sk_prot-sendmsg(iocb, sk, msg, size);
 }
@@ -698,8 +718,11 @@ static ssize_t inet_sendpage(struct socket *sock, struct 
page *page, int offset,
struct sock *sk = sock-sk;
 
/* We may need to bind the socket. */
-   if (!inet_sk(sk)-num  inet_autobind(sk))
-   return -EAGAIN;
+   if (!inet_sk(sk)-num) {
+   int err = inet_autobind(sk);
+   if (err)
+   return err;
+   }
 
if (sk-sk_prot-sendpage)
return sk-sk_prot-sendpage(sk, page, offset, size, flags);

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][BNX2X]: New driver for Broadcom 10Gb Ethernet.

2007-08-02 Thread Eliezer Tamir


Michal,

Thanks for going over the code.
My responses are inline.

Eliezer

Michael Buesch wrote:

On Wednesday 01 August 2007 10:31:17 Michael Chan wrote:


+typedef struct {
+   u8 reserved[64];
+} license_key_t;


No typedef.
What is a license key used for, anyway?

This will be removed.
This is just a placeholder that has the right size.




+#define RUN_AT(x)  (jiffies + (x))


That macro does only obfuscate code, in my opinion.
If you want jiffies+x, better opencode it.

OK




+typedef enum {
+   BCM5710 = 0,
+} board_t;


No typedef. Do
enum bnx2x_board {
BCM5710 = 0,
};
Or something like that.

OK




+static struct pci_device_id bnx2x_pci_tbl[] = {


static const struct...


OK


+   { PCI_VENDOR_ID_BROADCOM, PCI_DEVICE_ID_NX2_5710,
+   PCI_ANY_ID, PCI_ANY_ID, 0, 0, BCM5710 },
+   { 0 }
+};



+static inline u32 bnx2x_bits_en(struct bnx2x *bp, u32 block, u32 reg,
+   u32 bits)


Does that really need to be inline? I'd suggest dropping inline.


Does not need to be inlined. Will change.




+{
+   u32 val = REG_RD(bp, block, reg);
+
+   val |= bits;
+   REG_WR(bp, block, reg, val);
+   return val;
+}
+
+static inline u32 bnx2x_bits_dis(struct bnx2x *bp, u32 block, u32 reg,
+u32 bits)


Same here.

Same



+{
+   u32 val = REG_RD(bp, block, reg);
+
+   val = ~bits;
+   REG_WR(bp, block, reg, val);
+   return val;
+}
+
+static int bnx2x_mdio22_write(struct bnx2x *bp, u32 reg, u32 val)
+{
+   int rc;
+   u32 tmp, i;
+   int port = bp-port;
+   u32 emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0;
+
+   if (bp-phy_flags  PHY_INT_MODE_AUTO_POLLING_FLAG) {
+
+   tmp = REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_MODE);
+   tmp = ~EMAC_MDIO_MODE_AUTO_POLL;
+   EMAC_WR(EMAC_REG_EMAC_MDIO_MODE, tmp);
+   REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_MODE);
+   udelay(40);
+   }
+
+   tmp = ((bp-phy_addr  21) | (reg  16) |
+  (val  EMAC_MDIO_COMM_DATA) |
+  EMAC_MDIO_COMM_COMMAND_WRITE_22 |
+  EMAC_MDIO_COMM_START_BUSY);
+   EMAC_WR(EMAC_REG_EMAC_MDIO_COMM, tmp);
+
+   for (i = 0; i  50; i++) {
+   udelay(10);
+
+   tmp = REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_COMM);
+   if (!(tmp  EMAC_MDIO_COMM_START_BUSY)) {
+   udelay(5);
+   break;
+   }
+   }
+
+   if (tmp  EMAC_MDIO_COMM_START_BUSY) {
+   BNX2X_ERR(write phy register failed\n);
+
+   rc = -EBUSY;
+   } else {
+   rc = 0;
+   }
+
+   if (bp-phy_flags  PHY_INT_MODE_AUTO_POLLING_FLAG) {
+
+   tmp = REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_MODE);
+   tmp |= EMAC_MDIO_MODE_AUTO_POLL;
+   EMAC_WR(EMAC_REG_EMAC_MDIO_MODE, tmp);
+   }
+
+   return rc;
+}
+
+static int bnx2x_mdio22_read(struct bnx2x *bp, u32 reg, u32 *ret_val)
+{
+   int rc;
+   u32 val, i;
+   int port = bp-port;
+   u32 emac_base = port ? GRCBASE_EMAC1 : GRCBASE_EMAC0;
+
+   if (bp-phy_flags  PHY_INT_MODE_AUTO_POLLING_FLAG) {
+
+   val = REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_MODE);
+   val = ~EMAC_MDIO_MODE_AUTO_POLL;
+   EMAC_WR(EMAC_REG_EMAC_MDIO_MODE, val);
+   REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_MODE);
+   udelay(40);
+   }
+
+   val = ((bp-phy_addr  21) | (reg  16) |
+  EMAC_MDIO_COMM_COMMAND_READ_22 |
+  EMAC_MDIO_COMM_START_BUSY);
+   EMAC_WR(EMAC_REG_EMAC_MDIO_COMM, val);
+
+   for (i = 0; i  50; i++) {
+   udelay(10);
+
+   val = REG_RD(bp, emac_base, EMAC_REG_EMAC_MDIO_COMM);
+   if (!(val  EMAC_MDIO_COMM_START_BUSY)) {


No udelay(5) here, like in write above?

There is a udelay.




+   val = EMAC_MDIO_COMM_DATA;
+   break;
+   }
+   }



+static int bnx2x_mdio45_vwrite(struct bnx2x *bp, u32 reg, u32 addr, u32 val)
+{
+   int i;
+   u32 rd_val;
+
+   for (i = 0; i  10; i++) {
+   bnx2x_mdio45_write(bp, reg, addr, val);
+   mdelay(5);


Can you msleep(5) here?

Can't sleep, this can happen from a sleepless context.
Maybe we can break it down to smaller mdelays, will ask the HW guys.



+   bnx2x_mdio45_read(bp, reg, addr, rd_val);
+   /* if the read value is not the same as the value we wrote,
+  we should write it again */
+   if (rd_val == val) {
+   return 0;
+   }
+   }
+   BNX2X_ERR(MDIO write in CL45 failed\n);
+   return -EBUSY;
+}



+/* DMAE command positions used
+ * Port0 14
+ * Port1 15
+ */
+static void bnx2x_wb_write_dmae(struct bnx2x *bp, u32 wb_addr, u32 *wb_write,
+

Re: strange tcp behavior

2007-08-02 Thread Evgeniy Polyakov

On Thu, Aug 02, 2007 at 10:08:42PM +0400, Evgeniy Polyakov ([EMAIL PROTECTED]) 
wrote:
 So, following patch fixes problem for me.

Or this one. Essentially the same though.

Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 06c08e5..7c47ef5 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -168,8 +168,14 @@ void inet_sock_destruct(struct sock *sk)
 static int inet_autobind(struct sock *sk)
 {
struct inet_sock *inet;
+
/* We may need to bind the socket. */
lock_sock(sk);
+   if (sk-sk_err || (sk-sk_state == TCP_CLOSE)) {
+   release_sock(sk);
+   return sk-sk_err;
+   }
+
inet = inet_sk(sk);
if (!inet-num) {
if (sk-sk_prot-get_port(sk, 0)) {
@@ -686,8 +692,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket *sock, 
struct msghdr *msg,
struct sock *sk = sock-sk;
 
/* We may need to bind the socket. */
-   if (!inet_sk(sk)-num  inet_autobind(sk))
-   return -EAGAIN;
+   if (!inet_sk(sk)-num) {
+   int err = inet_autobind(sk);
+   if (err)
+   return err;
+   }
 
return sk-sk_prot-sendmsg(iocb, sk, msg, size);
 }
@@ -698,8 +707,11 @@ static ssize_t inet_sendpage(struct socket *sock, struct 
page *page, int offset,
struct sock *sk = sock-sk;
 
/* We may need to bind the socket. */
-   if (!inet_sk(sk)-num  inet_autobind(sk))
-   return -EAGAIN;
+   if (!inet_sk(sk)-num) {
+   int err = inet_autobind(sk);
+   if (err)
+   return err;
+   }
 
if (sk-sk_prot-sendpage)
return sk-sk_prot-sendpage(sk, page, offset, size, flags);

-- 
Evgeniy Polyakov
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: strange tcp behavior

2007-08-02 Thread Simon Arlott

On 02/08/07 19:08, Evgeniy Polyakov wrote:
 On Thu, Aug 02, 2007 at 06:15:52PM +0100, Simon Arlott ([EMAIL PROTECTED]) 
 wrote:
 17:33:45.351273 IP 192.168.7.4.5  192.168.7.8.2500: R 
 1385353596:1385353596(0) win 1500
 17:33:45.360878 IP 192.168.7.8.48186  192.168.7.4.5: R 
 1388203103:1388203103(0) ack 1385353596 win 14360
 
 Problem is not in tcp_send_active_reset(), when socket is being released
 it is already damaged.
 Problem is that inet_autobind() function is called for socket, which is
 already dead, but not yet completely - it smells bad (since it has its
 port freed), but stil alive (accessible via send()), so for its last
 word inet_sendmsg() tries to bind it again, and only after that time it
 will be eventually closed and freed completely.
 
 So, following patch fixes problem for me.
 Another solution might not to release port until socket is being
 released, but that can lead to performance degradation.
 Correct me if sk_err can be reset.

19:24:32.897071 IP 192.168.7.4.5  192.168.7.8.2500: S 
705362199:705362199(0) win 1500
19:24:32.897211 IP 192.168.7.8.2500  192.168.7.4.5: S 
4159455228:4159455228(0) ack 705362200 win 14360 mss 7180
19:24:32.920784 IP 192.168.7.4.5  192.168.7.8.2500: . ack 1 win 1500
19:24:32.921732 IP 192.168.7.4.5  192.168.7.8.2500: P 1:17(16) ack 1 win 
1500
19:24:32.921795 IP 192.168.7.8.2500  192.168.7.4.5: . ack 17 win 14360
19:24:32.922881 IP 192.168.7.4.5  192.168.7.8.2500: R 
705362216:705362216(0) win 1500
19:24:34.927717 IP 192.168.7.8.2500  192.168.7.4.5: R 1:1(0) ack 17 win 
14360

According to RFC 793, the RST from .4 means that the connection 
is CLOSED.

  Reset Processing

  The receiver of a RST first validates it, then changes state.  If the
  receiver was in the LISTEN state, it ignores it.  If the receiver was
  in SYN-RECEIVED state and had previously been in the LISTEN state,
  then the receiver returns to the LISTEN state, otherwise the receiver
  aborts the connection and goes to the CLOSED state.  If the receiver
  was in any other state, it aborts the connection and advises the user
  and goes to the CLOSED state.


So when the call to close() is made without reading:

  Abort

Format:  ABORT (local connection name)

This command causes all pending SENDs and RECEIVES to be
aborted, the TCB to be removed, and a special RESET message to
be sent to the TCP on the other side of the connection.

Isn't there no other side of the connection to send the RESET too?


 Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]
 
 diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
 index 06c08e5..6790b23 100644
 --- a/net/ipv4/af_inet.c
 +++ b/net/ipv4/af_inet.c
 @@ -168,8 +169,14 @@ void inet_sock_destruct(struct sock *sk)
  static int inet_autobind(struct sock *sk)
  {
   struct inet_sock *inet;
 +
   /* We may need to bind the socket. */
   lock_sock(sk);
 + if (sk-sk_err) {
 + release_sock(sk);
 + return sk-sk_err;
 + }
 +
   inet = inet_sk(sk);
   if (!inet-num) {
   if (sk-sk_prot-get_port(sk, 0)) {
 @@ -686,8 +703,11 @@ int inet_sendmsg(struct kiocb *iocb, struct socket 
 *sock, struct msghdr *msg,
   struct sock *sk = sock-sk;
  
   /* We may need to bind the socket. */
 - if (!inet_sk(sk)-num  inet_autobind(sk))
 - return -EAGAIN;
 + if (!inet_sk(sk)-num) {
 + int err = inet_autobind(sk);
 + if (err)
 + return err;
 + }
  
   return sk-sk_prot-sendmsg(iocb, sk, msg, size);
  }
 @@ -698,8 +718,11 @@ static ssize_t inet_sendpage(struct socket *sock, struct 
 page *page, int offset,
   struct sock *sk = sock-sk;
  
   /* We may need to bind the socket. */
 - if (!inet_sk(sk)-num  inet_autobind(sk))
 - return -EAGAIN;
 + if (!inet_sk(sk)-num) {
 + int err = inet_autobind(sk);
 + if (err)
 + return err;
 + }
  
   if (sk-sk_prot-sendpage)
   return sk-sk_prot-sendpage(sk, page, offset, size, flags);
 


-- 
Simon Arlott


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

ipsec not working in 2.6.23-rc1-git10 when using pfkey

2007-08-02 Thread Joy Latten


Although an ipsec SA was established, kernel couldn't seem to find it.

I think since we are now using x-sel.family instead of family 
in the  xfrm_selector_match() called in xfrm_state_find(), af_key 
needs to set this field too, just as xfrm_user. 

In af_key.c, x-sel.family only gets set when there's an 
ext_hdrs[SADB_EXT_ADDRESS_PROXY-1] which I think is for tunnel.

I think pfkey needs to also set the x-sel.family field when it is 0.

Tested with below patch, and ipsec worked when using pfkey. 
Let me know if this is correct approach or not.

Regards,
Joy


diff -urpN linux-2.6.22/net/key/af_key.c linux-2.6.22.fp/net/key/af_key.c
--- linux-2.6.22/net/key/af_key.c   2007-08-02 12:32:02.0 -0500
+++ linux-2.6.22.fp/net/key/af_key.c2007-08-02 12:40:57.0 -0500
@@ -1205,6 +1205,9 @@ static struct xfrm_state * pfkey_msg2xfr
x-sel.family = pfkey_sadb_addr2xfrm_addr(addr, x-sel.saddr);
x-sel.prefixlen_s = addr-sadb_address_prefixlen;
}
+   
+   if (!x-sel.family)
+   x-sel.family = x-props.family;
 
if (ext_hdrs[SADB_X_EXT_NAT_T_TYPE-1]) {
struct sadb_x_nat_t_type* n_type;
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Fwd: source interface ping bug ?

2007-08-02 Thread nano bug

-- Forwarded message --
From: nano bug [EMAIL PROTECTED]
Date: Aug 2, 2007 10:58 PM
Subject: Re: source interface ping bug ?
To: Patrick McHardy [EMAIL PROTECTED]


Hello,

Yes I'm running NAT, I have atached the output of the iptables -t nat
-vxnL command and the routing tables

On 7/30/07, Patrick McHardy [EMAIL PROTECTED] wrote:
 nano bug wrote:
  [...]
  using source interface :
 
  [EMAIL PROTECTED]:~/iputils# ./ping -I eth2 87.248.113.14
  PING 87.248.113.14 (87.248.113.14) from 86.106.19.75 eth2: 56(84) bytes of 
  data.
 From 86.106.19.75 icmp_seq=1 Destination Host Unreachable

  [EMAIL PROTECTED]:~# tcpdump -i eth2 -vvv -n host 87.248.113.14 and host
  86.106.19.75
  tcpdump: listening on eth2, link-type EN10MB (Ethernet), capture size 96 
  bytes
  01:19:24.292911 arp who-has 87.248.113.14 tell 86.106.19.75


 Are you using (or running) NAT locally? What do your routing tables look
 like?



route_tables
Description: Binary data

Re: [patch] genirq: temporary fix for level-triggered IRQ resend

2007-08-02 Thread Ingo Molnar


* Gabriel C [EMAIL PROTECTED] wrote:

 I get a warning on each boot now with this patch ..
 
 [   63.686613] WARNING: at kernel/irq/resend.c:70 check_irq_resend()
 [   63.686636]  [c013c55c] check_irq_resend+0x8c/0xa0
 [   63.686653]  [c013c15f] enable_irq+0xad/0xb3
 [   63.686662]  [e886481e] vortex_timer+0x20c/0x3d5 [3c59x]
 [   63.686675]  [c01164b9] scheduler_tick+0x154/0x273
 [   63.686685]  [c012fed1] getnstimeofday+0x34/0xe3
 [   63.686697]  [c0121f4a] run_timer_softirq+0x137/0x197
 [   63.686709]  [e8864612] vortex_timer+0x0/0x3d5 [3c59x]
 [   63.686720]  [c011ed09] __do_softirq+0x75/0xe1
 [   63.686729]  [c011edac] do_softirq+0x37/0x3d
 [   63.686735]  [c011ef85] irq_exit+0x7c/0x7e
 [   63.686740]  [c010e013] smp_apic_timer_interrupt+0x59/0x84
 [   63.686751]  [c0103428] apic_timer_interrupt+0x28/0x30
 [   63.686759]  [c0101355] default_idle+0x0/0x3f
 [   63.686767]  [c0101385] default_idle+0x30/0x3f
 [   63.686773]  [c0100c19] cpu_idle+0x5e/0x8e
 [   63.686779]  [c03fdc5f] start_kernel+0x2d7/0x368
 
 
 That means ?:)

if your network still works fine then you can ignore it :-)

we are still trying to figure out what happens with ne2k-pci. The 
message will vanish soon.

Ingo
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

net driver error accounting

2007-08-02 Thread Andrew Morton


Looking at http://bugzilla.kernel.org/show_bug.cgi?id=8106

Guys, could we please have a ruling here?

When a net driver encounters a tx_fifo_error, should this also contribute
to the tx_error count, or should it not?

More generally, should netdev drivers accumulate all the detailed
rx_errors into net_device_stats.rx_errors in real time, or should they not?

Thanks.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [TULIP] Need new maintainer

2007-08-02 Thread Jeff Garzik


Valerie Henson wrote:

On Mon, Jul 30, 2007 at 03:31:58PM -0400, Kyle McMartin wrote:

On Mon, Jul 30, 2007 at 01:04:13PM -0600, Valerie Henson wrote:

The Tulip network driver needs a new maintainer!  I no longer have
time to maintain the Tulip network driver and I'm stepping down.  Jeff
Garzik would be happy to get volunteers.


Since I already take care of a major consumer of these devices (parisc,
which pretty much all have tulip) I'm willing to take care of this.
Alternately, Grant is probably willing.


And I coulda handed you a suitcase full of cards and I missed my
chance!

It's fine by me, although Jeff is the final arbiter.


No objections here...

Jeff



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: net driver error accounting

2007-08-02 Thread Jeff Garzik


Andrew Morton wrote:

Looking at http://bugzilla.kernel.org/show_bug.cgi?id=8106

Guys, could we please have a ruling here?

When a net driver encounters a tx_fifo_error, should this also contribute
to the tx_error count, or should it not?


For each TX error, (a) tx_error is incremented and (b) a more-specific 
TX error stat is also potentially incremented.  So, yes, tx_error 
accumulates.


See cp_tx() in 8139cp.



More generally, should netdev drivers accumulate all the detailed
rx_errors into net_device_stats.rx_errors in real time, or should they not?


For each RX error, (a) rx_error is incremented and (b) a more-specific 
RX error stat is also potentially incremented.  So, yes, rx_error 
accumulates.


See cp_rx_err_acct() in 8139cp.

Jeff


-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: net driver error accounting

2007-08-02 Thread Andrew Morton

On Thu, 02 Aug 2007 16:38:09 -0400
Jeff Garzik [EMAIL PROTECTED] wrote:

 Andrew Morton wrote:
  Looking at http://bugzilla.kernel.org/show_bug.cgi?id=8106
  
  Guys, could we please have a ruling here?
  
  When a net driver encounters a tx_fifo_error, should this also contribute
  to the tx_error count, or should it not?
 
 For each TX error, (a) tx_error is incremented and (b) a more-specific 
 TX error stat is also potentially incremented.  So, yes, tx_error 
 accumulates.
 
 See cp_tx() in 8139cp.
 
 
  More generally, should netdev drivers accumulate all the detailed
  rx_errors into net_device_stats.rx_errors in real time, or should they not?
 
 For each RX error, (a) rx_error is incremented and (b) a more-specific 
 RX error stat is also potentially incremented.  So, yes, rx_error 
 accumulates.
 
 See cp_rx_err_acct() in 8139cp.
 

OK, thanks.

One does wonder why the overall rx_error exists all all, but whatever.  The
main thing is to get all the net drivers doing the same thing.


So I guess bug 8106 wants something like this?


diff -puN drivers/net/natsemi.c~a drivers/net/natsemi.c
--- a/drivers/net/natsemi.c~a
+++ a/drivers/net/natsemi.c
@@ -2438,13 +2438,16 @@ static void netdev_error(struct net_devi
dev-name);
}
np-stats.rx_fifo_errors++;
+   np-stats.rx_errors++;
}
/* Hm, it's not clear how to recover from PCI faults. */
if (intr_status  IntrPCIErr) {
printk(KERN_NOTICE %s: PCI error %#08x\n, dev-name,
intr_status  IntrPCIErr);
np-stats.tx_fifo_errors++;
+   np-stats.tx_errors++;
np-stats.rx_fifo_errors++;
+   np-stats.rx_errors++;
}
spin_unlock(np-lock);
 }
_

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [RFC][BNX2X]: New driver for Broadcom 10Gb Ethernet.

2007-08-02 Thread Michael Chan

On Thu, 2007-08-02 at 00:06 +0200, Michael Buesch wrote:
 +static inline u32 bnx2x_tx_avail(struct bnx2x_fastpath *fp)
 
 Too big for inlining.
 
  +{
  + u16 used;
  + u32 prod = fp-tx_bd_prod;
  + u32 cons = fp-tx_bd_cons;
  +
  + smp_mb();
 
 This barrier needs a comment. Why is it there? And why SMP only?

bnx2 and tg3 have similar logic to tell the compiler that prod and cons
can change.  Strictly speaking, we can just use barrier().  The barrier
is also not placed correctly and should be:

/* Tell compiler that prod and cons can change. */
barrier();
prod = fp-tx_bd_prod;
cons = fp-tx_bd_cons;
 
...
 
  + fp-tx_pkt_cons = sw_cons;
  + fp-tx_bd_cons = bd_cons;
  +
  + smp_mb();

 Please add a comment why we need a SMP MB here.

This is again similar to logic in tg3 and bnx2 and the comments in tg3
are:

/* Need to make the tx_cons update visible to tg3_start_xmit()
 * before checking for netif_queue_stopped().  Without the
 * memory barrier, there is a small possibility that tg3_start_xmit()
 * will miss it and cause the queue to be stopped forever.
 */



-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] improved xfrm_audit_log() patch

2007-08-02 Thread Joy Latten

Sorry for delay, here is xfrm_audit_log() modification with 
recommended changes. Let me know if this looks better. 

Regards,
Joy

Signed-off-by: Joy Latten [EMAIL PROTECTED]


diff -urpN linux-2.6.22/include/linux/audit.h 
linux-2.6.22.patch10/include/linux/audit.h
--- linux-2.6.22/include/linux/audit.h  2007-08-01 11:49:23.0 -0500
+++ linux-2.6.22.patch10/include/linux/audit.h  2007-08-01 13:11:14.0 
-0500
@@ -112,6 +112,7 @@
 #define AUDIT_MAC_IPSEC_DELSA  1412/* Delete a XFRM state */
 #define AUDIT_MAC_IPSEC_ADDSPD 1413/* Add a XFRM policy */
 #define AUDIT_MAC_IPSEC_DELSPD 1414/* Delete a XFRM policy */
+#define AUDIT_MAC_IPSEC_EVENT  1415/* Audit IPSec events */
 
 #define AUDIT_FIRST_KERN_ANOM_MSG   1700
 #define AUDIT_LAST_KERN_ANOM_MSG1799
diff -urpN linux-2.6.22/include/net/xfrm.h 
linux-2.6.22.patch10/include/net/xfrm.h
--- linux-2.6.22/include/net/xfrm.h 2007-08-01 11:49:24.0 -0500
+++ linux-2.6.22.patch10/include/net/xfrm.h 2007-08-01 13:11:14.0 
-0500
@@ -426,10 +426,15 @@ struct xfrm_audit
 };
 
 #ifdef CONFIG_AUDITSYSCALL
-extern void xfrm_audit_log(uid_t auid, u32 secid, int type, int result,
-   struct xfrm_policy *xp, struct xfrm_state *x);
+extern void xfrm_audit_log(struct xfrm_audit audit_info, int result,
+  __be32 flowid, struct xfrm_policy *xp, 
+  struct xfrm_state *x, char *buf);
+
+extern void xfrm_get_auditinfo(struct sk_buff *skb, 
+  struct xfrm_audit *audit_info);
 #else
-#define xfrm_audit_log(a,s,t,r,p,x) do { ; } while (0)
+#define xfrm_audit_log(a,r,f,p,s,b) do { ; } while (0)
+#define xfrm_get_auditinfo(s, a) do { ; } while (0)
 #endif /* CONFIG_AUDITSYSCALL */
 
 static inline void xfrm_pol_hold(struct xfrm_policy *policy)
@@ -975,7 +980,7 @@ struct xfrmk_spdinfo {
 
 extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq);
 extern int xfrm_state_delete(struct xfrm_state *x);
-extern int xfrm_state_flush(u8 proto, struct xfrm_audit *audit_info);
+extern int xfrm_state_flush(u8 proto, struct xfrm_audit audit_info);
 extern void xfrm_sad_getinfo(struct xfrmk_sadinfo *si);
 extern void xfrm_spd_getinfo(struct xfrmk_spdinfo *si);
 extern int xfrm_replay_check(struct xfrm_state *x, __be32 seq);
@@ -1032,13 +1037,13 @@ struct xfrm_policy *xfrm_policy_bysel_ct
  struct xfrm_sec_ctx *ctx, int delete,
  int *err);
 struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete, int 
*err);
-int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info);
+int xfrm_policy_flush(u8 type, struct xfrm_audit audit_info);
 u32 xfrm_get_acqseq(void);
 void xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi);
 struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto,
  xfrm_address_t *daddr, xfrm_address_t *saddr,
  int create, unsigned short family);
-extern int xfrm_policy_flush(u8 type, struct xfrm_audit *audit_info);
+extern int xfrm_policy_flush(u8 type, struct xfrm_audit audit_info);
 extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy 
*pol);
 extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst,
  struct flowi *fl, int family, int strict);
diff -urpN linux-2.6.22/net/key/af_key.c linux-2.6.22.patch10/net/key/af_key.c
--- linux-2.6.22/net/key/af_key.c   2007-08-01 11:49:42.0 -0500
+++ linux-2.6.22.patch10/net/key/af_key.c   2007-08-01 13:14:01.0 
-0500
@@ -1447,6 +1447,7 @@ static int pfkey_add(struct sock *sk, st
struct xfrm_state *x;
int err;
struct km_event c;
+   struct xfrm_audit audit_info;
 
x = pfkey_msg2xfrm_state(hdr, ext_hdrs);
if (IS_ERR(x))
@@ -1458,8 +1459,8 @@ static int pfkey_add(struct sock *sk, st
else
err = xfrm_state_update(x);
 
-   xfrm_audit_log(audit_get_loginuid(current-audit_context), 0,
-  AUDIT_MAC_IPSEC_ADDSA, err ? 0 : 1, NULL, x);
+   xfrm_get_auditinfo(0, audit_info);
+   xfrm_audit_log(audit_info, err ? 0 : 1, 0, 0, x, SAD-add);
 
if (err  0) {
x-km.state = XFRM_STATE_DEAD;
@@ -1484,6 +1485,7 @@ static int pfkey_delete(struct sock *sk,
struct xfrm_state *x;
struct km_event c;
int err;
+   struct xfrm_audit audit_info;
 
if (!ext_hdrs[SADB_EXT_SA-1] ||
!present_and_same_family(ext_hdrs[SADB_EXT_ADDRESS_SRC-1],
@@ -1512,8 +1514,9 @@ static int pfkey_delete(struct sock *sk,
c.event = XFRM_MSG_DELSA;
km_state_notify(x, c);
 out:
-   xfrm_audit_log(audit_get_loginuid(current-audit_context), 0,
-  AUDIT_MAC_IPSEC_DELSA, err ? 0 : 1, NULL, x);
+   xfrm_get_auditinfo(0, audit_info);
+   xfrm_audit_log(audit_info, err ? 0 : 1, 0,

Re: [PATCH 2.6.23 2/2] iw_cxgb3: Always call low level send function via cxgb3_ofld_send().

2007-08-02 Thread Roland Dreier

thanks, applied.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2.6.23 1/2] Make the iw_cxgb3 module parameters writable.

2007-08-02 Thread Roland Dreier

thanks... I actually applied this for 2.6.24, since it's not really a
fix for anything, and the 2.6.23 window is closed.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

include/linux/netfilter/xt_statistic.h isn't installed?

2007-08-02 Thread Chuck Ebbert

Apparently xt_statistic.h needs to be added to
include/linux/netfilter/Kbuild for iptables 1.3.6 to build.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Distributed storage.

2007-08-02 Thread Daniel Phillips

On Tuesday 31 July 2007 10:13, Evgeniy Polyakov wrote:
 Hi.

 I'm pleased to announce first release of the distributed storage
 subsystem, which allows to form a storage on top of remote and local
 nodes, which in turn can be exported to another storage as a node to
 form tree-like storages.

Excellent!  This is precisely what the doctor ordered for the 
OCFS2-based distributed storage system I have been mumbling about for 
some time.  In fact the dd in ddsnap and ddraid stands for distributed 
data.  The ddsnap/raid devices do not include an actual network 
transport, that is expected to be provided by a specialized block 
device, which up till now has been NBD.  But NBD has various 
deficiencies as you note, in addition to its tendency to deadlock when 
accessed locally.  Your new code base may be just the thing we always 
wanted.  We (zumastor et al) will take it for a drive and see if 
anything breaks.

Memory deadlock is a concern of course.  From a cursory glance through, 
it looks like this code is pretty vm-friendly and you have thought 
quite a lot about it, however I respectfully invite peterz 
(obsessive/compulsive memory deadlock hunter) to help give it a good 
going over with me.

I see bits that worry me, e.g.:

+   req = mempool_alloc(st-w-req_pool, GFP_NOIO);

which seems to be callable in response to a local request, just the case 
where NBD deadlocks.  Your mempool strategy can work reliably only if 
you can prove that the pool allocations of the maximum number of 
requests you can have in flight do not exceed the size of the pool.  In 
other words, if you ever take the pool's fallback path to normal 
allocation, you risk deadlock.

Anyway, if this is as grand as it seems then I would think we ought to 
factor out a common transfer core that can be used by all of NBD, 
iSCSI, ATAoE and your own kernel server, in place of the roll-yer-own 
code those things have now.

Regards,

Daniel
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: ipsec not working in 2.6.23-rc1-git10 when using pfkey

2007-08-02 Thread David Miller

From: Joy Latten [EMAIL PROTECTED]
Date: Thu, 2 Aug 2007 13:58:38 -0500

 Although an ipsec SA was established, kernel couldn't seem to find it.

 I think since we are now using x-sel.family instead of family 
 in the  xfrm_selector_match() called in xfrm_state_find(), af_key 
 needs to set this field too, just as xfrm_user. 

 In af_key.c, x-sel.family only gets set when there's an 
 ext_hdrs[SADB_EXT_ADDRESS_PROXY-1] which I think is for tunnel.

 I think pfkey needs to also set the x-sel.family field when it is 0.

Thanks for finding this bug Joy.

It basically proves that this inner address change was %100 not tested
in any reasonable way by the patch submitter.

Originally Herbert and I thought I only saw problems because XFRM_USER
cases such as openswan did not set the x-sel.family field, but now
that we see that PF_KEY also has the same exact problem and as a
result I am very annoyed.

Joakim, TEST YOUR PATCHES, and not just with your BEET test cases,
before submitting them in the future.  Having normal configurations of
both PF_KEY and XFRM_USER ipsec totally break as a result of your
changes is totally unacceptable and I will doubly scrutinize your
patch submissions in the future because of what has happened here.

Thanks.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: strange tcp behavior

2007-08-02 Thread David Miller

From: Evgeniy Polyakov [EMAIL PROTECTED]
Date: Thu, 2 Aug 2007 22:48:42 +0400

 On Thu, Aug 02, 2007 at 10:08:42PM +0400, Evgeniy Polyakov ([EMAIL 
 PROTECTED]) wrote:
  So, following patch fixes problem for me.

 Or this one. Essentially the same though.

Thanks a lot for figuring out this bug Evgeniy, I'll look at
this later.  I'm very surprised autobind isn't guarded properly
as this is a case that Alexey Kuznetsov and I used to audit from
time to time.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [REGRESSION] tg3 dead after s2ram

2007-08-02 Thread David Miller

From: Michael Chan [EMAIL PROTECTED]
Date: Thu, 02 Aug 2007 12:10:29 -0700

 On Thu, 2007-08-02 at 02:23 -0700, David Miller wrote:
  From: Joachim Deguara [EMAIL PROTECTED]
  Date: Thu, 2 Aug 2007 11:15:05 +0200

   Seams like even if powersave shuts down the network that the device 
   should 
   still work after a suspend to ram, so who is at fault here?

  It's a good question.

  The pci_enable() is done on the PCI device at probe time, at least in
  the tg3 driver, and with such a model restoring and saving of PCI
  config space should not be dependant upon whether the netdev is
  running or not.

 Alternatively, we can also fix it by calling pci_enable_device() again
 in tg3_open().  But I think it is better to just always save and restore
 in suspend/resume.  bnx2.c will also require the same fix.

We could do it that way.  But don't you think it's more reliable to
save and restore around the event we know will be what clobbers the
PCI config space on us? :-)

Other things might happen between -resume() and -open() that could
modify PCI config space, and we could overwrite such changes if we do
the PCI restore in -open().

One thing that's interesting to me is that, essentially, every PCI
driver with very few if any exceptions needs to do this sequence on
suspend and resume.  It would be nice if there was a way to get this
to happen transparently by default, with some reasonable override
mechanism, for PCI device drivers.

Anyways, once your patch is tested feel free to send me the bnx2
one too.

Thanks!
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] TIPC: make function tipc_nameseq_subscribe static

2007-08-02 Thread Florian Westphal

make needlessly global function tipc_nameseq_subscribe static.

Signed-off-by: Florian Westphal [EMAIL PROTECTED]
---
diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c
index d8473ee..ac7dfdd 100644
--- a/net/tipc/name_table.c
+++ b/net/tipc/name_table.c
@@ -501,7 +501,7 @@ end_node:
  * sequence overlapping with the requested sequence
  */
 
-void tipc_nameseq_subscribe(struct name_seq *nseq, struct subscription *s)
+static void tipc_nameseq_subscribe(struct name_seq *nseq, struct subscription 
*s)
 {
struct sub_seq *sseq = nseq-sseqs;
 

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 5/5][RFC] Update e1000 driver to use devres.

2007-08-02 Thread Brandon Philips

Conversion of e1000 probe() and remove() to devres.

Signed-off-by: Brandon Philips [EMAIL PROTECTED]
---
 drivers/net/e1000/e1000.h  |1 
 drivers/net/e1000/e1000_main.c |   79 -
 2 files changed, 26 insertions(+), 54 deletions(-)

Index: linux-2.6/drivers/net/e1000/e1000_main.c
===
--- linux-2.6.orig/drivers/net/e1000/e1000_main.c
+++ linux-2.6/drivers/net/e1000/e1000_main.c
@@ -868,7 +868,7 @@ e1000_probe(struct pci_dev *pdev,
int i, err, pci_using_dac;
uint16_t eeprom_data = 0;
uint16_t eeprom_apme_mask = E1000_EEPROM_APME;
-   if ((err = pci_enable_device(pdev)))
+   if ((err = pcim_enable_device(pdev)))
return err;
 
if (!(err = pci_set_dma_mask(pdev, DMA_64BIT_MASK)) 
@@ -884,14 +884,14 @@ e1000_probe(struct pci_dev *pdev,
}
 
if ((err = pci_request_regions(pdev, e1000_driver_name)))
-   goto err_pci_reg;
+   goto err_dma;
 
pci_set_master(pdev);
 
err = -ENOMEM;
-   netdev = alloc_etherdev(sizeof(struct e1000_adapter));
+   netdev = devm_alloc_etherdev(pdev-dev, sizeof(struct e1000_adapter));
if (!netdev)
-   goto err_alloc_etherdev;
+   goto err_dma;
 
SET_MODULE_OWNER(netdev);
SET_NETDEV_DEV(netdev, pdev-dev);
@@ -907,9 +907,9 @@ e1000_probe(struct pci_dev *pdev,
mmio_len = pci_resource_len(pdev, BAR_0);
 
err = -EIO;
-   adapter-hw.hw_addr = ioremap(mmio_start, mmio_len);
+   adapter-hw.hw_addr = devm_ioremap(pdev-dev, mmio_start, mmio_len);
if (!adapter-hw.hw_addr)
-   goto err_ioremap;
+   goto err_dma;
 
for (i = BAR_1; i = BAR_5; i++) {
if (pci_resource_len(pdev, i) == 0)
@@ -952,7 +952,7 @@ e1000_probe(struct pci_dev *pdev,
/* setup the private structure */
 
if ((err = e1000_sw_init(adapter)))
-   goto err_sw_init;
+   goto err_dma;
 
err = -EIO;
/* Flash BAR mapping must happen after e1000_sw_init
@@ -961,7 +961,9 @@ e1000_probe(struct pci_dev *pdev,
   (pci_resource_flags(pdev, 1)  IORESOURCE_MEM)) {
flash_start = pci_resource_start(pdev, 1);
flash_len = pci_resource_len(pdev, 1);
-   adapter-hw.flash_address = ioremap(flash_start, flash_len);
+   adapter-hw.flash_address = devm_ioremap(pdev-dev,
+   flash_start,
+   flash_len);
if (!adapter-hw.flash_address)
goto err_flashmap;
}
@@ -1163,27 +1165,11 @@ err_register:
 err_eeprom:
if (!e1000_check_phy_reset_block(adapter-hw))
e1000_phy_hw_reset(adapter-hw);
-
-   if (adapter-hw.flash_address)
-   iounmap(adapter-hw.flash_address);
 err_flashmap:
 #ifdef CONFIG_E1000_NAPI
for (i = 0; i  adapter-num_rx_queues; i++)
dev_put(adapter-polling_netdev[i]);
 #endif
-
-   kfree(adapter-tx_ring);
-   kfree(adapter-rx_ring);
-#ifdef CONFIG_E1000_NAPI
-   kfree(adapter-polling_netdev);
-#endif
-err_sw_init:
-   iounmap(adapter-hw.hw_addr);
-err_ioremap:
-   free_netdev(netdev);
-err_alloc_etherdev:
-   pci_release_regions(pdev);
-err_pci_reg:
 err_dma:
pci_disable_device(pdev);
return err;
@@ -1224,21 +1210,6 @@ e1000_remove(struct pci_dev *pdev)
 
if (!e1000_check_phy_reset_block(adapter-hw))
e1000_phy_hw_reset(adapter-hw);
-
-   kfree(adapter-tx_ring);
-   kfree(adapter-rx_ring);
-#ifdef CONFIG_E1000_NAPI
-   kfree(adapter-polling_netdev);
-#endif
-
-   iounmap(adapter-hw.hw_addr);
-   if (adapter-hw.flash_address)
-   iounmap(adapter-hw.flash_address);
-   pci_release_regions(pdev);
-
-   free_netdev(netdev);
-
-   pci_disable_device(pdev);
 }
 
 /**
@@ -1350,27 +1321,27 @@ e1000_sw_init(struct e1000_adapter *adap
 static int __devinit
 e1000_alloc_queues(struct e1000_adapter *adapter)
 {
-   adapter-tx_ring = kcalloc(adapter-num_tx_queues,
-  sizeof(struct e1000_tx_ring), GFP_KERNEL);
+   adapter-tx_ring = devm_kcalloc(adapter-pdev-dev,
+   adapter-num_tx_queues,
+   sizeof(struct e1000_tx_ring),
+   GFP_KERNEL);
if (!adapter-tx_ring)
return -ENOMEM;
 
-   adapter-rx_ring = kcalloc(adapter-num_rx_queues,
-  sizeof(struct e1000_rx_ring), GFP_KERNEL);
-   if (!adapter-rx_ring) {
-   kfree(adapter-tx_ring);
+   adapter-rx_ring = devm_kcalloc(adapter-pdev-dev,
+   adapter-num_rx_queues,
+

[patch 2/5][RFC] Update net core to use devres.

2007-08-02 Thread Brandon Philips

* netdev_pci_remove_one() can replace simple pci device remove
  functions

* devm_alloc_netdev() is like alloc_netdev but allocates memory using devres.

Signed-off-by: Brandon Philips [EMAIL PROTECTED]

---
 include/linux/etherdevice.h |5 ++
 include/linux/netdevice.h   |7 ++
 net/core/dev.c  |  109 +++-
 net/ethernet/eth.c  |8 +++
 4 files changed, 119 insertions(+), 10 deletions(-)

Index: linux-2.6/include/linux/netdevice.h
===
--- linux-2.6.orig/include/linux/netdevice.h
+++ linux-2.6/include/linux/netdevice.h
@@ -656,6 +656,7 @@ extern int  dev_queue_xmit(struct sk_buf
 extern int register_netdevice(struct net_device *dev);
 extern voidunregister_netdevice(struct net_device *dev);
 extern voidfree_netdev(struct net_device *dev);
+extern voidnetdev_pci_remove_one(struct pci_dev *pdev);
 extern voidsynchronize_net(void);
 extern int register_netdevice_notifier(struct notifier_block *nb);
 extern int unregister_netdevice_notifier(struct notifier_block 
*nb);
@@ -1085,8 +1086,14 @@ extern void  ether_setup(struct net_devi
 extern struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
   void (*setup)(struct net_device *),
   unsigned int queue_count);
+extern struct net_device *devm_alloc_netdev_mq(struct device *dev,
+  int sizeof_priv, const char *name,
+  void (*setup)(struct net_device *),
+  unsigned int queue_count);
 #define alloc_netdev(sizeof_priv, name, setup) \
alloc_netdev_mq(sizeof_priv, name, setup, 1)
+#define devm_alloc_netdev(dev, sizeof_priv, name, setup) \
+   devm_alloc_netdev_mq(dev, sizeof_priv, name, setup, 1)
 extern int register_netdev(struct net_device *dev);
 extern voidunregister_netdev(struct net_device *dev);
 /* Functions used for secondary unicast and multicast support */
Index: linux-2.6/net/core/dev.c
===
--- linux-2.6.orig/net/core/dev.c
+++ linux-2.6/net/core/dev.c
@@ -89,6 +89,7 @@
 #include linux/interrupt.h
 #include linux/if_ether.h
 #include linux/netdevice.h
+#include linux/pci.h
 #include linux/etherdevice.h
 #include linux/notifier.h
 #include linux/skbuff.h
@@ -3658,18 +3659,51 @@ static struct net_device_stats *internal
 }
 
 /**
- * alloc_netdev_mq - allocate network device
- * @sizeof_priv:   size of private data to allocate space for
- * @name:  device name format string
- * @setup: callback to initialize device
- * @queue_count:   the number of subqueues to allocate
+ * devm_free_netdev - wrapper around free_netdev for devres
+ */
+static void devm_free_netdev(struct device *gendev, void *res)
+{
+   struct net_device *dev = dev_get_drvdata(gendev);
+   free_netdev(dev);
+}
+
+/**
+ * register_netdev_devres - register netdev with a managed device
+ * @dev:   devres managed device responsible for the memory
+ * @netdev:pointer to netdev to be managed
  *
- * Allocates a struct net_device with private data area for driver use
- * and performs basic initialization.  Also allocates subquue structs
- * for each queue on the device at the end of the netdevice.
+ * Registers @netdev to the device @dev and calls free_netdev automatically 
when the
+ * device disappears
  */
-struct net_device *alloc_netdev_mq(int sizeof_priv, const char *name,
-   void (*setup)(struct net_device *), unsigned int queue_count)
+static inline void * register_netdev_devres(struct device *gendev,
+   struct net_device *dev)
+{
+   struct net_device **p;
+
+   /* 0 size because we don't need it. The net_device is already alloc'd
+* in alloc_netdev_mq.  We can't use devm_kzalloc in alloc_netdeev_mq
+* because a net_device cannot be free'd directly as it can be a
+* kobject.  See free_netdev.
+*/
+   p = devres_alloc(devm_free_netdev, 0, GFP_KERNEL);
+
+   if (unlikely(!p))
+   return NULL;
+
+   *p = dev;
+   devres_add(gendev, p);
+
+   return dev;
+}
+
+/**
+ * __alloc_netdev_mq - does the work to allocate a network device
+ * @dev:   devres managed device responsible for mem.
+ * NULL if unmanaged
+ */
+struct net_device *__alloc_netdev_mq(struct device *gendev, int sizeof_priv,
+   const char *name, void (*setup)(struct net_device *),
+   unsigned int queue_count)
 {
void *p;
struct net_device *dev;
@@ -3706,8 +3740,43 @@ struct net_device *alloc_netdev_mq(int s
dev-get_stats =

Re: [REGRESSION] tg3 dead after s2ram

2007-08-02 Thread Michael Chan

On Thu, 2007-08-02 at 15:06 -0700, David Miller wrote:
 From: Michael Chan [EMAIL PROTECTED]
 Date: Thu, 02 Aug 2007 12:10:29 -0700

  Alternatively, we can also fix it by calling pci_enable_device() again
  in tg3_open().  But I think it is better to just always save and restore
  in suspend/resume.  bnx2.c will also require the same fix.

 We could do it that way.  But don't you think it's more reliable to
 save and restore around the event we know will be what clobbers the
 PCI config space on us? :-)

Yes for sure when netif state is running and we were already doing that.

 Other things might happen between -resume() and -open() that could
 modify PCI config space, and we could overwrite such changes if we do
 the PCI restore in -open().

I suggested calling pci_enable_device() in -open(), not calling
pci_restore_state() in -open().  I ultimately decided against it
because some devices do not enable memory as a workaround and it would
be messy to deal with it again in tg3_open().

I definitely agree that calling PCI restore in -open() is a bad idea.
We used to save PCI state in -probe() once and restore PCI state after
every chip reset.  This sequence caused many subtle problems.

-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 0/5][RFC] Update network drivers to use devres

2007-08-02 Thread Brandon Philips

This patch set adds support for devres in the net core and converts the
e100 and e1000 drivers to devres.  Devres is a simple resource manager
for device drivers, see Documentation/driver-model/devres.txt for more
information.

The use of devres will remain optional for drivers with this patch set.
Drivers can be converted when it makes sense.

Builds on top of f0a664bbd1839fbe9f57564983f39bfc6c6f931d in Linus' tree
which renames __pci_reenable_device() to pci_reenable_device()

-- 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 1/5][RFC] NET: Change pci_enable_device to pci_reenable_device to keep device enable balance

2007-08-02 Thread Brandon Philips

On a slot_reset event pci_disable_device() is never called so calling
pci_enable_device() will unbalance the enable count.

Signed-off-by: Brandon Philips [EMAIL PROTECTED]

---
 drivers/net/e100.c |2 +-
 drivers/net/e1000/e1000_main.c |2 +-
 drivers/net/ixgb/ixgb_main.c   |2 +-
 drivers/net/s2io.c |2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

Index: linux-2.6/drivers/net/e100.c
===
--- linux-2.6.orig/drivers/net/e100.c
+++ linux-2.6/drivers/net/e100.c
@@ -2828,7 +2828,7 @@ static pci_ers_result_t e100_io_slot_res
struct net_device *netdev = pci_get_drvdata(pdev);
struct nic *nic = netdev_priv(netdev);
 
-   if (pci_enable_device(pdev)) {
+   if (pci_reenable_device(pdev)) {
printk(KERN_ERR e100: Cannot re-enable PCI device after 
reset.\n);
return PCI_ERS_RESULT_DISCONNECT;
}
Index: linux-2.6/drivers/net/e1000/e1000_main.c
===
--- linux-2.6.orig/drivers/net/e1000/e1000_main.c
+++ linux-2.6/drivers/net/e1000/e1000_main.c
@@ -5270,7 +5270,7 @@ static pci_ers_result_t e1000_io_slot_re
struct net_device *netdev = pci_get_drvdata(pdev);
struct e1000_adapter *adapter = netdev-priv;
 
-   if (pci_enable_device(pdev)) {
+   if (pci_reenable_device(pdev)) {
printk(KERN_ERR e1000: Cannot re-enable PCI device after 
reset.\n);
return PCI_ERS_RESULT_DISCONNECT;
}
Index: linux-2.6/drivers/net/ixgb/ixgb_main.c
===
--- linux-2.6.orig/drivers/net/ixgb/ixgb_main.c
+++ linux-2.6/drivers/net/ixgb/ixgb_main.c
@@ -2294,7 +2294,7 @@ static pci_ers_result_t ixgb_io_slot_res
struct net_device *netdev = pci_get_drvdata(pdev);
struct ixgb_adapter *adapter = netdev_priv(netdev);
 
-   if(pci_enable_device(pdev)) {
+   if(pci_reenable_device(pdev)) {
DPRINTK(PROBE, ERR, Cannot re-enable PCI device after 
reset.\n);
return PCI_ERS_RESULT_DISCONNECT;
}
Index: linux-2.6/drivers/net/s2io.c
===
--- linux-2.6.orig/drivers/net/s2io.c
+++ linux-2.6/drivers/net/s2io.c
@@ -7833,7 +7833,7 @@ static pci_ers_result_t s2io_io_slot_res
struct net_device *netdev = pci_get_drvdata(pdev);
struct s2io_nic *sp = netdev-priv;
 
-   if (pci_enable_device(pdev)) {
+   if (pci_reenable_device(pdev)) {
printk(KERN_ERR s2io: 
   Cannot re-enable PCI device after reset.\n);
return PCI_ERS_RESULT_DISCONNECT;

-- 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch 3/5][RFC] Update e100 driver to use devres.

2007-08-02 Thread Brandon Philips

devres manages device resources and is currently used by all libata low level
drivers.   It can greatly reduce the complexity of the error handling on probe
and the device removal functions.

For example the e100_free() function and all of the gotos in e100_probe have
been removed.  Also, e100_remove() has been deleted and replaced with a much
simpler netdev_pci_remove_one() function that can handle PCI net devices that
don't require teardown besides resource deallocation.

Signed-off-by: Brandon Philips [EMAIL PROTECTED]

---
 drivers/net/e100.c |   70 -
 1 file changed, 17 insertions(+), 53 deletions(-)

Index: linux-2.6/drivers/net/e100.c
===
--- linux-2.6.orig/drivers/net/e100.c
+++ linux-2.6/drivers/net/e100.c
@@ -2517,18 +2517,11 @@ static int e100_do_ioctl(struct net_devi
 
 static int e100_alloc(struct nic *nic)
 {
-   nic-mem = pci_alloc_consistent(nic-pdev, sizeof(struct mem),
-   nic-dma_addr);
-   return nic-mem ? 0 : -ENOMEM;
-}
+   struct device *dev = nic-pdev-dev;
 
-static void e100_free(struct nic *nic)
-{
-   if(nic-mem) {
-   pci_free_consistent(nic-pdev, sizeof(struct mem),
-   nic-mem, nic-dma_addr);
-   nic-mem = NULL;
-   }
+   nic-mem = dmam_alloc_coherent(dev, sizeof(struct mem),
+   nic-dma_addr, GFP_ATOMIC);
+   return nic-mem ? 0 : -ENOMEM;
 }
 
 static int e100_open(struct net_device *netdev)
@@ -2555,7 +2548,7 @@ static int __devinit e100_probe(struct p
struct nic *nic;
int err;
 
-   if(!(netdev = alloc_etherdev(sizeof(struct nic {
+   if (!(netdev = devm_alloc_etherdev(pdev-dev, sizeof(struct nic {
if(((1  debug) - 1)  NETIF_MSG_PROBE)
printk(KERN_ERR PFX Etherdev alloc failed, abort.\n);
return -ENOMEM;
@@ -2585,26 +2578,26 @@ static int __devinit e100_probe(struct p
nic-msg_enable = (1  debug) - 1;
pci_set_drvdata(pdev, netdev);
 
-   if((err = pci_enable_device(pdev))) {
+   if ((err = pcim_enable_device(pdev))) {
DPRINTK(PROBE, ERR, Cannot enable PCI device, aborting.\n);
-   goto err_out_free_dev;
+   return err;
}
 
if(!(pci_resource_flags(pdev, 0)  IORESOURCE_MEM)) {
DPRINTK(PROBE, ERR, Cannot find proper PCI device 
base address, aborting.\n);
err = -ENODEV;
-   goto err_out_disable_pdev;
+   return err;
}
 
if((err = pci_request_regions(pdev, DRV_NAME))) {
DPRINTK(PROBE, ERR, Cannot obtain PCI resources, aborting.\n);
-   goto err_out_disable_pdev;
+   return err;
}
 
if((err = pci_set_dma_mask(pdev, DMA_32BIT_MASK))) {
DPRINTK(PROBE, ERR, No usable DMA configuration, aborting.\n);
-   goto err_out_free_res;
+   return err;
}
 
SET_MODULE_OWNER(netdev);
@@ -2613,11 +2606,11 @@ static int __devinit e100_probe(struct p
if (use_io)
DPRINTK(PROBE, INFO, using i/o access mode\n);
 
-   nic-csr = pci_iomap(pdev, (use_io ? 1 : 0), sizeof(struct csr));
+   nic-csr = pcim_iomap(pdev, (use_io ? 1 : 0), sizeof(struct csr));
if(!nic-csr) {
DPRINTK(PROBE, ERR, Cannot map device registers, aborting.\n);
err = -ENOMEM;
-   goto err_out_free_res;
+   return err;
}
 
if(ent-driver_data)
@@ -2650,11 +2643,11 @@ static int __devinit e100_probe(struct p
 
if((err = e100_alloc(nic))) {
DPRINTK(PROBE, ERR, Cannot alloc driver memory, aborting.\n);
-   goto err_out_iounmap;
+   return err;
}
 
if((err = e100_eeprom_load(nic)))
-   goto err_out_free;
+   return err;
 
e100_phy_init(nic);
 
@@ -2664,8 +2657,7 @@ static int __devinit e100_probe(struct p
if (!eeprom_bad_csum_allow) {
DPRINTK(PROBE, ERR, Invalid MAC address from 
EEPROM, aborting.\n);
-   err = -EAGAIN;
-   goto err_out_free;
+   return -EAGAIN;
} else {
DPRINTK(PROBE, ERR, Invalid MAC address from EEPROM, 
you MUST configure one.\n);
@@ -2685,7 +2677,7 @@ static int __devinit e100_probe(struct p
strcpy(netdev-name, eth%d);
if((err = register_netdev(netdev))) {
DPRINTK(PROBE, ERR, Cannot register net device, aborting.\n);
-   goto err_out_free;
+   return err;
}
 
DPRINTK(PROBE, INFO, addr 0x%llx, irq %d, 
@@ -2695,36 +2687,8 @@ static int __devinit e100_probe(struct p

[patch 4/5][RFC] Implement devm_kcalloc

2007-08-02 Thread Brandon Philips

devm_kcalloc is a simple wrapper around devm_kzalloc for arrays.  This is
needed because kcalloc is often used in network devices. 

Signed-off-by: Brandon Philips [EMAIL PROTECTED]

---
 drivers/base/devres.c  |   16 
 include/linux/device.h |1 +
 2 files changed, 17 insertions(+)

Index: linux-2.6/drivers/base/devres.c
===
--- linux-2.6.orig/drivers/base/devres.c
+++ linux-2.6/drivers/base/devres.c
@@ -630,6 +630,22 @@ void * devm_kzalloc(struct device *dev, 
 EXPORT_SYMBOL_GPL(devm_kzalloc);
 
 /**
+ * devm_kcalloc - resource-managed kcalloc
+ * @dev: Device to allocate memory for
+ * @n: number of elements.
+ * @size: element size.
+ * @flags: the type of memory to allocate.
+ */
+inline void * devm_kcalloc(struct device * dev, size_t n, size_t size,
+  gfp_t flags)
+{
+if (n != 0  size  ULONG_MAX / n)
+return NULL;
+return devm_kzalloc(dev, n * size, flags);
+}
+EXPORT_SYMBOL_GPL(devm_kcalloc);
+
+/**
  * devm_kfree - Resource-managed kfree
  * @dev: Device this memory belongs to
  * @p: Memory to free
Index: linux-2.6/include/linux/device.h
===
--- linux-2.6.orig/include/linux/device.h
+++ linux-2.6/include/linux/device.h
@@ -402,6 +402,7 @@ extern int devres_release_group(struct d
 
 /* managed kzalloc/kfree for device drivers, no kmalloc, always use kzalloc */
 extern void *devm_kzalloc(struct device *dev, size_t size, gfp_t gfp);
+extern void *devm_kcalloc(struct device *dev, size_t n, size_t size, gfp_t 
flags);
 extern void devm_kfree(struct device *dev, void *p);
 
 struct device {

-- 
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] TIPC: fix two minor sparse warnings

2007-08-02 Thread Florian Westphal

fix two warnings generated by sparse:

link.c:2386 symbol 'msgcount' shadows an earlier one
node.c:244 symbol 'addr_string' shadows an earlier one

Signed-off-by: Florian Westphal [EMAIL PROTECTED]
---
 net/tipc/link.c |2 +-
 net/tipc/node.c |2 --
 2 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index 1d674e0..1b17fec 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -2383,10 +2383,10 @@ void tipc_link_changeover(struct link *l_ptr)
struct tipc_msg *msg = buf_msg(crs);
 
if ((msg_user(msg) == MSG_BUNDLER)  split_bundles) {
-   u32 msgcount = msg_msgcnt(msg);
struct tipc_msg *m = msg_get_wrapped(msg);
unchar* pos = (unchar*)m;
 
+   msgcount = msg_msgcnt(msg);
while (msgcount--) {
msg_set_seqno(m,msg_seqno(msg));
tipc_link_tunnel(l_ptr, tunnel_hdr, m,
diff --git a/net/tipc/node.c b/net/tipc/node.c
index e2e452a..598f4d3 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -241,8 +241,6 @@ struct node *tipc_node_attach_link(struct link *l_ptr)
char addr_string[16];
 
if (n_ptr-link_cnt = 2) {
-   char addr_string[16];
-
err(Attempt to create third link to %s\n,
addr_string_fill(addr_string, n_ptr-addr));
return NULL;
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: TCP SACK issue, hung connection, tcpdump included

2007-08-02 Thread Ilpo Järvinen

...I dropped lkml, it's useless to bother them with this network related 
stuff...

On Thu, 2 Aug 2007, Darryl Miles wrote:
 Ilpo Järvinen wrote:
  On Tue, 31 Jul 2007, Darryl L. Miles wrote:

[...RFC bashing, snip...] 

  * The older linux kernel for not being 100% SACK RFC compliant in its
 implementation ?  Not a lot we can do about this now, but if we're able
 to identify there maybe backward compatibility issues with the same
 implementation thats a useful point to take forward.

  * The newer linux kernel for enabling D-SACK by default when RFC2883
 doesn't even claim a cast iron case for D-SACK to be compatible with any
 100% RFC compliant SACK implementation.

Are you aware that D-SACK processing and generation has been part of the 
linux kernel TCP far before 2.6 series even begun... ...and it goes far 
beoynd that, 2.4.0 had it too (2.2.0 didn't seem to have it, never before 
have I read that one IIRC :-) ).

 Does Ilpo have a particular vested interest in D-SACK that should be 
 disclosed?

Sure :-). ...my interest was to show you that it's not a bug :-).

 So it is necessary to turn off a TCP option (that is enabled by default)
 to be sure to have reliable TCP connections (that don't lock up) in the
 bugfree Linux networking stack ?  This is absurd.

...You'll have to turn a lot off to be compatible with everything around 
Internet, and still you would probably fail. Some people have to, e.g., to 
turn of window scaling to work-around buggy intermediate nodes (nat boxes 
or some firewalls), there's even a sysctl to workaround signed 16-bit 
window arithmetic bugs that's mostly legacy but I bet you can find host 
broken in that area too. Etc. Yet we don't off those by default.

 If such an option causes such a problem; then that option should not be
 enabled by default. 

...Linux TCP has enabled by default option which are _known_ (at least 
nowadays) to cause bad problems and many of them are _still_ enabled... 
Browse archives if you don't believe me... And I'm relatively sure it will 
do so also in future though I'm not the maintainer nor anybody to make 
such decisions...

 rather than wallpaper over the cracks with the voodoo of turning things 
 that are enabled by default off.

...I said that because it felt like you kept repeating that the generated 
DSACK block is a bug even though, like you now know, it's a feature, not a 
bug. :-)

  2) The ACK got discarded by the SERVER
 
 I'd thought about that one, its a possibility.  The server in question
 does have period of high UDP load on a fair number of UDP sockets at
 once (a few 1000).  Both systems have 2Gb of RAM.  The server has maybe
 just 250Mb of RSS of all apps combined.

...There are three independent signs in the log to indicate discard out
of these 3 reasons. Whereas your theory _fails_ to explain some behavior 
in the log you presented, e.g., not updated timestamp which happen even 
_before_ the DSACK stuff?!?... I'll formulate this question: why didn't 
snd_una advance nor timestamp update though a cumulative ACK arrived?
You can check for yourself (in server log):

03:58:56.384503
03:58:56.462583
03:58:56.465707
03:58:56.678546

...I'm hoping SNMPs provide explanation to it.

 The client sent a SACK.  But from understanding more about D-SACK, this
 is a valid D-SACK response, but it appears to confuse the older Linux
 kernel at the server end.

...Are you saying that it's confused by _DSACK_ just because it's only 
strange thing you seem to find from the log? I see other things in your 
log which are exceptional and point to elsewhere... Please don't neglect 
them... ...Problems occur already before that DSACK is received by the 
server end.

 Agreed on this.  However discarding data is allowed (providing it is
 intentional discarding not a bug where the TCP stack is discarding segments it
 shouldn't), TCP should recover providing sufficient packets get through.

But if one end decides to discard everything after time t, TCP _will
not_ recover because sufficient packets won't get through... And 
that's what your log is telling me. Yes discarding is allowed but that 
wasn't the point, we're more interested here on why it got discarded
rather than allowance of discarding.

 Forgive me if I am mistaken, but while the server reports a checksum
 error, the client did not.  I took this to be a misreporting by tcpdump
 at the server, probably due to the e1000 network card checksum
 offloading

...That's probably the reason, I agree, these show up. Thought that also 
myself, besides, it wouldn't cause that kind of breakage anyway.

 So the SNMP data would show up intentional discards (due to memory/resource
 issues).  So I'll get some of those too.
 
 The SNMP stats aren't so useful right now as
 the box has been rebooted since then but I shall attempt to capture
 /proc/net/* data, cause the problem, then capture /proc/net/* data again
 if those numbers can help.

Good, thanks. 

-- 
 i.

Re: [PATCH] ethtool: Add support for setting multiple rx/tx queues

2007-08-02 Thread Kok, Auke


Auke Kok wrote:

Signed-off-by: Auke Kok [EMAIL PROTECTED]
---

@@ -496,6 +516,14 @@ static void parse_cmdline(int argc, char **argp)
i = argc;
break;
}
+   if (mode == MODE_SQUEUE) {
+   parse_generic_cmdline(argc, argp, i,
+   gqueue_changed,
+   cmdline_ring,


Nick pointed out the obvious typo here... I'll wait for some (more) comments 
before reposting.


Auke
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] TIPC: fix two minor sparse warnings

2007-08-02 Thread David Miller

From: Florian Westphal [EMAIL PROTECTED]
Date: Fri, 3 Aug 2007 00:57:56 +0200

 fix two warnings generated by sparse:

 link.c:2386 symbol 'msgcount' shadows an earlier one
 node.c:244 symbol 'addr_string' shadows an earlier one

 Signed-off-by: Florian Westphal [EMAIL PROTECTED]

Looks good, applied, thanks Florian.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] TIPC: make function tipc_nameseq_subscribe static

2007-08-02 Thread David Miller

From: Florian Westphal [EMAIL PROTECTED]
Date: Fri, 3 Aug 2007 00:56:38 +0200

 make needlessly global function tipc_nameseq_subscribe static.

 Signed-off-by: Florian Westphal [EMAIL PROTECTED]

Patch applied, thanks Florian.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: strange tcp behavior

2007-08-02 Thread David Miller

From: Evgeniy Polyakov [EMAIL PROTECTED]
Date: Thu, 2 Aug 2007 22:48:42 +0400

 On Thu, Aug 02, 2007 at 10:08:42PM +0400, Evgeniy Polyakov ([EMAIL 
 PROTECTED]) wrote:
  So, following patch fixes problem for me.

 Or this one. Essentially the same though.

 Signed-off-by: Evgeniy Polyakov [EMAIL PROTECTED]

So, this bug got introduced partly in 2.3.15, which is when
we SMP threaded the networking stack.

The error check was present in inet_sendmsg() previously, it
looked like this:

int inet_sendmsg(struct socket *sock, struct msghdr *msg, int size,
 struct scm_cookie *scm)
{
struct sock *sk = sock-sk;

if (sk-shutdown  SEND_SHUTDOWN) {
if (!(msg-msg_flagsMSG_NOSIGNAL))
send_sig(SIGPIPE, current, 1);
return(-EPIPE);
}
if (sk-prot-sendmsg == NULL) 
return(-EOPNOTSUPP);
if(sk-err)
return sock_error(sk);

/* We may need to bind the socket. */
if (inet_autobind(sk) != 0)
return -EAGAIN;

return sk-prot-sendmsg(sk, msg, size);
}

I believe the idea was to move the sk-err check down into
tcp_sendmsg().

But this raises a major issue.

What in the world are we doing allowing stream sockets to autobind?
That is totally bogus.  Even if we autobind, that won't make a connect
happen.

There is logic down in TCP to handle all of these details properly
as long as we don't do this bogus autobind stuff.

do_tcp_sendpages() and tcp_sendmsg() both invoke sk_stream_wait_connect()
if TCP is in a state where data sending is not possible.  Inside of
sk_stream_wait_connect() it handles socket errors as first priority,
then if no socket errors are pending it checks if we are trying to
connect currently and if not returns -EPIPE.  It is exactly what we
want under these circumstances.

So the bug is purely that autobind is attempted for TCP sockets at
all.

TCP's sendpage handles this correctly already, it calls directly down
into tcp_sendpage(), inet_sendpage() is not used at all.

So the fix is to make tcp_sendmsg() direct as well, that bypasses all
of this autobind madness.  The error checking and state verification
in TCP's sendmsg() and sendpage() implementations will do the right
thing.

Comments?

Signed-off-by: David S. Miller [EMAIL PROTECTED]

diff --git a/include/net/tcp.h b/include/net/tcp.h
index c209361..185c7ec 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -281,7 +281,7 @@ extern int  tcp_v4_remember_stamp(struct 
sock *sk);

 extern int tcp_v4_tw_remember_stamp(struct 
inet_timewait_sock *tw);

-extern int tcp_sendmsg(struct kiocb *iocb, struct sock *sk,
+extern int tcp_sendmsg(struct kiocb *iocb, struct socket 
*sock,
struct msghdr *msg, size_t size);
 extern ssize_t tcp_sendpage(struct socket *sock, struct page 
*page, int offset, size_t size, int flags);

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 06c08e5..e681034 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -831,7 +831,7 @@ const struct proto_ops inet_stream_ops = {
.shutdown  = inet_shutdown,
.setsockopt= sock_common_setsockopt,
.getsockopt= sock_common_getsockopt,
-   .sendmsg   = inet_sendmsg,
+   .sendmsg   = tcp_sendmsg,
.recvmsg   = sock_common_recvmsg,
.mmap  = sock_no_mmap,
.sendpage  = tcp_sendpage,
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index da4c0b6..7e74011 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -658,9 +658,10 @@ static inline int select_size(struct sock *sk)
return tmp;
 }

-int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg,
+int tcp_sendmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg,
size_t size)
 {
+   struct sock *sk = sock-sk;
struct iovec *iov;
struct tcp_sock *tp = tcp_sk(sk);
struct sk_buff *skb;
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 3f5f742..9c94627 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2425,7 +2425,6 @@ struct proto tcp_prot = {
.shutdown   = tcp_shutdown,
.setsockopt = tcp_setsockopt,
.getsockopt = tcp_getsockopt,
-   .sendmsg= tcp_sendmsg,
.recvmsg= tcp_recvmsg,
.backlog_rcv= tcp_v4_do_rcv,
.hash   = tcp_v4_hash,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index eed0937..b5f9637 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -484,7 +484,7 @@ const struct proto_ops inet6_stream_ops = {
.shutdown  = inet_shutdown, /* ok   */
.setsockopt= sock_common_setsockopt,/* ok

Re: [GIT PULL] sctp updates

2007-08-02 Thread David Miller

From: Vlad Yasevich [EMAIL PROTECTED]
Date: Thu, 02 Aug 2007 13:55:33 -0400

 Hi David

 Please pull the following changes since commit 
 fc34f6c617bf2a845d793af12b96bcc0afd472c4:
  Andrew Morton (1):
Fix up remove the arm26 port

 which are found in branch 'master' of the git repository at:

  master.kernel.org:/pub/scm/linux/kernel/git/vxy/lksctp-dev.git

Pulled, thanks a lot Vlad.
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] [TCP]: Also handle snd_una changes in tcp_cwnd_down

2007-08-02 Thread David Miller

From: Ilpo_Järvinen [EMAIL PROTECTED]
Date: Thu, 2 Aug 2007 14:18:59 +0300 (EEST)

 Dave, please put these two patches to net-2.6 to complete bidir fix 
 series. ...And please push to stable as well, take just the minimized 
 fix portion of this [TCP]: Also handle snd_una changes in 
 tcp_cwnd_down patch as I described above. Other cleanups in it can be
 put just to net-2.6.

Ok, I will, thanks Ilpo!
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Distributed storage.

2007-08-02 Thread Mike Snitzer

On 7/31/07, Evgeniy Polyakov [EMAIL PROTECTED] wrote:
 Hi.

 I'm pleased to announce first release of the distributed storage
 subsystem, which allows to form a storage on top of remote and local
 nodes, which in turn can be exported to another storage as a node to
 form tree-like storages.

Very interesting work, I read through your blog for the project and it
is amazing how quickly you developed/tested this code.  Thanks for
capturing the evolution of DST like you have.

 Compared to other similar approaches namely iSCSI and NBD,
 there are following advantages:
 * non-blocking processing without busy loops (compared to both above)
 * small, plugable architecture
 * failover recovery (reconnect to remote target)
 * autoconfiguration (full absence in NBD and/or device mapper on top of 
 it)
 * no additional allocatins (not including network part) - at least two in
 device mapper for fast path
 * very simple - try to compare with iSCSI
 * works with different network protocols
 * storage can be formed on top of remote nodes and be exported
 simultaneously (iSCSI is peer-to-peer only, NBD requires device
 mapper and is synchronous)

Having the in-kernel export is a great improvement over NBD's
userspace nbd-server (extra copy, etc).

But NBD's synchronous nature is actually an asset when coupled with MD
raid1 as it provides guarantees that the data has _really_ been
mirrored remotely.

 TODO list currently includes following main items:
 * redundancy algorithm (drop me a request of your own, but it is highly
 unlikley that Reed-Solomon based will ever be used - it is too slow
 for distributed RAID, I consider WEAVER codes)

I'd like to better understand where you see DST heading in the area of
redundancy.Based on your blog entries:
http://tservice.net.ru/~s0mbre/blog/devel/dst/2007_07_24_1.html
http://tservice.net.ru/~s0mbre/blog/devel/dst/2007_07_31_2.html
(and your todo above) implementing a mirroring algorithm appears to be
a near-term goal for you.  Can you comment on how your intended
implementation would compare, in terms of correctness and efficiency,
to say MD (raid1) + NBD?  MD raid1 has a write intent bitmap that is
useful to speed resyncs; what if any mechanisms do you see DST
embracing to provide similar and/or better reconstruction
infrastructure?  Do you intend to embrace any exisiting MD or DM
infrastructure?

BTW, you have definitely published some very compelling work and its
sad that you're predisposed to think DST won't be recieved well if you
pushed for inclusion (for others, as much was said in the 7.31.2007
blog post I referenced above).  Clearly others need to embrace DST to
help inclusion become a reality.  To that end, its great to see that
Daniel Phillips and the other zumastor folks will be putting DST
through its paces.

regards,
Mike
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Distributed storage.

2007-08-02 Thread Manu Abraham

On 7/31/07, Evgeniy Polyakov [EMAIL PROTECTED] wrote:

 TODO list currently includes following main items:
 * redundancy algorithm (drop me a request of your own, but it is highly
 unlikley that Reed-Solomon based will ever be used - it is too slow
 for distributed RAID, I consider WEAVER codes)


LDPC codes[1][2] have been replacing Turbo code[3] with regards to
communication links and we have been seeing that transition. (maybe
helpful, came to mind seeing the mention of Turbo code) Don't know how
weaver compares to LDPC, though found some comparisons [4][5] But
looking at fault tolerance figures, i guess Weaver is much better.

[1] http://www.ldpc-codes.com/
[2] http://portal.acm.org/citation.cfm?id=1240497
[3] http://en.wikipedia.org/wiki/Turbo_code
[4] 
http://domino.research.ibm.com/library/cyberdig.nsf/papers/BD559022A190D41C85257212006CEC11/$File/rj10391.pdf
[5] http://hplabs.hp.com/personal/Jay_Wylie/publications/wylie_dsn2007.pdf
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[patch] genirq: fix simple and fasteoi irq handlers

2007-08-02 Thread Jarek Poplawski

On Thu, Aug 02, 2007 at 10:11:26PM +0200, Ingo Molnar wrote:
 
 * Gabriel C [EMAIL PROTECTED] wrote:
 
  I get a warning on each boot now with this patch ..
  
  [   63.686613] WARNING: at kernel/irq/resend.c:70 check_irq_resend()
...
 we are still trying to figure out what happens with ne2k-pci. The 
 message will vanish soon.

Hi,

I can't guarantee this is all needed to fix this bug, but I think
this patch is necessary here.

Regards,
Jarek P.


Subject: genirq: fix simple and fasteoi irq handlers

After the genirq: do not mask interrupts by default patch interrupts
should be disabled not immediately upon request, but after they happen.
But, handle_simple_irq() and handle_fasteoi_irq() can skip this once or
more if an irq is just serviced (IRQ_INPROGRESS), possibly disrupting a
driver's work.

The main reason of problems here, pointing the broken patch and making
the first patch which can fix this was done by Marcin Slusarz.
Additional test patches of Thomas Gleixner and Ingo Molnar tested by
Marcin Slusarz helped to narrow possible reasons even more. Thanks.

PS: this patch fixes only one evident error here, but there could be
more places affected by above-mentioned change in irq handling.


Signed-off-by: Jarek Poplawski [EMAIL PROTECTED]

---

diff -Nurp 2.6.23-rc1-/kernel/irq/chip.c 2.6.23-rc1/kernel/irq/chip.c
--- 2.6.23-rc1-/kernel/irq/chip.c   2007-07-09 01:32:17.0 +0200
+++ 2.6.23-rc1/kernel/irq/chip.c2007-08-02 20:42:38.0 +0200
@@ -295,12 +295,11 @@ handle_simple_irq(unsigned int irq, stru
 
spin_lock(desc-lock);
 
-   if (unlikely(desc-status  IRQ_INPROGRESS))
-   goto out_unlock;
kstat_cpu(cpu).irqs[irq]++;
 
action = desc-action;
-   if (unlikely(!action || (desc-status  IRQ_DISABLED))) {
+   if (unlikely(!action || (desc-status  (IRQ_INPROGRESS |
+IRQ_DISABLED {
if (desc-chip-mask)
desc-chip-mask(irq);
desc-status = ~(IRQ_REPLAY | IRQ_WAITING);
@@ -392,18 +391,16 @@ handle_fasteoi_irq(unsigned int irq, str
 
spin_lock(desc-lock);
 
-   if (unlikely(desc-status  IRQ_INPROGRESS))
-   goto out;
-
desc-status = ~(IRQ_REPLAY | IRQ_WAITING);
kstat_cpu(cpu).irqs[irq]++;
 
/*
-* If its disabled or no action available
+* If it's running, disabled or no action available
 * then mask it and get out of here:
 */
action = desc-action;
-   if (unlikely(!action || (desc-status  IRQ_DISABLED))) {
+   if (unlikely(!action || (desc-status  (IRQ_INPROGRESS |
+IRQ_DISABLED {
desc-status |= IRQ_PENDING;
if (desc-chip-mask)
desc-chip-mask(irq);
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

75 matches

Mail list logo