Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-10-03 Thread Herbert Xu
On Tue, Oct 03, 2006 at 03:49:35PM +1000, Herbert Xu wrote:

 OK, I think I've got the right bug this time.

Here is the patch for the other bug that I found along the way:

[UDP6]: Fix MSG_PROBE crash

UDP tracks corking status through the pending variable.  The
IP layer also tracks it through the socket write queue.  It
is possible for the two to get out of sync when MSG_PROBE is
used.

This patch changes UDP to check the write queue to ensure
that the two stay in sync.

Signed-off-by: Herbert Xu [EMAIL PROTECTED]

Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -675,6 +675,8 @@ do_append_data:
udp_flush_pending_frames(sk);
else if (!corkreq)
err = udp_push_pending_frames(sk, up);
+   else if (unlikely(skb_queue_empty(sk-sk_write_queue)))
+   up-pending = 0;
release_sock(sk);
 
 out:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -770,6 +770,8 @@ do_append_data:
udp_v6_flush_pending_frames(sk);
else if (!corkreq)
err = udp_v6_push_pending_frames(sk, up);
+   else if (unlikely(skb_queue_empty(sk-sk_write_queue)))
+   up-pending = 0;
 
if (dst) {
if (connected) {
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-10-03 Thread James Morris
On Tue, 3 Oct 2006, Herbert Xu wrote:

 On Thu, Sep 28, 2006 at 10:40:18AM +0200, cagri coltekin wrote:
 
  No. Bug is the first after boot:
 
 OK, I think I've got the right bug this time.
 
 [UDP6]: Fix flowi clobbering
 
 The udp6_sendmsg function uses a shared buffer to store the
 flow without taking any locks.  This leads to races with SMP.
 This patch moves the flowi object onto the stack.
 
 Signed-off-by: Herbert Xu [EMAIL PROTECTED]

Nice catch.

Acked-by: James Morris [EMAIL PROTECTED]



-- 
James Morris
[EMAIL PROTECTED]
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-10-03 Thread cagri coltekin
On Tue, Oct 03, 2006 at 04:28:20PM +1000, Herbert Xu wrote:
 On Tue, Oct 03, 2006 at 03:49:35PM +1000, Herbert Xu wrote:
 
  OK, I think I've got the right bug this time.
 
 Here is the patch for the other bug that I found along the way:
 
 [UDP6]: Fix MSG_PROBE crash
 

This one fixes. Thanks!

The patch does not cleanly apply to 2.6.18, needed some manual
tweaking (the patch that applies cleanly to vanilla 2.6.18 is
below in case it has any use).

Cheers,
-- 
cagri


--- linux-2.6.18/net/ipv6/udp.c 2006-09-20 05:42:06.0 +0200
+++ linux-2.6.18-p4/net/ipv6/udp.c  2006-10-03 08:57:31.0 +0200
@@ -613,7 +613,7 @@
struct in6_addr *daddr, *final_p = NULL, final;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
-   struct flowi *fl = inet-cork.fl;
+   struct flowi fl;
struct dst_entry *dst;
int addr_len = msg-msg_namelen;
int ulen = len;
@@ -693,19 +693,19 @@
}
ulen += sizeof(struct udphdr);
 
-   memset(fl, 0, sizeof(*fl));
+   memset(fl, 0, sizeof(fl));
 
if (sin6) {
if (sin6-sin6_port == 0)
return -EINVAL;
 
-   fl-fl_ip_dport = sin6-sin6_port;
+   fl.fl_ip_dport = sin6-sin6_port;
daddr = sin6-sin6_addr;
 
if (np-sndflow) {
-   fl-fl6_flowlabel = 
sin6-sin6_flowinfoIPV6_FLOWINFO_MASK;
-   if (fl-fl6_flowlabelIPV6_FLOWLABEL_MASK) {
-   flowlabel = fl6_sock_lookup(sk, 
fl-fl6_flowlabel);
+   fl.fl6_flowlabel = 
sin6-sin6_flowinfoIPV6_FLOWINFO_MASK;
+   if (fl.fl6_flowlabelIPV6_FLOWLABEL_MASK) {
+   flowlabel = fl6_sock_lookup(sk, 
fl.fl6_flowlabel);
if (flowlabel == NULL)
return -EINVAL;
daddr = flowlabel-dst;
@@ -723,32 +723,32 @@
if (addr_len = sizeof(struct sockaddr_in6) 
sin6-sin6_scope_id 
ipv6_addr_type(daddr)IPV6_ADDR_LINKLOCAL)
-   fl-oif = sin6-sin6_scope_id;
+   fl.oif = sin6-sin6_scope_id;
} else {
if (sk-sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
 
-   fl-fl_ip_dport = inet-dport;
+   fl.fl_ip_dport = inet-dport;
daddr = np-daddr;
-   fl-fl6_flowlabel = np-flow_label;
+   fl.fl6_flowlabel = np-flow_label;
connected = 1;
}
 
-   if (!fl-oif)
-   fl-oif = sk-sk_bound_dev_if;
+   if (!fl.oif)
+   fl.oif = sk-sk_bound_dev_if;
 
if (msg-msg_controllen) {
opt = opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt-tot_len = sizeof(*opt);
 
-   err = datagram_send_ctl(msg, fl, opt, hlimit, tclass);
+   err = datagram_send_ctl(msg, fl, opt, hlimit, tclass);
if (err  0) {
fl6_sock_release(flowlabel);
return err;
}
-   if ((fl-fl6_flowlabelIPV6_FLOWLABEL_MASK)  !flowlabel) {
-   flowlabel = fl6_sock_lookup(sk, fl-fl6_flowlabel);
+   if ((fl.fl6_flowlabelIPV6_FLOWLABEL_MASK)  !flowlabel) {
+   flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
if (flowlabel == NULL)
return -EINVAL;
}
@@ -762,37 +762,37 @@
opt = fl6_merge_options(opt_space, flowlabel, opt);
opt = ipv6_fixup_options(opt_space, opt);
 
-   fl-proto = IPPROTO_UDP;
-   ipv6_addr_copy(fl-fl6_dst, daddr);
-   if (ipv6_addr_any(fl-fl6_src)  !ipv6_addr_any(np-saddr))
-   ipv6_addr_copy(fl-fl6_src, np-saddr);
-   fl-fl_ip_sport = inet-sport;
+   fl.proto = IPPROTO_UDP;
+   ipv6_addr_copy(fl.fl6_dst, daddr);
+   if (ipv6_addr_any(fl.fl6_src)  !ipv6_addr_any(np-saddr))
+   ipv6_addr_copy(fl.fl6_src, np-saddr);
+   fl.fl_ip_sport = inet-sport;

/* merge ip6_build_xmit from ip6_output */
if (opt  opt-srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) opt-srcrt;
-   ipv6_addr_copy(final, fl-fl6_dst);
-   ipv6_addr_copy(fl-fl6_dst, rt0-addr);
+   ipv6_addr_copy(final, fl.fl6_dst);
+   ipv6_addr_copy(fl.fl6_dst, rt0-addr);
final_p = final;
connected = 0;
}
 
-   if (!fl-oif  ipv6_addr_is_multicast(fl-fl6_dst)) {
-   fl-oif = np-mcast_oif;
+   if (!fl.oif  ipv6_addr_is_multicast(fl.fl6_dst)) {
+   fl.oif = np-mcast_oif;
connected = 0;
}
 
-   err = ip6_sk_dst_lookup(sk, 

Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-10-02 Thread Herbert Xu
On Thu, Sep 28, 2006 at 10:40:18AM +0200, cagri coltekin wrote:

 No. Bug is the first after boot:

OK, I think I've got the right bug this time.

[UDP6]: Fix flowi clobbering

The udp6_sendmsg function uses a shared buffer to store the
flow without taking any locks.  This leads to races with SMP.
This patch moves the flowi object onto the stack.

Signed-off-by: Herbert Xu [EMAIL PROTECTED]

This bug is pretty old so we need the fix for 2.6.18 too.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -546,7 +546,7 @@ static int udpv6_sendmsg(struct kiocb *i
struct in6_addr *daddr, *final_p = NULL, final;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
-   struct flowi *fl = inet-cork.fl;
+   struct flowi fl;
struct dst_entry *dst;
int addr_len = msg-msg_namelen;
int ulen = len;
@@ -626,19 +626,19 @@ do_udp_sendmsg:
}
ulen += sizeof(struct udphdr);
 
-   memset(fl, 0, sizeof(*fl));
+   memset(fl, 0, sizeof(fl));
 
if (sin6) {
if (sin6-sin6_port == 0)
return -EINVAL;
 
-   fl-fl_ip_dport = sin6-sin6_port;
+   fl.fl_ip_dport = sin6-sin6_port;
daddr = sin6-sin6_addr;
 
if (np-sndflow) {
-   fl-fl6_flowlabel = 
sin6-sin6_flowinfoIPV6_FLOWINFO_MASK;
-   if (fl-fl6_flowlabelIPV6_FLOWLABEL_MASK) {
-   flowlabel = fl6_sock_lookup(sk, 
fl-fl6_flowlabel);
+   fl.fl6_flowlabel = 
sin6-sin6_flowinfoIPV6_FLOWINFO_MASK;
+   if (fl.fl6_flowlabelIPV6_FLOWLABEL_MASK) {
+   flowlabel = fl6_sock_lookup(sk, 
fl.fl6_flowlabel);
if (flowlabel == NULL)
return -EINVAL;
daddr = flowlabel-dst;
@@ -656,32 +656,32 @@ do_udp_sendmsg:
if (addr_len = sizeof(struct sockaddr_in6) 
sin6-sin6_scope_id 
ipv6_addr_type(daddr)IPV6_ADDR_LINKLOCAL)
-   fl-oif = sin6-sin6_scope_id;
+   fl.oif = sin6-sin6_scope_id;
} else {
if (sk-sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
 
-   fl-fl_ip_dport = inet-dport;
+   fl.fl_ip_dport = inet-dport;
daddr = np-daddr;
-   fl-fl6_flowlabel = np-flow_label;
+   fl.fl6_flowlabel = np-flow_label;
connected = 1;
}
 
-   if (!fl-oif)
-   fl-oif = sk-sk_bound_dev_if;
+   if (!fl.oif)
+   fl.oif = sk-sk_bound_dev_if;
 
if (msg-msg_controllen) {
opt = opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt-tot_len = sizeof(*opt);
 
-   err = datagram_send_ctl(msg, fl, opt, hlimit, tclass);
+   err = datagram_send_ctl(msg, fl, opt, hlimit, tclass);
if (err  0) {
fl6_sock_release(flowlabel);
return err;
}
-   if ((fl-fl6_flowlabelIPV6_FLOWLABEL_MASK)  !flowlabel) {
-   flowlabel = fl6_sock_lookup(sk, fl-fl6_flowlabel);
+   if ((fl.fl6_flowlabelIPV6_FLOWLABEL_MASK)  !flowlabel) {
+   flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
if (flowlabel == NULL)
return -EINVAL;
}
@@ -695,39 +695,39 @@ do_udp_sendmsg:
opt = fl6_merge_options(opt_space, flowlabel, opt);
opt = ipv6_fixup_options(opt_space, opt);
 
-   fl-proto = IPPROTO_UDP;
-   ipv6_addr_copy(fl-fl6_dst, daddr);
-   if (ipv6_addr_any(fl-fl6_src)  !ipv6_addr_any(np-saddr))
-   ipv6_addr_copy(fl-fl6_src, np-saddr);
-   fl-fl_ip_sport = inet-sport;
+   fl.proto = IPPROTO_UDP;
+   ipv6_addr_copy(fl.fl6_dst, daddr);
+   if (ipv6_addr_any(fl.fl6_src)  !ipv6_addr_any(np-saddr))
+   ipv6_addr_copy(fl.fl6_src, np-saddr);
+   fl.fl_ip_sport = inet-sport;

/* merge ip6_build_xmit from ip6_output */
if (opt  opt-srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) opt-srcrt;
-   ipv6_addr_copy(final, fl-fl6_dst);
-   ipv6_addr_copy(fl-fl6_dst, rt0-addr);
+   ipv6_addr_copy(final, fl.fl6_dst);
+   ipv6_addr_copy(fl.fl6_dst, rt0-addr);
final_p = final;
connected = 0;
}
 
-   if (!fl-oif  ipv6_addr_is_multicast(fl-fl6_dst)) 

Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-09-28 Thread cagri coltekin
On Thu, Sep 28, 2006 at 10:38:29AM +1000, Herbert Xu wrote:
 On Tue, Sep 26, 2006 at 01:21:22PM +0200, cagri coltekin wrote:
 
  --
  [ 1395.890897] [ cut here ]
  [ 1395.946093] kernel BUG at net/ipv6/ip6_output.c:940!
 
 Could you go further back in the logs to see if there was a
 warning message? Either that or turn the WARN_ON into a BUG.

No. Bug is the first after boot:

[   34.042841] ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
[   44.110469] eth0: no IPv6 routers present
[   80.968012] process `syslogd' is using obsolete setsockopt SO_BSDCOMPAT
[   81.452248] process `named' is using obsolete setsockopt SO_BSDCOMPAT
[  110.559560] process `lwresd' is using obsolete setsockopt SO_BSDCOMPAT
[  140.568831] process `named' is using obsolete setsockopt SO_BSDCOMPAT
[ 1395.890897] [ cut here ]
[ 1395.946093] kernel BUG at net/ipv6/ip6_output.c:940!
[ 1396.005441] invalid opcode:  [#1]

Cheers,
-- 
cagri
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-09-27 Thread Herbert Xu
On Tue, Sep 26, 2006 at 01:21:22PM +0200, cagri coltekin wrote:

 --
 [ 1395.890897] [ cut here ]
 [ 1395.946093] kernel BUG at net/ipv6/ip6_output.c:940!

Could you go further back in the logs to see if there was a
warning message? Either that or turn the WARN_ON into a BUG.

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-09-26 Thread cagri coltekin
Hi,

On Mon, Sep 25, 2006 at 10:15:30PM +1000, Herbert Xu wrote:
 On Fri, Sep 01, 2006 at 06:22:48PM +0200, cagri coltekin wrote:
 
  The second causes the system to give the bug a couple of seconds
  after bind starts, and loads the zones, without any traffic going
  on. BTW, patch applied with some offset difference (3 for the
  first -48 for the other two changes), on a pristine 2.6.17.11
  source tree.
 
 Well the good news is that I found a bug with MSG_PROBE that can
 cause exactly what you're seeing.  The bad news is that bind doesn't
 use MSG_PROBE :)
 
 So please try this patch to narrow the problem down further.

This time I applied patch to 2.6.18. The patch applied with some
offset difference. I can stick to a version you suggest if 2.6.18
is not a good. Here is the new bug message:

--
[ 1395.890897] [ cut here ]
[ 1395.946093] kernel BUG at net/ipv6/ip6_output.c:940!
[ 1396.005441] invalid opcode:  [#1]
[ 1396.049225] SMP 
[ 1396.071419] Modules linked in: ipmi_si ipmi_msghandler ide_cd cdrom
[ 1396.146853] CPU:2
[ 1396.146854] EIP:0060:[c02c6148]Not tainted VLI
[ 1396.146855] EFLAGS: 00010246   (2.6.18-ns-pri-debug-p3 #2) 
[ 1396.304174] EIP is at ip6_append_data+0xaf8/0xbd6
[ 1396.360405] eax: f7534d00   ebx:    ecx: f7534e9c   edx: f68f4480
[ 1396.441552] esi: f7534ee4   edi: f7534ee4   ebp: f7534ef0   esp: f742bc20
[ 1396.522691] ds: 007b   es: 007b   ss: 0068
[ 1396.571655] Process named (pid: 1897, ti=f742a000 task=c2b2c030 task.ti=f742)
[ 1396.659026] Stack: f68f4480 c03c3cb4 f742bf00 c02ef7e2 c02ce658 c02ce658 c03 
[ 1396.759947]0002 c02ef7e2 f7534eb4 f7534d70   f74 
[ 1396.860803]f742bce4 c02c55c5 f7534d00 f7534e9c f7534d00 0286 f74 
[ 1396.961659] Call Trace:
[ 1396.993128]  [c02ef7e2] _read_unlock_bh+0x12/0x16
[ 1397.051544]  [c02ce658] ip6_route_output+0xeb/0x1e9
[ 1397.112038]  [c02ce658] ip6_route_output+0xeb/0x1e9
[ 1397.172535]  [c02ef7e2] _read_unlock_bh+0x12/0x16
[ 1397.230952]  [c02c55c5] ip6_dst_lookup_tail+0xc6/0xd0
[ 1397.293524]  [c02d7e29] udpv6_sendmsg+0x3d4/0x9ac
[ 1397.351936]  [c028b4a2] ip_generic_getfrag+0x0/0xaf
[ 1397.412431]  [c02d6e22] udpv6_recvmsg+0x20c/0x303
[ 1397.470846]  [c02ae7b3] inet_sendmsg+0x4a/0x56
[ 1397.526148]  [c02682f4] sock_sendmsg+0xe8/0x101
[ 1397.582494]  [c01306ca] autoremove_wake_function+0x0/0x57
[ 1397.649214]  [c01cadc4] copy_from_user+0x46/0x7e
[ 1397.706594]  [c0269e4b] sys_sendmsg+0x191/0x1f8
[ 1397.762941]  [c014fe63] find_extend_vma+0x29/0x7e
[ 1397.821357]  [c0133bca] get_futex_key+0x4c/0x126
[ 1397.878740]  [c0135be8] do_futex+0x6c/0x10a
[ 1397.930928]  [c01cadc4] copy_from_user+0x46/0x7e
[ 1397.988307]  [c026a2f1] sys_socketcall+0x236/0x254
[ 1398.047762]  [c0102cdf] syscall_call+0x7/0xb
[ 1398.100989] Code: 34 c7 44 24 04 5a 00 00 00 89 4c 24 0c e8 89 02 02 00 b8 a 
[ 1398.333299] EIP: [c02c6148] ip6_append_data+0xaf8/0xbd6 SS:ESP 0068:f742bc0
--

Cheers,
-- 
cagri
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-09-25 Thread Herbert Xu
On Fri, Sep 01, 2006 at 06:22:48PM +0200, cagri coltekin wrote:

 The second causes the system to give the bug a couple of seconds
 after bind starts, and loads the zones, without any traffic going
 on. BTW, patch applied with some offset difference (3 for the
 first -48 for the other two changes), on a pristine 2.6.17.11
 source tree.

Well the good news is that I found a bug with MSG_PROBE that can
cause exactly what you're seeing.  The bad news is that bind doesn't
use MSG_PROBE :)

So please try this patch to narrow the problem down further.

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 6671691..637b5c4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -990,8 +990,10 @@ int ip6_append_data(struct sock *sk, int
int offset = 0;
int csummode = CHECKSUM_NONE;
 
-   if (flagsMSG_PROBE)
+   if (flagsMSG_PROBE) {
+   WARN_ON(1);
return 0;
+   }
if (skb_queue_empty(sk-sk_write_queue)) {
/*
 * setup for corking
@@ -1013,6 +1015,7 @@ int ip6_append_data(struct sock *sk, int
dst_hold(rt-u.dst);
np-cork.rt = rt;
inet-cork.fl = *fl;
+   BUG_ON(!fl-proto);
np-cork.hop_limit = hlimit;
np-cork.tclass = tclass;
mtu = dst_mtu(rt-u.dst.path);
@@ -1032,6 +1035,7 @@ int ip6_append_data(struct sock *sk, int
} else {
rt = np-cork.rt;
fl = inet-cork.fl;
+   BUG_ON(!fl-proto);
if (inet-cork.flags  IPCORK_OPT)
opt = np-cork.opt;
transhdrlen = 0;
@@ -1285,6 +1289,7 @@ int ip6_push_pending_frames(struct sock 
 
if ((skb = __skb_dequeue(sk-sk_write_queue)) == NULL)
goto out;
+   BUG_ON(!proto);
tail_skb = (skb_shinfo(skb)-frag_list);
 
/* move skb-data to ip header from ext header */
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-08-31 Thread cagri coltekin
Hi Again,

It took a while to find equipment for test environment, but now I
have a test environment that I can test.

Here is the result:

---
[17180051.768000] ip6_fragment: hlen = 0x818, len = 0x7ce, nexthdr=4
[17180051.84] [ cut here ]
[17180051.84] kernel BUG at net/ipv6/ip6_output.c:510!
[17180051.84] invalid opcode:  [#1]
[17180051.84] SMP 
[17180051.84] Modules linked in: ipmi_si ipmi_msghandler ide_cd cdrom
[17180051.84] CPU:0
[17180051.84] EIP:0060:[c02bc6bd]Not tainted VLI
[17180051.84] EFLAGS: 00010296   (2.6.17.11-ns-pri-debug-p1 #6) 
[17180051.84] EIP is at ip6_fragment+0x7f6/0x803
[17180051.84] eax: 0048   ebx: f75c4c5c   ecx: c038f5bc   edx: 0286
[17180051.84] esi: f7605c50   edi:    ebp: f76e2c80   esp: f7605bb8
[17180051.84] ds: 007b   es: 007b   ss: 0068
[17180051.84] Process named (pid: 1899, threadinfo=f7604000 task=f75cead0)
[17180051.84] Stack: c0324600 0818 07ce 0004  f7605bdc 
0400  
[17180051.84]ffd14ca4  f7605ea8 0818 f77a4040 01fe 
f755d080 f7976048 
[17180051.84]f76e2c80 f7605c50 f7976040 f75c4a80 c02bb612 f76e2c80 
c02bb40e c02bd66a 
[17180051.84] Call Trace:
[17180051.84]  c02bb612 ip6_output+0x3c/0x4c  c02bb40e 
ip6_output2+0x0/0x1c8
[17180051.84]  c02bd66a ip6_push_pending_frames+0x250/0x390  c02ce38e 
udp_v6_push_pending_frames+0x13d/0x1a4
[17180051.84]  c02ce97f udpv6_sendmsg+0x58a/0x953  c02cd7c2 
udpv6_recvmsg+0x20c/0x303
[17180051.84]  c02a6032 inet_sendmsg+0x4a/0x56  c0260b82 
sock_sendmsg+0xeb/0x105
[17180051.84]  c01c18cc __next_cpu+0x22/0x31  c01167c7 
find_busiest_group+0xd6/0x305
[17180051.84]  c012f91e autoremove_wake_function+0x0/0x57  c01c662e 
copy_from_user+0x46/0x7c
[17180051.84]  c01c662e copy_from_user+0x46/0x7c  c02626c9 
sys_sendmsg+0x191/0x1f8
[17180051.84]  c01334c6 futex_wait+0x129/0x238  c014b75c 
find_extend_vma+0x29/0x7e
[17180051.84]  c0117927 default_wake_function+0x0/0x12  c0132b91 
futex_wake+0x4a/0xba
[17180051.84]  c01c662e copy_from_user+0x46/0x7c  c0262b6f 
sys_socketcall+0x236/0x254
[17180051.84]  c0102be3 syscall_call+0x7/0xb 
[17180051.84] Code: 50 60 e9 36 f9 ff ff 0f b6 44 24 1b 8b 54 24 2c 89 44 
24 0c 8b 45 60 c7 04 24 00 46 32 c0 89 54 24 04 89 44 24 08 e8 50 07 e6 ff 0f 
0b fe 01 41 13 32 c0 e9 68 f8 ff ff 55 57 56 31 f6 53 83 ec 
[17180051.84] EIP: [c02bc6bd] ip6_fragment+0x7f6/0x803 SS:ESP 
0068:f7605bb8
---

I hope this helps.

Cheers,
-- 
cagri

On Tue, Aug 29, 2006 at 06:28:28PM +1000, Herbert Xu wrote:
 
 Thanks.  Please try this patch and tell me if it prints anything out.
 
 Cheers,
 -- 
 Visit Openswan at http://www.openswan.org/
 Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
 Home Page: http://gondor.apana.org.au/~herbert/
 PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
 --
 diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
 index 4fb47a2..5e2e4ea 100644
 --- a/net/ipv6/ip6_output.c
 +++ b/net/ipv6/ip6_output.c
 @@ -508,6 +508,10 @@ static int ip6_fragment(struct sk_buff *
   dev = rt-u.dst.dev;
   hlen = ip6_find_1stfragopt(skb, prevhdr);
   nexthdr = *prevhdr;
 + if (unlikely(hlen  skb-len)) {
 + printk(KERN_CRIT ip6_fragment: hlen = 0x%x, len = 0x%x, 
 nexthdr=%d\n, hlen, skb-len, nexthdr);
 + BUG();
 + }
  
   mtu = dst_mtu(rt-u.dst);
   if (np  np-frag_size  mtu) {
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-08-29 Thread Herbert Xu
On Mon, Aug 28, 2006 at 02:49:07AM +0200, cagri coltekin wrote:
 
 Ooops, sorry for the confusion. It happens with 2.6.17 too (see
 below), cutpaste from wrong log. The rest of the data provided
 in the previous message is actually fresh.

Thanks.  Please try this patch and tell me if it prints anything out.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED]
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4fb47a2..5e2e4ea 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -508,6 +508,10 @@ static int ip6_fragment(struct sk_buff *
dev = rt-u.dst.dev;
hlen = ip6_find_1stfragopt(skb, prevhdr);
nexthdr = *prevhdr;
+   if (unlikely(hlen  skb-len)) {
+   printk(KERN_CRIT ip6_fragment: hlen = 0x%x, len = 0x%x, 
nexthdr=%d\n, hlen, skb-len, nexthdr);
+   BUG();
+   }
 
mtu = dst_mtu(rt-u.dst);
if (np  np-frag_size  mtu) {
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-08-27 Thread cagri coltekin
Hi,

[ Apologies for possible duplicates, and if I'm addressing wrong
  people. ]

The following is the standard bug report form. I believe I have
included enough information for the starters. I'd be happy to try
to provide more if you need it. Please let me know.

Kind Regards,
-- 
Cagri Coltekin

--

[1.] Kernel message: kernel BUG at net/ipv6/ip6_output.c:718

[2.] Full description of the problem/report:

 This is a on busy DNS server (serving about 5k queries per
 second). The problem started after we have switched to 2.6
 kernel since it gives better performance. After the the kernel
 bug message below, the system continues to run.  However, bind
 gets stuck, completely unresponsive, killing it makes it zombie
 (parent is init). A new instance of bind can be started, and
 works fine until next time. 

 The problem is not in 2.4, we were running kernel.org 2.4.29
 before.

 I've tried the patch at http://lkml.org/lkml/2006/8/13/56, just
 in case. It does not make any difference.

 The system is a dual CPU (Hyperthreading) Dell Poweredge 2650.

[3.] Keywords: Kernel, networking, IPv6, UDP, DNS 

[4.] Kernel version (from /proc/version):

 Linux version 2.6.17.11-ns-debug ([EMAIL PROTECTED]) (gcc version 3.3.5 
(Debian 1:3.3.5-8ubuntu2.1)) #6 SMP Sat Aug 26 05:06:53 CEST 2006

 It is vanilla 2.6.17.11 with all unnecessary functionality
 removed during compilation.

 Please note that the kernel is compiled on a different machine,
 I'll provide information on both system below where appropriate.

[5.] Output of Oops.. 

  NOTE that there are bug messages from two consecutive events.

Aug 25 04:03:35 ns kernel: [ cut here ]
Aug 25 04:03:35 ns kernel: kernel BUG at net/ipv6/ip6_output.c:718!
Aug 25 04:03:35 ns kernel: invalid operand:  [#1]
Aug 25 04:03:35 ns kernel: SMP 
Aug 25 04:03:35 ns kernel: Modules linked in: uhci_hcd ehci_hcd ohci_hcd 
aic7xxx ide_cd
Aug 25 04:03:35 ns kernel: CPU:3
Aug 25 04:03:35 ns kernel: EIP:0060:[svc_create_socket+189/416]Not 
tainted VLI
Aug 25 04:03:35 ns kernel: EFLAGS: 00010282   (2.6.12.6-ncc-server) 
Aug 25 04:03:35 ns kernel: EIP is at ip6_fragment+0x24d/0x880
Aug 25 04:03:35 ns kernel: eax: fff2   ebx: f5391ff8   ecx: 07e8   edx: 
f792a600
Aug 25 04:03:35 ns kernel: esi: f792a800   edi: f5391ff8   ebp: f513d180   esp: 
f6defbac
Aug 25 04:03:35 ns kernel: ds: 007b   es: 007b   ss: 0068
Aug 25 04:03:35 ns kernel: Process named (pid: 1942, threadinfo=f6dee000 
task=f7959a20)
Aug 25 04:03:35 ns kernel: Stack: f31a8480 07e8 f5392000 fdd0 f6dee000 
 b6346cc8 821e 
Aug 25 04:03:35 ns kernel: 07e8  fdd0 ffee 
07e8 fdd4 f7075780 
Aug 25 04:03:35 ns kernel:f792a048 f792a018 f31a8480 f792a040 f7b18080 
c031fd50 f31a8480 c031fa30 
Aug 25 04:03:35 ns kernel: Call Trace:
Aug 25 04:03:35 ns kernel:  [svc_tcp_accept+960/992] ip6_output+0x30/0x40
Aug 25 04:03:35 ns kernel:  [svc_tcp_accept+160/992] ip6_output2+0x0/0x2f0
Aug 25 04:03:35 ns kernel:  [svcauth_unix_accept+43/496] 
ip6_push_pending_frames+0x29b/0x470
Aug 25 04:03:35 ns kernel:  [__func__.2+898/986] 
udp_v6_push_pending_frames+0x148/0x1b0
Aug 25 04:03:35 ns kernel:  [ip_send_reply+144/592] ip_generic_getfrag+0x0/0xc0
Aug 25 04:03:35 ns kernel:  [__func__.1+0/17] udpv6_sendmsg+0x52c/0x920
Aug 25 04:03:35 ns kernel:  [arp_solicit+356/464] udp_recvmsg+0x54/0x300
Aug 25 04:03:35 ns kernel:  [igmp_rcv+125/336] inet_sendmsg+0x4d/0x60
Aug 25 04:03:35 ns kernel:  [sys_connect+90/176] sock_sendmsg+0xda/0x100
Aug 25 04:03:35 ns kernel:  [find_busiest_group+209/736] 
find_busiest_group+0xd1/0x2e0
Aug 25 04:03:35 ns kernel:  [pci_bus_read_config_dword+38/144] 
copy_from_user+0x46/0x80
Aug 25 04:03:35 ns kernel:  [autoremove_wake_function+0/96] 
autoremove_wake_function+0x0/0x60
Aug 25 04:03:35 ns kernel:  [sock_wfree+15/96] sys_sendmsg+0x18f/0x1f0
Aug 25 04:03:35 ns kernel:  [futex_wait+292/576] futex_wait+0x124/0x240
Aug 25 04:03:35 ns kernel:  [find_extend_vma+41/144] find_extend_vma+0x29/0x90
Aug 25 04:03:35 ns kernel:  [default_wake_function+0/32] 
default_wake_function+0x0/0x20
Aug 25 04:03:35 ns kernel:  [futex_wake+123/208] futex_wake+0x7b/0xd0
Aug 25 04:03:35 ns kernel:  [pci_bus_read_config_dword+38/144] 
copy_from_user+0x46/0x80
Aug 25 04:03:35 ns kernel:  [sock_alloc_send_pskb+370/480] 
sys_socketcall+0x242/0x260
Aug 25 04:03:35 ns kernel:  [syscall_call+7/11] syscall_call+0x7/0xb
Aug 25 04:03:35 ns kernel: Code: 00 00 00 00 8b 54 24 2c 8b 4c 24 24 89 54 24 
0c 8b 45 24 89 4c 24 04 89 44 24 08 8b 44 24 58 89 04 24 e8 a7 44 fa ff 85 c0 
74 08 0f 0b ce 02 86 07 3a c0 0f b7 44 24 20 0f b6 d0 c1 e2 08 c1 e8 
Aug 25 05:49:07 ns kernel:  7UDP: bad checksum. From 213.147.0.92:53 to 
193.0.0.195:2101 ulen 52
Aug 25 06:30:02 ns kernel: [ cut here ]
Aug 25 06:30:02 ns kernel: kernel BUG at 

Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-08-27 Thread cagri coltekin
On Mon, Aug 28, 2006 at 10:16:56AM +1000, Herbert Xu wrote:
 cagri coltekin [EMAIL PROTECTED] wrote:
  
  Aug 25 04:03:35 ns kernel: [ cut here ]
  Aug 25 04:03:35 ns kernel: kernel BUG at net/ipv6/ip6_output.c:718!
  Aug 25 04:03:35 ns kernel: invalid operand:  [#1]
  Aug 25 04:03:35 ns kernel: SMP 
  Aug 25 04:03:35 ns kernel: Modules linked in: uhci_hcd ehci_hcd ohci_hcd 
  aic7xxx ide_cd
  Aug 25 04:03:35 ns kernel: CPU:3
  Aug 25 04:03:35 ns kernel: EIP:0060:[svc_create_socket+189/416]Not 
  tainted VLI
  Aug 25 04:03:35 ns kernel: EFLAGS: 00010282   (2.6.12.6-ncc-server) 
 
 This is an ancient kernel.  Please really try 2.6.17 instead of just
 talking about it (the line number confirms that it is 2.6.12).

Ooops, sorry for the confusion. It happens with 2.6.17 too (see
below), cutpaste from wrong log. The rest of the data provided
in the previous message is actually fresh.

Aug 26 07:09:36 ns kernel: [17180077.732000] [ cut here 
]
Aug 26 07:09:36 ns kernel: [17180077.792000] kernel BUG at 
net/ipv6/ip6_output.c:693!
Aug 26 07:09:36 ns kernel: [17180077.856000] invalid opcode:  [#1]
Aug 26 07:09:36 ns kernel: [17180077.90] SMP 
Aug 26 07:09:36 ns kernel: [17180077.928000] Modules linked in: ide_cd cdrom
Aug 26 07:09:36 ns kernel: [17180077.98] CPU:2
Aug 26 07:09:36 ns kernel: [17180077.98] EIP:
0060:[ip6_fragment+619/1981]Not tainted VLI
Aug 26 07:09:36 ns kernel: [17180077.98] EFLAGS: 00010282   
(2.6.17.11-ns-debug #6) 
Aug 26 07:09:36 ns kernel: [17180078.148000] EIP is at ip6_fragment+0x26b/0x7bd
Aug 26 07:09:36 ns kernel: [17180078.204000] eax: fff2   ebx: fdd8   
ecx: 05b8   edx: f5ecc600
Aug 26 07:09:36 ns kernel: [17180078.288000] esi: f5ecc7f8   edi: f5e7bff0   
ebp: c2ff6780   esp: f71f5bb8
Aug 26 07:09:36 ns kernel: [17180078.376000] ds: 007b   es: 007b   ss: 0068
Aug 26 07:09:36 ns kernel: [17180078.428000] Process named (pid: 1811, 
threadinfo=f71f4000 task=f7470a10)
Aug 26 07:09:36 ns kernel: [17180078.508000] Stack: f7208880 07e0 f5e7bff8 
fdd8 f71f4000 f71f5bdc 5d00  
Aug 26 07:09:36 ns kernel: [17180078.612000]07e0 0e03 ffee 
07e0 fddc f5e7bff0 f7fd7880 f5ecc048 
Aug 26 07:09:36 ns kernel: [17180078.72]f7208880 f7fd7880 f5ecc040 
f774c080 c02adcc6 f7208880 c02adac2 c02afcc6 
Aug 26 07:09:36 ns kernel: [17180078.824000] Call Trace:
Aug 26 07:09:36 ns kernel: [17180078.86]  c02adcc6 ip6_output+0x3c/0x4c  
c02adac2 ip6_output2+0x0/0x1c8
Aug 26 07:09:36 ns kernel: [17180078.948000]  c02afcc6 
ip6_push_pending_frames+0x250/0x390  c02c09ea 
udp_v6_push_pending_frames+0x13d/0x1a4
Aug 26 07:09:36 ns kernel: [17180079.072000]  c02c0fdb 
udpv6_sendmsg+0x58a/0x953  c0291d36 udp_recvmsg+0x56/0x24c
Aug 26 07:09:36 ns kernel: [17180079.172000]  c02986e6 inet_sendmsg+0x4a/0x56 
 c0253256 sock_sendmsg+0xeb/0x105
Aug 26 07:09:36 ns kernel: [17180079.264000]  c01c18cc __next_cpu+0x22/0x31  
c01167c7 find_busiest_group+0xd6/0x305
Aug 26 07:09:36 ns kernel: [17180079.364000]  c01177e6 
dependent_sleeper+0x1ec/0x32d  c012f91e autoremove_wake_function+0x0/0x57
Aug 26 07:09:36 ns kernel: [17180079.476000]  c01c662e 
copy_from_user+0x46/0x7c  c01c662e copy_from_user+0x46/0x7c
Aug 26 07:09:36 ns kernel: [17180079.576000]  c0254d9d 
sys_sendmsg+0x191/0x1f8  c01334c6 futex_wait+0x129/0x238
Aug 26 07:09:36 ns kernel: [17180079.672000]  c014b75c 
find_extend_vma+0x29/0x7e  c0117927 default_wake_function+0x0/0x12
Aug 26 07:09:36 ns kernel: [17180079.776000]  c0132b91 futex_wake+0x4a/0xba  
c01667a8 pipe_write+0x0/0x3b
Aug 26 07:09:36 ns kernel: [17180079.864000]  c01c662e 
copy_from_user+0x46/0x7c  c0255243 sys_socketcall+0x236/0x254
Aug 26 07:09:36 ns kernel: [17180079.964000]  c0102be3 syscall_call+0x7/0xb 
Aug 26 07:09:36 ns kernel: [17180080.02] Code: 24 8b 44 24 34 89 50 04 89 
5c 24 0c 8b 4c 24 20 8b 45 1c 89 4c 24 04 89 44 24 08 8b 44 24 54 89 04 24 e8 
25 a6 fa ff 85 c0 74 08 0f 0b b5 02 21 fb 30 c0 0f b7 44 24 1c 8b 4c 24 34 89 
c2 c1 e8 
Aug 26 07:09:36 ns kernel: [17180080.264000] EIP: [ip6_fragment+619/1981] 
ip6_fragment+0x26b/0x7bd SS:ESP 0068:f71f5bb8
--


-- 
cagri
-
To unsubscribe from this list: send the line unsubscribe netdev in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html