Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718
On Tue, Oct 03, 2006 at 03:49:35PM +1000, Herbert Xu wrote: OK, I think I've got the right bug this time. Here is the patch for the other bug that I found along the way: [UDP6]: Fix MSG_PROBE crash UDP tracks corking status through the pending variable. The IP layer also tracks it through the socket write queue. It is possible for the two to get out of sync when MSG_PROBE is used. This patch changes UDP to check the write queue to ensure that the two stay in sync. Signed-off-by: Herbert Xu [EMAIL PROTECTED] Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -675,6 +675,8 @@ do_append_data: udp_flush_pending_frames(sk); else if (!corkreq) err = udp_push_pending_frames(sk, up); + else if (unlikely(skb_queue_empty(sk-sk_write_queue))) + up-pending = 0; release_sock(sk); out: diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -770,6 +770,8 @@ do_append_data: udp_v6_flush_pending_frames(sk); else if (!corkreq) err = udp_v6_push_pending_frames(sk, up); + else if (unlikely(skb_queue_empty(sk-sk_write_queue))) + up-pending = 0; if (dst) { if (connected) { - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718
On Tue, 3 Oct 2006, Herbert Xu wrote: On Thu, Sep 28, 2006 at 10:40:18AM +0200, cagri coltekin wrote: No. Bug is the first after boot: OK, I think I've got the right bug this time. [UDP6]: Fix flowi clobbering The udp6_sendmsg function uses a shared buffer to store the flow without taking any locks. This leads to races with SMP. This patch moves the flowi object onto the stack. Signed-off-by: Herbert Xu [EMAIL PROTECTED] Nice catch. Acked-by: James Morris [EMAIL PROTECTED] -- James Morris [EMAIL PROTECTED] - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718
On Tue, Oct 03, 2006 at 04:28:20PM +1000, Herbert Xu wrote: On Tue, Oct 03, 2006 at 03:49:35PM +1000, Herbert Xu wrote: OK, I think I've got the right bug this time. Here is the patch for the other bug that I found along the way: [UDP6]: Fix MSG_PROBE crash This one fixes. Thanks! The patch does not cleanly apply to 2.6.18, needed some manual tweaking (the patch that applies cleanly to vanilla 2.6.18 is below in case it has any use). Cheers, -- cagri --- linux-2.6.18/net/ipv6/udp.c 2006-09-20 05:42:06.0 +0200 +++ linux-2.6.18-p4/net/ipv6/udp.c 2006-10-03 08:57:31.0 +0200 @@ -613,7 +613,7 @@ struct in6_addr *daddr, *final_p = NULL, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; - struct flowi *fl = inet-cork.fl; + struct flowi fl; struct dst_entry *dst; int addr_len = msg-msg_namelen; int ulen = len; @@ -693,19 +693,19 @@ } ulen += sizeof(struct udphdr); - memset(fl, 0, sizeof(*fl)); + memset(fl, 0, sizeof(fl)); if (sin6) { if (sin6-sin6_port == 0) return -EINVAL; - fl-fl_ip_dport = sin6-sin6_port; + fl.fl_ip_dport = sin6-sin6_port; daddr = sin6-sin6_addr; if (np-sndflow) { - fl-fl6_flowlabel = sin6-sin6_flowinfoIPV6_FLOWINFO_MASK; - if (fl-fl6_flowlabelIPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl-fl6_flowlabel); + fl.fl6_flowlabel = sin6-sin6_flowinfoIPV6_FLOWINFO_MASK; + if (fl.fl6_flowlabelIPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); if (flowlabel == NULL) return -EINVAL; daddr = flowlabel-dst; @@ -723,32 +723,32 @@ if (addr_len = sizeof(struct sockaddr_in6) sin6-sin6_scope_id ipv6_addr_type(daddr)IPV6_ADDR_LINKLOCAL) - fl-oif = sin6-sin6_scope_id; + fl.oif = sin6-sin6_scope_id; } else { if (sk-sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl-fl_ip_dport = inet-dport; + fl.fl_ip_dport = inet-dport; daddr = np-daddr; - fl-fl6_flowlabel = np-flow_label; + fl.fl6_flowlabel = np-flow_label; connected = 1; } - if (!fl-oif) - fl-oif = sk-sk_bound_dev_if; + if (!fl.oif) + fl.oif = sk-sk_bound_dev_if; if (msg-msg_controllen) { opt = opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt-tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, fl, opt, hlimit, tclass); + err = datagram_send_ctl(msg, fl, opt, hlimit, tclass); if (err 0) { fl6_sock_release(flowlabel); return err; } - if ((fl-fl6_flowlabelIPV6_FLOWLABEL_MASK) !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl-fl6_flowlabel); + if ((fl.fl6_flowlabelIPV6_FLOWLABEL_MASK) !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); if (flowlabel == NULL) return -EINVAL; } @@ -762,37 +762,37 @@ opt = fl6_merge_options(opt_space, flowlabel, opt); opt = ipv6_fixup_options(opt_space, opt); - fl-proto = IPPROTO_UDP; - ipv6_addr_copy(fl-fl6_dst, daddr); - if (ipv6_addr_any(fl-fl6_src) !ipv6_addr_any(np-saddr)) - ipv6_addr_copy(fl-fl6_src, np-saddr); - fl-fl_ip_sport = inet-sport; + fl.proto = IPPROTO_UDP; + ipv6_addr_copy(fl.fl6_dst, daddr); + if (ipv6_addr_any(fl.fl6_src) !ipv6_addr_any(np-saddr)) + ipv6_addr_copy(fl.fl6_src, np-saddr); + fl.fl_ip_sport = inet-sport; /* merge ip6_build_xmit from ip6_output */ if (opt opt-srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) opt-srcrt; - ipv6_addr_copy(final, fl-fl6_dst); - ipv6_addr_copy(fl-fl6_dst, rt0-addr); + ipv6_addr_copy(final, fl.fl6_dst); + ipv6_addr_copy(fl.fl6_dst, rt0-addr); final_p = final; connected = 0; } - if (!fl-oif ipv6_addr_is_multicast(fl-fl6_dst)) { - fl-oif = np-mcast_oif; + if (!fl.oif ipv6_addr_is_multicast(fl.fl6_dst)) { + fl.oif = np-mcast_oif; connected = 0; } - err = ip6_sk_dst_lookup(sk,
Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718
On Thu, Sep 28, 2006 at 10:40:18AM +0200, cagri coltekin wrote: No. Bug is the first after boot: OK, I think I've got the right bug this time. [UDP6]: Fix flowi clobbering The udp6_sendmsg function uses a shared buffer to store the flow without taking any locks. This leads to races with SMP. This patch moves the flowi object onto the stack. Signed-off-by: Herbert Xu [EMAIL PROTECTED] This bug is pretty old so we need the fix for 2.6.18 too. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -546,7 +546,7 @@ static int udpv6_sendmsg(struct kiocb *i struct in6_addr *daddr, *final_p = NULL, final; struct ipv6_txoptions *opt = NULL; struct ip6_flowlabel *flowlabel = NULL; - struct flowi *fl = inet-cork.fl; + struct flowi fl; struct dst_entry *dst; int addr_len = msg-msg_namelen; int ulen = len; @@ -626,19 +626,19 @@ do_udp_sendmsg: } ulen += sizeof(struct udphdr); - memset(fl, 0, sizeof(*fl)); + memset(fl, 0, sizeof(fl)); if (sin6) { if (sin6-sin6_port == 0) return -EINVAL; - fl-fl_ip_dport = sin6-sin6_port; + fl.fl_ip_dport = sin6-sin6_port; daddr = sin6-sin6_addr; if (np-sndflow) { - fl-fl6_flowlabel = sin6-sin6_flowinfoIPV6_FLOWINFO_MASK; - if (fl-fl6_flowlabelIPV6_FLOWLABEL_MASK) { - flowlabel = fl6_sock_lookup(sk, fl-fl6_flowlabel); + fl.fl6_flowlabel = sin6-sin6_flowinfoIPV6_FLOWINFO_MASK; + if (fl.fl6_flowlabelIPV6_FLOWLABEL_MASK) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); if (flowlabel == NULL) return -EINVAL; daddr = flowlabel-dst; @@ -656,32 +656,32 @@ do_udp_sendmsg: if (addr_len = sizeof(struct sockaddr_in6) sin6-sin6_scope_id ipv6_addr_type(daddr)IPV6_ADDR_LINKLOCAL) - fl-oif = sin6-sin6_scope_id; + fl.oif = sin6-sin6_scope_id; } else { if (sk-sk_state != TCP_ESTABLISHED) return -EDESTADDRREQ; - fl-fl_ip_dport = inet-dport; + fl.fl_ip_dport = inet-dport; daddr = np-daddr; - fl-fl6_flowlabel = np-flow_label; + fl.fl6_flowlabel = np-flow_label; connected = 1; } - if (!fl-oif) - fl-oif = sk-sk_bound_dev_if; + if (!fl.oif) + fl.oif = sk-sk_bound_dev_if; if (msg-msg_controllen) { opt = opt_space; memset(opt, 0, sizeof(struct ipv6_txoptions)); opt-tot_len = sizeof(*opt); - err = datagram_send_ctl(msg, fl, opt, hlimit, tclass); + err = datagram_send_ctl(msg, fl, opt, hlimit, tclass); if (err 0) { fl6_sock_release(flowlabel); return err; } - if ((fl-fl6_flowlabelIPV6_FLOWLABEL_MASK) !flowlabel) { - flowlabel = fl6_sock_lookup(sk, fl-fl6_flowlabel); + if ((fl.fl6_flowlabelIPV6_FLOWLABEL_MASK) !flowlabel) { + flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel); if (flowlabel == NULL) return -EINVAL; } @@ -695,39 +695,39 @@ do_udp_sendmsg: opt = fl6_merge_options(opt_space, flowlabel, opt); opt = ipv6_fixup_options(opt_space, opt); - fl-proto = IPPROTO_UDP; - ipv6_addr_copy(fl-fl6_dst, daddr); - if (ipv6_addr_any(fl-fl6_src) !ipv6_addr_any(np-saddr)) - ipv6_addr_copy(fl-fl6_src, np-saddr); - fl-fl_ip_sport = inet-sport; + fl.proto = IPPROTO_UDP; + ipv6_addr_copy(fl.fl6_dst, daddr); + if (ipv6_addr_any(fl.fl6_src) !ipv6_addr_any(np-saddr)) + ipv6_addr_copy(fl.fl6_src, np-saddr); + fl.fl_ip_sport = inet-sport; /* merge ip6_build_xmit from ip6_output */ if (opt opt-srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) opt-srcrt; - ipv6_addr_copy(final, fl-fl6_dst); - ipv6_addr_copy(fl-fl6_dst, rt0-addr); + ipv6_addr_copy(final, fl.fl6_dst); + ipv6_addr_copy(fl.fl6_dst, rt0-addr); final_p = final; connected = 0; } - if (!fl-oif ipv6_addr_is_multicast(fl-fl6_dst))
Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718
On Thu, Sep 28, 2006 at 10:38:29AM +1000, Herbert Xu wrote: On Tue, Sep 26, 2006 at 01:21:22PM +0200, cagri coltekin wrote: -- [ 1395.890897] [ cut here ] [ 1395.946093] kernel BUG at net/ipv6/ip6_output.c:940! Could you go further back in the logs to see if there was a warning message? Either that or turn the WARN_ON into a BUG. No. Bug is the first after boot: [ 34.042841] ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready [ 44.110469] eth0: no IPv6 routers present [ 80.968012] process `syslogd' is using obsolete setsockopt SO_BSDCOMPAT [ 81.452248] process `named' is using obsolete setsockopt SO_BSDCOMPAT [ 110.559560] process `lwresd' is using obsolete setsockopt SO_BSDCOMPAT [ 140.568831] process `named' is using obsolete setsockopt SO_BSDCOMPAT [ 1395.890897] [ cut here ] [ 1395.946093] kernel BUG at net/ipv6/ip6_output.c:940! [ 1396.005441] invalid opcode: [#1] Cheers, -- cagri - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718
On Tue, Sep 26, 2006 at 01:21:22PM +0200, cagri coltekin wrote: -- [ 1395.890897] [ cut here ] [ 1395.946093] kernel BUG at net/ipv6/ip6_output.c:940! Could you go further back in the logs to see if there was a warning message? Either that or turn the WARN_ON into a BUG. Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718
Hi, On Mon, Sep 25, 2006 at 10:15:30PM +1000, Herbert Xu wrote: On Fri, Sep 01, 2006 at 06:22:48PM +0200, cagri coltekin wrote: The second causes the system to give the bug a couple of seconds after bind starts, and loads the zones, without any traffic going on. BTW, patch applied with some offset difference (3 for the first -48 for the other two changes), on a pristine 2.6.17.11 source tree. Well the good news is that I found a bug with MSG_PROBE that can cause exactly what you're seeing. The bad news is that bind doesn't use MSG_PROBE :) So please try this patch to narrow the problem down further. This time I applied patch to 2.6.18. The patch applied with some offset difference. I can stick to a version you suggest if 2.6.18 is not a good. Here is the new bug message: -- [ 1395.890897] [ cut here ] [ 1395.946093] kernel BUG at net/ipv6/ip6_output.c:940! [ 1396.005441] invalid opcode: [#1] [ 1396.049225] SMP [ 1396.071419] Modules linked in: ipmi_si ipmi_msghandler ide_cd cdrom [ 1396.146853] CPU:2 [ 1396.146854] EIP:0060:[c02c6148]Not tainted VLI [ 1396.146855] EFLAGS: 00010246 (2.6.18-ns-pri-debug-p3 #2) [ 1396.304174] EIP is at ip6_append_data+0xaf8/0xbd6 [ 1396.360405] eax: f7534d00 ebx: ecx: f7534e9c edx: f68f4480 [ 1396.441552] esi: f7534ee4 edi: f7534ee4 ebp: f7534ef0 esp: f742bc20 [ 1396.522691] ds: 007b es: 007b ss: 0068 [ 1396.571655] Process named (pid: 1897, ti=f742a000 task=c2b2c030 task.ti=f742) [ 1396.659026] Stack: f68f4480 c03c3cb4 f742bf00 c02ef7e2 c02ce658 c02ce658 c03 [ 1396.759947]0002 c02ef7e2 f7534eb4 f7534d70 f74 [ 1396.860803]f742bce4 c02c55c5 f7534d00 f7534e9c f7534d00 0286 f74 [ 1396.961659] Call Trace: [ 1396.993128] [c02ef7e2] _read_unlock_bh+0x12/0x16 [ 1397.051544] [c02ce658] ip6_route_output+0xeb/0x1e9 [ 1397.112038] [c02ce658] ip6_route_output+0xeb/0x1e9 [ 1397.172535] [c02ef7e2] _read_unlock_bh+0x12/0x16 [ 1397.230952] [c02c55c5] ip6_dst_lookup_tail+0xc6/0xd0 [ 1397.293524] [c02d7e29] udpv6_sendmsg+0x3d4/0x9ac [ 1397.351936] [c028b4a2] ip_generic_getfrag+0x0/0xaf [ 1397.412431] [c02d6e22] udpv6_recvmsg+0x20c/0x303 [ 1397.470846] [c02ae7b3] inet_sendmsg+0x4a/0x56 [ 1397.526148] [c02682f4] sock_sendmsg+0xe8/0x101 [ 1397.582494] [c01306ca] autoremove_wake_function+0x0/0x57 [ 1397.649214] [c01cadc4] copy_from_user+0x46/0x7e [ 1397.706594] [c0269e4b] sys_sendmsg+0x191/0x1f8 [ 1397.762941] [c014fe63] find_extend_vma+0x29/0x7e [ 1397.821357] [c0133bca] get_futex_key+0x4c/0x126 [ 1397.878740] [c0135be8] do_futex+0x6c/0x10a [ 1397.930928] [c01cadc4] copy_from_user+0x46/0x7e [ 1397.988307] [c026a2f1] sys_socketcall+0x236/0x254 [ 1398.047762] [c0102cdf] syscall_call+0x7/0xb [ 1398.100989] Code: 34 c7 44 24 04 5a 00 00 00 89 4c 24 0c e8 89 02 02 00 b8 a [ 1398.333299] EIP: [c02c6148] ip6_append_data+0xaf8/0xbd6 SS:ESP 0068:f742bc0 -- Cheers, -- cagri - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718
On Fri, Sep 01, 2006 at 06:22:48PM +0200, cagri coltekin wrote: The second causes the system to give the bug a couple of seconds after bind starts, and loads the zones, without any traffic going on. BTW, patch applied with some offset difference (3 for the first -48 for the other two changes), on a pristine 2.6.17.11 source tree. Well the good news is that I found a bug with MSG_PROBE that can cause exactly what you're seeing. The bad news is that bind doesn't use MSG_PROBE :) So please try this patch to narrow the problem down further. Thanks, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 6671691..637b5c4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -990,8 +990,10 @@ int ip6_append_data(struct sock *sk, int int offset = 0; int csummode = CHECKSUM_NONE; - if (flagsMSG_PROBE) + if (flagsMSG_PROBE) { + WARN_ON(1); return 0; + } if (skb_queue_empty(sk-sk_write_queue)) { /* * setup for corking @@ -1013,6 +1015,7 @@ int ip6_append_data(struct sock *sk, int dst_hold(rt-u.dst); np-cork.rt = rt; inet-cork.fl = *fl; + BUG_ON(!fl-proto); np-cork.hop_limit = hlimit; np-cork.tclass = tclass; mtu = dst_mtu(rt-u.dst.path); @@ -1032,6 +1035,7 @@ int ip6_append_data(struct sock *sk, int } else { rt = np-cork.rt; fl = inet-cork.fl; + BUG_ON(!fl-proto); if (inet-cork.flags IPCORK_OPT) opt = np-cork.opt; transhdrlen = 0; @@ -1285,6 +1289,7 @@ int ip6_push_pending_frames(struct sock if ((skb = __skb_dequeue(sk-sk_write_queue)) == NULL) goto out; + BUG_ON(!proto); tail_skb = (skb_shinfo(skb)-frag_list); /* move skb-data to ip header from ext header */ - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718
Hi Again, It took a while to find equipment for test environment, but now I have a test environment that I can test. Here is the result: --- [17180051.768000] ip6_fragment: hlen = 0x818, len = 0x7ce, nexthdr=4 [17180051.84] [ cut here ] [17180051.84] kernel BUG at net/ipv6/ip6_output.c:510! [17180051.84] invalid opcode: [#1] [17180051.84] SMP [17180051.84] Modules linked in: ipmi_si ipmi_msghandler ide_cd cdrom [17180051.84] CPU:0 [17180051.84] EIP:0060:[c02bc6bd]Not tainted VLI [17180051.84] EFLAGS: 00010296 (2.6.17.11-ns-pri-debug-p1 #6) [17180051.84] EIP is at ip6_fragment+0x7f6/0x803 [17180051.84] eax: 0048 ebx: f75c4c5c ecx: c038f5bc edx: 0286 [17180051.84] esi: f7605c50 edi: ebp: f76e2c80 esp: f7605bb8 [17180051.84] ds: 007b es: 007b ss: 0068 [17180051.84] Process named (pid: 1899, threadinfo=f7604000 task=f75cead0) [17180051.84] Stack: c0324600 0818 07ce 0004 f7605bdc 0400 [17180051.84]ffd14ca4 f7605ea8 0818 f77a4040 01fe f755d080 f7976048 [17180051.84]f76e2c80 f7605c50 f7976040 f75c4a80 c02bb612 f76e2c80 c02bb40e c02bd66a [17180051.84] Call Trace: [17180051.84] c02bb612 ip6_output+0x3c/0x4c c02bb40e ip6_output2+0x0/0x1c8 [17180051.84] c02bd66a ip6_push_pending_frames+0x250/0x390 c02ce38e udp_v6_push_pending_frames+0x13d/0x1a4 [17180051.84] c02ce97f udpv6_sendmsg+0x58a/0x953 c02cd7c2 udpv6_recvmsg+0x20c/0x303 [17180051.84] c02a6032 inet_sendmsg+0x4a/0x56 c0260b82 sock_sendmsg+0xeb/0x105 [17180051.84] c01c18cc __next_cpu+0x22/0x31 c01167c7 find_busiest_group+0xd6/0x305 [17180051.84] c012f91e autoremove_wake_function+0x0/0x57 c01c662e copy_from_user+0x46/0x7c [17180051.84] c01c662e copy_from_user+0x46/0x7c c02626c9 sys_sendmsg+0x191/0x1f8 [17180051.84] c01334c6 futex_wait+0x129/0x238 c014b75c find_extend_vma+0x29/0x7e [17180051.84] c0117927 default_wake_function+0x0/0x12 c0132b91 futex_wake+0x4a/0xba [17180051.84] c01c662e copy_from_user+0x46/0x7c c0262b6f sys_socketcall+0x236/0x254 [17180051.84] c0102be3 syscall_call+0x7/0xb [17180051.84] Code: 50 60 e9 36 f9 ff ff 0f b6 44 24 1b 8b 54 24 2c 89 44 24 0c 8b 45 60 c7 04 24 00 46 32 c0 89 54 24 04 89 44 24 08 e8 50 07 e6 ff 0f 0b fe 01 41 13 32 c0 e9 68 f8 ff ff 55 57 56 31 f6 53 83 ec [17180051.84] EIP: [c02bc6bd] ip6_fragment+0x7f6/0x803 SS:ESP 0068:f7605bb8 --- I hope this helps. Cheers, -- cagri On Tue, Aug 29, 2006 at 06:28:28PM +1000, Herbert Xu wrote: Thanks. Please try this patch and tell me if it prints anything out. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4fb47a2..5e2e4ea 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -508,6 +508,10 @@ static int ip6_fragment(struct sk_buff * dev = rt-u.dst.dev; hlen = ip6_find_1stfragopt(skb, prevhdr); nexthdr = *prevhdr; + if (unlikely(hlen skb-len)) { + printk(KERN_CRIT ip6_fragment: hlen = 0x%x, len = 0x%x, nexthdr=%d\n, hlen, skb-len, nexthdr); + BUG(); + } mtu = dst_mtu(rt-u.dst); if (np np-frag_size mtu) { - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718
On Mon, Aug 28, 2006 at 02:49:07AM +0200, cagri coltekin wrote: Ooops, sorry for the confusion. It happens with 2.6.17 too (see below), cutpaste from wrong log. The rest of the data provided in the previous message is actually fresh. Thanks. Please try this patch and tell me if it prints anything out. Cheers, -- Visit Openswan at http://www.openswan.org/ Email: Herbert Xu ~{PmVHI~} [EMAIL PROTECTED] Home Page: http://gondor.apana.org.au/~herbert/ PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt -- diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 4fb47a2..5e2e4ea 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -508,6 +508,10 @@ static int ip6_fragment(struct sk_buff * dev = rt-u.dst.dev; hlen = ip6_find_1stfragopt(skb, prevhdr); nexthdr = *prevhdr; + if (unlikely(hlen skb-len)) { + printk(KERN_CRIT ip6_fragment: hlen = 0x%x, len = 0x%x, nexthdr=%d\n, hlen, skb-len, nexthdr); + BUG(); + } mtu = dst_mtu(rt-u.dst); if (np np-frag_size mtu) { - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html
PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718
Hi, [ Apologies for possible duplicates, and if I'm addressing wrong people. ] The following is the standard bug report form. I believe I have included enough information for the starters. I'd be happy to try to provide more if you need it. Please let me know. Kind Regards, -- Cagri Coltekin -- [1.] Kernel message: kernel BUG at net/ipv6/ip6_output.c:718 [2.] Full description of the problem/report: This is a on busy DNS server (serving about 5k queries per second). The problem started after we have switched to 2.6 kernel since it gives better performance. After the the kernel bug message below, the system continues to run. However, bind gets stuck, completely unresponsive, killing it makes it zombie (parent is init). A new instance of bind can be started, and works fine until next time. The problem is not in 2.4, we were running kernel.org 2.4.29 before. I've tried the patch at http://lkml.org/lkml/2006/8/13/56, just in case. It does not make any difference. The system is a dual CPU (Hyperthreading) Dell Poweredge 2650. [3.] Keywords: Kernel, networking, IPv6, UDP, DNS [4.] Kernel version (from /proc/version): Linux version 2.6.17.11-ns-debug ([EMAIL PROTECTED]) (gcc version 3.3.5 (Debian 1:3.3.5-8ubuntu2.1)) #6 SMP Sat Aug 26 05:06:53 CEST 2006 It is vanilla 2.6.17.11 with all unnecessary functionality removed during compilation. Please note that the kernel is compiled on a different machine, I'll provide information on both system below where appropriate. [5.] Output of Oops.. NOTE that there are bug messages from two consecutive events. Aug 25 04:03:35 ns kernel: [ cut here ] Aug 25 04:03:35 ns kernel: kernel BUG at net/ipv6/ip6_output.c:718! Aug 25 04:03:35 ns kernel: invalid operand: [#1] Aug 25 04:03:35 ns kernel: SMP Aug 25 04:03:35 ns kernel: Modules linked in: uhci_hcd ehci_hcd ohci_hcd aic7xxx ide_cd Aug 25 04:03:35 ns kernel: CPU:3 Aug 25 04:03:35 ns kernel: EIP:0060:[svc_create_socket+189/416]Not tainted VLI Aug 25 04:03:35 ns kernel: EFLAGS: 00010282 (2.6.12.6-ncc-server) Aug 25 04:03:35 ns kernel: EIP is at ip6_fragment+0x24d/0x880 Aug 25 04:03:35 ns kernel: eax: fff2 ebx: f5391ff8 ecx: 07e8 edx: f792a600 Aug 25 04:03:35 ns kernel: esi: f792a800 edi: f5391ff8 ebp: f513d180 esp: f6defbac Aug 25 04:03:35 ns kernel: ds: 007b es: 007b ss: 0068 Aug 25 04:03:35 ns kernel: Process named (pid: 1942, threadinfo=f6dee000 task=f7959a20) Aug 25 04:03:35 ns kernel: Stack: f31a8480 07e8 f5392000 fdd0 f6dee000 b6346cc8 821e Aug 25 04:03:35 ns kernel: 07e8 fdd0 ffee 07e8 fdd4 f7075780 Aug 25 04:03:35 ns kernel:f792a048 f792a018 f31a8480 f792a040 f7b18080 c031fd50 f31a8480 c031fa30 Aug 25 04:03:35 ns kernel: Call Trace: Aug 25 04:03:35 ns kernel: [svc_tcp_accept+960/992] ip6_output+0x30/0x40 Aug 25 04:03:35 ns kernel: [svc_tcp_accept+160/992] ip6_output2+0x0/0x2f0 Aug 25 04:03:35 ns kernel: [svcauth_unix_accept+43/496] ip6_push_pending_frames+0x29b/0x470 Aug 25 04:03:35 ns kernel: [__func__.2+898/986] udp_v6_push_pending_frames+0x148/0x1b0 Aug 25 04:03:35 ns kernel: [ip_send_reply+144/592] ip_generic_getfrag+0x0/0xc0 Aug 25 04:03:35 ns kernel: [__func__.1+0/17] udpv6_sendmsg+0x52c/0x920 Aug 25 04:03:35 ns kernel: [arp_solicit+356/464] udp_recvmsg+0x54/0x300 Aug 25 04:03:35 ns kernel: [igmp_rcv+125/336] inet_sendmsg+0x4d/0x60 Aug 25 04:03:35 ns kernel: [sys_connect+90/176] sock_sendmsg+0xda/0x100 Aug 25 04:03:35 ns kernel: [find_busiest_group+209/736] find_busiest_group+0xd1/0x2e0 Aug 25 04:03:35 ns kernel: [pci_bus_read_config_dword+38/144] copy_from_user+0x46/0x80 Aug 25 04:03:35 ns kernel: [autoremove_wake_function+0/96] autoremove_wake_function+0x0/0x60 Aug 25 04:03:35 ns kernel: [sock_wfree+15/96] sys_sendmsg+0x18f/0x1f0 Aug 25 04:03:35 ns kernel: [futex_wait+292/576] futex_wait+0x124/0x240 Aug 25 04:03:35 ns kernel: [find_extend_vma+41/144] find_extend_vma+0x29/0x90 Aug 25 04:03:35 ns kernel: [default_wake_function+0/32] default_wake_function+0x0/0x20 Aug 25 04:03:35 ns kernel: [futex_wake+123/208] futex_wake+0x7b/0xd0 Aug 25 04:03:35 ns kernel: [pci_bus_read_config_dword+38/144] copy_from_user+0x46/0x80 Aug 25 04:03:35 ns kernel: [sock_alloc_send_pskb+370/480] sys_socketcall+0x242/0x260 Aug 25 04:03:35 ns kernel: [syscall_call+7/11] syscall_call+0x7/0xb Aug 25 04:03:35 ns kernel: Code: 00 00 00 00 8b 54 24 2c 8b 4c 24 24 89 54 24 0c 8b 45 24 89 4c 24 04 89 44 24 08 8b 44 24 58 89 04 24 e8 a7 44 fa ff 85 c0 74 08 0f 0b ce 02 86 07 3a c0 0f b7 44 24 20 0f b6 d0 c1 e2 08 c1 e8 Aug 25 05:49:07 ns kernel: 7UDP: bad checksum. From 213.147.0.92:53 to 193.0.0.195:2101 ulen 52 Aug 25 06:30:02 ns kernel: [ cut here ] Aug 25 06:30:02 ns kernel: kernel BUG at
Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718
On Mon, Aug 28, 2006 at 10:16:56AM +1000, Herbert Xu wrote: cagri coltekin [EMAIL PROTECTED] wrote: Aug 25 04:03:35 ns kernel: [ cut here ] Aug 25 04:03:35 ns kernel: kernel BUG at net/ipv6/ip6_output.c:718! Aug 25 04:03:35 ns kernel: invalid operand: [#1] Aug 25 04:03:35 ns kernel: SMP Aug 25 04:03:35 ns kernel: Modules linked in: uhci_hcd ehci_hcd ohci_hcd aic7xxx ide_cd Aug 25 04:03:35 ns kernel: CPU:3 Aug 25 04:03:35 ns kernel: EIP:0060:[svc_create_socket+189/416]Not tainted VLI Aug 25 04:03:35 ns kernel: EFLAGS: 00010282 (2.6.12.6-ncc-server) This is an ancient kernel. Please really try 2.6.17 instead of just talking about it (the line number confirms that it is 2.6.12). Ooops, sorry for the confusion. It happens with 2.6.17 too (see below), cutpaste from wrong log. The rest of the data provided in the previous message is actually fresh. Aug 26 07:09:36 ns kernel: [17180077.732000] [ cut here ] Aug 26 07:09:36 ns kernel: [17180077.792000] kernel BUG at net/ipv6/ip6_output.c:693! Aug 26 07:09:36 ns kernel: [17180077.856000] invalid opcode: [#1] Aug 26 07:09:36 ns kernel: [17180077.90] SMP Aug 26 07:09:36 ns kernel: [17180077.928000] Modules linked in: ide_cd cdrom Aug 26 07:09:36 ns kernel: [17180077.98] CPU:2 Aug 26 07:09:36 ns kernel: [17180077.98] EIP: 0060:[ip6_fragment+619/1981]Not tainted VLI Aug 26 07:09:36 ns kernel: [17180077.98] EFLAGS: 00010282 (2.6.17.11-ns-debug #6) Aug 26 07:09:36 ns kernel: [17180078.148000] EIP is at ip6_fragment+0x26b/0x7bd Aug 26 07:09:36 ns kernel: [17180078.204000] eax: fff2 ebx: fdd8 ecx: 05b8 edx: f5ecc600 Aug 26 07:09:36 ns kernel: [17180078.288000] esi: f5ecc7f8 edi: f5e7bff0 ebp: c2ff6780 esp: f71f5bb8 Aug 26 07:09:36 ns kernel: [17180078.376000] ds: 007b es: 007b ss: 0068 Aug 26 07:09:36 ns kernel: [17180078.428000] Process named (pid: 1811, threadinfo=f71f4000 task=f7470a10) Aug 26 07:09:36 ns kernel: [17180078.508000] Stack: f7208880 07e0 f5e7bff8 fdd8 f71f4000 f71f5bdc 5d00 Aug 26 07:09:36 ns kernel: [17180078.612000]07e0 0e03 ffee 07e0 fddc f5e7bff0 f7fd7880 f5ecc048 Aug 26 07:09:36 ns kernel: [17180078.72]f7208880 f7fd7880 f5ecc040 f774c080 c02adcc6 f7208880 c02adac2 c02afcc6 Aug 26 07:09:36 ns kernel: [17180078.824000] Call Trace: Aug 26 07:09:36 ns kernel: [17180078.86] c02adcc6 ip6_output+0x3c/0x4c c02adac2 ip6_output2+0x0/0x1c8 Aug 26 07:09:36 ns kernel: [17180078.948000] c02afcc6 ip6_push_pending_frames+0x250/0x390 c02c09ea udp_v6_push_pending_frames+0x13d/0x1a4 Aug 26 07:09:36 ns kernel: [17180079.072000] c02c0fdb udpv6_sendmsg+0x58a/0x953 c0291d36 udp_recvmsg+0x56/0x24c Aug 26 07:09:36 ns kernel: [17180079.172000] c02986e6 inet_sendmsg+0x4a/0x56 c0253256 sock_sendmsg+0xeb/0x105 Aug 26 07:09:36 ns kernel: [17180079.264000] c01c18cc __next_cpu+0x22/0x31 c01167c7 find_busiest_group+0xd6/0x305 Aug 26 07:09:36 ns kernel: [17180079.364000] c01177e6 dependent_sleeper+0x1ec/0x32d c012f91e autoremove_wake_function+0x0/0x57 Aug 26 07:09:36 ns kernel: [17180079.476000] c01c662e copy_from_user+0x46/0x7c c01c662e copy_from_user+0x46/0x7c Aug 26 07:09:36 ns kernel: [17180079.576000] c0254d9d sys_sendmsg+0x191/0x1f8 c01334c6 futex_wait+0x129/0x238 Aug 26 07:09:36 ns kernel: [17180079.672000] c014b75c find_extend_vma+0x29/0x7e c0117927 default_wake_function+0x0/0x12 Aug 26 07:09:36 ns kernel: [17180079.776000] c0132b91 futex_wake+0x4a/0xba c01667a8 pipe_write+0x0/0x3b Aug 26 07:09:36 ns kernel: [17180079.864000] c01c662e copy_from_user+0x46/0x7c c0255243 sys_socketcall+0x236/0x254 Aug 26 07:09:36 ns kernel: [17180079.964000] c0102be3 syscall_call+0x7/0xb Aug 26 07:09:36 ns kernel: [17180080.02] Code: 24 8b 44 24 34 89 50 04 89 5c 24 0c 8b 4c 24 20 8b 45 1c 89 4c 24 04 89 44 24 08 8b 44 24 54 89 04 24 e8 25 a6 fa ff 85 c0 74 08 0f 0b b5 02 21 fb 30 c0 0f b7 44 24 1c 8b 4c 24 34 89 c2 c1 e8 Aug 26 07:09:36 ns kernel: [17180080.264000] EIP: [ip6_fragment+619/1981] ip6_fragment+0x26b/0x7bd SS:ESP 0068:f71f5bb8 -- -- cagri - To unsubscribe from this list: send the line unsubscribe netdev in the body of a message to [EMAIL PROTECTED] More majordomo info at http://vger.kernel.org/majordomo-info.html