Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-10-03 Thread cagri coltekin
On Tue, Oct 03, 2006 at 04:28:20PM +1000, Herbert Xu wrote:
> On Tue, Oct 03, 2006 at 03:49:35PM +1000, Herbert Xu wrote:
> >
> > OK, I think I've got the right bug this time.
> 
> Here is the patch for the other bug that I found along the way:
> 
> [UDP6]: Fix MSG_PROBE crash
> 

This one fixes. Thanks!

The patch does not cleanly apply to 2.6.18, needed some manual
tweaking (the patch that applies cleanly to vanilla 2.6.18 is
below in case it has any use).

Cheers,
-- 
cagri


--- linux-2.6.18/net/ipv6/udp.c 2006-09-20 05:42:06.0 +0200
+++ linux-2.6.18-p4/net/ipv6/udp.c  2006-10-03 08:57:31.0 +0200
@@ -613,7 +613,7 @@
struct in6_addr *daddr, *final_p = NULL, final;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
-   struct flowi *fl = &inet->cork.fl;
+   struct flowi fl;
struct dst_entry *dst;
int addr_len = msg->msg_namelen;
int ulen = len;
@@ -693,19 +693,19 @@
}
ulen += sizeof(struct udphdr);
 
-   memset(fl, 0, sizeof(*fl));
+   memset(&fl, 0, sizeof(fl));
 
if (sin6) {
if (sin6->sin6_port == 0)
return -EINVAL;
 
-   fl->fl_ip_dport = sin6->sin6_port;
+   fl.fl_ip_dport = sin6->sin6_port;
daddr = &sin6->sin6_addr;
 
if (np->sndflow) {
-   fl->fl6_flowlabel = 
sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
-   if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
-   flowlabel = fl6_sock_lookup(sk, 
fl->fl6_flowlabel);
+   fl.fl6_flowlabel = 
sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+   if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+   flowlabel = fl6_sock_lookup(sk, 
fl.fl6_flowlabel);
if (flowlabel == NULL)
return -EINVAL;
daddr = &flowlabel->dst;
@@ -723,32 +723,32 @@
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
-   fl->oif = sin6->sin6_scope_id;
+   fl.oif = sin6->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
 
-   fl->fl_ip_dport = inet->dport;
+   fl.fl_ip_dport = inet->dport;
daddr = &np->daddr;
-   fl->fl6_flowlabel = np->flow_label;
+   fl.fl6_flowlabel = np->flow_label;
connected = 1;
}
 
-   if (!fl->oif)
-   fl->oif = sk->sk_bound_dev_if;
+   if (!fl.oif)
+   fl.oif = sk->sk_bound_dev_if;
 
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(*opt);
 
-   err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass);
+   err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
}
-   if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
-   flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
+   if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+   flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
if (flowlabel == NULL)
return -EINVAL;
}
@@ -762,37 +762,37 @@
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
 
-   fl->proto = IPPROTO_UDP;
-   ipv6_addr_copy(&fl->fl6_dst, daddr);
-   if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr))
-   ipv6_addr_copy(&fl->fl6_src, &np->saddr);
-   fl->fl_ip_sport = inet->sport;
+   fl.proto = IPPROTO_UDP;
+   ipv6_addr_copy(&fl.fl6_dst, daddr);
+   if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
+   ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+   fl.fl_ip_sport = inet->sport;

/* merge ip6_build_xmit from ip6_output */
if (opt && opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-   ipv6_addr_copy(&final, &fl->fl6_dst);
-   ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
+   ipv6_addr_copy(&final, &fl.fl6_dst);
+   ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
final_p = &final;
connected = 0;
}
 
-   if (!fl->oif && ipv6_addr_is_multicast(&fl->fl6_dst)) {
-   fl->oif = np->mcast_oif;
+   if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst))

Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-10-03 Thread James Morris
On Tue, 3 Oct 2006, Herbert Xu wrote:

> On Thu, Sep 28, 2006 at 10:40:18AM +0200, cagri coltekin wrote:
> >
> > No. Bug is the first after boot:
> 
> OK, I think I've got the right bug this time.
> 
> [UDP6]: Fix flowi clobbering
> 
> The udp6_sendmsg function uses a shared buffer to store the
> flow without taking any locks.  This leads to races with SMP.
> This patch moves the flowi object onto the stack.
> 
> Signed-off-by: Herbert Xu <[EMAIL PROTECTED]>

Nice catch.

Acked-by: James Morris <[EMAIL PROTECTED]>



-- 
James Morris
<[EMAIL PROTECTED]>
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-10-02 Thread Herbert Xu
On Tue, Oct 03, 2006 at 03:49:35PM +1000, Herbert Xu wrote:
>
> OK, I think I've got the right bug this time.

Here is the patch for the other bug that I found along the way:

[UDP6]: Fix MSG_PROBE crash

UDP tracks corking status through the pending variable.  The
IP layer also tracks it through the socket write queue.  It
is possible for the two to get out of sync when MSG_PROBE is
used.

This patch changes UDP to check the write queue to ensure
that the two stay in sync.

Signed-off-by: Herbert Xu <[EMAIL PROTECTED]>

Cheers,
--
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -675,6 +675,8 @@ do_append_data:
udp_flush_pending_frames(sk);
else if (!corkreq)
err = udp_push_pending_frames(sk, up);
+   else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
+   up->pending = 0;
release_sock(sk);
 
 out:
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -770,6 +770,8 @@ do_append_data:
udp_v6_flush_pending_frames(sk);
else if (!corkreq)
err = udp_v6_push_pending_frames(sk, up);
+   else if (unlikely(skb_queue_empty(&sk->sk_write_queue)))
+   up->pending = 0;
 
if (dst) {
if (connected) {
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-10-02 Thread Herbert Xu
On Thu, Sep 28, 2006 at 10:40:18AM +0200, cagri coltekin wrote:
>
> No. Bug is the first after boot:

OK, I think I've got the right bug this time.

[UDP6]: Fix flowi clobbering

The udp6_sendmsg function uses a shared buffer to store the
flow without taking any locks.  This leads to races with SMP.
This patch moves the flowi object onto the stack.

Signed-off-by: Herbert Xu <[EMAIL PROTECTED]>

This bug is pretty old so we need the fix for 2.6.18 too.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -546,7 +546,7 @@ static int udpv6_sendmsg(struct kiocb *i
struct in6_addr *daddr, *final_p = NULL, final;
struct ipv6_txoptions *opt = NULL;
struct ip6_flowlabel *flowlabel = NULL;
-   struct flowi *fl = &inet->cork.fl;
+   struct flowi fl;
struct dst_entry *dst;
int addr_len = msg->msg_namelen;
int ulen = len;
@@ -626,19 +626,19 @@ do_udp_sendmsg:
}
ulen += sizeof(struct udphdr);
 
-   memset(fl, 0, sizeof(*fl));
+   memset(&fl, 0, sizeof(fl));
 
if (sin6) {
if (sin6->sin6_port == 0)
return -EINVAL;
 
-   fl->fl_ip_dport = sin6->sin6_port;
+   fl.fl_ip_dport = sin6->sin6_port;
daddr = &sin6->sin6_addr;
 
if (np->sndflow) {
-   fl->fl6_flowlabel = 
sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
-   if (fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
-   flowlabel = fl6_sock_lookup(sk, 
fl->fl6_flowlabel);
+   fl.fl6_flowlabel = 
sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK;
+   if (fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) {
+   flowlabel = fl6_sock_lookup(sk, 
fl.fl6_flowlabel);
if (flowlabel == NULL)
return -EINVAL;
daddr = &flowlabel->dst;
@@ -656,32 +656,32 @@ do_udp_sendmsg:
if (addr_len >= sizeof(struct sockaddr_in6) &&
sin6->sin6_scope_id &&
ipv6_addr_type(daddr)&IPV6_ADDR_LINKLOCAL)
-   fl->oif = sin6->sin6_scope_id;
+   fl.oif = sin6->sin6_scope_id;
} else {
if (sk->sk_state != TCP_ESTABLISHED)
return -EDESTADDRREQ;
 
-   fl->fl_ip_dport = inet->dport;
+   fl.fl_ip_dport = inet->dport;
daddr = &np->daddr;
-   fl->fl6_flowlabel = np->flow_label;
+   fl.fl6_flowlabel = np->flow_label;
connected = 1;
}
 
-   if (!fl->oif)
-   fl->oif = sk->sk_bound_dev_if;
+   if (!fl.oif)
+   fl.oif = sk->sk_bound_dev_if;
 
if (msg->msg_controllen) {
opt = &opt_space;
memset(opt, 0, sizeof(struct ipv6_txoptions));
opt->tot_len = sizeof(*opt);
 
-   err = datagram_send_ctl(msg, fl, opt, &hlimit, &tclass);
+   err = datagram_send_ctl(msg, &fl, opt, &hlimit, &tclass);
if (err < 0) {
fl6_sock_release(flowlabel);
return err;
}
-   if ((fl->fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
-   flowlabel = fl6_sock_lookup(sk, fl->fl6_flowlabel);
+   if ((fl.fl6_flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) {
+   flowlabel = fl6_sock_lookup(sk, fl.fl6_flowlabel);
if (flowlabel == NULL)
return -EINVAL;
}
@@ -695,39 +695,39 @@ do_udp_sendmsg:
opt = fl6_merge_options(&opt_space, flowlabel, opt);
opt = ipv6_fixup_options(&opt_space, opt);
 
-   fl->proto = IPPROTO_UDP;
-   ipv6_addr_copy(&fl->fl6_dst, daddr);
-   if (ipv6_addr_any(&fl->fl6_src) && !ipv6_addr_any(&np->saddr))
-   ipv6_addr_copy(&fl->fl6_src, &np->saddr);
-   fl->fl_ip_sport = inet->sport;
+   fl.proto = IPPROTO_UDP;
+   ipv6_addr_copy(&fl.fl6_dst, daddr);
+   if (ipv6_addr_any(&fl.fl6_src) && !ipv6_addr_any(&np->saddr))
+   ipv6_addr_copy(&fl.fl6_src, &np->saddr);
+   fl.fl_ip_sport = inet->sport;

/* merge ip6_build_xmit from ip6_output */
if (opt && opt->srcrt) {
struct rt0_hdr *rt0 = (struct rt0_hdr *) opt->srcrt;
-   ipv6_addr_copy(&final, &fl->fl6_dst);
-   ipv6_addr_copy(&fl->fl6_dst, rt0->addr);
+   ipv6_addr_copy(&final, &fl.fl6_dst);
+   ipv6_addr_copy(&fl.fl6_dst, rt0->addr);
final_p 

Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-09-28 Thread cagri coltekin
On Thu, Sep 28, 2006 at 10:38:29AM +1000, Herbert Xu wrote:
> On Tue, Sep 26, 2006 at 01:21:22PM +0200, cagri coltekin wrote:
> >
> > --
> > [ 1395.890897] [ cut here ]
> > [ 1395.946093] kernel BUG at net/ipv6/ip6_output.c:940!
> 
> Could you go further back in the logs to see if there was a
> warning message? Either that or turn the WARN_ON into a BUG.

No. Bug is the first after boot:

[   34.042841] ADDRCONF(NETDEV_CHANGE): eth0: link becomes ready
[   44.110469] eth0: no IPv6 routers present
[   80.968012] process `syslogd' is using obsolete setsockopt SO_BSDCOMPAT
[   81.452248] process `named' is using obsolete setsockopt SO_BSDCOMPAT
[  110.559560] process `lwresd' is using obsolete setsockopt SO_BSDCOMPAT
[  140.568831] process `named' is using obsolete setsockopt SO_BSDCOMPAT
[ 1395.890897] [ cut here ]
[ 1395.946093] kernel BUG at net/ipv6/ip6_output.c:940!
[ 1396.005441] invalid opcode:  [#1]

Cheers,
-- 
cagri
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-09-27 Thread Herbert Xu
On Tue, Sep 26, 2006 at 01:21:22PM +0200, cagri coltekin wrote:
>
> --
> [ 1395.890897] [ cut here ]
> [ 1395.946093] kernel BUG at net/ipv6/ip6_output.c:940!

Could you go further back in the logs to see if there was a
warning message? Either that or turn the WARN_ON into a BUG.

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-09-26 Thread cagri coltekin
Hi,

On Mon, Sep 25, 2006 at 10:15:30PM +1000, Herbert Xu wrote:
> On Fri, Sep 01, 2006 at 06:22:48PM +0200, cagri coltekin wrote:
> >
> > The second causes the system to give the bug a couple of seconds
> > after bind starts, and loads the zones, without any traffic going
> > on. BTW, patch applied with some offset difference (3 for the
> > first -48 for the other two changes), on a pristine 2.6.17.11
> > source tree.
> 
> Well the good news is that I found a bug with MSG_PROBE that can
> cause exactly what you're seeing.  The bad news is that bind doesn't
> use MSG_PROBE :)
> 
> So please try this patch to narrow the problem down further.

This time I applied patch to 2.6.18. The patch applied with some
offset difference. I can stick to a version you suggest if 2.6.18
is not a good. Here is the new bug message:

--
[ 1395.890897] [ cut here ]
[ 1395.946093] kernel BUG at net/ipv6/ip6_output.c:940!
[ 1396.005441] invalid opcode:  [#1]
[ 1396.049225] SMP 
[ 1396.071419] Modules linked in: ipmi_si ipmi_msghandler ide_cd cdrom
[ 1396.146853] CPU:2
[ 1396.146854] EIP:0060:[]Not tainted VLI
[ 1396.146855] EFLAGS: 00010246   (2.6.18-ns-pri-debug-p3 #2) 
[ 1396.304174] EIP is at ip6_append_data+0xaf8/0xbd6
[ 1396.360405] eax: f7534d00   ebx:    ecx: f7534e9c   edx: f68f4480
[ 1396.441552] esi: f7534ee4   edi: f7534ee4   ebp: f7534ef0   esp: f742bc20
[ 1396.522691] ds: 007b   es: 007b   ss: 0068
[ 1396.571655] Process named (pid: 1897, ti=f742a000 task=c2b2c030 task.ti=f742)
[ 1396.659026] Stack: f68f4480 c03c3cb4 f742bf00 c02ef7e2 c02ce658 c02ce658 c03 
[ 1396.759947]0002 c02ef7e2 f7534eb4 f7534d70   f74 
[ 1396.860803]f742bce4 c02c55c5 f7534d00 f7534e9c f7534d00 0286 f74 
[ 1396.961659] Call Trace:
[ 1396.993128]  [] _read_unlock_bh+0x12/0x16
[ 1397.051544]  [] ip6_route_output+0xeb/0x1e9
[ 1397.112038]  [] ip6_route_output+0xeb/0x1e9
[ 1397.172535]  [] _read_unlock_bh+0x12/0x16
[ 1397.230952]  [] ip6_dst_lookup_tail+0xc6/0xd0
[ 1397.293524]  [] udpv6_sendmsg+0x3d4/0x9ac
[ 1397.351936]  [] ip_generic_getfrag+0x0/0xaf
[ 1397.412431]  [] udpv6_recvmsg+0x20c/0x303
[ 1397.470846]  [] inet_sendmsg+0x4a/0x56
[ 1397.526148]  [] sock_sendmsg+0xe8/0x101
[ 1397.582494]  [] autoremove_wake_function+0x0/0x57
[ 1397.649214]  [] copy_from_user+0x46/0x7e
[ 1397.706594]  [] sys_sendmsg+0x191/0x1f8
[ 1397.762941]  [] find_extend_vma+0x29/0x7e
[ 1397.821357]  [] get_futex_key+0x4c/0x126
[ 1397.878740]  [] do_futex+0x6c/0x10a
[ 1397.930928]  [] copy_from_user+0x46/0x7e
[ 1397.988307]  [] sys_socketcall+0x236/0x254
[ 1398.047762]  [] syscall_call+0x7/0xb
[ 1398.100989] Code: 34 c7 44 24 04 5a 00 00 00 89 4c 24 0c e8 89 02 02 00 b8 a 
[ 1398.333299] EIP: [] ip6_append_data+0xaf8/0xbd6 SS:ESP 0068:f742bc0
--

Cheers,
-- 
cagri
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-09-25 Thread Herbert Xu
On Fri, Sep 01, 2006 at 06:22:48PM +0200, cagri coltekin wrote:
>
> The second causes the system to give the bug a couple of seconds
> after bind starts, and loads the zones, without any traffic going
> on. BTW, patch applied with some offset difference (3 for the
> first -48 for the other two changes), on a pristine 2.6.17.11
> source tree.

Well the good news is that I found a bug with MSG_PROBE that can
cause exactly what you're seeing.  The bad news is that bind doesn't
use MSG_PROBE :)

So please try this patch to narrow the problem down further.

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 6671691..637b5c4 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -990,8 +990,10 @@ int ip6_append_data(struct sock *sk, int
int offset = 0;
int csummode = CHECKSUM_NONE;
 
-   if (flags&MSG_PROBE)
+   if (flags&MSG_PROBE) {
+   WARN_ON(1);
return 0;
+   }
if (skb_queue_empty(&sk->sk_write_queue)) {
/*
 * setup for corking
@@ -1013,6 +1015,7 @@ int ip6_append_data(struct sock *sk, int
dst_hold(&rt->u.dst);
np->cork.rt = rt;
inet->cork.fl = *fl;
+   BUG_ON(!fl->proto);
np->cork.hop_limit = hlimit;
np->cork.tclass = tclass;
mtu = dst_mtu(rt->u.dst.path);
@@ -1032,6 +1035,7 @@ int ip6_append_data(struct sock *sk, int
} else {
rt = np->cork.rt;
fl = &inet->cork.fl;
+   BUG_ON(!fl->proto);
if (inet->cork.flags & IPCORK_OPT)
opt = np->cork.opt;
transhdrlen = 0;
@@ -1285,6 +1289,7 @@ int ip6_push_pending_frames(struct sock 
 
if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
goto out;
+   BUG_ON(!proto);
tail_skb = &(skb_shinfo(skb)->frag_list);
 
/* move skb->data to ip header from ext header */
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-09-01 Thread cagri coltekin
On Fri, Sep 01, 2006 at 05:05:57PM +1000, Herbert Xu wrote:
> On Thu, Aug 31, 2006 at 05:12:43PM +0200, cagri coltekin wrote:
> > 
> > It took a while to find equipment for test environment, but now I
> > have a test environment that I can test.
> > 
> > Here is the result:
> > 
> > ---
> > [17180051.768000] ip6_fragment: hlen = 0x818, len = 0x7ce, nexthdr=4
> 
> Thanks for the result.  It looks like something is screwed up with the
> extension headers.  What version of bind are you using?

It's bind 9.3.2, the version we were using had a specific patch.
However, I've just tested with non-patched bind 9.3.2, it does it
too. The system has large number of zones, with most of them
DNSSEC enabled. That may be the reason for the peculiarity. I can
send configuration/zone files etc. if it would be helpful.

> Please try the following patch instead to see if we can further isolate
> the problem.

The second causes the system to give the bug a couple of seconds
after bind starts, and loads the zones, without any traffic going
on. BTW, patch applied with some offset difference (3 for the
first -48 for the other two changes), on a pristine 2.6.17.11
source tree.

Here is the new result:

-
[17199663.616000] ip6_push: hlen = 0x388, len = 0x8f, nexthdr1 = 0, nexthdr2 = 
162, proto = 0
[17199663.712000] ip6_push: opt = 0x0, flen = 0, nflen = 0
[17199663.776000] [ cut here ]
[17199663.836000] kernel BUG at net/ipv6/ip6_output.c:1215!
[17199663.896000] invalid opcode:  [#1]
[17199663.944000] SMP 
[17199663.972000] Modules linked in: ipmi_si ipmi_msghandler ide_cd cdrom
[17199664.048000] CPU:1
[17199664.048000] EIP:0060:[]Not tainted VLI
[17199664.048000] EFLAGS: 00010282   (2.6.17.11-ns-pri-debug-p2 #1) 
[17199664.22] EIP is at ip6_push_pending_frames+0x39d/0x42e
[17199664.288000] eax: 003e   ebx: f60fae80   ecx: c038f5bc   edx: 0286
[17199664.372000] esi: f7258d80   edi: f782ea40   ebp: f6171d00   esp: f60f7c0c
[17199664.456000] ds: 007b   es: 007b   ss: 0068
[17199664.508000] Process named (pid: 15561, threadinfo=f60f6000 task=f7ae9030)
[17199664.592000] Stack: c03246e0    00a2  
f6171e88 f7258d80 
[17199664.696000] f6171edc f782ea48 f60f7c40   
  
[17199664.80] f6171e90 f6171ea0 f6171e88 f782ea40 c02ce42e 
f6171d00 0008 
[17199664.904000] Call Trace:
[17199664.936000]   udp_v6_push_pending_frames+0x13d/0x1a4  
 udpv6_sendmsg+0x58a/0x953
[17199665.048000]   inet_sendmsg+0x4a/0x56   
sock_sendmsg+0xeb/0x105
[17199665.144000]   __next_cpu+0x22/0x31   
find_busiest_group+0xd6/0x305
[17199665.244000]   file_update_time+0x48/0xcb   
dependent_sleeper+0x1ec/0x32d
[17199665.348000]   autoremove_wake_function+0x0/0x57   
copy_from_user+0x46/0x7c
[17199665.456000]   verify_iovec+0x3c/0x94   
sys_sendmsg+0x191/0x1f8
[17199665.548000]   schedule_timeout+0xa8/0xaa   
unqueue_me+0x56/0x9d
[17199665.644000]   add_wait_queue+0x1a/0x46   
futex_wait+0x1cd/0x238
[17199665.74]   find_extend_vma+0x29/0x7e   
__next_cpu+0x22/0x31
[17199665.832000]   dependent_sleeper+0x1ec/0x32d   
copy_from_user+0x46/0x7c
[17199665.936000]   sys_socketcall+0x236/0x254   
syscall_call+0x7/0xb
[17199666.032000] Code: 20 89 44 24 0c 31 c0 85 d2 74 08 8b 54 24 20 0f b7 42 
04 89 44 24 08 c7 04 24 e0 46 32 c0 8b 44 24 20 89 44 24 04 e8 54 f6 e5 ff <0f> 
0b bf 04 41 13 32 c0 e9 b8 fe ff ff 66 c7 41 04 00 00 e9 21 
[17199666.268000] EIP: [] ip6_push_pending_frames+0x39d/0x42e SS:ESP 
0068:f60f7c0c
-

Cheers,
-- 
cagri
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-09-01 Thread Herbert Xu
On Thu, Aug 31, 2006 at 05:12:43PM +0200, cagri coltekin wrote:
> 
> It took a while to find equipment for test environment, but now I
> have a test environment that I can test.
> 
> Here is the result:
> 
> ---
> [17180051.768000] ip6_fragment: hlen = 0x818, len = 0x7ce, nexthdr=4

Thanks for the result.  It looks like something is screwed up with the
extension headers.  What version of bind are you using?

Please try the following patch instead to see if we can further isolate
the problem.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4fb47a2..e5ba216 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -508,6 +508,10 @@ static int ip6_fragment(struct sk_buff *
dev = rt->u.dst.dev;
hlen = ip6_find_1stfragopt(skb, &prevhdr);
nexthdr = *prevhdr;
+   if (unlikely(hlen > skb->len)) {
+   printk(KERN_CRIT "ip6_fragment: hlen = 0x%x, len = 0x%x, 
nexthdr = %d\n", hlen, skb->len, skb->nh.ipv6h->nexthdr);
+   BUG();
+   }
 
mtu = dst_mtu(&rt->u.dst);
if (np && np->frag_size < mtu) {
@@ -1204,6 +1208,8 @@ int ip6_push_pending_frames(struct sock 
struct flowi *fl = &inet->cork.fl;
unsigned char proto = fl->proto;
int err = 0;
+   u8 *prevhdr;
+   unsigned int hlen;
 
if ((skb = __skb_dequeue(&sk->sk_write_queue)) == NULL)
goto out;
@@ -1249,6 +1255,14 @@ int ip6_push_pending_frames(struct sock 
 
skb->dst = dst_clone(&rt->u.dst);
IP6_INC_STATS(IPSTATS_MIB_OUTREQUESTS); 
+
+   hlen = ip6_find_1stfragopt(skb, &prevhdr);
+   if (unlikely(hlen > skb->len)) {
+   printk(KERN_CRIT "ip6_push: hlen = 0x%x, len = 0x%x, nexthdr1 = 
%d, nexthdr2 = %d, proto = %d\n", hlen, skb->len, skb->nh.ipv6h->nexthdr, 
*prevhdr, proto);
+   printk(KERN_CRIT "ip6_push: opt = 0x%x, flen = %d, nflen = 
%d\n", (unsigned int)opt, opt ? opt->opt_flen : 0, opt ? opt->opt_nflen : 0);
+   BUG();
+   }
+
err = NF_HOOK(PF_INET6, NF_IP6_LOCAL_OUT, skb, NULL, skb->dst->dev, 
dst_output);
if (err) {
if (err > 0)
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-08-31 Thread cagri coltekin
Hi Again,

It took a while to find equipment for test environment, but now I
have a test environment that I can test.

Here is the result:

---
[17180051.768000] ip6_fragment: hlen = 0x818, len = 0x7ce, nexthdr=4
[17180051.84] [ cut here ]
[17180051.84] kernel BUG at net/ipv6/ip6_output.c:510!
[17180051.84] invalid opcode:  [#1]
[17180051.84] SMP 
[17180051.84] Modules linked in: ipmi_si ipmi_msghandler ide_cd cdrom
[17180051.84] CPU:0
[17180051.84] EIP:0060:[]Not tainted VLI
[17180051.84] EFLAGS: 00010296   (2.6.17.11-ns-pri-debug-p1 #6) 
[17180051.84] EIP is at ip6_fragment+0x7f6/0x803
[17180051.84] eax: 0048   ebx: f75c4c5c   ecx: c038f5bc   edx: 0286
[17180051.84] esi: f7605c50   edi:    ebp: f76e2c80   esp: f7605bb8
[17180051.84] ds: 007b   es: 007b   ss: 0068
[17180051.84] Process named (pid: 1899, threadinfo=f7604000 task=f75cead0)
[17180051.84] Stack: c0324600 0818 07ce 0004  f7605bdc 
0400  
[17180051.84]ffd14ca4  f7605ea8 0818 f77a4040 01fe 
f755d080 f7976048 
[17180051.84]f76e2c80 f7605c50 f7976040 f75c4a80 c02bb612 f76e2c80 
c02bb40e c02bd66a 
[17180051.84] Call Trace:
[17180051.84]   ip6_output+0x3c/0x4c   
ip6_output2+0x0/0x1c8
[17180051.84]   ip6_push_pending_frames+0x250/0x390   
udp_v6_push_pending_frames+0x13d/0x1a4
[17180051.84]   udpv6_sendmsg+0x58a/0x953   
udpv6_recvmsg+0x20c/0x303
[17180051.84]   inet_sendmsg+0x4a/0x56   
sock_sendmsg+0xeb/0x105
[17180051.84]   __next_cpu+0x22/0x31   
find_busiest_group+0xd6/0x305
[17180051.84]   autoremove_wake_function+0x0/0x57   
copy_from_user+0x46/0x7c
[17180051.84]   copy_from_user+0x46/0x7c   
sys_sendmsg+0x191/0x1f8
[17180051.84]   futex_wait+0x129/0x238   
find_extend_vma+0x29/0x7e
[17180051.84]   default_wake_function+0x0/0x12   
futex_wake+0x4a/0xba
[17180051.84]   copy_from_user+0x46/0x7c   
sys_socketcall+0x236/0x254
[17180051.84]   syscall_call+0x7/0xb 
[17180051.84] Code: 50 60 e9 36 f9 ff ff 0f b6 44 24 1b 8b 54 24 2c 89 44 
24 0c 8b 45 60 c7 04 24 00 46 32 c0 89 54 24 04 89 44 24 08 e8 50 07 e6 ff <0f> 
0b fe 01 41 13 32 c0 e9 68 f8 ff ff 55 57 56 31 f6 53 83 ec 
[17180051.84] EIP: [] ip6_fragment+0x7f6/0x803 SS:ESP 
0068:f7605bb8
---

I hope this helps.

Cheers,
-- 
cagri

On Tue, Aug 29, 2006 at 06:28:28PM +1000, Herbert Xu wrote:
> 
> Thanks.  Please try this patch and tell me if it prints anything out.
> 
> Cheers,
> -- 
> Visit Openswan at http://www.openswan.org/
> Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
> Home Page: http://gondor.apana.org.au/~herbert/
> PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
> --
> diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
> index 4fb47a2..5e2e4ea 100644
> --- a/net/ipv6/ip6_output.c
> +++ b/net/ipv6/ip6_output.c
> @@ -508,6 +508,10 @@ static int ip6_fragment(struct sk_buff *
>   dev = rt->u.dst.dev;
>   hlen = ip6_find_1stfragopt(skb, &prevhdr);
>   nexthdr = *prevhdr;
> + if (unlikely(hlen > skb->len)) {
> + printk(KERN_CRIT "ip6_fragment: hlen = 0x%x, len = 0x%x, 
> nexthdr=%d\n", hlen, skb->len, nexthdr);
> + BUG();
> + }
>  
>   mtu = dst_mtu(&rt->u.dst);
>   if (np && np->frag_size < mtu) {
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-08-29 Thread Herbert Xu
On Mon, Aug 28, 2006 at 02:49:07AM +0200, cagri coltekin wrote:
> 
> Ooops, sorry for the confusion. It happens with 2.6.17 too (see
> below), cut&paste from wrong log. The rest of the data provided
> in the previous message is actually fresh.

Thanks.  Please try this patch and tell me if it prints anything out.

Cheers,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
--
diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c
index 4fb47a2..5e2e4ea 100644
--- a/net/ipv6/ip6_output.c
+++ b/net/ipv6/ip6_output.c
@@ -508,6 +508,10 @@ static int ip6_fragment(struct sk_buff *
dev = rt->u.dst.dev;
hlen = ip6_find_1stfragopt(skb, &prevhdr);
nexthdr = *prevhdr;
+   if (unlikely(hlen > skb->len)) {
+   printk(KERN_CRIT "ip6_fragment: hlen = 0x%x, len = 0x%x, 
nexthdr=%d\n", hlen, skb->len, nexthdr);
+   BUG();
+   }
 
mtu = dst_mtu(&rt->u.dst);
if (np && np->frag_size < mtu) {
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-08-27 Thread cagri coltekin
On Mon, Aug 28, 2006 at 10:16:56AM +1000, Herbert Xu wrote:
> cagri coltekin <[EMAIL PROTECTED]> wrote:
> > 
> > Aug 25 04:03:35 ns kernel: [ cut here ]
> > Aug 25 04:03:35 ns kernel: kernel BUG at net/ipv6/ip6_output.c:718!
> > Aug 25 04:03:35 ns kernel: invalid operand:  [#1]
> > Aug 25 04:03:35 ns kernel: SMP 
> > Aug 25 04:03:35 ns kernel: Modules linked in: uhci_hcd ehci_hcd ohci_hcd 
> > aic7xxx ide_cd
> > Aug 25 04:03:35 ns kernel: CPU:3
> > Aug 25 04:03:35 ns kernel: EIP:0060:[svc_create_socket+189/416]Not 
> > tainted VLI
> > Aug 25 04:03:35 ns kernel: EFLAGS: 00010282   (2.6.12.6-ncc-server) 
> 
> This is an ancient kernel.  Please really try 2.6.17 instead of just
> talking about it (the line number confirms that it is 2.6.12).

Ooops, sorry for the confusion. It happens with 2.6.17 too (see
below), cut&paste from wrong log. The rest of the data provided
in the previous message is actually fresh.

Aug 26 07:09:36 ns kernel: [17180077.732000] [ cut here 
]
Aug 26 07:09:36 ns kernel: [17180077.792000] kernel BUG at 
net/ipv6/ip6_output.c:693!
Aug 26 07:09:36 ns kernel: [17180077.856000] invalid opcode:  [#1]
Aug 26 07:09:36 ns kernel: [17180077.90] SMP 
Aug 26 07:09:36 ns kernel: [17180077.928000] Modules linked in: ide_cd cdrom
Aug 26 07:09:36 ns kernel: [17180077.98] CPU:2
Aug 26 07:09:36 ns kernel: [17180077.98] EIP:
0060:[ip6_fragment+619/1981]Not tainted VLI
Aug 26 07:09:36 ns kernel: [17180077.98] EFLAGS: 00010282   
(2.6.17.11-ns-debug #6) 
Aug 26 07:09:36 ns kernel: [17180078.148000] EIP is at ip6_fragment+0x26b/0x7bd
Aug 26 07:09:36 ns kernel: [17180078.204000] eax: fff2   ebx: fdd8   
ecx: 05b8   edx: f5ecc600
Aug 26 07:09:36 ns kernel: [17180078.288000] esi: f5ecc7f8   edi: f5e7bff0   
ebp: c2ff6780   esp: f71f5bb8
Aug 26 07:09:36 ns kernel: [17180078.376000] ds: 007b   es: 007b   ss: 0068
Aug 26 07:09:36 ns kernel: [17180078.428000] Process named (pid: 1811, 
threadinfo=f71f4000 task=f7470a10)
Aug 26 07:09:36 ns kernel: [17180078.508000] Stack: f7208880 07e0 f5e7bff8 
fdd8 f71f4000 f71f5bdc 5d00  
Aug 26 07:09:36 ns kernel: [17180078.612000]07e0 0e03 ffee 
07e0 fddc f5e7bff0 f7fd7880 f5ecc048 
Aug 26 07:09:36 ns kernel: [17180078.72]f7208880 f7fd7880 f5ecc040 
f774c080 c02adcc6 f7208880 c02adac2 c02afcc6 
Aug 26 07:09:36 ns kernel: [17180078.824000] Call Trace:
Aug 26 07:09:36 ns kernel: [17180078.86]   ip6_output+0x3c/0x4c  
 ip6_output2+0x0/0x1c8
Aug 26 07:09:36 ns kernel: [17180078.948000]   
ip6_push_pending_frames+0x250/0x390   
udp_v6_push_pending_frames+0x13d/0x1a4
Aug 26 07:09:36 ns kernel: [17180079.072000]   
udpv6_sendmsg+0x58a/0x953   udp_recvmsg+0x56/0x24c
Aug 26 07:09:36 ns kernel: [17180079.172000]   inet_sendmsg+0x4a/0x56 
  sock_sendmsg+0xeb/0x105
Aug 26 07:09:36 ns kernel: [17180079.264000]   __next_cpu+0x22/0x31  
 find_busiest_group+0xd6/0x305
Aug 26 07:09:36 ns kernel: [17180079.364000]   
dependent_sleeper+0x1ec/0x32d   autoremove_wake_function+0x0/0x57
Aug 26 07:09:36 ns kernel: [17180079.476000]   
copy_from_user+0x46/0x7c   copy_from_user+0x46/0x7c
Aug 26 07:09:36 ns kernel: [17180079.576000]   
sys_sendmsg+0x191/0x1f8   futex_wait+0x129/0x238
Aug 26 07:09:36 ns kernel: [17180079.672000]   
find_extend_vma+0x29/0x7e   default_wake_function+0x0/0x12
Aug 26 07:09:36 ns kernel: [17180079.776000]   futex_wake+0x4a/0xba  
 pipe_write+0x0/0x3b
Aug 26 07:09:36 ns kernel: [17180079.864000]   
copy_from_user+0x46/0x7c   sys_socketcall+0x236/0x254
Aug 26 07:09:36 ns kernel: [17180079.964000]   syscall_call+0x7/0xb 
Aug 26 07:09:36 ns kernel: [17180080.02] Code: 24 8b 44 24 34 89 50 04 89 
5c 24 0c 8b 4c 24 20 8b 45 1c 89 4c 24 04 89 44 24 08 8b 44 24 54 89 04 24 e8 
25 a6 fa ff 85 c0 74 08 <0f> 0b b5 02 21 fb 30 c0 0f b7 44 24 1c 8b 4c 24 34 89 
c2 c1 e8 
Aug 26 07:09:36 ns kernel: [17180080.264000] EIP: [ip6_fragment+619/1981] 
ip6_fragment+0x26b/0x7bd SS:ESP 0068:f71f5bb8
--


-- 
cagri
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: PROBLEM: kernel BUG at net/ipv6/ip6_output.c:718

2006-08-27 Thread Herbert Xu
cagri coltekin <[EMAIL PROTECTED]> wrote:
> 
> Aug 25 04:03:35 ns kernel: [ cut here ]
> Aug 25 04:03:35 ns kernel: kernel BUG at net/ipv6/ip6_output.c:718!
> Aug 25 04:03:35 ns kernel: invalid operand:  [#1]
> Aug 25 04:03:35 ns kernel: SMP 
> Aug 25 04:03:35 ns kernel: Modules linked in: uhci_hcd ehci_hcd ohci_hcd 
> aic7xxx ide_cd
> Aug 25 04:03:35 ns kernel: CPU:3
> Aug 25 04:03:35 ns kernel: EIP:0060:[svc_create_socket+189/416]Not 
> tainted VLI
> Aug 25 04:03:35 ns kernel: EFLAGS: 00010282   (2.6.12.6-ncc-server) 

This is an ancient kernel.  Please really try 2.6.17 instead of just
talking about it (the line number confirms that it is 2.6.12).

Thanks,
-- 
Visit Openswan at http://www.openswan.org/
Email: Herbert Xu ~{PmV>HI~} <[EMAIL PROTECTED]>
Home Page: http://gondor.apana.org.au/~herbert/
PGP Key: http://gondor.apana.org.au/~herbert/pubkey.txt
-
To unsubscribe from this list: send the line "unsubscribe netdev" in
the body of a message to [EMAIL PROTECTED]
More majordomo info at  http://vger.kernel.org/majordomo-info.html