Re: net/atm: warning in alloc_tx/__might_sleep
On Mon, 2017-03-13 at 18:43 +0100, Andrey Konovalov wrote: > On Thu, Jan 12, 2017 at 11:40 AM, Chas Williams <3ch...@gmail.com> wrote: > > On Wed, 2017-01-11 at 20:36 -0800, Cong Wang wrote: > >> On Wed, Jan 11, 2017 at 11:46 AM, Michal Hocko wrote: > >> > On Wed 11-01-17 20:45:25, Michal Hocko wrote: > >> >> On Wed 11-01-17 09:37:06, Chas Williams wrote: > >> >> > On Mon, 2017-01-09 at 18:20 +0100, Andrey Konovalov wrote: > >> >> > > Hi! > >> >> > > > >> >> > > I've got the following error report while running the syzkaller > >> >> > > fuzzer. > >> >> > > > >> >> > > On commit a121103c922847ba5010819a3f250f1f7fc84ab8 (4.10-rc3). > >> >> > > > >> >> > > A reproducer is attached. > >> >> > > > >> >> > > [ cut here ] > >> >> > > WARNING: CPU: 0 PID: 4114 at kernel/sched/core.c:7737 > >> >> > > __might_sleep+0x149/0x1a0 > >> >> > > do not call blocking ops when !TASK_RUNNING; state=1 set at > >> >> > > [] prepare_to_wait+0x182/0x530 > >> >> > > Modules linked in: > >> >> > > CPU: 0 PID: 4114 Comm: a.out Not tainted 4.10.0-rc3+ #59 > >> >> > > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs > >> >> > > 01/01/2011 > >> >> > > Call Trace: > >> >> > > __dump_stack lib/dump_stack.c:15 > >> >> > > dump_stack+0x292/0x398 lib/dump_stack.c:51 > >> >> > > __warn+0x19f/0x1e0 kernel/panic.c:547 > >> >> > > warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:562 > >> >> > > __might_sleep+0x149/0x1a0 kernel/sched/core.c:7732 > >> >> > > slab_pre_alloc_hook mm/slab.h:408 > >> >> > > slab_alloc_node mm/slub.c:2634 > >> >> > > kmem_cache_alloc_node+0x14a/0x280 mm/slub.c:2744 > >> >> > > __alloc_skb+0x10f/0x800 net/core/skbuff.c:219 > >> >> > > alloc_skb ./include/linux/skbuff.h:926 > >> >> > > alloc_tx net/atm/common.c:75 > >> >> > > >> >> > This is likely alloc_skb(..., GFP_KERNEL) in alloc_tx(). The simplest > >> >> > fix for this would be simply to switch this GFP_ATOMIC. See if this > >> >> > is > >> >> > any better. > >> >> > > >> >> > diff --git a/net/atm/common.c b/net/atm/common.c > >> >> > index a3ca922..d84220c 100644 > >> >> > --- a/net/atm/common.c > >> >> > +++ b/net/atm/common.c > >> >> > @@ -72,7 +72,7 @@ static struct sk_buff *alloc_tx(struct atm_vcc > >> >> > *vcc, unsigned int size) > >> >> > sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); > >> >> > return NULL; > >> >> > } > >> >> > - while (!(skb = alloc_skb(size, GFP_KERNEL))) > >> >> > + while (!(skb = alloc_skb(size, GFP_ATOMIC))) > >> >> > schedule(); > >> >> > pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); > >> >> > atomic_add(skb->truesize, &sk->sk_wmem_alloc); > >> >> > >> >> Blee, this code is just horrendous. But the "fix" is obviously broken! > >> >> schedule() is just a noop if you do not change the task state and what > >> >> you are just asking for is a never failing non sleeping allocation - aka > >> >> a busy loop in the kernel! > >> > > >> > And btw. this while loop should be really turned into GFP_KERNEL | > >> > __GFP_NOFAIL with and explanation why this allocation cannot possibly > >> > fail. > >> > >> I think a nested loop is quite unnecessary, probably due to the code itself > >> is pretty old. The alloc_tx() is in the outer loop, the alloc_skb() is > >> in the inner > >> loop, both seem to wait for a successful GFP allocation. The inner one > >> is even more unnecessary. > >> > >> Of course, I am not surprised MM may already have a mechanism to do > >> the similar logic. > >> > >> There maybe some reason ATM needs such a logic, although other proto > >> could handle skb allocation failure quite well in ->sendmsg(). > > > > > > I can't think of any particular reason that it needs this loop here. I > > suspect > > that the loop for alloc_tx() predates the wait logic in ->sendmsg() and > > that the > > original looping was in alloc_tx() initially and was simply never removed. > > Changes > > here would date back to before the git conversion. > > > > Hi, > > I'm still seeing this on 4495c08e84729385774601b5146d51d9e5849f81 (4.11-rc2). > > Thanks! David Miller just accepted a patch for net-next that should resolve this issue.
Re: net/atm: warning in alloc_tx/__might_sleep
On Thu, Jan 12, 2017 at 11:40 AM, Chas Williams <3ch...@gmail.com> wrote: > On Wed, 2017-01-11 at 20:36 -0800, Cong Wang wrote: >> On Wed, Jan 11, 2017 at 11:46 AM, Michal Hocko wrote: >> > On Wed 11-01-17 20:45:25, Michal Hocko wrote: >> >> On Wed 11-01-17 09:37:06, Chas Williams wrote: >> >> > On Mon, 2017-01-09 at 18:20 +0100, Andrey Konovalov wrote: >> >> > > Hi! >> >> > > >> >> > > I've got the following error report while running the syzkaller >> >> > > fuzzer. >> >> > > >> >> > > On commit a121103c922847ba5010819a3f250f1f7fc84ab8 (4.10-rc3). >> >> > > >> >> > > A reproducer is attached. >> >> > > >> >> > > [ cut here ] >> >> > > WARNING: CPU: 0 PID: 4114 at kernel/sched/core.c:7737 >> >> > > __might_sleep+0x149/0x1a0 >> >> > > do not call blocking ops when !TASK_RUNNING; state=1 set at >> >> > > [] prepare_to_wait+0x182/0x530 >> >> > > Modules linked in: >> >> > > CPU: 0 PID: 4114 Comm: a.out Not tainted 4.10.0-rc3+ #59 >> >> > > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs >> >> > > 01/01/2011 >> >> > > Call Trace: >> >> > > __dump_stack lib/dump_stack.c:15 >> >> > > dump_stack+0x292/0x398 lib/dump_stack.c:51 >> >> > > __warn+0x19f/0x1e0 kernel/panic.c:547 >> >> > > warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:562 >> >> > > __might_sleep+0x149/0x1a0 kernel/sched/core.c:7732 >> >> > > slab_pre_alloc_hook mm/slab.h:408 >> >> > > slab_alloc_node mm/slub.c:2634 >> >> > > kmem_cache_alloc_node+0x14a/0x280 mm/slub.c:2744 >> >> > > __alloc_skb+0x10f/0x800 net/core/skbuff.c:219 >> >> > > alloc_skb ./include/linux/skbuff.h:926 >> >> > > alloc_tx net/atm/common.c:75 >> >> > >> >> > This is likely alloc_skb(..., GFP_KERNEL) in alloc_tx(). The simplest >> >> > fix for this would be simply to switch this GFP_ATOMIC. See if this is >> >> > any better. >> >> > >> >> > diff --git a/net/atm/common.c b/net/atm/common.c >> >> > index a3ca922..d84220c 100644 >> >> > --- a/net/atm/common.c >> >> > +++ b/net/atm/common.c >> >> > @@ -72,7 +72,7 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc, >> >> > unsigned int size) >> >> > sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); >> >> > return NULL; >> >> > } >> >> > - while (!(skb = alloc_skb(size, GFP_KERNEL))) >> >> > + while (!(skb = alloc_skb(size, GFP_ATOMIC))) >> >> > schedule(); >> >> > pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); >> >> > atomic_add(skb->truesize, &sk->sk_wmem_alloc); >> >> >> >> Blee, this code is just horrendous. But the "fix" is obviously broken! >> >> schedule() is just a noop if you do not change the task state and what >> >> you are just asking for is a never failing non sleeping allocation - aka >> >> a busy loop in the kernel! >> > >> > And btw. this while loop should be really turned into GFP_KERNEL | >> > __GFP_NOFAIL with and explanation why this allocation cannot possibly >> > fail. >> >> I think a nested loop is quite unnecessary, probably due to the code itself >> is pretty old. The alloc_tx() is in the outer loop, the alloc_skb() is >> in the inner >> loop, both seem to wait for a successful GFP allocation. The inner one >> is even more unnecessary. >> >> Of course, I am not surprised MM may already have a mechanism to do >> the similar logic. >> >> There maybe some reason ATM needs such a logic, although other proto >> could handle skb allocation failure quite well in ->sendmsg(). > > > I can't think of any particular reason that it needs this loop here. I > suspect > that the loop for alloc_tx() predates the wait logic in ->sendmsg() and that > the > original looping was in alloc_tx() initially and was simply never removed. > Changes > here would date back to before the git conversion. > Hi, I'm still seeing this on 4495c08e84729385774601b5146d51d9e5849f81 (4.11-rc2). Thanks!
Re: net/atm: warning in alloc_tx/__might_sleep
On Wed, 2017-01-11 at 20:36 -0800, Cong Wang wrote: > On Wed, Jan 11, 2017 at 11:46 AM, Michal Hocko wrote: > > On Wed 11-01-17 20:45:25, Michal Hocko wrote: > >> On Wed 11-01-17 09:37:06, Chas Williams wrote: > >> > On Mon, 2017-01-09 at 18:20 +0100, Andrey Konovalov wrote: > >> > > Hi! > >> > > > >> > > I've got the following error report while running the syzkaller fuzzer. > >> > > > >> > > On commit a121103c922847ba5010819a3f250f1f7fc84ab8 (4.10-rc3). > >> > > > >> > > A reproducer is attached. > >> > > > >> > > [ cut here ] > >> > > WARNING: CPU: 0 PID: 4114 at kernel/sched/core.c:7737 > >> > > __might_sleep+0x149/0x1a0 > >> > > do not call blocking ops when !TASK_RUNNING; state=1 set at > >> > > [] prepare_to_wait+0x182/0x530 > >> > > Modules linked in: > >> > > CPU: 0 PID: 4114 Comm: a.out Not tainted 4.10.0-rc3+ #59 > >> > > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs > >> > > 01/01/2011 > >> > > Call Trace: > >> > > __dump_stack lib/dump_stack.c:15 > >> > > dump_stack+0x292/0x398 lib/dump_stack.c:51 > >> > > __warn+0x19f/0x1e0 kernel/panic.c:547 > >> > > warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:562 > >> > > __might_sleep+0x149/0x1a0 kernel/sched/core.c:7732 > >> > > slab_pre_alloc_hook mm/slab.h:408 > >> > > slab_alloc_node mm/slub.c:2634 > >> > > kmem_cache_alloc_node+0x14a/0x280 mm/slub.c:2744 > >> > > __alloc_skb+0x10f/0x800 net/core/skbuff.c:219 > >> > > alloc_skb ./include/linux/skbuff.h:926 > >> > > alloc_tx net/atm/common.c:75 > >> > > >> > This is likely alloc_skb(..., GFP_KERNEL) in alloc_tx(). The simplest > >> > fix for this would be simply to switch this GFP_ATOMIC. See if this is > >> > any better. > >> > > >> > diff --git a/net/atm/common.c b/net/atm/common.c > >> > index a3ca922..d84220c 100644 > >> > --- a/net/atm/common.c > >> > +++ b/net/atm/common.c > >> > @@ -72,7 +72,7 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc, > >> > unsigned int size) > >> > sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); > >> > return NULL; > >> > } > >> > - while (!(skb = alloc_skb(size, GFP_KERNEL))) > >> > + while (!(skb = alloc_skb(size, GFP_ATOMIC))) > >> > schedule(); > >> > pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); > >> > atomic_add(skb->truesize, &sk->sk_wmem_alloc); > >> > >> Blee, this code is just horrendous. But the "fix" is obviously broken! > >> schedule() is just a noop if you do not change the task state and what > >> you are just asking for is a never failing non sleeping allocation - aka > >> a busy loop in the kernel! > > > > And btw. this while loop should be really turned into GFP_KERNEL | > > __GFP_NOFAIL with and explanation why this allocation cannot possibly > > fail. > > I think a nested loop is quite unnecessary, probably due to the code itself > is pretty old. The alloc_tx() is in the outer loop, the alloc_skb() is > in the inner > loop, both seem to wait for a successful GFP allocation. The inner one > is even more unnecessary. > > Of course, I am not surprised MM may already have a mechanism to do > the similar logic. > > There maybe some reason ATM needs such a logic, although other proto > could handle skb allocation failure quite well in ->sendmsg(). I can't think of any particular reason that it needs this loop here. I suspect that the loop for alloc_tx() predates the wait logic in ->sendmsg() and that the original looping was in alloc_tx() initially and was simply never removed. Changes here would date back to before the git conversion.
Re: net/atm: warning in alloc_tx/__might_sleep
On Wed, Jan 11, 2017 at 11:46 AM, Michal Hocko wrote: > On Wed 11-01-17 20:45:25, Michal Hocko wrote: >> On Wed 11-01-17 09:37:06, Chas Williams wrote: >> > On Mon, 2017-01-09 at 18:20 +0100, Andrey Konovalov wrote: >> > > Hi! >> > > >> > > I've got the following error report while running the syzkaller fuzzer. >> > > >> > > On commit a121103c922847ba5010819a3f250f1f7fc84ab8 (4.10-rc3). >> > > >> > > A reproducer is attached. >> > > >> > > [ cut here ] >> > > WARNING: CPU: 0 PID: 4114 at kernel/sched/core.c:7737 >> > > __might_sleep+0x149/0x1a0 >> > > do not call blocking ops when !TASK_RUNNING; state=1 set at >> > > [] prepare_to_wait+0x182/0x530 >> > > Modules linked in: >> > > CPU: 0 PID: 4114 Comm: a.out Not tainted 4.10.0-rc3+ #59 >> > > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs >> > > 01/01/2011 >> > > Call Trace: >> > > __dump_stack lib/dump_stack.c:15 >> > > dump_stack+0x292/0x398 lib/dump_stack.c:51 >> > > __warn+0x19f/0x1e0 kernel/panic.c:547 >> > > warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:562 >> > > __might_sleep+0x149/0x1a0 kernel/sched/core.c:7732 >> > > slab_pre_alloc_hook mm/slab.h:408 >> > > slab_alloc_node mm/slub.c:2634 >> > > kmem_cache_alloc_node+0x14a/0x280 mm/slub.c:2744 >> > > __alloc_skb+0x10f/0x800 net/core/skbuff.c:219 >> > > alloc_skb ./include/linux/skbuff.h:926 >> > > alloc_tx net/atm/common.c:75 >> > >> > This is likely alloc_skb(..., GFP_KERNEL) in alloc_tx(). The simplest >> > fix for this would be simply to switch this GFP_ATOMIC. See if this is >> > any better. >> > >> > diff --git a/net/atm/common.c b/net/atm/common.c >> > index a3ca922..d84220c 100644 >> > --- a/net/atm/common.c >> > +++ b/net/atm/common.c >> > @@ -72,7 +72,7 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc, >> > unsigned int size) >> > sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); >> > return NULL; >> > } >> > - while (!(skb = alloc_skb(size, GFP_KERNEL))) >> > + while (!(skb = alloc_skb(size, GFP_ATOMIC))) >> > schedule(); >> > pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); >> > atomic_add(skb->truesize, &sk->sk_wmem_alloc); >> >> Blee, this code is just horrendous. But the "fix" is obviously broken! >> schedule() is just a noop if you do not change the task state and what >> you are just asking for is a never failing non sleeping allocation - aka >> a busy loop in the kernel! > > And btw. this while loop should be really turned into GFP_KERNEL | > __GFP_NOFAIL with and explanation why this allocation cannot possibly > fail. I think a nested loop is quite unnecessary, probably due to the code itself is pretty old. The alloc_tx() is in the outer loop, the alloc_skb() is in the inner loop, both seem to wait for a successful GFP allocation. The inner one is even more unnecessary. Of course, I am not surprised MM may already have a mechanism to do the similar logic. There maybe some reason ATM needs such a logic, although other proto could handle skb allocation failure quite well in ->sendmsg().
Re: net/atm: warning in alloc_tx/__might_sleep
On Wed 11-01-17 20:45:25, Michal Hocko wrote: > On Wed 11-01-17 09:37:06, Chas Williams wrote: > > On Mon, 2017-01-09 at 18:20 +0100, Andrey Konovalov wrote: > > > Hi! > > > > > > I've got the following error report while running the syzkaller fuzzer. > > > > > > On commit a121103c922847ba5010819a3f250f1f7fc84ab8 (4.10-rc3). > > > > > > A reproducer is attached. > > > > > > [ cut here ] > > > WARNING: CPU: 0 PID: 4114 at kernel/sched/core.c:7737 > > > __might_sleep+0x149/0x1a0 > > > do not call blocking ops when !TASK_RUNNING; state=1 set at > > > [] prepare_to_wait+0x182/0x530 > > > Modules linked in: > > > CPU: 0 PID: 4114 Comm: a.out Not tainted 4.10.0-rc3+ #59 > > > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs > > > 01/01/2011 > > > Call Trace: > > > __dump_stack lib/dump_stack.c:15 > > > dump_stack+0x292/0x398 lib/dump_stack.c:51 > > > __warn+0x19f/0x1e0 kernel/panic.c:547 > > > warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:562 > > > __might_sleep+0x149/0x1a0 kernel/sched/core.c:7732 > > > slab_pre_alloc_hook mm/slab.h:408 > > > slab_alloc_node mm/slub.c:2634 > > > kmem_cache_alloc_node+0x14a/0x280 mm/slub.c:2744 > > > __alloc_skb+0x10f/0x800 net/core/skbuff.c:219 > > > alloc_skb ./include/linux/skbuff.h:926 > > > alloc_tx net/atm/common.c:75 > > > > This is likely alloc_skb(..., GFP_KERNEL) in alloc_tx(). The simplest > > fix for this would be simply to switch this GFP_ATOMIC. See if this is > > any better. > > > > diff --git a/net/atm/common.c b/net/atm/common.c > > index a3ca922..d84220c 100644 > > --- a/net/atm/common.c > > +++ b/net/atm/common.c > > @@ -72,7 +72,7 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc, > > unsigned int size) > > sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); > > return NULL; > > } > > - while (!(skb = alloc_skb(size, GFP_KERNEL))) > > + while (!(skb = alloc_skb(size, GFP_ATOMIC))) > > schedule(); > > pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); > > atomic_add(skb->truesize, &sk->sk_wmem_alloc); > > Blee, this code is just horrendous. But the "fix" is obviously broken! > schedule() is just a noop if you do not change the task state and what > you are just asking for is a never failing non sleeping allocation - aka > a busy loop in the kernel! And btw. this while loop should be really turned into GFP_KERNEL | __GFP_NOFAIL with and explanation why this allocation cannot possibly fail. -- Michal Hocko SUSE Labs
Re: net/atm: warning in alloc_tx/__might_sleep
On Wed 11-01-17 09:37:06, Chas Williams wrote: > On Mon, 2017-01-09 at 18:20 +0100, Andrey Konovalov wrote: > > Hi! > > > > I've got the following error report while running the syzkaller fuzzer. > > > > On commit a121103c922847ba5010819a3f250f1f7fc84ab8 (4.10-rc3). > > > > A reproducer is attached. > > > > [ cut here ] > > WARNING: CPU: 0 PID: 4114 at kernel/sched/core.c:7737 > > __might_sleep+0x149/0x1a0 > > do not call blocking ops when !TASK_RUNNING; state=1 set at > > [] prepare_to_wait+0x182/0x530 > > Modules linked in: > > CPU: 0 PID: 4114 Comm: a.out Not tainted 4.10.0-rc3+ #59 > > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 > > Call Trace: > > __dump_stack lib/dump_stack.c:15 > > dump_stack+0x292/0x398 lib/dump_stack.c:51 > > __warn+0x19f/0x1e0 kernel/panic.c:547 > > warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:562 > > __might_sleep+0x149/0x1a0 kernel/sched/core.c:7732 > > slab_pre_alloc_hook mm/slab.h:408 > > slab_alloc_node mm/slub.c:2634 > > kmem_cache_alloc_node+0x14a/0x280 mm/slub.c:2744 > > __alloc_skb+0x10f/0x800 net/core/skbuff.c:219 > > alloc_skb ./include/linux/skbuff.h:926 > > alloc_tx net/atm/common.c:75 > > This is likely alloc_skb(..., GFP_KERNEL) in alloc_tx(). The simplest > fix for this would be simply to switch this GFP_ATOMIC. See if this is > any better. > > diff --git a/net/atm/common.c b/net/atm/common.c > index a3ca922..d84220c 100644 > --- a/net/atm/common.c > +++ b/net/atm/common.c > @@ -72,7 +72,7 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc, > unsigned int size) >sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); > return NULL; > } > - while (!(skb = alloc_skb(size, GFP_KERNEL))) > + while (!(skb = alloc_skb(size, GFP_ATOMIC))) > schedule(); > pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); > atomic_add(skb->truesize, &sk->sk_wmem_alloc); Blee, this code is just horrendous. But the "fix" is obviously broken! schedule() is just a noop if you do not change the task state and what you are just asking for is a never failing non sleeping allocation - aka a busy loop in the kernel! -- Michal Hocko SUSE Labs
Re: net/atm: warning in alloc_tx/__might_sleep
On Tue, 2017-01-10 at 23:47 +0100, Francois Romieu wrote: > Eric Dumazet : > > On Tue, Jan 10, 2017 at 9:35 AM, Cong Wang wrote: > > > On Mon, Jan 9, 2017 at 9:20 AM, Andrey Konovalov > > > wrote: > > > > > > The fix should be straight-forward. Mind to try the attached patch? > > > > > > You forgot to remove schedule() ? > > It may be clearer to split alloc_tx in two parts: only the unsleepable > "if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) {" part of it > contributes to the inner "while (!(skb = alloc_tx(vcc, eff))) {" block. > > See net/atm/common.c > [...] > static struct sk_buff *alloc_tx(struct atm_vcc *vcc, unsigned int size) > { > struct sk_buff *skb; > struct sock *sk = sk_atm(vcc); > > if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) { > pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", > sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); > return NULL; > } > while (!(skb = alloc_skb(size, GFP_KERNEL))) > schedule(); Yeah, this code looks quite wrong anyway. We can read it as an infinite loop in some stress conditions or memcg constraints. > The waiting stuff is related to vcc drain but the code makes it look as > if it were also related to skb alloc (it isn't). > > It may be obvious for you but it took me a while to figure what the > code is supposed to achieve. >
Re: net/atm: warning in alloc_tx/__might_sleep
On Mon, 2017-01-09 at 18:20 +0100, Andrey Konovalov wrote: > Hi! > > I've got the following error report while running the syzkaller fuzzer. > > On commit a121103c922847ba5010819a3f250f1f7fc84ab8 (4.10-rc3). > > A reproducer is attached. > > [ cut here ] > WARNING: CPU: 0 PID: 4114 at kernel/sched/core.c:7737 > __might_sleep+0x149/0x1a0 > do not call blocking ops when !TASK_RUNNING; state=1 set at > [] prepare_to_wait+0x182/0x530 > Modules linked in: > CPU: 0 PID: 4114 Comm: a.out Not tainted 4.10.0-rc3+ #59 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 > Call Trace: > __dump_stack lib/dump_stack.c:15 > dump_stack+0x292/0x398 lib/dump_stack.c:51 > __warn+0x19f/0x1e0 kernel/panic.c:547 > warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:562 > __might_sleep+0x149/0x1a0 kernel/sched/core.c:7732 > slab_pre_alloc_hook mm/slab.h:408 > slab_alloc_node mm/slub.c:2634 > kmem_cache_alloc_node+0x14a/0x280 mm/slub.c:2744 > __alloc_skb+0x10f/0x800 net/core/skbuff.c:219 > alloc_skb ./include/linux/skbuff.h:926 > alloc_tx net/atm/common.c:75 This is likely alloc_skb(..., GFP_KERNEL) in alloc_tx(). The simplest fix for this would be simply to switch this GFP_ATOMIC. See if this is any better. diff --git a/net/atm/common.c b/net/atm/common.c index a3ca922..d84220c 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -72,7 +72,7 @@ static struct sk_buff *alloc_tx(struct atm_vcc *vcc, unsigned int size) sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); return NULL; } - while (!(skb = alloc_skb(size, GFP_KERNEL))) + while (!(skb = alloc_skb(size, GFP_ATOMIC))) schedule(); pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); atomic_add(skb->truesize, &sk->sk_wmem_alloc);
Re: net/atm: warning in alloc_tx/__might_sleep
On Tue, Jan 10, 2017 at 6:40 PM, Eric Dumazet wrote: > On Tue, Jan 10, 2017 at 9:35 AM, Cong Wang wrote: >> On Mon, Jan 9, 2017 at 9:20 AM, Andrey Konovalov >> wrote: >> >> The fix should be straight-forward. Mind to try the attached patch? Hi Cong, Your patch with schedule() removed as suggested by Eric fixes the issue. Tested-by: Andrey Konovalov Thanks! > > > You forgot to remove schedule() ? > > schedule(); > + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
Re: net/atm: warning in alloc_tx/__might_sleep
Eric Dumazet : > On Tue, Jan 10, 2017 at 9:35 AM, Cong Wang wrote: > > On Mon, Jan 9, 2017 at 9:20 AM, Andrey Konovalov > > wrote: > > > > The fix should be straight-forward. Mind to try the attached patch? > > > You forgot to remove schedule() ? It may be clearer to split alloc_tx in two parts: only the unsleepable "if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) {" part of it contributes to the inner "while (!(skb = alloc_tx(vcc, eff))) {" block. See net/atm/common.c [...] static struct sk_buff *alloc_tx(struct atm_vcc *vcc, unsigned int size) { struct sk_buff *skb; struct sock *sk = sk_atm(vcc); if (sk_wmem_alloc_get(sk) && !atm_may_send(vcc, size)) { pr_debug("Sorry: wmem_alloc = %d, size = %d, sndbuf = %d\n", sk_wmem_alloc_get(sk), size, sk->sk_sndbuf); return NULL; } while (!(skb = alloc_skb(size, GFP_KERNEL))) schedule(); pr_debug("%d += %d\n", sk_wmem_alloc_get(sk), skb->truesize); atomic_add(skb->truesize, &sk->sk_wmem_alloc); return skb; } The waiting stuff is related to vcc drain but the code makes it look as if it were also related to skb alloc (it isn't). It may be obvious for you but it took me a while to figure what the code is supposed to achieve. -- Ueimor
Re: net/atm: warning in alloc_tx/__might_sleep
On Tue, Jan 10, 2017 at 9:40 AM, Eric Dumazet wrote: > On Tue, Jan 10, 2017 at 9:35 AM, Cong Wang wrote: >> On Mon, Jan 9, 2017 at 9:20 AM, Andrey Konovalov >> wrote: >> >> The fix should be straight-forward. Mind to try the attached patch? > > > You forgot to remove schedule() ? > > schedule(); > + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); Ah, of course, never even compile it... :-/
Re: net/atm: warning in alloc_tx/__might_sleep
On Tue, Jan 10, 2017 at 9:35 AM, Cong Wang wrote: > On Mon, Jan 9, 2017 at 9:20 AM, Andrey Konovalov > wrote: > > The fix should be straight-forward. Mind to try the attached patch? You forgot to remove schedule() ? schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT);
Re: net/atm: warning in alloc_tx/__might_sleep
On Mon, Jan 9, 2017 at 9:20 AM, Andrey Konovalov wrote: > Hi! > > I've got the following error report while running the syzkaller fuzzer. > > On commit a121103c922847ba5010819a3f250f1f7fc84ab8 (4.10-rc3). > > A reproducer is attached. > > [ cut here ] > WARNING: CPU: 0 PID: 4114 at kernel/sched/core.c:7737 > __might_sleep+0x149/0x1a0 > do not call blocking ops when !TASK_RUNNING; state=1 set at > [] prepare_to_wait+0x182/0x530 > Modules linked in: > CPU: 0 PID: 4114 Comm: a.out Not tainted 4.10.0-rc3+ #59 > Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 > Call Trace: > __dump_stack lib/dump_stack.c:15 > dump_stack+0x292/0x398 lib/dump_stack.c:51 > __warn+0x19f/0x1e0 kernel/panic.c:547 > warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:562 > __might_sleep+0x149/0x1a0 kernel/sched/core.c:7732 > slab_pre_alloc_hook mm/slab.h:408 > slab_alloc_node mm/slub.c:2634 > kmem_cache_alloc_node+0x14a/0x280 mm/slub.c:2744 > __alloc_skb+0x10f/0x800 net/core/skbuff.c:219 > alloc_skb ./include/linux/skbuff.h:926 > alloc_tx net/atm/common.c:75 > vcc_sendmsg+0x5e8/0x1010 net/atm/common.c:609 > sock_sendmsg_nosec net/socket.c:635 > sock_sendmsg+0xca/0x110 net/socket.c:645 > ___sys_sendmsg+0x9d2/0xae0 net/socket.c:1985 > __sys_sendmsg+0x138/0x320 net/socket.c:2019 > SYSC_sendmsg net/socket.c:2030 > SyS_sendmsg+0x2d/0x50 net/socket.c:2026 > entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 > RIP: 0033:0x7fcbacfddb79 > RSP: 002b:7ffed8b5a7b8 EFLAGS: 0206 ORIG_RAX: 002e > RAX: ffda RBX: 7ffed8b5a950 RCX: 7fcbacfddb79 > RDX: c000 RSI: 20002000 RDI: 0003 > RBP: 00400af0 R08: R09: > R10: R11: 0206 R12: > R13: 7ffed8b5a950 R14: R15: > ---[ end trace 9edf2da84d8112da ]--- > atm:sigd_send: bad message type 0 The fix should be straight-forward. Mind to try the attached patch? diff --git a/net/atm/common.c b/net/atm/common.c index a3ca922..b7d9661 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -571,8 +571,8 @@ int vcc_recvmsg(struct socket *sock, struct msghdr *msg, size_t size, int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) { + DEFINE_WAIT_FUNC(wait, woken_wake_function); struct sock *sk = sock->sk; - DEFINE_WAIT(wait); struct atm_vcc *vcc; struct sk_buff *skb; int eff, error; @@ -604,7 +604,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) } eff = (size+3) & ~3; /* align to word boundary */ - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); + add_wait_queue(sk_sleep(sk), &wait); error = 0; while (!(skb = alloc_tx(vcc, eff))) { if (m->msg_flags & MSG_DONTWAIT) { @@ -612,6 +612,7 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) break; } schedule(); + wait_woken(&wait, TASK_INTERRUPTIBLE, MAX_SCHEDULE_TIMEOUT); if (signal_pending(current)) { error = -ERESTARTSYS; break; @@ -623,9 +624,8 @@ int vcc_sendmsg(struct socket *sock, struct msghdr *m, size_t size) send_sig(SIGPIPE, current, 0); break; } - prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); } - finish_wait(sk_sleep(sk), &wait); + remove_wait_queue(sk_sleep(sk), &wait); if (error) goto out; skb->dev = NULL; /* for paths shared with net_device interfaces */
net/atm: warning in alloc_tx/__might_sleep
Hi! I've got the following error report while running the syzkaller fuzzer. On commit a121103c922847ba5010819a3f250f1f7fc84ab8 (4.10-rc3). A reproducer is attached. [ cut here ] WARNING: CPU: 0 PID: 4114 at kernel/sched/core.c:7737 __might_sleep+0x149/0x1a0 do not call blocking ops when !TASK_RUNNING; state=1 set at [] prepare_to_wait+0x182/0x530 Modules linked in: CPU: 0 PID: 4114 Comm: a.out Not tainted 4.10.0-rc3+ #59 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 dump_stack+0x292/0x398 lib/dump_stack.c:51 __warn+0x19f/0x1e0 kernel/panic.c:547 warn_slowpath_fmt+0xc5/0x110 kernel/panic.c:562 __might_sleep+0x149/0x1a0 kernel/sched/core.c:7732 slab_pre_alloc_hook mm/slab.h:408 slab_alloc_node mm/slub.c:2634 kmem_cache_alloc_node+0x14a/0x280 mm/slub.c:2744 __alloc_skb+0x10f/0x800 net/core/skbuff.c:219 alloc_skb ./include/linux/skbuff.h:926 alloc_tx net/atm/common.c:75 vcc_sendmsg+0x5e8/0x1010 net/atm/common.c:609 sock_sendmsg_nosec net/socket.c:635 sock_sendmsg+0xca/0x110 net/socket.c:645 ___sys_sendmsg+0x9d2/0xae0 net/socket.c:1985 __sys_sendmsg+0x138/0x320 net/socket.c:2019 SYSC_sendmsg net/socket.c:2030 SyS_sendmsg+0x2d/0x50 net/socket.c:2026 entry_SYSCALL_64_fastpath+0x1f/0xc2 arch/x86/entry/entry_64.S:203 RIP: 0033:0x7fcbacfddb79 RSP: 002b:7ffed8b5a7b8 EFLAGS: 0206 ORIG_RAX: 002e RAX: ffda RBX: 7ffed8b5a950 RCX: 7fcbacfddb79 RDX: c000 RSI: 20002000 RDI: 0003 RBP: 00400af0 R08: R09: R10: R11: 0206 R12: R13: 7ffed8b5a950 R14: R15: ---[ end trace 9edf2da84d8112da ]--- atm:sigd_send: bad message type 0 // autogenerated by syzkaller (http://github.com/google/syzkaller) #ifndef __NR_mmap #define __NR_mmap 9 #endif #ifndef __NR_socket #define __NR_socket 41 #endif #ifndef __NR_ioctl #define __NR_ioctl 16 #endif #ifndef __NR_sendmsg #define __NR_sendmsg 46 #endif #define _GNU_SOURCE #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include #include const int kFailStatus = 67; const int kErrorStatus = 68; const int kRetryStatus = 69; __attribute__((noreturn)) void doexit(int status) { syscall(__NR_exit_group, status); volatile unsigned i; for (i = 0;; i++) { } } __attribute__((noreturn)) void fail(const char* msg, ...) { int e = errno; fflush(stdout); va_list args; va_start(args, msg); vfprintf(stderr, msg, args); va_end(args); fprintf(stderr, " (errno %d)\n", e); doexit(e == ENOMEM ? kRetryStatus : kFailStatus); } __attribute__((noreturn)) void exitf(const char* msg, ...) { int e = errno; fflush(stdout); va_list args; va_start(args, msg); vfprintf(stderr, msg, args); va_end(args); fprintf(stderr, " (errno %d)\n", e); doexit(kRetryStatus); } static int flag_debug; void debug(const char* msg, ...) { if (!flag_debug) return; va_list args; va_start(args, msg); vfprintf(stdout, msg, args); va_end(args); fflush(stdout); } __thread int skip_segv; __thread jmp_buf segv_env; static void segv_handler(int sig, siginfo_t* info, void* uctx) { if (__atomic_load_n(&skip_segv, __ATOMIC_RELAXED)) _longjmp(segv_env, 1); doexit(sig); for (;;) { } } static void install_segv_handler() { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_sigaction = segv_handler; sa.sa_flags = SA_NODEFER | SA_SIGINFO; sigaction(SIGSEGV, &sa, NULL); sigaction(SIGBUS, &sa, NULL); } #define NONFAILING(...)\ {\ __atomic_fetch_add(&skip_segv, 1, __ATOMIC_SEQ_CST); \ if (_setjmp(segv_env) == 0) { \ __VA_ARGS__; \ } \ __atomic_fetch_sub(&skip_segv, 1, __ATOMIC_SEQ_CST); \ } static uintptr_t execute_syscall(int nr, uintptr_t a0, uintptr_t a1, uintptr_t a2, uintptr_t a3, uintptr_t a4, uintptr_t a5, uintptr_t a6, uintptr_t a7, uintptr_t a8) { switch (nr) { default: return syscall(nr, a0, a1, a2, a3, a4, a5); } } static void setup_main_process(uint64_t pid, bool enable_tun) { struct sigaction sa; memset(&sa, 0, sizeof(sa)); sa.sa_handler = SIG_IGN; syscall(SYS_rt_sigaction, 0x20, &sa, NULL, 8); syscall(SYS_rt_sigaction, 0x21, &sa,