Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-12 Thread Bart Van Assche

On 7/12/19 1:55 AM, Peter Zijlstra wrote:

On Thu, Jul 11, 2019 at 11:53:12AM -0700, Bart Van Assche wrote:

On 7/10/19 3:09 PM, Peter Zijlstra wrote:

One thing I mentioned when Thomas did the unwinder API changes was
trying to move lockdep over to something like stackdepot.

We can't directly use stackdepot as is, because it uses locks and memory
allocation, but we could maybe add a lower level API to it and use that
under the graph_lock() on static storage or something.

Otherwise we'll have to (re)implement something like it.

I've not looked at it in detail.


Hi Peter,

Is something like the untested patch below perhaps what you had in mind?


Most excellent, yes! Now I suppose the $64000 question is if it actually
reduces the amount of storage we use for stack traces..

Seems to boot just fine.. :-)


Hi Peter,

On my setup after some time the space occupied by stack traces stabilizes
to the following:

# grep stack /proc/lockdep_stats
 stack-trace entries:169456 [max: 524288]
 number of stack traces:9073
 number of stack hash chains:   6980

I think these numbers show that there are some but not too many hash
collisions (the code for the hash statistics was added after I e-mailed my
patch).

Bart.


Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-12 Thread Peter Zijlstra
On Thu, Jul 11, 2019 at 11:53:12AM -0700, Bart Van Assche wrote:
> On 7/10/19 3:09 PM, Peter Zijlstra wrote:
> > One thing I mentioned when Thomas did the unwinder API changes was
> > trying to move lockdep over to something like stackdepot.
> > 
> > We can't directly use stackdepot as is, because it uses locks and memory
> > allocation, but we could maybe add a lower level API to it and use that
> > under the graph_lock() on static storage or something.
> > 
> > Otherwise we'll have to (re)implement something like it.
> > 
> > I've not looked at it in detail.
> 
> Hi Peter,
> 
> Is something like the untested patch below perhaps what you had in mind?

Most excellent, yes! Now I suppose the $64000 question is if it actually
reduces the amount of storage we use for stack traces..

Seems to boot just fine.. :-)


Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-11 Thread Bart Van Assche

On 7/10/19 3:09 PM, Peter Zijlstra wrote:

One thing I mentioned when Thomas did the unwinder API changes was
trying to move lockdep over to something like stackdepot.

We can't directly use stackdepot as is, because it uses locks and memory
allocation, but we could maybe add a lower level API to it and use that
under the graph_lock() on static storage or something.

Otherwise we'll have to (re)implement something like it.

I've not looked at it in detail.


Hi Peter,

Is something like the untested patch below perhaps what you had in mind?

Thanks,

Bart.

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 57baa27f238c..a5c7c1813d35 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -66,10 +66,7 @@ struct lock_class_key {

 extern struct lock_class_key __lockdep_no_validate__;

-struct lock_trace {
-   unsigned intnr_entries;
-   unsigned intoffset;
-};
+struct lock_trace;

 #define LOCKSTAT_POINTS4

@@ -105,7 +102,7 @@ struct lock_class {
 * IRQ/softirq usage tracking bits:
 */
unsigned long   usage_mask;
-   struct lock_trace   usage_traces[XXX_LOCK_USAGE_STATES];
+   const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES];

/*
 * Generation counter, when doing certain classes of graph walking,
@@ -193,7 +190,7 @@ struct lock_list {
struct list_headentry;
struct lock_class   *class;
struct lock_class   *links_to;
-   struct lock_trace   trace;
+   const struct lock_trace *trace;
int distance;

/*
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 341f52117f88..c3022367ba2a 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -449,33 +449,72 @@ static void print_lockdep_off(const char *bug_msg)
 unsigned long nr_stack_trace_entries;

 #if defined(CONFIG_TRACE_IRQFLAGS) && defined(CONFIG_PROVE_LOCKING)
+/**
+ * struct lock_trace - single stack backtrace
+ * @hash_entry:Entry in a stack_trace_hash[] list.
+ * @hash:  jhash() of @entries.
+ * @nr_entries:Number of entries in @entries.
+ * @entries:   Actual stack backtrace.
+ */
+struct lock_trace {
+   struct hlist_node   hash_entry;
+   u32 hash;
+   u32 nr_entries;
+   unsigned long   entries[0] __aligned(sizeof(unsigned long));
+};
+#define LOCK_TRACE_SIZE_IN_LONGS   \
+   (sizeof(struct lock_trace) / sizeof(unsigned long))
 /*
- * Stack-trace: tightly packed array of stack backtrace
- * addresses. Protected by the graph_lock.
+ * Stack-trace: sequence of lock_trace structures. Protected by the graph_lock.
  */
 static unsigned long stack_trace[MAX_STACK_TRACE_ENTRIES];
+static struct hlist_head stack_trace_hash[STACK_TRACE_HASH_SIZE];
+
+static bool traces_identical(struct lock_trace *t1, struct lock_trace *t2)
+{
+   return t1->hash == t2->hash && t1->nr_entries == t2->nr_entries &&
+   memcmp(t1->entries, t2->entries,
+  t1->nr_entries * sizeof(t1->entries[0])) == 0;
+}

-static int save_trace(struct lock_trace *trace)
+static struct lock_trace *save_trace(void)
 {
-   unsigned long *entries = stack_trace + nr_stack_trace_entries;
+   struct lock_trace *trace, *t2;
+   struct hlist_head *hash_head;
+   u32 hash;
unsigned int max_entries;

-   trace->offset = nr_stack_trace_entries;
-   max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries;
-   trace->nr_entries = stack_trace_save(entries, max_entries, 3);
-   nr_stack_trace_entries += trace->nr_entries;
+   BUILD_BUG_ON_NOT_POWER_OF_2(STACK_TRACE_HASH_SIZE);
+   BUILD_BUG_ON(LOCK_TRACE_SIZE_IN_LONGS >= MAX_STACK_TRACE_ENTRIES);
+
+   trace = (struct lock_trace *)(stack_trace + nr_stack_trace_entries);
+   max_entries = MAX_STACK_TRACE_ENTRIES - nr_stack_trace_entries -
+   LOCK_TRACE_SIZE_IN_LONGS;
+   trace->nr_entries = stack_trace_save(trace->entries, max_entries, 3);

-   if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES-1) {
+   if (nr_stack_trace_entries >= MAX_STACK_TRACE_ENTRIES -
+   LOCK_TRACE_SIZE_IN_LONGS - 1) {
if (!debug_locks_off_graph_unlock())
-   return 0;
+   return NULL;

print_lockdep_off("BUG: MAX_STACK_TRACE_ENTRIES too low!");
dump_stack();

-   return 0;
+   return NULL;
}

-   return 1;
+   hash = jhash(trace->entries, trace->nr_entries *
+sizeof(trace->entries[0]), 0);
+   trace->hash = hash;
+   hash_head = stack_trace_hash + (hash & (STACK_TRACE_HASH_SIZE - 1));
+   hlist_for_each_entry(t2, hash_head, hash_entry) {
+   

Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-10 Thread Peter Zijlstra
On Wed, Jul 10, 2019 at 02:23:39PM -0700, Bart Van Assche wrote:
> As one can see in remove_class_from_lock_chain() there is already code
> present in lockdep for compacting the chain_hlocks[] array. Similar code
> is not yet available for the stack_trace[] array because I had not
> encountered any overflows of that array during my tests.

One thing I mentioned when Thomas did the unwinder API changes was
trying to move lockdep over to something like stackdepot.

We can't directly use stackdepot as is, because it uses locks and memory
allocation, but we could maybe add a lower level API to it and use that
under the graph_lock() on static storage or something.

Otherwise we'll have to (re)implement something like it.

I've not looked at it in detail.


Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-10 Thread Bart Van Assche
On 7/10/19 1:47 PM, Eric Dumazet wrote:
> 
> 
> On 7/10/19 9:09 PM, Bart Van Assche wrote:
>> On 7/10/19 11:44 AM, Eric Dumazet wrote:
>>> If anything using workqueues in dynamically allocated objects can turn off 
>>> lockdep,
>>> we have a serious issue.
>>
>> As far as I know that issue is only hit by syzbot tests.
> 
> 
> 
>> Anyway, I see
>> two possible paths forward:
>> * Revert the patch that makes workqueues use dynamic lockdep keys and
>> thereby reintroduce the false positives that lockdep reports if
>> different workqueues share a lockdep key. I think there is agreement
>> that having to analyze lockdep false positives is annoying, time
>> consuming and something nobody likes.
>> * Modify lockdep such that space in its fixed size arrays that is no
>> longer in use gets reused. Since the stack traces saved in the
>> stack_trace[] array have a variable size that array will have to be
>> compacted to avoid fragmentation.
>>
> 
> Can not destroy_workqueue() undo what alloc_workqueue() did ?

destroy_workqueue() already calls lockdep_unregister_key(). If that
wouldn't happen then sooner or later one of the warning statements in
the lockdep code would be triggered, e.g. the WARN_ON_ONCE() statement
in lockdep_register_key(). I think the root cause is that
lockdep_unregister_key() does not free the stack traces recorded by the
lockdep save_trace() function. save_trace() is called every time a lock
is acquired and lockdep encounters a new lock dependency chain.

As one can see in remove_class_from_lock_chain() there is already code
present in lockdep for compacting the chain_hlocks[] array. Similar code
is not yet available for the stack_trace[] array because I had not
encountered any overflows of that array during my tests.

Bart.



Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-10 Thread Eric Dumazet



On 7/10/19 9:09 PM, Bart Van Assche wrote:
> On 7/10/19 11:44 AM, Eric Dumazet wrote:
>> If anything using workqueues in dynamically allocated objects can turn off 
>> lockdep,
>> we have a serious issue.
> 
> As far as I know that issue is only hit by syzbot tests.



> Anyway, I see
> two possible paths forward:
> * Revert the patch that makes workqueues use dynamic lockdep keys and
> thereby reintroduce the false positives that lockdep reports if
> different workqueues share a lockdep key. I think there is agreement
> that having to analyze lockdep false positives is annoying, time
> consuming and something nobody likes.
> * Modify lockdep such that space in its fixed size arrays that is no
> longer in use gets reused. Since the stack traces saved in the
> stack_trace[] array have a variable size that array will have to be
> compacted to avoid fragmentation.
> 

Can not destroy_workqueue() undo what alloc_workqueue() did ?


Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-10 Thread Bart Van Assche
On 7/10/19 11:44 AM, Eric Dumazet wrote:
> If anything using workqueues in dynamically allocated objects can turn off 
> lockdep,
> we have a serious issue.

As far as I know that issue is only hit by syzbot tests. Anyway, I see
two possible paths forward:
* Revert the patch that makes workqueues use dynamic lockdep keys and
thereby reintroduce the false positives that lockdep reports if
different workqueues share a lockdep key. I think there is agreement
that having to analyze lockdep false positives is annoying, time
consuming and something nobody likes.
* Modify lockdep such that space in its fixed size arrays that is no
longer in use gets reused. Since the stack traces saved in the
stack_trace[] array have a variable size that array will have to be
compacted to avoid fragmentation.

Did I overlook anything?

Bart.




Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-10 Thread Eric Dumazet



On 7/10/19 8:36 PM, Bart Van Assche wrote:
> On 7/10/19 11:02 AM, Eric Biggers wrote:
>> I already mentioned that io_uring triggers it too.
>>
>> Those are just 2 cases that syzbot happened to generate reproducers for.  I
>> expect there are many others too, since many places in the kernel allocate
>> workqueues.  AFAICS most are placed in static or global variables which 
>> avoids
>> this issue, but there are still many cases where a workqueue is owned by some
>> dynamic structure that can have a much shorter lifetime.
>>
>> You can also check the other syzbot reports that look similar
>> (https://lore.kernel.org/lkml/20190710055838.GC2152@sol.localdomain/).
>> Two of them have C reproducers too.
> 
> As you may know lockdep cannot use dynamic memory allocation because
> doing so would introduce a circular dependency between lockdep and the
> memory allocator. Hence the fixed size arrays in the lockdep code.
> Additionally, as far as I know lockdep works fine for human kernel
> developers and only syzbot runs code that triggers the lockdep limits.
> So I think it's up to the syzbot authors to come up with a solution. I
> mean another solution than finger pointing at kernel developers.

This is silly, syzbot simply uses kernel standard system calls.

If anything using workqueues in dynamically allocated objects can turn off 
lockdep,
we have a serious issue.

Please work on this issue instead of trying to blame someone else.

Thank you.



Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-10 Thread Bart Van Assche
On 7/10/19 11:02 AM, Eric Biggers wrote:
> I already mentioned that io_uring triggers it too.
> 
> Those are just 2 cases that syzbot happened to generate reproducers for.  I
> expect there are many others too, since many places in the kernel allocate
> workqueues.  AFAICS most are placed in static or global variables which avoids
> this issue, but there are still many cases where a workqueue is owned by some
> dynamic structure that can have a much shorter lifetime.
> 
> You can also check the other syzbot reports that look similar
> (https://lore.kernel.org/lkml/20190710055838.GC2152@sol.localdomain/).
> Two of them have C reproducers too.

As you may know lockdep cannot use dynamic memory allocation because
doing so would introduce a circular dependency between lockdep and the
memory allocator. Hence the fixed size arrays in the lockdep code.
Additionally, as far as I know lockdep works fine for human kernel
developers and only syzbot runs code that triggers the lockdep limits.
So I think it's up to the syzbot authors to come up with a solution. I
mean another solution than finger pointing at kernel developers.

Bart.




Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-10 Thread Eric Biggers
On Wed, Jul 10, 2019 at 10:46:00AM -0700, Bart Van Assche wrote:
> On 7/10/19 10:21 AM, Eric Biggers wrote:
> > With my simplified reproducer, on commit 669de8bda87b ("kernel/workqueue: 
> > Use
> > dynamic lockdep keys for workqueues") I see:
> > 
> > WARNING: CPU: 3 PID: 189 at kernel/locking/lockdep.c:747 
> > register_lock_class+0x4f6/0x580
> > 
> > and then somewhat later:
> > 
> > BUG: MAX_LOCKDEP_KEYS too low!
> > 
> > If on top of that I cherry pick commit 28d49e282665 ("locking/lockdep: 
> > Shrink
> > struct lock_class_key"), I see instead:
> > 
> > BUG: MAX_STACK_TRACE_ENTRIES too low!
> > 
> > I also see that on mainline.
> > 
> > Alternatively, if I check out 669de8bda87b and revert it, I don't see 
> > anything.
> 
> Hi Eric,
> 
> Is the rdma_ucm code the only code that triggers the "BUG:
> MAX_STACK_TRACE_ENTRIES too low!" complaint or is this complaint also
> triggered by other kernel code? I'm asking this because I think that
> fixing this would require to implement garbage collection for the
> stack_trace[] array in the lockdep code. That would make the lockdep
> code slower. I don't think that making the lockdep code slower would be
> welcome.
> 
> Bart.

I already mentioned that io_uring triggers it too.

Those are just 2 cases that syzbot happened to generate reproducers for.  I
expect there are many others too, since many places in the kernel allocate
workqueues.  AFAICS most are placed in static or global variables which avoids
this issue, but there are still many cases where a workqueue is owned by some
dynamic structure that can have a much shorter lifetime.

You can also check the other syzbot reports that look similar
(https://lore.kernel.org/lkml/20190710055838.GC2152@sol.localdomain/).
Two of them have C reproducers too.

- Eric


Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-10 Thread Bart Van Assche
On 7/10/19 10:21 AM, Eric Biggers wrote:
> With my simplified reproducer, on commit 669de8bda87b ("kernel/workqueue: Use
> dynamic lockdep keys for workqueues") I see:
> 
>   WARNING: CPU: 3 PID: 189 at kernel/locking/lockdep.c:747 
> register_lock_class+0x4f6/0x580
> 
> and then somewhat later:
> 
>   BUG: MAX_LOCKDEP_KEYS too low!
> 
> If on top of that I cherry pick commit 28d49e282665 ("locking/lockdep: Shrink
> struct lock_class_key"), I see instead:
> 
>   BUG: MAX_STACK_TRACE_ENTRIES too low!
> 
> I also see that on mainline.
> 
> Alternatively, if I check out 669de8bda87b and revert it, I don't see 
> anything.

Hi Eric,

Is the rdma_ucm code the only code that triggers the "BUG:
MAX_STACK_TRACE_ENTRIES too low!" complaint or is this complaint also
triggered by other kernel code? I'm asking this because I think that
fixing this would require to implement garbage collection for the
stack_trace[] array in the lockdep code. That would make the lockdep
code slower. I don't think that making the lockdep code slower would be
welcome.

Bart.



Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-10 Thread Eric Biggers
On Wed, Jul 10, 2019 at 10:00:59AM -0700, Eric Biggers wrote:
> On Wed, Jul 10, 2019 at 07:19:55AM -0700, Bart Van Assche wrote:
> > On 7/9/19 10:30 PM, Eric Biggers wrote:
> > > [Moved most people to Bcc; syzbot added way too many random people to 
> > > this.]
> > > 
> > > Hi Bart,
> > > 
> > > On Sat, Mar 30, 2019 at 07:17:09PM -0700, Bart Van Assche wrote:
> > > > On 3/30/19 2:58 PM, syzbot wrote:
> > > > > syzbot has bisected this bug to:
> > > > > 
> > > > > commit 669de8bda87b92ab9a2fc663b3f5743c2ad1ae9f
> > > > > Author: Bart Van Assche 
> > > > > Date:   Thu Feb 14 23:00:54 2019 +
> > > > > 
> > > > >       kernel/workqueue: Use dynamic lockdep keys for workqueues
> > > > > 
> > > > > bisection log:  
> > > > > https://syzkaller.appspot.com/x/bisect.txt?x=17f1bacd20
> > > > > start commit:   0e40da3e Merge tag 'kbuild-fixes-v5.1' of
> > > > > git://git.kernel..
> > > > > git tree:   upstream
> > > > > final crash:    
> > > > > https://syzkaller.appspot.com/x/report.txt?x=1409bacd20
> > > > > console output: 
> > > > > https://syzkaller.appspot.com/x/log.txt?x=1009bacd20
> > > > > kernel config:  
> > > > > https://syzkaller.appspot.com/x/.config?x=8dcdce25ea72bedf
> > > > > dashboard link:
> > > > > https://syzkaller.appspot.com/bug?extid=6f39a9deb697359fe520
> > > > > syz repro:  
> > > > > https://syzkaller.appspot.com/x/repro.syz?x=10e1bacd20
> > > > > C reproducer:   
> > > > > https://syzkaller.appspot.com/x/repro.c?x=1120fe0f20
> > > > > 
> > > > > Reported-by: syzbot+6f39a9deb697359fe...@syzkaller.appspotmail.com
> > > > > Fixes: 669de8bda87b ("kernel/workqueue: Use dynamic lockdep keys for
> > > > > workqueues")
> > > > > 
> > > > > For information about bisection process see:
> > > > > https://goo.gl/tpsmEJ#bisection
> > > > 
> > > > Hi Dmitry,
> > > > 
> > > > This bisection result doesn't make sense to me. As one can see, the 
> > > > message
> > > > "BUG: MAX_STACK_TRACE_ENTRIES too low!" does not occur in the console 
> > > > output
> > > > the above console output URL points at.
> > > > 
> > > > Bart.
> > > 
> > > This is still happening on mainline, and I think this bisection result is
> > > probably correct.  syzbot did start hitting something different at the 
> > > very end
> > > of the bisection ("WARNING: CPU: 0 PID: 9153 at 
> > > kernel/locking/lockdep.c:747")
> > > but that seems to be just because your commit had a lot of bugs in it, 
> > > which had
> > > to be fixed by later commits.  In particular, the WARNING seems to have 
> > > been
> > > fixed by commit 28d49e282665e ("locking/lockdep: Shrink struct 
> > > lock_class_key").
> > > 
> > > What seems to still be happening is that the dynamic lockdep keys which 
> > > you
> > > added make it possible for an unbounded number of entries to be added to 
> > > the
> > > fixed length stack_trace[] array in kernel/locking/lockdep.c.  Hence the 
> > > "BUG:
> > > MAX_STACK_TRACE_ENTRIES too low!".
> > > 
> > > Am I understanding it correctly?  How did you intend this to work?
> > 
> > The last two paragraphs do not make sense to me. My changes do not increase
> > the number of stack traces that get recorded by the lockdep code.
> > 
> > Bart.
> > 
> 
> Interesting.  How do we explain that repeatedly allocating and freeing a
> workqueue is causing the number of lockdep stack trace entries to grow without
> bound, though?
> 
> This can be reproduced with the following (which I simplified from the C
> reproducer that syzbot generated and used for its bisection):
> 
>   #include 
>   #include 
> 
>   int main()
>   {
>   for (;;) {
>   int fd = open("/dev/infiniband/rdma_cm", O_RDWR);
> 
>   close(fd);
>   }
>   }
> 
> The workqueue is allocated in ucma_open() and freed in ucma_close().  If I run
> 'grep stack-trace /proc/lockdep_stats' while reproducer is running, I can see
> the number is growing continuously until it hits the limit.
> 
> There is also a reproducer using io_uring instead of rdma_cm
> (https://syzkaller.appspot.com/text?tag=ReproC=16483bf860).
> In both cases the workqueue is associated with a file descriptor; the 
> workqueue
> is allocated and freed as the file descriptor is opened and closed.
> 
> Anyone have any ideas?
> 
> - Eric

With my simplified reproducer, on commit 669de8bda87b ("kernel/workqueue: Use
dynamic lockdep keys for workqueues") I see:

WARNING: CPU: 3 PID: 189 at kernel/locking/lockdep.c:747 
register_lock_class+0x4f6/0x580

and then somewhat later:

BUG: MAX_LOCKDEP_KEYS too low!

If on top of that I cherry pick commit 28d49e282665 ("locking/lockdep: Shrink
struct lock_class_key"), I see instead:

BUG: MAX_STACK_TRACE_ENTRIES too low!

I also see that on mainline.

Alternatively, if I check out 669de8bda87b and revert it, I don't see anything.

- Eric


Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-10 Thread Eric Biggers
On Wed, Jul 10, 2019 at 07:19:55AM -0700, Bart Van Assche wrote:
> On 7/9/19 10:30 PM, Eric Biggers wrote:
> > [Moved most people to Bcc; syzbot added way too many random people to this.]
> > 
> > Hi Bart,
> > 
> > On Sat, Mar 30, 2019 at 07:17:09PM -0700, Bart Van Assche wrote:
> > > On 3/30/19 2:58 PM, syzbot wrote:
> > > > syzbot has bisected this bug to:
> > > > 
> > > > commit 669de8bda87b92ab9a2fc663b3f5743c2ad1ae9f
> > > > Author: Bart Van Assche 
> > > > Date:   Thu Feb 14 23:00:54 2019 +
> > > > 
> > > >       kernel/workqueue: Use dynamic lockdep keys for workqueues
> > > > 
> > > > bisection log:  
> > > > https://syzkaller.appspot.com/x/bisect.txt?x=17f1bacd20
> > > > start commit:   0e40da3e Merge tag 'kbuild-fixes-v5.1' of
> > > > git://git.kernel..
> > > > git tree:   upstream
> > > > final crash:    
> > > > https://syzkaller.appspot.com/x/report.txt?x=1409bacd20
> > > > console output: https://syzkaller.appspot.com/x/log.txt?x=1009bacd20
> > > > kernel config:  
> > > > https://syzkaller.appspot.com/x/.config?x=8dcdce25ea72bedf
> > > > dashboard link:
> > > > https://syzkaller.appspot.com/bug?extid=6f39a9deb697359fe520
> > > > syz repro:  
> > > > https://syzkaller.appspot.com/x/repro.syz?x=10e1bacd20
> > > > C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1120fe0f20
> > > > 
> > > > Reported-by: syzbot+6f39a9deb697359fe...@syzkaller.appspotmail.com
> > > > Fixes: 669de8bda87b ("kernel/workqueue: Use dynamic lockdep keys for
> > > > workqueues")
> > > > 
> > > > For information about bisection process see:
> > > > https://goo.gl/tpsmEJ#bisection
> > > 
> > > Hi Dmitry,
> > > 
> > > This bisection result doesn't make sense to me. As one can see, the 
> > > message
> > > "BUG: MAX_STACK_TRACE_ENTRIES too low!" does not occur in the console 
> > > output
> > > the above console output URL points at.
> > > 
> > > Bart.
> > 
> > This is still happening on mainline, and I think this bisection result is
> > probably correct.  syzbot did start hitting something different at the very 
> > end
> > of the bisection ("WARNING: CPU: 0 PID: 9153 at 
> > kernel/locking/lockdep.c:747")
> > but that seems to be just because your commit had a lot of bugs in it, 
> > which had
> > to be fixed by later commits.  In particular, the WARNING seems to have been
> > fixed by commit 28d49e282665e ("locking/lockdep: Shrink struct 
> > lock_class_key").
> > 
> > What seems to still be happening is that the dynamic lockdep keys which you
> > added make it possible for an unbounded number of entries to be added to the
> > fixed length stack_trace[] array in kernel/locking/lockdep.c.  Hence the 
> > "BUG:
> > MAX_STACK_TRACE_ENTRIES too low!".
> > 
> > Am I understanding it correctly?  How did you intend this to work?
> 
> The last two paragraphs do not make sense to me. My changes do not increase
> the number of stack traces that get recorded by the lockdep code.
> 
> Bart.
> 

Interesting.  How do we explain that repeatedly allocating and freeing a
workqueue is causing the number of lockdep stack trace entries to grow without
bound, though?

This can be reproduced with the following (which I simplified from the C
reproducer that syzbot generated and used for its bisection):

#include 
#include 

int main()
{
for (;;) {
int fd = open("/dev/infiniband/rdma_cm", O_RDWR);

close(fd);
}
}

The workqueue is allocated in ucma_open() and freed in ucma_close().  If I run
'grep stack-trace /proc/lockdep_stats' while reproducer is running, I can see
the number is growing continuously until it hits the limit.

There is also a reproducer using io_uring instead of rdma_cm
(https://syzkaller.appspot.com/text?tag=ReproC=16483bf860).
In both cases the workqueue is associated with a file descriptor; the workqueue
is allocated and freed as the file descriptor is opened and closed.

Anyone have any ideas?

- Eric


Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-10 Thread Bart Van Assche

On 7/9/19 10:30 PM, Eric Biggers wrote:

[Moved most people to Bcc; syzbot added way too many random people to this.]

Hi Bart,

On Sat, Mar 30, 2019 at 07:17:09PM -0700, Bart Van Assche wrote:

On 3/30/19 2:58 PM, syzbot wrote:

syzbot has bisected this bug to:

commit 669de8bda87b92ab9a2fc663b3f5743c2ad1ae9f
Author: Bart Van Assche 
Date:   Thu Feb 14 23:00:54 2019 +

      kernel/workqueue: Use dynamic lockdep keys for workqueues

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=17f1bacd20
start commit:   0e40da3e Merge tag 'kbuild-fixes-v5.1' of
git://git.kernel..
git tree:   upstream
final crash:    https://syzkaller.appspot.com/x/report.txt?x=1409bacd20
console output: https://syzkaller.appspot.com/x/log.txt?x=1009bacd20
kernel config:  https://syzkaller.appspot.com/x/.config?x=8dcdce25ea72bedf
dashboard link:
https://syzkaller.appspot.com/bug?extid=6f39a9deb697359fe520
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=10e1bacd20
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1120fe0f20

Reported-by: syzbot+6f39a9deb697359fe...@syzkaller.appspotmail.com
Fixes: 669de8bda87b ("kernel/workqueue: Use dynamic lockdep keys for
workqueues")

For information about bisection process see:
https://goo.gl/tpsmEJ#bisection


Hi Dmitry,

This bisection result doesn't make sense to me. As one can see, the message
"BUG: MAX_STACK_TRACE_ENTRIES too low!" does not occur in the console output
the above console output URL points at.

Bart.


This is still happening on mainline, and I think this bisection result is
probably correct.  syzbot did start hitting something different at the very end
of the bisection ("WARNING: CPU: 0 PID: 9153 at kernel/locking/lockdep.c:747")
but that seems to be just because your commit had a lot of bugs in it, which had
to be fixed by later commits.  In particular, the WARNING seems to have been
fixed by commit 28d49e282665e ("locking/lockdep: Shrink struct lock_class_key").

What seems to still be happening is that the dynamic lockdep keys which you
added make it possible for an unbounded number of entries to be added to the
fixed length stack_trace[] array in kernel/locking/lockdep.c.  Hence the "BUG:
MAX_STACK_TRACE_ENTRIES too low!".

Am I understanding it correctly?  How did you intend this to work?


The last two paragraphs do not make sense to me. My changes do not 
increase the number of stack traces that get recorded by the lockdep code.


Bart.


Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-07-09 Thread Eric Biggers
[Moved most people to Bcc; syzbot added way too many random people to this.]

Hi Bart,

On Sat, Mar 30, 2019 at 07:17:09PM -0700, Bart Van Assche wrote:
> On 3/30/19 2:58 PM, syzbot wrote:
> > syzbot has bisected this bug to:
> > 
> > commit 669de8bda87b92ab9a2fc663b3f5743c2ad1ae9f
> > Author: Bart Van Assche 
> > Date:   Thu Feb 14 23:00:54 2019 +
> > 
> >      kernel/workqueue: Use dynamic lockdep keys for workqueues
> > 
> > bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=17f1bacd20
> > start commit:   0e40da3e Merge tag 'kbuild-fixes-v5.1' of
> > git://git.kernel..
> > git tree:   upstream
> > final crash:    https://syzkaller.appspot.com/x/report.txt?x=1409bacd20
> > console output: https://syzkaller.appspot.com/x/log.txt?x=1009bacd20
> > kernel config:  https://syzkaller.appspot.com/x/.config?x=8dcdce25ea72bedf
> > dashboard link:
> > https://syzkaller.appspot.com/bug?extid=6f39a9deb697359fe520
> > syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=10e1bacd20
> > C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1120fe0f20
> > 
> > Reported-by: syzbot+6f39a9deb697359fe...@syzkaller.appspotmail.com
> > Fixes: 669de8bda87b ("kernel/workqueue: Use dynamic lockdep keys for
> > workqueues")
> > 
> > For information about bisection process see:
> > https://goo.gl/tpsmEJ#bisection
> 
> Hi Dmitry,
> 
> This bisection result doesn't make sense to me. As one can see, the message
> "BUG: MAX_STACK_TRACE_ENTRIES too low!" does not occur in the console output
> the above console output URL points at.
> 
> Bart.

This is still happening on mainline, and I think this bisection result is
probably correct.  syzbot did start hitting something different at the very end
of the bisection ("WARNING: CPU: 0 PID: 9153 at kernel/locking/lockdep.c:747")
but that seems to be just because your commit had a lot of bugs in it, which had
to be fixed by later commits.  In particular, the WARNING seems to have been
fixed by commit 28d49e282665e ("locking/lockdep: Shrink struct lock_class_key").

What seems to still be happening is that the dynamic lockdep keys which you
added make it possible for an unbounded number of entries to be added to the
fixed length stack_trace[] array in kernel/locking/lockdep.c.  Hence the "BUG:
MAX_STACK_TRACE_ENTRIES too low!".

Am I understanding it correctly?  How did you intend this to work?

- Eric


Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-03-30 Thread Bart Van Assche

On 3/30/19 2:58 PM, syzbot wrote:

syzbot has bisected this bug to:

commit 669de8bda87b92ab9a2fc663b3f5743c2ad1ae9f
Author: Bart Van Assche 
Date:   Thu Feb 14 23:00:54 2019 +

     kernel/workqueue: Use dynamic lockdep keys for workqueues

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=17f1bacd20
start commit:   0e40da3e Merge tag 'kbuild-fixes-v5.1' of 
git://git.kernel..

git tree:   upstream
final crash:    https://syzkaller.appspot.com/x/report.txt?x=1409bacd20
console output: https://syzkaller.appspot.com/x/log.txt?x=1009bacd20
kernel config:  https://syzkaller.appspot.com/x/.config?x=8dcdce25ea72bedf
dashboard link: 
https://syzkaller.appspot.com/bug?extid=6f39a9deb697359fe520

syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=10e1bacd20
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1120fe0f20

Reported-by: syzbot+6f39a9deb697359fe...@syzkaller.appspotmail.com
Fixes: 669de8bda87b ("kernel/workqueue: Use dynamic lockdep keys for 
workqueues")


For information about bisection process see: 
https://goo.gl/tpsmEJ#bisection


Hi Dmitry,

This bisection result doesn't make sense to me. As one can see, the 
message "BUG: MAX_STACK_TRACE_ENTRIES too low!" does not occur in the 
console output the above console output URL points at.


Bart.




Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-03-30 Thread syzbot

syzbot has bisected this bug to:

commit 669de8bda87b92ab9a2fc663b3f5743c2ad1ae9f
Author: Bart Van Assche 
Date:   Thu Feb 14 23:00:54 2019 +

kernel/workqueue: Use dynamic lockdep keys for workqueues

bisection log:  https://syzkaller.appspot.com/x/bisect.txt?x=17f1bacd20
start commit:   0e40da3e Merge tag 'kbuild-fixes-v5.1' of git://git.kernel..
git tree:   upstream
final crash:https://syzkaller.appspot.com/x/report.txt?x=1409bacd20
console output: https://syzkaller.appspot.com/x/log.txt?x=1009bacd20
kernel config:  https://syzkaller.appspot.com/x/.config?x=8dcdce25ea72bedf
dashboard link: https://syzkaller.appspot.com/bug?extid=6f39a9deb697359fe520
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=10e1bacd20
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=1120fe0f20

Reported-by: syzbot+6f39a9deb697359fe...@syzkaller.appspotmail.com
Fixes: 669de8bda87b ("kernel/workqueue: Use dynamic lockdep keys for  
workqueues")


For information about bisection process see: https://goo.gl/tpsmEJ#bisection


Re: BUG: MAX_STACK_TRACE_ENTRIES too low! (2)

2019-03-30 Thread syzbot

syzbot has found a reproducer for the following crash on:

HEAD commit:0e40da3e Merge tag 'kbuild-fixes-v5.1' of git://git.kernel..
git tree:   upstream
console output: https://syzkaller.appspot.com/x/log.txt?x=14d9123f20
kernel config:  https://syzkaller.appspot.com/x/.config?x=8dcdce25ea72bedf
dashboard link: https://syzkaller.appspot.com/bug?extid=6f39a9deb697359fe520
compiler:   gcc (GCC) 9.0.0 20181231 (experimental)
userspace arch: i386
syz repro:  https://syzkaller.appspot.com/x/repro.syz?x=1244591f20
C reproducer:   https://syzkaller.appspot.com/x/repro.c?x=12611c7320

IMPORTANT: if you fix the bug, please add the following tag to the commit:
Reported-by: syzbot+6f39a9deb697359fe...@syzkaller.appspotmail.com

BUG: MAX_STACK_TRACE_ENTRIES too low!
turning off the locking correctness validator.
CPU: 0 PID: 678 Comm: syz-executor519 Not tainted 5.1.0-rc2+ #43
Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS  
Google 01/01/2011

Call Trace:
 __dump_stack lib/dump_stack.c:77 [inline]
 dump_stack+0x172/0x1f0 lib/dump_stack.c:113
 save_trace kernel/locking/lockdep.c:467 [inline]
 save_trace.cold+0x14/0x19 kernel/locking/lockdep.c:437
 mark_lock+0x2fb/0x1380 kernel/locking/lockdep.c:3410
 __lock_acquire+0x548/0x3fb0 kernel/locking/lockdep.c:3657
 lock_acquire+0x16f/0x3f0 kernel/locking/lockdep.c:4211
 flush_workqueue+0x126/0x14c0 kernel/workqueue.c:2774
 drain_workqueue+0x1b4/0x470 kernel/workqueue.c:2939
 destroy_workqueue+0x21/0x700 kernel/workqueue.c:4320
 ucma_close+0x289/0x320 drivers/infiniband/core/ucma.c:1786
 __fput+0x2e5/0x8d0 fs/file_table.c:278
 fput+0x16/0x20 fs/file_table.c:309
 task_work_run+0x14a/0x1c0 kernel/task_work.c:113
 tracehook_notify_resume include/linux/tracehook.h:188 [inline]
 exit_to_usermode_loop+0x273/0x2c0 arch/x86/entry/common.c:166
 prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline]
 syscall_return_slowpath arch/x86/entry/common.c:268 [inline]
 do_syscall_32_irqs_on arch/x86/entry/common.c:341 [inline]
 do_fast_syscall_32+0xa9d/0xc98 arch/x86/entry/common.c:397
 entry_SYSENTER_compat+0x70/0x7f arch/x86/entry/entry_64_compat.S:139
RIP: 0023:0xf7fc4869
Code: 85 d2 74 02 89 0a 5b 5d c3 8b 04 24 c3 8b 14 24 c3 8b 3c 24 c3 90 90  
90 90 90 90 90 90 90 90 90 90 51 52 55 89 e5 0f 34 cd 80 <5d> 5a 59 c3 90  
90 90 90 eb 0d 90 90 90 90 90 90 90 90 90 90 90 90

RSP: 002b:ffc382bc EFLAGS: 0246 ORIG_RAX: 0006
RAX:  RBX: 0003 RCX: 22c0
RDX: 0004 RSI: 080bc36e RDI: 22d8
RBP: ffc38308 R08:  R09: 
R10:  R11:  R12: 
R13:  R14:  R15: