On Wed, Sep 30, 2020 at 11:49:37AM +0200, Peter Zijlstra wrote: > On Wed, Sep 30, 2020 at 11:16:11AM +0200, Peter Zijlstra wrote: > > On Wed, Sep 30, 2020 at 07:08:23AM +0800, Boqun Feng wrote: > > > I think there are two problems here: > > > > > > 1) the "(null)" means we don't have the "usage_str" for a usage bit, > > > which I think is the LOCK_USED_READ bit introduced by Peter at > > > 23870f122768 ('locking/lockdep: Fix "USED" <- "IN-NMI" inversions'). > > > > > > 2) the next null-ptr-deref, and I think this is also caused by > > > LOCK_USED_READ bit, because in the loop inside > > > print_lock_class_header(), we iterate from 0 to LOCK_USAGE_STATES (which > > > is 4*2 + 3), however the class->usage_traces[] only has > > > XXX_LOCK_USAGE_STATES (which is 4*2 + 1) elements, so if we have > > > LOCK_USED_READ bit set in ->usage_mask, we will try to access an element > > > out of the ->usage_traces[] array. > > > > > > Probably the following helps? And another possible fix is to enlarge the > > > ->usage_trace[] array and record the call trace of LOCK_READ_USED. > > > > Urgh.. yeah, I wanted to avoid saving that trace; it's pretty useless :/ > > The existing USED trace is already mostly pointless, the consistent > > thing would be to remove both but that might be too radical. > > > > But you're right in that I made a right mess of it. Not sure what's > > best here. > > > > Let me have a play. > > How's something like this? It's bigger than I'd like, but I feel the > result is more consistent/readable. >
Looks good to me. For one thing, I do think that LOCK_READ_USED trace is helpful for better reporting, because if there is a read lock in the dependency path which causes the deadlock, it's better to have the LOCK_READ_USED trace to know at least the initial READ usage. For example, if we have void f1(...) { write_lock(&A); spin_lock(&C); // A -> C ... } void g(...) { read_lock(&A); ... } void f2(...) { spin_lock(&B); g(...); // B -> A } void f3(...) { spin_lock(&C); spin_lock(&B); // C -> B, trigger lockdep splat } when lockdep reports the deadlock (at the time f3() is called), it will be useful if we have a trace like: INITIAL READ usage at: g+0x.../0x... f2+0x.../0x... Thoughts? Regards, Boqun > --- > diff --git a/include/linux/lockdep_types.h b/include/linux/lockdep_types.h > index bb35b449f533..a55b1d314ae8 100644 > --- a/include/linux/lockdep_types.h > +++ b/include/linux/lockdep_types.h > @@ -35,8 +35,12 @@ enum lockdep_wait_type { > /* > * We'd rather not expose kernel/lockdep_states.h this wide, but we do need > * the total number of states... :-( > + * > + * XXX_LOCK_USAGE_STATES is the number of lines in lockdep_states.h, for each > + * of those we generates 4 states, Additionally we (for now) report on USED. > */ > -#define XXX_LOCK_USAGE_STATES (1+2*4) > +#define XXX_LOCK_USAGE_STATES 2 > +#define LOCK_TRACE_STATES (XXX_LOCK_USAGE_STATES*4 + 1) > > /* > * NR_LOCKDEP_CACHING_CLASSES ... Number of classes > @@ -106,7 +110,7 @@ struct lock_class { > * IRQ/softirq usage tracking bits: > */ > unsigned long usage_mask; > - const struct lock_trace *usage_traces[XXX_LOCK_USAGE_STATES]; > + const struct lock_trace *usage_traces[LOCK_TRACE_STATES]; > > /* > * Generation counter, when doing certain classes of graph walking, > diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c > index 454355c033d2..4f98ac8b4575 100644 > --- a/kernel/locking/lockdep.c > +++ b/kernel/locking/lockdep.c > @@ -600,6 +600,8 @@ static const char *usage_str[] = > #include "lockdep_states.h" > #undef LOCKDEP_STATE > [LOCK_USED] = "INITIAL USE", > + [LOCK_USED_READ] = "INITIAL READ USE", > + /* abused as string storage for verify_lock_unused() */ > [LOCK_USAGE_STATES] = "IN-NMI", > }; > #endif > @@ -2231,7 +2233,7 @@ static void print_lock_class_header(struct lock_class > *class, int depth) > #endif > printk(KERN_CONT " {\n"); > > - for (bit = 0; bit < LOCK_USAGE_STATES; bit++) { > + for (bit = 0; bit < LOCK_TRACE_STATES; bit++) { > if (class->usage_mask & (1 << bit)) { > int len = depth; > > @@ -4354,27 +4356,24 @@ static int mark_lock(struct task_struct *curr, struct > held_lock *this, > old_mask = hlock_class(this)->usage_mask; > hlock_class(this)->usage_mask |= new_mask; > > - /* > - * Save one usage_traces[] entry and map both LOCK_USED and > - * LOCK_USED_READ onto the same entry. > - */ > - if (new_bit == LOCK_USED || new_bit == LOCK_USED_READ) { > - if (old_mask & (LOCKF_USED | LOCKF_USED_READ)) > - goto unlock; > - new_bit = LOCK_USED; > + if (new_bit < LOCK_TRACE_STATES) { > + if (!(hlock_class(this)->usage_traces[new_bit] = save_trace())) > + return 0; > } > > - if (!(hlock_class(this)->usage_traces[new_bit] = save_trace())) > - return 0; > - > switch (new_bit) { > + case 0 ... LOCK_USED-1: > + ret = mark_lock_irq(curr, this, new_bit); > + if (!ret) > + return 0; > + break; > + > case LOCK_USED: > debug_atomic_dec(nr_unused_locks); > break; > + > default: > - ret = mark_lock_irq(curr, this, new_bit); > - if (!ret) > - return 0; > + break; > } > > unlock: > diff --git a/kernel/locking/lockdep_internals.h > b/kernel/locking/lockdep_internals.h > index b0be1560ed17..67dc46e46552 100644 > --- a/kernel/locking/lockdep_internals.h > +++ b/kernel/locking/lockdep_internals.h > @@ -20,9 +20,12 @@ enum lock_usage_bit { > #undef LOCKDEP_STATE > LOCK_USED, > LOCK_USED_READ, > - LOCK_USAGE_STATES > + LOCK_USAGE_STATES, > }; > > +/* states after LOCK_USED are not traced and printed */ > +static_assert(LOCK_TRACE_STATES == LOCK_USED_READ); > + > #define LOCK_USAGE_READ_MASK 1 > #define LOCK_USAGE_DIR_MASK 2 > #define LOCK_USAGE_STATE_MASK (~(LOCK_USAGE_READ_MASK | LOCK_USAGE_DIR_MASK)) > @@ -121,7 +124,7 @@ static const unsigned long LOCKF_USED_IN_IRQ_READ = > extern struct list_head all_lock_classes; > extern struct lock_chain lock_chains[]; > > -#define LOCK_USAGE_CHARS (1+LOCK_USAGE_STATES/2) > +#define LOCK_USAGE_CHARS (2*XXX_LOCK_USAGE_STATES + 1) > > extern void get_usage_chars(struct lock_class *class, > char usage[LOCK_USAGE_CHARS]);