Re: lockdep splats with blk-mq drivers

2018-05-07 Thread Sebastian Ott
Hi,

On Thu, 19 Apr 2018, Sebastian Ott wrote:
> Since commit 1d9bd5161ba3 ("blk-mq: replace timeout synchronization with a
> RCU and generation based scheme") I observe lockdep complaints (full
> message attached):
> 
> [   21.763369]CPU0CPU1
> [   21.763370]
> [   21.763371]   lock(>gstate_seq);
> [   21.763374]local_irq_disable();
> [   21.763375]lock(&(>lock)->rlock);
> [   21.763377]lock(>gstate_seq);
> [   21.763379]   
> [   21.763380] lock(&(>lock)->rlock);
> [   21.763382] 
> *** DEADLOCK ***
> 
> 
> This only happens in combination with DASD (s390 specific) and another
> blk-mq driver (scsi, null_blk). The distinctive behavior of DASD is that
> it calls blk_mq_start_request with irqs disabled.
> 
> This is a false positive since gstate_seq on CPU0 is from a different
> request queue / block driver than gstate_seq on CPU1.
> 
> Possible fixes are to use local_irq_save/restore in blk_mq_start_request.
> Or, since it's a false positive, teach lockdep that these are different
> locks - like below:

Something we can do here? I've send 2 proposals to address this. Do they
make sense?

Regards,
Sebastian



lockdep splats with blk-mq drivers

2018-04-19 Thread Sebastian Ott
Since commit 1d9bd5161ba3 ("blk-mq: replace timeout synchronization with a
RCU and generation based scheme") I observe lockdep complaints (full
message attached):

[   21.763369]CPU0CPU1
[   21.763370]
[   21.763371]   lock(>gstate_seq);
[   21.763374]local_irq_disable();
[   21.763375]lock(&(>lock)->rlock);
[   21.763377]lock(>gstate_seq);
[   21.763379]   
[   21.763380] lock(&(>lock)->rlock);
[   21.763382] 
*** DEADLOCK ***


This only happens in combination with DASD (s390 specific) and another
blk-mq driver (scsi, null_blk). The distinctive behavior of DASD is that
it calls blk_mq_start_request with irqs disabled.

This is a false positive since gstate_seq on CPU0 is from a different
request queue / block driver than gstate_seq on CPU1.

Possible fixes are to use local_irq_save/restore in blk_mq_start_request.
Or, since it's a false positive, teach lockdep that these are different
locks - like below:

-->8
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 0dc9e341c2a7..d1a2c2fa015d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -2040,7 +2040,7 @@ static int blk_mq_init_request(struct blk_mq_tag_set 
*set, struct request *rq,
return ret;
}
 
-   seqcount_init(>gstate_seq);
+   __seqcount_init(>gstate_seq, ">gstate_seq", 
set->rq_seqcount_key);
u64_stats_init(>aborted_gstate_sync);
return 0;
 }
@@ -2774,7 +2774,7 @@ static int blk_mq_update_queue_map(struct blk_mq_tag_set 
*set)
  * requested depth down, if if it too large. In that case, the set
  * value will be stored in set->queue_depth.
  */
-int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
+int __blk_mq_alloc_tag_set(struct blk_mq_tag_set *set, struct lock_class_key 
*key)
 {
int ret;
 
@@ -2825,6 +2825,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
if (!set->mq_map)
goto out_free_tags;
 
+   set->rq_seqcount_key = key;
ret = blk_mq_update_queue_map(set);
if (ret)
goto out_free_mq_map;
@@ -2846,7 +2847,7 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
set->tags = NULL;
return ret;
 }
-EXPORT_SYMBOL(blk_mq_alloc_tag_set);
+EXPORT_SYMBOL(__blk_mq_alloc_tag_set);
 
 void blk_mq_free_tag_set(struct blk_mq_tag_set *set)
 {
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index e3986f4b3461..1fb8bc1fdb07 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -85,6 +85,8 @@ struct blk_mq_tag_set {
 
struct mutextag_list_lock;
struct list_headtag_list;
+
+   struct lock_class_key   *rq_seqcount_key;
 };
 
 struct blk_mq_queue_data {
@@ -201,7 +203,15 @@ struct request_queue *blk_mq_init_allocated_queue(struct 
blk_mq_tag_set *set,
 int blk_mq_register_dev(struct device *, struct request_queue *);
 void blk_mq_unregister_dev(struct device *, struct request_queue *);
 
-int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set);
+int __blk_mq_alloc_tag_set(struct blk_mq_tag_set *set, struct lock_class_key 
*key);
+
+static inline int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set)
+{
+   static struct lock_class_key _seqcount_key;
+
+   return __blk_mq_alloc_tag_set(set, &_seqcount_key);
+}
+
 void blk_mq_free_tag_set(struct blk_mq_tag_set *set);
 
 void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule);[   74.182295] WARNING: possible irq lock inversion dependency detected
[   74.182296] 4.16.0-11766-ge241e3f2bf97 #166 Not tainted
[   74.182297] 
[   74.182298] kworker/u64:7/175 just changed the state of lock:
[   74.182301] 1b8db3f6 (>gstate_seq){+.+.}, at: 
scsi_queue_rq+0x460/0x578
[   74.182308] but this lock was taken by another, SOFTIRQ-safe lock in the 
past:
[   74.182310]  (&(>lock)->rlock){..-.}
[   74.182311] 
   
   and interrupts could create inverse lock ordering between them.

[   74.182315] 
   other info that might help us debug this:
[   74.182316]  Possible interrupt unsafe locking scenario:

[   74.182318]CPU0CPU1
[   74.182319]
[   74.182320]   lock(>gstate_seq);
[   74.182340]local_irq_disable();
[   74.182341]lock(&(>lock)->rlock);
[   74.182343]lock(>gstate_seq);
[   74.182345]   
[   74.182346] lock(&(>lock)->rlock);
[   74.182348] 
*** DEADLOCK ***

[   74.182350] 4 locks held by kworker/u64:7/175:
[   74.182351]  #0: 1751423f 
((wq_completion)"%s"shost->work_q_name){+.+.}, at: process_one_work+0x1a0/0x6f0
[   74.182359]  #1: 1c880b65 
((work_completion)(>scan_work)){+.+.}, at: process_one_work+0x1a0/0x6f0
[