[PATCH V6 6/6] SCSI: set block queue at preempt only when SCSI device is put into quiesce

2017-09-26 Thread Ming Lei
Simply quiesing SCSI device and waiting for completeion of IO
dispatched to SCSI queue isn't safe, it is easy to use up
request pool because all allocated requests before can't
be dispatched when device is put in QIUESCE. Then no request
can be allocated for RQF_PREEMPT, and system may hang somewhere,
such as When sending commands of sync_cache or start_stop during
system suspend path.

Before quiesing SCSI, this patch sets block queue in preempt
mode first, so no new normal request can enter queue any more,
and all pending requests are drained too once blk_set_preempt_only(true)
is returned. Then RQF_PREEMPT can be allocated successfully duirng
preempt freeze.

Signed-off-by: Ming Lei 
---
 drivers/scsi/scsi_lib.c | 25 ++---
 1 file changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 9cf6a80fe297..82c51619f1b7 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -252,9 +252,10 @@ int scsi_execute(struct scsi_device *sdev, const unsigned 
char *cmd,
struct scsi_request *rq;
int ret = DRIVER_ERROR << 24;
 
-   req = blk_get_request(sdev->request_queue,
+   req = __blk_get_request(sdev->request_queue,
data_direction == DMA_TO_DEVICE ?
-   REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM);
+   REQ_OP_SCSI_OUT : REQ_OP_SCSI_IN, __GFP_RECLAIM,
+   BLK_REQ_PREEMPT);
if (IS_ERR(req))
return ret;
rq = scsi_req(req);
@@ -2928,12 +2929,28 @@ scsi_device_quiesce(struct scsi_device *sdev)
 {
int err;
 
+   /*
+* Simply quiesing SCSI device isn't safe, it is easy
+* to use up requests because all these allocated requests
+* can't be dispatched when device is put in QIUESCE.
+* Then no request can be allocated and we may hang
+* somewhere, such as system suspend/resume.
+*
+* So we set block queue in preempt only first, no new
+* normal request can enter queue any more, and all pending
+* requests are drained once blk_set_preempt_only()
+* returns. Only RQF_PREEMPT is allowed in preempt only mode.
+*/
+   blk_set_preempt_only(sdev->request_queue, true);
+
mutex_lock(>state_mutex);
err = scsi_device_set_state(sdev, SDEV_QUIESCE);
mutex_unlock(>state_mutex);
 
-   if (err)
+   if (err) {
+   blk_set_preempt_only(sdev->request_queue, false);
return err;
+   }
 
scsi_run_queue(sdev->request_queue);
while (atomic_read(>device_busy)) {
@@ -2964,6 +2981,8 @@ void scsi_device_resume(struct scsi_device *sdev)
scsi_device_set_state(sdev, SDEV_RUNNING) == 0)
scsi_run_queue(sdev->request_queue);
mutex_unlock(>state_mutex);
+
+   blk_set_preempt_only(sdev->request_queue, false);
 }
 EXPORT_SYMBOL(scsi_device_resume);
 
-- 
2.9.5



[PATCH V6 5/6] block: support PREEMPT_ONLY

2017-09-26 Thread Ming Lei
When queue is in PREEMPT_ONLY mode, only RQF_PREEMPT request
can be allocated and dispatched, other requests won't be allowed
to enter I/O path.

This is useful for supporting safe SCSI quiesce.

Part of this patch is from Bart's '[PATCH v4 4∕7] block: Add the 
QUEUE_FLAG_PREEMPT_ONLY
request queue flag'.

Signed-off-by: Ming Lei 
---
 block/blk-core.c   | 25 +++--
 include/linux/blkdev.h |  5 +
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index 0a8396e8e4ff..3c0a7e7f172f 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -346,6 +346,17 @@ void blk_sync_queue(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_sync_queue);
 
+void blk_set_preempt_only(struct request_queue *q, bool preempt_only)
+{
+   blk_mq_freeze_queue(q);
+   if (preempt_only)
+   queue_flag_set_unlocked(QUEUE_FLAG_PREEMPT_ONLY, q);
+   else
+   queue_flag_clear_unlocked(QUEUE_FLAG_PREEMPT_ONLY, q);
+   blk_mq_unfreeze_queue(q);
+}
+EXPORT_SYMBOL(blk_set_preempt_only);
+
 /**
  * __blk_run_queue_uncond - run a queue whether or not it has been stopped
  * @q: The queue to run
@@ -771,9 +782,18 @@ int blk_queue_enter(struct request_queue *q, unsigned 
flags)
while (true) {
int ret;
 
+   /*
+* preempt_only flag has to be set after queue is frozen,
+* so it can be checked here lockless and safely
+*/
+   if (blk_queue_preempt_only(q)) {
+   if (!(flags & BLK_REQ_PREEMPT))
+   goto slow_path;
+   }
+
if (percpu_ref_tryget_live(>q_usage_counter))
return 0;
-
+ slow_path:
if (flags & BLK_REQ_NOWAIT)
return -EBUSY;
 
@@ -787,7 +807,8 @@ int blk_queue_enter(struct request_queue *q, unsigned flags)
smp_rmb();
 
ret = wait_event_interruptible(q->mq_freeze_wq,
-   !atomic_read(>mq_freeze_depth) ||
+   (!atomic_read(>mq_freeze_depth) &&
+   !blk_queue_preempt_only(q)) ||
blk_queue_dying(q));
if (blk_queue_dying(q))
return -ENODEV;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index d1ab950a7f72..8f5b15b2bf06 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -630,6 +630,7 @@ struct request_queue {
 #define QUEUE_FLAG_REGISTERED  26  /* queue has been registered to a disk 
*/
 #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */
 #define QUEUE_FLAG_QUIESCED28  /* queue has been quiesced */
+#define QUEUE_FLAG_PREEMPT_ONLY29  /* only process REQ_PREEMPT 
requests */
 
 #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) |\
 (1 << QUEUE_FLAG_STACKABLE)|   \
@@ -734,6 +735,10 @@ static inline void queue_flag_clear(unsigned int flag, 
struct request_queue *q)
((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \
 REQ_FAILFAST_DRIVER))
 #define blk_queue_quiesced(q)  test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags)
+#define blk_queue_preempt_only(q)  \
+   test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags)
+
+extern void blk_set_preempt_only(struct request_queue *q, bool preempt_only);
 
 static inline bool blk_account_rq(struct request *rq)
 {
-- 
2.9.5



[PATCH V6 2/6] block: tracking request allocation with q_usage_counter

2017-09-26 Thread Ming Lei
This usage is basically same with blk-mq, so that we can
support to freeze legacy queue easily.

Also 'wake_up_all(>mq_freeze_wq)' has to be moved
into blk_set_queue_dying() since both legacy and blk-mq
may wait on the wait queue of .mq_freeze_wq.

Signed-off-by: Ming Lei 
---
 block/blk-core.c | 14 ++
 block/blk-mq.c   |  7 ---
 2 files changed, 14 insertions(+), 7 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index aebe676225e6..abfba798ee03 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -610,6 +610,12 @@ void blk_set_queue_dying(struct request_queue *q)
}
spin_unlock_irq(q->queue_lock);
}
+
+   /*
+* We need to ensure that processes currently waiting on
+* the queue are notified as well.
+*/
+   wake_up_all(>mq_freeze_wq);
 }
 EXPORT_SYMBOL_GPL(blk_set_queue_dying);
 
@@ -1392,16 +1398,21 @@ static struct request *blk_old_get_request(struct 
request_queue *q,
   unsigned int op, gfp_t gfp_mask)
 {
struct request *rq;
+   int ret = 0;
 
WARN_ON_ONCE(q->mq_ops);
 
/* create ioc upfront */
create_io_context(gfp_mask, q->node);
 
+   ret = blk_queue_enter(q, !(gfp_mask & __GFP_DIRECT_RECLAIM));
+   if (ret)
+   return ERR_PTR(ret);
spin_lock_irq(q->queue_lock);
rq = get_request(q, op, NULL, gfp_mask);
if (IS_ERR(rq)) {
spin_unlock_irq(q->queue_lock);
+   blk_queue_exit(q);
return rq;
}
 
@@ -1573,6 +1584,7 @@ void __blk_put_request(struct request_queue *q, struct 
request *req)
blk_free_request(rl, req);
freed_request(rl, sync, rq_flags);
blk_put_rl(rl);
+   blk_queue_exit(q);
}
 }
 EXPORT_SYMBOL_GPL(__blk_put_request);
@@ -1854,8 +1866,10 @@ static blk_qc_t blk_queue_bio(struct request_queue *q, 
struct bio *bio)
 * Grab a free request. This is might sleep but can not fail.
 * Returns with the queue unlocked.
 */
+   blk_queue_enter_live(q);
req = get_request(q, bio->bi_opf, bio, GFP_NOIO);
if (IS_ERR(req)) {
+   blk_queue_exit(q);
__wbt_done(q->rq_wb, wb_acct);
if (PTR_ERR(req) == -ENOMEM)
bio->bi_status = BLK_STS_RESOURCE;
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 6fd9f86fc86d..10c1f49f663d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -256,13 +256,6 @@ void blk_mq_wake_waiters(struct request_queue *q)
queue_for_each_hw_ctx(q, hctx, i)
if (blk_mq_hw_queue_mapped(hctx))
blk_mq_tag_wakeup_all(hctx->tags, true);
-
-   /*
-* If we are called because the queue has now been marked as
-* dying, we need to ensure that processes currently waiting on
-* the queue are notified as well.
-*/
-   wake_up_all(>mq_freeze_wq);
 }
 
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx)
-- 
2.9.5



[PATCH V6 1/6] blk-mq: only run hw queues for blk-mq

2017-09-26 Thread Ming Lei
This patch just makes it explicitely.

Reviewed-by: Johannes Thumshirn 
Signed-off-by: Ming Lei 
---
 block/blk-mq.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/block/blk-mq.c b/block/blk-mq.c
index 98a18609755e..6fd9f86fc86d 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -125,7 +125,8 @@ void blk_freeze_queue_start(struct request_queue *q)
freeze_depth = atomic_inc_return(>mq_freeze_depth);
if (freeze_depth == 1) {
percpu_ref_kill(>q_usage_counter);
-   blk_mq_run_hw_queues(q, false);
+   if (q->mq_ops)
+   blk_mq_run_hw_queues(q, false);
}
 }
 EXPORT_SYMBOL_GPL(blk_freeze_queue_start);
-- 
2.9.5



[PATCH V6 4/6] block: prepare for passing RQF_PREEMPT to request allocation

2017-09-26 Thread Ming Lei
REQF_PREEMPT is a bit special because the request is required
to be dispatched to lld even when SCSI device is quiesced.

So this patch introduces __blk_get_request() and allows users to pass
RQF_PREEMPT flag in, then we can allow to allocate request of RQF_PREEMPT
when queue is in mode of PREEMPT ONLY which will be introduced
in the following patch.

Signed-off-by: Ming Lei 
---
 block/blk-core.c   | 19 +--
 block/blk-mq.c |  3 +--
 include/linux/blk-mq.h |  7 ---
 include/linux/blkdev.h | 17 ++---
 4 files changed, 28 insertions(+), 18 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index be17b5bcf6e7..0a8396e8e4ff 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -1395,7 +1395,8 @@ static struct request *get_request(struct request_queue 
*q, unsigned int op,
 }
 
 static struct request *blk_old_get_request(struct request_queue *q,
-  unsigned int op, gfp_t gfp_mask)
+  unsigned int op, gfp_t gfp_mask,
+  unsigned int flags)
 {
struct request *rq;
int ret = 0;
@@ -1405,8 +1406,7 @@ static struct request *blk_old_get_request(struct 
request_queue *q,
/* create ioc upfront */
create_io_context(gfp_mask, q->node);
 
-   ret = blk_queue_enter(q, !(gfp_mask & __GFP_DIRECT_RECLAIM) ?
-   BLK_REQ_NOWAIT : 0);
+   ret = blk_queue_enter(q, flags & BLK_REQ_BITS_MASK);
if (ret)
return ERR_PTR(ret);
spin_lock_irq(q->queue_lock);
@@ -1424,26 +1424,25 @@ static struct request *blk_old_get_request(struct 
request_queue *q,
return rq;
 }
 
-struct request *blk_get_request(struct request_queue *q, unsigned int op,
-   gfp_t gfp_mask)
+struct request *__blk_get_request(struct request_queue *q, unsigned int op,
+ gfp_t gfp_mask, unsigned int flags)
 {
struct request *req;
 
+   flags |= gfp_mask & __GFP_DIRECT_RECLAIM ? 0 : BLK_REQ_NOWAIT;
if (q->mq_ops) {
-   req = blk_mq_alloc_request(q, op,
-   (gfp_mask & __GFP_DIRECT_RECLAIM) ?
-   0 : BLK_MQ_REQ_NOWAIT);
+   req = blk_mq_alloc_request(q, op, flags);
if (!IS_ERR(req) && q->mq_ops->initialize_rq_fn)
q->mq_ops->initialize_rq_fn(req);
} else {
-   req = blk_old_get_request(q, op, gfp_mask);
+   req = blk_old_get_request(q, op, gfp_mask, flags);
if (!IS_ERR(req) && q->initialize_rq_fn)
q->initialize_rq_fn(req);
}
 
return req;
 }
-EXPORT_SYMBOL(blk_get_request);
+EXPORT_SYMBOL(__blk_get_request);
 
 /**
  * blk_requeue_request - put a request back on queue
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 45bff90e08f7..90b43f607e3c 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -384,8 +384,7 @@ struct request *blk_mq_alloc_request(struct request_queue 
*q, unsigned int op,
struct request *rq;
int ret;
 
-   ret = blk_queue_enter(q, (flags & BLK_MQ_REQ_NOWAIT) ?
-   BLK_REQ_NOWAIT : 0);
+   ret = blk_queue_enter(q, flags & BLK_REQ_BITS_MASK);
if (ret)
return ERR_PTR(ret);
 
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 50c6485cb04f..066a676d7749 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -197,9 +197,10 @@ void blk_mq_free_request(struct request *rq);
 bool blk_mq_can_queue(struct blk_mq_hw_ctx *);
 
 enum {
-   BLK_MQ_REQ_NOWAIT   = (1 << 0), /* return when out of requests */
-   BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */
-   BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */
+   BLK_MQ_REQ_NOWAIT   = BLK_REQ_NOWAIT, /* return when out of 
requests */
+   BLK_MQ_REQ_PREEMPT  = BLK_REQ_PREEMPT, /* allocate for RQF_PREEMPT 
*/
+   BLK_MQ_REQ_RESERVED = (1 << BLK_REQ_MQ_START_BIT), /* allocate from 
reserved pool */
+   BLK_MQ_REQ_INTERNAL = (1 << (BLK_REQ_MQ_START_BIT + 1)), /* 
allocate internal/sched tag */
 };
 
 struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op,
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 107e2fd48486..d1ab950a7f72 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -859,7 +859,10 @@ enum {
 
 /* passed to blk_queue_enter */
 enum {
-   BLK_REQ_NOWAIT = (1 << 0),
+   BLK_REQ_NOWAIT  = (1 << 0),
+   BLK_REQ_PREEMPT = (1 << 1),
+   BLK_REQ_MQ_START_BIT= 2,
+   BLK_REQ_BITS_MASK   = (1U << BLK_REQ_MQ_START_BIT) - 1,
 };
 
 extern unsigned long blk_max_low_pfn, blk_max_pfn;
@@ -944,8 +947,9 @@ extern void blk_rq_init(struct request_queue *q, struct 
request *rq);
 

[PATCH V6 3/6] block: pass flags to blk_queue_enter()

2017-09-26 Thread Ming Lei
We need to pass PREEMPT flags to blk_queue_enter()
for allocating request with RQF_PREEMPT in the
following patch.

Signed-off-by: Ming Lei 
---
 block/blk-core.c   | 10 ++
 block/blk-mq.c |  5 +++--
 block/blk-timeout.c|  2 +-
 fs/block_dev.c |  4 ++--
 include/linux/blkdev.h |  7 ++-
 5 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/block/blk-core.c b/block/blk-core.c
index abfba798ee03..be17b5bcf6e7 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -766,7 +766,7 @@ struct request_queue *blk_alloc_queue(gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(blk_alloc_queue);
 
-int blk_queue_enter(struct request_queue *q, bool nowait)
+int blk_queue_enter(struct request_queue *q, unsigned flags)
 {
while (true) {
int ret;
@@ -774,7 +774,7 @@ int blk_queue_enter(struct request_queue *q, bool nowait)
if (percpu_ref_tryget_live(>q_usage_counter))
return 0;
 
-   if (nowait)
+   if (flags & BLK_REQ_NOWAIT)
return -EBUSY;
 
/*
@@ -1405,7 +1405,8 @@ static struct request *blk_old_get_request(struct 
request_queue *q,
/* create ioc upfront */
create_io_context(gfp_mask, q->node);
 
-   ret = blk_queue_enter(q, !(gfp_mask & __GFP_DIRECT_RECLAIM));
+   ret = blk_queue_enter(q, !(gfp_mask & __GFP_DIRECT_RECLAIM) ?
+   BLK_REQ_NOWAIT : 0);
if (ret)
return ERR_PTR(ret);
spin_lock_irq(q->queue_lock);
@@ -2212,7 +2213,8 @@ blk_qc_t generic_make_request(struct bio *bio)
do {
struct request_queue *q = bio->bi_disk->queue;
 
-   if (likely(blk_queue_enter(q, bio->bi_opf & REQ_NOWAIT) == 0)) {
+   if (likely(blk_queue_enter(q, (bio->bi_opf & REQ_NOWAIT) ?
+   BLK_REQ_NOWAIT : 0) == 0)) {
struct bio_list lower, same;
 
/* Create a fresh bio_list for all subordinate requests 
*/
diff --git a/block/blk-mq.c b/block/blk-mq.c
index 10c1f49f663d..45bff90e08f7 100644
--- a/block/blk-mq.c
+++ b/block/blk-mq.c
@@ -384,7 +384,8 @@ struct request *blk_mq_alloc_request(struct request_queue 
*q, unsigned int op,
struct request *rq;
int ret;
 
-   ret = blk_queue_enter(q, flags & BLK_MQ_REQ_NOWAIT);
+   ret = blk_queue_enter(q, (flags & BLK_MQ_REQ_NOWAIT) ?
+   BLK_REQ_NOWAIT : 0);
if (ret)
return ERR_PTR(ret);
 
@@ -423,7 +424,7 @@ struct request *blk_mq_alloc_request_hctx(struct 
request_queue *q,
if (hctx_idx >= q->nr_hw_queues)
return ERR_PTR(-EIO);
 
-   ret = blk_queue_enter(q, true);
+   ret = blk_queue_enter(q, BLK_REQ_NOWAIT);
if (ret)
return ERR_PTR(ret);
 
diff --git a/block/blk-timeout.c b/block/blk-timeout.c
index 17ec83bb0900..e803106a5e5b 100644
--- a/block/blk-timeout.c
+++ b/block/blk-timeout.c
@@ -134,7 +134,7 @@ void blk_timeout_work(struct work_struct *work)
struct request *rq, *tmp;
int next_set = 0;
 
-   if (blk_queue_enter(q, true))
+   if (blk_queue_enter(q, BLK_REQ_NOWAIT))
return;
spin_lock_irqsave(q->queue_lock, flags);
 
diff --git a/fs/block_dev.c b/fs/block_dev.c
index 93d088ffc05c..98cf2d7ee9d3 100644
--- a/fs/block_dev.c
+++ b/fs/block_dev.c
@@ -674,7 +674,7 @@ int bdev_read_page(struct block_device *bdev, sector_t 
sector,
if (!ops->rw_page || bdev_get_integrity(bdev))
return result;
 
-   result = blk_queue_enter(bdev->bd_queue, false);
+   result = blk_queue_enter(bdev->bd_queue, 0);
if (result)
return result;
result = ops->rw_page(bdev, sector + get_start_sect(bdev), page, false);
@@ -710,7 +710,7 @@ int bdev_write_page(struct block_device *bdev, sector_t 
sector,
 
if (!ops->rw_page || bdev_get_integrity(bdev))
return -EOPNOTSUPP;
-   result = blk_queue_enter(bdev->bd_queue, false);
+   result = blk_queue_enter(bdev->bd_queue, 0);
if (result)
return result;
 
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 460294bb0fa5..107e2fd48486 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -857,6 +857,11 @@ enum {
BLKPREP_INVALID,/* invalid command, kill, return -EREMOTEIO */
 };
 
+/* passed to blk_queue_enter */
+enum {
+   BLK_REQ_NOWAIT = (1 << 0),
+};
+
 extern unsigned long blk_max_low_pfn, blk_max_pfn;
 
 /*
@@ -962,7 +967,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct 
gendisk *, fmode_t,
 extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t,
 struct scsi_ioctl_command __user *);
 
-extern int blk_queue_enter(struct request_queue *q, bool nowait);
+extern int blk_queue_enter(struct request_queue *q, 

[PATCH V6 0/6] block/scsi: safe SCSI quiescing

2017-09-26 Thread Ming Lei
Hi,

The current SCSI quiesce isn't safe and easy to trigger I/O deadlock.

Once SCSI device is put into QUIESCE, no new request except for
RQF_PREEMPT can be dispatched to SCSI successfully, and
scsi_device_quiesce() just simply waits for completion of I/Os
dispatched to SCSI stack. It isn't enough at all.

Because new request still can be comming, but all the allocated
requests can't be dispatched successfully, so request pool can be
consumed up easily.

Then request with RQF_PREEMPT can't be allocated and wait forever,
meantime scsi_device_resume() waits for completion of RQF_PREEMPT,
then system hangs forever, such as during system suspend or
sending SCSI domain alidation.

Both IO hang inside system suspend[1] or SCSI domain validation
were reported before.

This patch introduces preempt only mode, and solves the issue
by allowing RQF_PREEMP only during SCSI quiesce.

Both SCSI and SCSI_MQ have this IO deadlock issue, this patch fixes
them all.

V6:
- borrow Bart's idea of preempt only, with clean
  implementation(patch 5/patch 6)
- needn't any external driver's dependency, such as MD's
change

V5:
- fix one tiny race by introducing blk_queue_enter_preempt_freeze()
given this change is small enough compared with V4, I added
tested-by directly

V4:
- reorganize patch order to make it more reasonable
- support nested preempt freeze, as required by SCSI transport spi
- check preempt freezing in slow path of of blk_queue_enter()
- add "SCSI: transport_spi: resume a quiesced device"
- wake up freeze queue in setting dying for both blk-mq and legacy
- rename blk_mq_[freeze|unfreeze]_queue() in one patch
- rename .mq_freeze_wq and .mq_freeze_depth
- improve comment

V3:
- introduce q->preempt_unfreezing to fix one bug of preempt freeze
- call blk_queue_enter_live() only when queue is preempt frozen
- cleanup a bit on the implementation of preempt freeze
- only patch 6 and 7 are changed

V2:
- drop the 1st patch in V1 because percpu_ref_is_dying() is
enough as pointed by Tejun
- introduce preempt version of blk_[freeze|unfreeze]_queue
- sync between preempt freeze and normal freeze
- fix warning from percpu-refcount as reported by Oleksandr


[1] https://marc.info/?t=150340250100013=3=2


Thanks,
Ming

Ming Lei (6):
  blk-mq: only run hw queues for blk-mq
  block: tracking request allocation with q_usage_counter
  block: pass flags to blk_queue_enter()
  block: prepare for passing RQF_PREEMPT to request allocation
  block: support PREEMPT_ONLY
  SCSI: set block queue at preempt only when SCSI device is put into
quiesce

 block/blk-core.c| 62 ++---
 block/blk-mq.c  | 14 ---
 block/blk-timeout.c |  2 +-
 drivers/scsi/scsi_lib.c | 25 +---
 fs/block_dev.c  |  4 ++--
 include/linux/blk-mq.h  |  7 +++---
 include/linux/blkdev.h  | 27 ++---
 7 files changed, 106 insertions(+), 35 deletions(-)

-- 
2.9.5



Re: [PATCH] scsi: ufs: add ufs a command complete time stamp

2017-09-26 Thread Zang Leigang
Hi, Subhash

I send a new patch, forget tag a "PATCH V2", pls check.

On Tue, Sep 26, 2017 at 02:39:06PM -0700, Subhash Jadavani wrote:
> On 2017-09-20 03:30, Zang Leigang wrote:
> >Signed-off-by: Zang Leigang 
> >
> >diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
> >index 794a4600e952..2984f33095be 100644
> >--- a/drivers/scsi/ufs/ufshcd.c
> >+++ b/drivers/scsi/ufs/ufshcd.c
> >@@ -385,6 +385,8 @@ void ufshcd_print_trs(struct ufs_hba *hba,
> >unsigned long bitmap, bool pr_prdt)
> >
> > dev_err(hba->dev, "UPIU[%d] - issue time %lld us\n",
> > tag, ktime_to_us(lrbp->issue_time_stamp));
> >+dev_err(hba->dev, "UPIU[%d] - complete time %lld us\n",
> >+tag, ktime_to_us(lrbp->complete_time_stamp));
> > dev_err(hba->dev,
> > "UPIU[%d] - Transfer Request Descriptor phys@0x%llx\n",
> > tag, (u64)lrbp->utrd_dma_addr);
> >@@ -1746,6 +1748,7 @@ static inline
> > void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
> > {
> > hba->lrb[task_tag].issue_time_stamp = ktime_get();
> >+hba->lrb[task_tag].complete_time_stamp.tv64 = 0UL;
> 
> can we use ktime_set(0, 0)?
> complete_time_stamp = ktime_set(0, 0);

Sure.

> 
> 
> > ufshcd_clk_scaling_start_busy(hba);
> > __set_bit(task_tag, >outstanding_reqs);
> > ufshcd_writel(hba, 1 << task_tag, REG_UTP_TRANSFER_REQ_DOOR_BELL);
> >@@ -4627,6 +4630,8 @@ static void __ufshcd_transfer_req_compl(struct
> >ufs_hba *hba,
> > }
> > if (ufshcd_is_clkscaling_supported(hba))
> > hba->clk_scaling.active_reqs--;
> >+
> >+lrbp->complete_time_stamp = ktime_get();
> > }
> >
> > /* clear corresponding bits of completed commands */
> >diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
> >index cdc8bd05f7df..67a8f8ef1b06 100644
> >--- a/drivers/scsi/ufs/ufshcd.h
> >+++ b/drivers/scsi/ufs/ufshcd.h
> >@@ -166,6 +166,7 @@ struct ufs_pm_lvl_states {
> >  * @lun: LUN of the command
> >  * @intr_cmd: Interrupt command (doesn't participate in interrupt
> >aggregation)
> >  * @issue_time_stamp: time stamp for debug purposes
> >+ * @complete_time_stamp: time stamp for statistics
> >  * @req_abort_skip: skip request abort task flag
> >  */
> > struct ufshcd_lrb {
> >@@ -189,6 +190,7 @@ struct ufshcd_lrb {
> > u8 lun; /* UPIU LUN id field is only 8-bit wide */
> > bool intr_cmd;
> > ktime_t issue_time_stamp;
> >+ktime_t complete_time_stamp;
> 
> Just a suggestion for short name, s/complete/compl ? or s/complete/cmpl ?

Accept. compl look better.

> 
> >
> > bool req_abort_skip;
> > };
> 
> -- 
> The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
> a Linux Foundation Collaborative Project

-- 
Thanks,
Zang Leigang


本邮件及其附件含有华为公司的保密信息,仅限于发送给上面地址中列出的个人或群组。禁止任何其他人以任何形式使用(包括但不限于全部或部分地泄露、复制、或散发)本邮件中的信息。如果您错收了本邮件,请您立即电话或邮件通知发件人并删除本邮件!
This e-mail and its attachments contain confidential information from HUAWEI, 
which is intended only for the person or entity whose address is listed above. 
Any use of the information contained herein in any way (including, but not 
limited to, total or partial disclosure, reproduction, or dissemination) by 
persons other than the intended recipient(s) is prohibited. If you receive this 
e-mail in error, please notify the sender by phone or email immediately and 
delete it!


[PATCH] scsi: ufs: add ufs a command complete time stamp

2017-09-26 Thread Zang Leigang
Signed-off-by: Zang Leigang 

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 794a460..7e8d583 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -385,6 +385,8 @@ void ufshcd_print_trs(struct ufs_hba *hba, unsigned long 
bitmap, bool pr_prdt)
 
dev_err(hba->dev, "UPIU[%d] - issue time %lld us\n",
tag, ktime_to_us(lrbp->issue_time_stamp));
+   dev_err(hba->dev, "UPIU[%d] - complete time %lld us\n",
+   tag, ktime_to_us(lrbp->compl_time_stamp));
dev_err(hba->dev,
"UPIU[%d] - Transfer Request Descriptor phys@0x%llx\n",
tag, (u64)lrbp->utrd_dma_addr);
@@ -1746,6 +1748,7 @@ static inline
 void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
 {
hba->lrb[task_tag].issue_time_stamp = ktime_get();
+   hba->lrb[task_tag].compl_time_stamp = ktime_set(0, 0);
ufshcd_clk_scaling_start_busy(hba);
__set_bit(task_tag, >outstanding_reqs);
ufshcd_writel(hba, 1 << task_tag, REG_UTP_TRANSFER_REQ_DOOR_BELL);
@@ -4627,6 +4630,8 @@ static void __ufshcd_transfer_req_compl(struct ufs_hba 
*hba,
}
if (ufshcd_is_clkscaling_supported(hba))
hba->clk_scaling.active_reqs--;
+
+   lrbp->compl_time_stamp = ktime_get();
}
 
/* clear corresponding bits of completed commands */
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index cdc8bd0..40ea475 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -166,6 +166,7 @@ struct ufs_pm_lvl_states {
  * @lun: LUN of the command
  * @intr_cmd: Interrupt command (doesn't participate in interrupt aggregation)
  * @issue_time_stamp: time stamp for debug purposes
+ * @compl_time_stamp: time stamp for statistics
  * @req_abort_skip: skip request abort task flag
  */
 struct ufshcd_lrb {
@@ -189,6 +190,7 @@ struct ufshcd_lrb {
u8 lun; /* UPIU LUN id field is only 8-bit wide */
bool intr_cmd;
ktime_t issue_time_stamp;
+   ktime_t compl_time_stamp;
 
bool req_abort_skip;
 };
-- 
2.7.4



Re: [PATCH 5/5] ufs/phy: qcom: Refactor to use phy_init call

2017-09-26 Thread Subhash Jadavani

Hi Vivek,

Please find one comment inline below, rest look good.

Regards,
Subhash

On 2017-08-03 23:48, Vivek Gautam wrote:

Refactor ufs_qcom_power_up_sequence() to get rid of ugly
exported phy APIs and use the phy_init() and phy_power_on()
to do the phy initialization.

Signed-off-by: Vivek Gautam 
---
 drivers/phy/qualcomm/phy-qcom-ufs-i.h|  2 --
 drivers/phy/qualcomm/phy-qcom-ufs-qmp-14nm.c |  9 +--
 drivers/phy/qualcomm/phy-qcom-ufs-qmp-20nm.c |  9 +--
 drivers/phy/qualcomm/phy-qcom-ufs.c  | 38 

 drivers/scsi/ufs/ufs-qcom.c  | 36 
++

 include/linux/phy/phy-qcom-ufs.h |  3 ---
 6 files changed, 38 insertions(+), 59 deletions(-)

diff --git a/drivers/phy/qualcomm/phy-qcom-ufs-i.h
b/drivers/phy/qualcomm/phy-qcom-ufs-i.h
index 94326ed107c3..495fd5941231 100644
--- a/drivers/phy/qualcomm/phy-qcom-ufs-i.h
+++ b/drivers/phy/qualcomm/phy-qcom-ufs-i.h
@@ -123,7 +123,6 @@ struct ufs_qcom_phy {
  * struct ufs_qcom_phy_specific_ops - set of pointers to functions 
which have a

  * specific implementation per phy. Each UFS phy, should implement
  * those functions according to its spec and requirements
- * @calibrate_phy: pointer to a function that calibrate the phy
  * @start_serdes: pointer to a function that starts the serdes
  * @is_physical_coding_sublayer_ready: pointer to a function that
  * checks pcs readiness. returns 0 for success and non-zero for error.
@@ -132,7 +131,6 @@ struct ufs_qcom_phy {
  * and writes to QSERDES_RX_SIGDET_CNTRL attribute
  */
 struct ufs_qcom_phy_specific_ops {
-   int (*calibrate_phy)(struct ufs_qcom_phy *phy, bool is_rate_B);
void (*start_serdes)(struct ufs_qcom_phy *phy);
int (*is_physical_coding_sublayer_ready)(struct ufs_qcom_phy *phy);
void (*set_tx_lane_enable)(struct ufs_qcom_phy *phy, u32 val);
diff --git a/drivers/phy/qualcomm/phy-qcom-ufs-qmp-14nm.c
b/drivers/phy/qualcomm/phy-qcom-ufs-qmp-14nm.c
index af65785230b5..c39440b56b6d 100644
--- a/drivers/phy/qualcomm/phy-qcom-ufs-qmp-14nm.c
+++ b/drivers/phy/qualcomm/phy-qcom-ufs-qmp-14nm.c
@@ -44,7 +44,13 @@ void ufs_qcom_phy_qmp_14nm_advertise_quirks(struct
ufs_qcom_phy *phy_common)

 static int ufs_qcom_phy_qmp_14nm_init(struct phy *generic_phy)
 {
-   return 0;
+   struct ufs_qcom_phy *phy_common = get_ufs_qcom_phy(generic_phy);
+   bool is_rate_B = false;
+
+   if (phy_common->mode == PHY_MODE_UFS_HS_B)
+   is_rate_B = true;
+
+   return ufs_qcom_phy_qmp_14nm_phy_calibrate(phy_common, is_rate_B);
 }

 static int ufs_qcom_phy_qmp_14nm_exit(struct phy *generic_phy)
@@ -120,7 +126,6 @@ static int
ufs_qcom_phy_qmp_14nm_is_pcs_ready(struct ufs_qcom_phy *phy_common)
 };

 static struct ufs_qcom_phy_specific_ops phy_14nm_ops = {
-   .calibrate_phy  = ufs_qcom_phy_qmp_14nm_phy_calibrate,
.start_serdes   = ufs_qcom_phy_qmp_14nm_start_serdes,
 	.is_physical_coding_sublayer_ready = 
ufs_qcom_phy_qmp_14nm_is_pcs_ready,

.set_tx_lane_enable = ufs_qcom_phy_qmp_14nm_set_tx_lane_enable,
diff --git a/drivers/phy/qualcomm/phy-qcom-ufs-qmp-20nm.c
b/drivers/phy/qualcomm/phy-qcom-ufs-qmp-20nm.c
index 5c18c41dbdb4..5705a2d4c6d2 100644
--- a/drivers/phy/qualcomm/phy-qcom-ufs-qmp-20nm.c
+++ b/drivers/phy/qualcomm/phy-qcom-ufs-qmp-20nm.c
@@ -63,7 +63,13 @@ void ufs_qcom_phy_qmp_20nm_advertise_quirks(struct
ufs_qcom_phy *phy_common)

 static int ufs_qcom_phy_qmp_20nm_init(struct phy *generic_phy)
 {
-   return 0;
+   struct ufs_qcom_phy *phy_common = get_ufs_qcom_phy(generic_phy);
+   bool is_rate_B = false;
+
+   if (phy_common->mode == PHY_MODE_UFS_HS_B)
+   is_rate_B = true;
+
+   return ufs_qcom_phy_qmp_20nm_phy_calibrate(phy_common, is_rate_B);
 }

 static int ufs_qcom_phy_qmp_20nm_exit(struct phy *generic_phy)
@@ -178,7 +184,6 @@ static int
ufs_qcom_phy_qmp_20nm_is_pcs_ready(struct ufs_qcom_phy *phy_common)
 };

 static struct ufs_qcom_phy_specific_ops phy_20nm_ops = {
-   .calibrate_phy  = ufs_qcom_phy_qmp_20nm_phy_calibrate,
.start_serdes   = ufs_qcom_phy_qmp_20nm_start_serdes,
 	.is_physical_coding_sublayer_ready = 
ufs_qcom_phy_qmp_20nm_is_pcs_ready,

.set_tx_lane_enable = ufs_qcom_phy_qmp_20nm_set_tx_lane_enable,
diff --git a/drivers/phy/qualcomm/phy-qcom-ufs.c
b/drivers/phy/qualcomm/phy-qcom-ufs.c
index 43865ef340e2..1febe3294fe3 100644
--- a/drivers/phy/qualcomm/phy-qcom-ufs.c
+++ b/drivers/phy/qualcomm/phy-qcom-ufs.c
@@ -518,9 +518,8 @@ void ufs_qcom_phy_disable_iface_clk(struct
ufs_qcom_phy *phy)
}
 }

-int ufs_qcom_phy_start_serdes(struct phy *generic_phy)
+static int ufs_qcom_phy_start_serdes(struct ufs_qcom_phy 
*ufs_qcom_phy)

 {
-   struct ufs_qcom_phy *ufs_qcom_phy = get_ufs_qcom_phy(generic_phy);
int ret = 0;

if (!ufs_qcom_phy->phy_spec_ops->start_serdes) {
@@ -533,7 +532,6 @@ int 

Re: [PATCH 4/5] scsi/ufs: qcom: Set phy mode based on the controllers HS MODE

2017-09-26 Thread Subhash Jadavani

On 2017-08-03 23:48, Vivek Gautam wrote:

Set the phy mode based on the UFS HS PA mode. This lets the
controller let phy know the mode in which the PHY Adapter is
running and set the phy rates accordingly.

Signed-off-by: Vivek Gautam 
---
 drivers/scsi/ufs/ufs-qcom.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/ufs/ufs-qcom.c b/drivers/scsi/ufs/ufs-qcom.c
index c87d770b519a..44c21d5818ee 100644
--- a/drivers/scsi/ufs/ufs-qcom.c
+++ b/drivers/scsi/ufs/ufs-qcom.c
@@ -273,6 +273,9 @@ static int ufs_qcom_power_up_sequence(struct 
ufs_hba *hba)

bool is_rate_B = (UFS_QCOM_LIMIT_HS_RATE == PA_HS_MODE_B)
? true : false;

+   if (is_rate_B)
+   phy_set_mode(phy, PHY_MODE_UFS_HS_B);
+
/* Assert PHY reset and apply PHY calibration values */
ufs_qcom_assert_reset(hba);
/* provide 1ms delay to let the reset pulse propagate */


Looks good to me.
Reviewed-by: Subhash Jadavani 

--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: [PATCH] scsi: libsas: remove unused variable sas_ha

2017-09-26 Thread John Garry

On 26/09/2017 20:17, Colin King wrote:

From: Colin Ian King 

Remove unused variable sas_ha to clean up build warning
"unused variable ‘sas_ha’ [-Wunused-variable]"

Fixes: 042ebd293b86 ("scsi: libsas: kill useless ha_event and do some cleanup")
Signed-off-by: Colin Ian King 


Acked-by: John Garry 


---
 drivers/scsi/hisi_sas/hisi_sas_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c 
b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 29d6cb39299c..9e2990268f00 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1046,7 +1046,6 @@ static void hisi_sas_rescan_topology(struct hisi_hba 
*hisi_hba, u32 old_state,

 static int hisi_sas_controller_reset(struct hisi_hba *hisi_hba)
 {
-   struct sas_ha_struct *sas_ha = _hba->sha;
struct device *dev = hisi_hba->dev;
struct Scsi_Host *shost = hisi_hba->shost;
u32 old_state, state;






Re: UFS maximum access unit is 8KB

2017-09-26 Thread Subhash Jadavani

On 2017-08-09 00:28, Bean Huo (beanhuo) wrote:

Hi,
I am now using one hikey960, and Linux kernel version is 4.4.77.
And found that for UFS driver version, the maximum transformation is 
8KB.

Means that if I using 128KB chuck size to program in the user space,
but, from ftrace/blktrace,
It shows that it always programs by 8KB in kernel. please see below
log, who knows where is wrong with it?

24992.855327 |   7)   |  vfs_read() {
24992.855340 |   7) + 89.584 us   |submit_bio();
24992.855432 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676352 + 16 [fio] */
24992.855434 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676368 + 16 [fio] */
24992.855435 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676384 + 16 [fio] */
24992.855437 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676400 + 16 [fio] */
24992.855438 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676416 + 16 [fio] */
24992.855439 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676432 + 16 [fio] */
24992.855440 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676448 + 16 [fio] */
24992.855441 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676464 + 16 [fio] */
24992.855443 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676480 + 16 [fio] */
24992.855443 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676496 + 16 [fio] */
24992.855444 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676512 + 16 [fio] */
24992.855445 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676528 + 16 [fio] */
24992.855446 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676544 + 16 [fio] */
24992.855447 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676560 + 16 [fio] */
24992.855448 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676576 + 16 [fio] */
24992.855449 |   7)   |  /* block_rq_insert: 8,48 R 0 ()
40676592 + 16 [fio] */
24992.855451 |   7)   |__blk_run_queue() {
24992.855451 |   7)   |  scsi_request_fn() {
24992.855453 |   7)   |/* block_rq_issue: 8,48 R 0
() 40676352 + 16 [fio] */
24992.855456 |   7)   |scsi_dispatch_cmd() {
24992.855457 |   7)   |  /*
scsi_dispatch_cmd_start: host_no=0 channel=0 id=0 lun=3 data_sgl=2
prot_sgl=0 prot_op=SCSI_PROT_NORMAL cmnd=(READ_10 lba=5084544 txlen=2
protect=0 raw=28 00 00 4d 95 80 00 00 02 00) */
24992.855459 |   7) + 15.104 us   |  scsi_dma_map();
24992.855476 |   7) + 19.271 us   |}
24992.855478 |   7)   |/* block_rq_issue: 8,48 R 0
() 40676368 + 16 [fio] */
24992.855481 |   7)   |scsi_dispatch_cmd() {
24992.855481 |   7)   |  /*
scsi_dispatch_cmd_start: host_no=0 channel=0 id=0 lun=3 data_sgl=2
prot_sgl=0 prot_op=SCSI_PROT_NORMAL cmnd=(READ_10 lba=5084546 txlen=2
protect=0 raw=28 00 00 4d 95 82 00 00 02 00) */
24992.855483 |   7)   2.083 us|  scsi_dma_map();
24992.855486 |   7)   5.730 us|}
24992.855488 |   7)   |/* block_rq_issue: 8,48 R 0
() 40676384 + 16 [fio] */
24992.855489 |   7)   |scsi_dispatch_cmd() {
24992.855490 |   7)   |  /*
scsi_dispatch_cmd_start: host_no=0 channel=0 id=0 lun=3 data_sgl=2
prot_sgl=0 prot_op=SCSI_PROT_NORMAL cmnd=(READ_10 lba=5084548 txlen=2
protect=0 raw=28 00 00 4d 95 84 00 00 02 00) */
24992.855491 |   7)   2.083 us|  scsi_dma_map();
24992.855495 |   7)   5.209 us|}
24992.855497 |   7)   |/* block_rq_issue: 8,48 R 0
() 40676400 + 16 [fio] */
24992.855498 |   7)   |scsi_dispatch_cmd() {
24992.855499 |   7)   |  /*
scsi_dispatch_cmd_start: host_no=0 channel=0 id=0 lun=3 data_sgl=2
prot_sgl=0 prot_op=SCSI_PROT_NORMAL cmnd=(READ_10 lba=5084550 txlen=2
protect=0 raw=28 00 00 4d 95 86 00 00 02 00) */
24992.855500 |   7)   2.083 us|  scsi_dma_map();
24992.855503 |   7)   5.208 us|}
24992.855505 |   7)   |/* block_rq_issue: 8,48 R 0
() 40676416 + 16 [fio] */
24992.855507 |   7)   |scsi_dispatch_cmd() {
24992.855508 |   7)   |  /*
scsi_dispatch_cmd_start: host_no=0 channel=0 id=0 lun=3 data_sgl=2
prot_sgl=0 prot_op=SCSI_PROT_NORMAL cmnd=(READ_10 lba=5084552 txlen=2
protect=0 raw=28 00 00 4d 95 88 00 00 02 00) */
24992.855509 |   7)   2.084 us|  scsi_dma_map();
24992.855513 |   7)   5.208 us|}
24992.855514 |   7)   |/* block_rq_issue: 8,48 R 0
() 40676432 + 16 [fio] */
24992.855516 |   7)   |scsi_dispatch_cmd() {
24992.855516 |   7)   |  /*
scsi_dispatch_cmd_start: host_no=0 channel=0 id=0 lun=3 data_sgl=2
prot_sgl=0 prot_op=SCSI_PROT_NORMAL cmnd=(READ_10 lba=5084554 txlen=2
protect=0 raw=28 00 00 4d 95 8a 00 00 02 00) */

Re: [PATCH] scsi: ufs: fix wrong command type of UTRD for UFSHCI v2.1

2017-09-26 Thread Subhash Jadavani

On 2017-09-06 02:58, kehuanlin wrote:
Since the command type of UTRD in UFS 2.1 specification is the same 
with
UFS 2.0. And it assumes the future UFS specification will follow the 
same

definition.

Signed-off-by: kehuanlin 
---
 drivers/scsi/ufs/ufshcd.c | 14 --
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 5bc9dc1..c33a2f8 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -2195,10 +2195,11 @@ static int ufshcd_comp_devman_upiu(struct
ufs_hba *hba, struct ufshcd_lrb *lrbp)
u32 upiu_flags;
int ret = 0;

-   if (hba->ufs_version == UFSHCI_VERSION_20)
-   lrbp->command_type = UTP_CMD_TYPE_UFS_STORAGE;
-   else
+   if ((hba->ufs_version == UFSHCI_VERSION_10) ||
+   (hba->ufs_version == UFSHCI_VERSION_11))
lrbp->command_type = UTP_CMD_TYPE_DEV_MANAGE;
+   else
+   lrbp->command_type = UTP_CMD_TYPE_UFS_STORAGE;

ufshcd_prepare_req_desc_hdr(lrbp, _flags, DMA_NONE);
if (hba->dev_cmd.type == DEV_CMD_TYPE_QUERY)
@@ -,10 +2223,11 @@ static int ufshcd_comp_scsi_upiu(struct
ufs_hba *hba, struct ufshcd_lrb *lrbp)
u32 upiu_flags;
int ret = 0;

-   if (hba->ufs_version == UFSHCI_VERSION_20)
-   lrbp->command_type = UTP_CMD_TYPE_UFS_STORAGE;
-   else
+   if ((hba->ufs_version == UFSHCI_VERSION_10) ||
+   (hba->ufs_version == UFSHCI_VERSION_11))
lrbp->command_type = UTP_CMD_TYPE_SCSI;
+   else
+   lrbp->command_type = UTP_CMD_TYPE_UFS_STORAGE;

if (likely(lrbp->cmd)) {
ufshcd_prepare_req_desc_hdr(lrbp, _flags,


Looks good to me.
Reviewed-by: Subhash Jadavani 



--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: [PATCH] scsi: ufs: fix a pclint warning

2017-09-26 Thread Subhash Jadavani

On 2017-09-19 01:50, Zang Leigang wrote:

Signed-off-by: Zang Leigang 

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 794a4600e952..deb77535b8c9 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -3586,7 +3586,7 @@ static int ufshcd_uic_pwr_ctrl(struct ufs_hba
*hba, struct uic_command *cmd)
status = ufshcd_get_upmcrs(hba);
if (status != PWR_LOCAL) {
dev_err(hba->dev,
-   "pwr ctrl cmd 0x%0x failed, host upmcrs:0x%x\n",
+   "pwr ctrl cmd 0x%x failed, host upmcrs:0x%x\n",
cmd->command, status);
ret = (status != PWR_OK) ? status : -1;
}



Looks good to me.
Reviewed-by: Subhash Jadavani 

--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: [PATCH] scsi: ufs: add ufs a command complete time stamp

2017-09-26 Thread Subhash Jadavani

On 2017-09-20 03:30, Zang Leigang wrote:

Signed-off-by: Zang Leigang 

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 794a4600e952..2984f33095be 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -385,6 +385,8 @@ void ufshcd_print_trs(struct ufs_hba *hba,
unsigned long bitmap, bool pr_prdt)

dev_err(hba->dev, "UPIU[%d] - issue time %lld us\n",
tag, ktime_to_us(lrbp->issue_time_stamp));
+   dev_err(hba->dev, "UPIU[%d] - complete time %lld us\n",
+   tag, ktime_to_us(lrbp->complete_time_stamp));
dev_err(hba->dev,
"UPIU[%d] - Transfer Request Descriptor phys@0x%llx\n",
tag, (u64)lrbp->utrd_dma_addr);
@@ -1746,6 +1748,7 @@ static inline
 void ufshcd_send_command(struct ufs_hba *hba, unsigned int task_tag)
 {
hba->lrb[task_tag].issue_time_stamp = ktime_get();
+   hba->lrb[task_tag].complete_time_stamp.tv64 = 0UL;


can we use ktime_set(0, 0)?
complete_time_stamp = ktime_set(0, 0);



ufshcd_clk_scaling_start_busy(hba);
__set_bit(task_tag, >outstanding_reqs);
ufshcd_writel(hba, 1 << task_tag, REG_UTP_TRANSFER_REQ_DOOR_BELL);
@@ -4627,6 +4630,8 @@ static void __ufshcd_transfer_req_compl(struct
ufs_hba *hba,
}
if (ufshcd_is_clkscaling_supported(hba))
hba->clk_scaling.active_reqs--;
+
+   lrbp->complete_time_stamp = ktime_get();
}

/* clear corresponding bits of completed commands */
diff --git a/drivers/scsi/ufs/ufshcd.h b/drivers/scsi/ufs/ufshcd.h
index cdc8bd05f7df..67a8f8ef1b06 100644
--- a/drivers/scsi/ufs/ufshcd.h
+++ b/drivers/scsi/ufs/ufshcd.h
@@ -166,6 +166,7 @@ struct ufs_pm_lvl_states {
  * @lun: LUN of the command
  * @intr_cmd: Interrupt command (doesn't participate in interrupt 
aggregation)

  * @issue_time_stamp: time stamp for debug purposes
+ * @complete_time_stamp: time stamp for statistics
  * @req_abort_skip: skip request abort task flag
  */
 struct ufshcd_lrb {
@@ -189,6 +190,7 @@ struct ufshcd_lrb {
u8 lun; /* UPIU LUN id field is only 8-bit wide */
bool intr_cmd;
ktime_t issue_time_stamp;
+   ktime_t complete_time_stamp;


Just a suggestion for short name, s/complete/compl ? or s/complete/cmpl 
?




bool req_abort_skip;
 };


--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


Re: qla2xxx UBSAN warning in 4.14-rc1

2017-09-26 Thread Madhani, Himanshu

> On Sep 18, 2017, at 3:49 AM, Meelis Roos  wrote:
> 
> Hello, I decided to widen the coverage of my kernel testbed and put some 
> FC cards into servers. This one is a PCI-X QLA2340 in HP Proliant DL 380 
> G4 (first 64-bit generation of Proliants). I got a UBSAN warning from 
> qla2xxx before probing for the firmware.
> 
> This is reproducible with or without firmware being available.
> 
> [3.905570] qla2xxx [:00:00.0]-0005: : QLogic Fibre Channel HBA 
> Driver: 10.00.00.01-k.
> [3.905977] qla2xxx :06:02.0: PCI IRQ 78 -> rerouted to legacy IRQ 18
> [3.906172] qla2xxx [:06:02.0]-001d: : Found an ISP2312 irq 18 iobase 
> 0xc9139000.
> [...]
> [4.180117] 
> 
> [4.180300] UBSAN: Undefined behaviour in 
> drivers/scsi/qla2xxx/qla_isr.c:275:14
> [4.180464] shift exponent 32 is too large for 32-bit type 'int'
> [4.180576] CPU: 0 PID: 138 Comm: systemd-udevd Not tainted 
> 4.14.0-rc1-9-g0666f560b71b #27
> [4.180741] Hardware name: HP ProLiant DL380 G4, BIOS P51 07/19/2007
> [4.180849] Call Trace:
> [4.180961]  dump_stack+0x4e/0x6c
> [4.181072]  ubsan_epilogue+0xd/0x3b
> [4.181179]  __ubsan_handle_shift_out_of_bounds+0x112/0x14c
> [4.181290]  ? try_to_del_timer_sync+0x44/0x68
> [4.181440]  qla2x00_mbx_completion+0x1c5/0x25d [qla2xxx]
> [4.182683]  qla2300_intr_handler+0x1ea/0x3bb [qla2xxx]
> [4.182827]  qla2x00_mailbox_command+0x77b/0x139a [qla2xxx]
> [4.182935]  ? __const_udelay+0x3c/0x3e
> [4.183073]  qla2x00_mbx_reg_test+0x83/0x114 [qla2xxx]
> [4.183213]  ? qla2x00_read_nvram_data+0x5c/0xe1 [qla2xxx]
> [4.183349]  qla2x00_chip_diag+0x354/0x45f [qla2xxx]
> [4.183489]  ? qla25xx_read_optrom_data+0x401/0x401 [qla2xxx]
> [4.183628]  qla2x00_initialize_adapter+0x2c2/0xa4e [qla2xxx]
> [4.183767]  qla2x00_probe_one+0x1681/0x392e [qla2xxx]
> [4.183883]  ? kernfs_add_one+0x11c/0x1ca
> [4.183990]  pci_device_probe+0x10b/0x1f1
> [4.184102]  driver_probe_device+0x21f/0x3a4
> [4.184210]  __driver_attach+0xa9/0xe1
> [4.184317]  ? driver_probe_device+0x3a4/0x3a4
> [4.184424]  bus_for_each_dev+0x6e/0xb5
> [4.184530]  driver_attach+0x22/0x3c
> [4.184638]  bus_add_driver+0x1d1/0x2ae
> [4.184745]  driver_register+0x78/0x130
> [4.184851]  __pci_register_driver+0x75/0xa8
> [4.184953]  ? 0xa0227000
> [4.185099]  qla2x00_module_init+0x21b/0x267 [qla2xxx]
> [4.185211]  do_one_initcall+0x5a/0x1e2
> [4.185323]  ? kfree+0x164/0x27a
> [4.185435]  do_init_module+0x9d/0x285
> [4.185545]  load_module+0x20db/0x38e3
> [4.185654]  ? disable_ro_nx+0x8f/0x8f
> [4.185765]  ? kernel_read+0x60/0xe5
> [4.185875]  ? kernel_read_file_from_fd+0x44/0x6d
> [4.185988]  SYSC_finit_module+0xa8/0xbc
> [4.186104]  SyS_finit_module+0x9/0xb
> [4.186216]  do_syscall_64+0x77/0x271
> [4.186331]  entry_SYSCALL64_slow_path+0x25/0x25
> [4.186444] RIP: 0033:0x7f29e6783219
> [4.186552] RSP: 002b:7ffc8cb7c858 EFLAGS: 0246 ORIG_RAX: 
> 0139
> [4.186723] RAX: ffda RBX: 561d18660bd0 RCX: 
> 7f29e6783219
> [4.186837] RDX:  RSI: 7f29e64992d5 RDI: 
> 0007
> [4.186944] RBP: 7f29e64992d5 R08:  R09: 
> 7ffc8cb7cdd0
> [4.187055] R10: 0007 R11: 0246 R12: 
> 
> [4.187170] R13: 561d18666140 R14: 0002 R15: 
> 7ffc8cb7c970
> [4.187284] 
> 
> [...]
> [4.489060] scsi host4: qla2xxx
> [4.489875] qla2xxx [:06:02.0]-00fb:4: QLogic QLA2340 - .
> [4.489976] qla2xxx [:06:02.0]-00fc:4: ISP2312: PCI-X (100 MHz) @ 
> :06:02.0 hdma+ host#=4 fw=3.03.28 IPX.
> 
> 
> 
> -- 
> Meelis Roos (mr...@linux.ee)

we’ll take a look at this

Thanks,
- Himanshu



Re: [PATCH] scsi: ufs: continue to boot even with Boot LUN is disabled

2017-09-26 Thread Subhash Jadavani

On 2017-09-22 03:31, Huanlin Ke wrote:

Several configurable fields of the Device Descriptor and the Unit
Descriptors determine the Boot LUN status. The bBootEnable field and
the bBootLunEn attribute is set to zero by default, so the Boot LUN is
disabled by default.

At which point the scsi device add for Boot LUN will fail, but we can
continue to use the ufs device in fact. This failure shouldn't abort 
the

device boot.

Signed-off-by: Huanlin Ke 
---
 drivers/scsi/ufs/ufshcd.c | 19 ---
 1 file changed, 8 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/ufs/ufshcd.c b/drivers/scsi/ufs/ufshcd.c
index 794a460..bff84be 100644
--- a/drivers/scsi/ufs/ufshcd.c
+++ b/drivers/scsi/ufs/ufshcd.c
@@ -5998,25 +5998,22 @@ static int ufshcd_scsi_add_wlus(struct ufs_hba 
*hba)

}
scsi_device_put(hba->sdev_ufs_device);

-   sdev_boot = __scsi_add_device(hba->host, 0, 0,
-   ufshcd_upiu_wlun_to_scsi_wlun(UFS_UPIU_BOOT_WLUN), NULL);
-   if (IS_ERR(sdev_boot)) {
-   ret = PTR_ERR(sdev_boot);
-   goto remove_sdev_ufs_device;
-   }
-   scsi_device_put(sdev_boot);
-
sdev_rpmb = __scsi_add_device(hba->host, 0, 0,
ufshcd_upiu_wlun_to_scsi_wlun(UFS_UPIU_RPMB_WLUN), NULL);
if (IS_ERR(sdev_rpmb)) {
ret = PTR_ERR(sdev_rpmb);
-   goto remove_sdev_boot;
+   goto remove_sdev_ufs_device;
}
scsi_device_put(sdev_rpmb);
+
+   sdev_boot = __scsi_add_device(hba->host, 0, 0,
+   ufshcd_upiu_wlun_to_scsi_wlun(UFS_UPIU_BOOT_WLUN), NULL);
+   if (IS_ERR(sdev_boot))
+   dev_err(hba->dev, "%s: BOOT WLUN not found\n", __func__);
+   else
+   scsi_device_put(sdev_boot);
goto out;

-remove_sdev_boot:
-   scsi_remove_device(sdev_boot);
 remove_sdev_ufs_device:
scsi_remove_device(hba->sdev_ufs_device);
 out:


Looks good to me.
Reviewed-by: Subhash Jadavani 

--
The Qualcomm Innovation Center, Inc. is a member of Code Aurora Forum,
a Linux Foundation Collaborative Project


[PATCH] scsi: libsas: remove unused variable sas_ha

2017-09-26 Thread Colin King
From: Colin Ian King 

Remove unused variable sas_ha to clean up build warning
"unused variable ‘sas_ha’ [-Wunused-variable]"

Fixes: 042ebd293b86 ("scsi: libsas: kill useless ha_event and do some cleanup")
Signed-off-by: Colin Ian King 
---
 drivers/scsi/hisi_sas/hisi_sas_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/scsi/hisi_sas/hisi_sas_main.c 
b/drivers/scsi/hisi_sas/hisi_sas_main.c
index 29d6cb39299c..9e2990268f00 100644
--- a/drivers/scsi/hisi_sas/hisi_sas_main.c
+++ b/drivers/scsi/hisi_sas/hisi_sas_main.c
@@ -1046,7 +1046,6 @@ static void hisi_sas_rescan_topology(struct hisi_hba 
*hisi_hba, u32 old_state,
 
 static int hisi_sas_controller_reset(struct hisi_hba *hisi_hba)
 {
-   struct sas_ha_struct *sas_ha = _hba->sha;
struct device *dev = hisi_hba->dev;
struct Scsi_Host *shost = hisi_hba->shost;
u32 old_state, state;
-- 
2.14.1



Re: [PATCH] scsi: ioctl reset should wait for IOs to complete

2017-09-26 Thread James Bottomley
On Tue, 2017-09-26 at 10:22 -0700, Lee Duncan wrote:
> The SCSI ioctl reset path is smart enough to set the
> flag tmf_in_progress when a user-requested reset is
> processed, but it does not wait for IO that is in
> flight. This can result in lost IOs and hung
> processes. We should wait for a reasonable amount
> of time for either the IOs to complete or to fail
> the request.

The idea of the SCSI ioctl was to be async: issue reset and return.  Do
we have nothing in userspace that expects async behaviour? (if we have,
you could still add a flag or a new ioctl).

However:

> Signed-off-by: Lee Duncan 
> ---
>  drivers/scsi/scsi_error.c | 26 ++
>  1 file changed, 26 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
> index 38942050b265..b964152611c3 100644
> --- a/drivers/scsi/scsi_error.c
> +++ b/drivers/scsi/scsi_error.c
> @@ -57,6 +57,14 @@
>  #define BUS_RESET_SETTLE_TIME   (10)
>  #define HOST_RESET_SETTLE_TIME  (10)
>  
> +/*
> + * Time to wait for outstanding IOs when about to send
> + * a device reset, e.g. sg_reset. The msecs to wait must
> + * be an multiple of the msecs to wait per try.
> + */
> +#define MSECS_PER_TRY_FOR_IO_ON_RESET500
> +#define MSECS_TO_WAIT_FOR_IO_ON_RESET(MSECS_PER_TRY_FOR_IO_O
> N_RESET * 10)
> +
>  static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
>  static int scsi_try_to_abort_cmd(struct scsi_host_template *,
>    struct scsi_cmnd *);
> @@ -2269,6 +2277,7 @@ void scsi_report_device_reset(struct Scsi_Host
> *shost, int channel, int target)
>   struct request *rq;
>   unsigned long flags;
>   int error = 0, rtn, val;
> + unsigned int msecs_to_wait = MSECS_TO_WAIT_FOR_IO_ON_RESET;

scsi_report_device_reset is the wrong place to do the wait: that's used
by low level device drivers to report that they've detected a bus reset
and is expected to return immediately.

James



Re: [PATCH] scsi: ioctl reset should wait for IOs to complete

2017-09-26 Thread Bart Van Assche
On Tue, 2017-09-26 at 10:22 -0700, Lee Duncan wrote:
> The SCSI ioctl reset path is smart enough to set the
> flag tmf_in_progress when a user-requested reset is
> processed, but it does not wait for IO that is in
> flight. This can result in lost IOs and hung
> processes. We should wait for a reasonable amount
> of time for either the IOs to complete or to fail
> the request.

Hello Lee,

I'm using this functionality all the time to test how SCSI target code handles
TMFs while SCSI commands are in progress. So I would regret if the SCSI reset
ioctl code would be modified such that it waits for outstanding requests.
Isn't the behavior you described a SCSI LLD bug? Shouldn't such bugs be fixed
instead of implementing a work-around in the SCSI core?

Thanks,

Bart.

Hello

2017-09-26 Thread Mr Neil Trotter

A Donation Of 1 Million British Pounds To You In Good Faith


[PATCH] scsi: ioctl reset should wait for IOs to complete

2017-09-26 Thread Lee Duncan
The SCSI ioctl reset path is smart enough to set the
flag tmf_in_progress when a user-requested reset is
processed, but it does not wait for IO that is in
flight. This can result in lost IOs and hung
processes. We should wait for a reasonable amount
of time for either the IOs to complete or to fail
the request.

Signed-off-by: Lee Duncan 
---
 drivers/scsi/scsi_error.c | 26 ++
 1 file changed, 26 insertions(+)

diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 38942050b265..b964152611c3 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -57,6 +57,14 @@
 #define BUS_RESET_SETTLE_TIME   (10)
 #define HOST_RESET_SETTLE_TIME  (10)
 
+/*
+ * Time to wait for outstanding IOs when about to send
+ * a device reset, e.g. sg_reset. The msecs to wait must
+ * be an multiple of the msecs to wait per try.
+ */
+#define MSECS_PER_TRY_FOR_IO_ON_RESET  500
+#define MSECS_TO_WAIT_FOR_IO_ON_RESET  (MSECS_PER_TRY_FOR_IO_ON_RESET * 10)
+
 static int scsi_eh_try_stu(struct scsi_cmnd *scmd);
 static int scsi_try_to_abort_cmd(struct scsi_host_template *,
 struct scsi_cmnd *);
@@ -2269,6 +2277,7 @@ void scsi_report_device_reset(struct Scsi_Host *shost, 
int channel, int target)
struct request *rq;
unsigned long flags;
int error = 0, rtn, val;
+   unsigned int msecs_to_wait = MSECS_TO_WAIT_FOR_IO_ON_RESET;
 
if (!capable(CAP_SYS_ADMIN) || !capable(CAP_SYS_RAWIO))
return -EACCES;
@@ -2301,6 +2310,22 @@ void scsi_report_device_reset(struct Scsi_Host *shost, 
int channel, int target)
 
spin_lock_irqsave(shost->host_lock, flags);
shost->tmf_in_progress = 1;
+
+   /* if any IOs in progress wait for them a while */
+   while ((atomic_read(>host_busy) > 0) && (msecs_to_wait > 0)) {
+   spin_unlock_irqrestore(shost->host_lock, flags);
+   msleep(MSECS_PER_TRY_FOR_IO_ON_RESET);
+   msecs_to_wait -= MSECS_PER_TRY_FOR_IO_ON_RESET;
+   spin_lock_irqsave(shost->host_lock, flags);
+   }
+   if (atomic_read(>host_busy)) {
+   shost->tmf_in_progress = 0;
+   spin_unlock_irqrestore(shost->host_lock, flags);
+   SCSI_LOG_ERROR_RECOVERY(3,
+   printk("%s: device reset failed: outstanding IO\n", 
__func__));
+   goto out_put_scmd_and_free;
+   }
+
spin_unlock_irqrestore(shost->host_lock, flags);
 
switch (val & ~SG_SCSI_RESET_NO_ESCALATE) {
@@ -2349,6 +2374,7 @@ void scsi_report_device_reset(struct Scsi_Host *shost, 
int channel, int target)
wake_up(>host_wait);
scsi_run_host_queues(shost);
 
+out_put_scmd_and_free:
scsi_put_command(scmd);
kfree(rq);
 
-- 
1.8.5.6



Re: [PATCHv5 2/5] scsi: Export blacklist flags to sysfs

2017-09-26 Thread Bart Van Assche
On Fri, 2017-09-22 at 08:04 +0200, Hannes Reinecke wrote:
> +static ssize_t
> +sdev_show_blacklist(struct device *dev, struct device_attribute *attr,
> + char *buf)
> +{

Please make this function accept the output buffer size as fourth argument.

> + if (len)
> + len += snprintf(buf + len, 2, " ");
> +
> + if (name)
> + len += snprintf(buf + len, strlen(name) + 1,
> + "%s", name);
> + else
> + len += snprintf(buf + len, 67,
> + "INVALID_BIT(%d)", i);
+   }
> + if (len)
> + len += snprintf(buf + len, 2, "\n");

Please adjust the snprintf() statements such that no buffer overflow can be
triggered.

Thanks,

Bart.

Re: [PATCH v2 1/1] scsi: fc: check for rport presence in fc_block_scsi_eh

2017-09-26 Thread Steffen Maier

On 09/26/2017 08:58 AM, Johannes Thumshirn wrote:

Coverity-scan recently found a possible NULL pointer dereference in
fc_block_scsi_eh() as starget_to_rport() either returns the rport for
the startget or NULL.

While it is rather unlikely to have fc_block_scsi_eh() called without
an rport associated it's a good idea to catch potential misuses of the
API gracefully.

Signed-off-by: Johannes Thumshirn 
Reviewed-by: Bart Van Assche 
---

Changes since v1:
- s/WARN_ON/WARN_ON_ONCE/ (Bart)

---
  drivers/scsi/scsi_transport_fc.c | 3 +++
  1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index ba9d70f8a6a1..38abff7b5dbc 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3328,6 +3328,9 @@ int fc_block_scsi_eh(struct scsi_cmnd *cmnd)
  {
struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));

+   if (WARN_ON_ONCE(!rport))
+   return 0;


Good idea.

However, return 0 or FAST_IO_FAIL?
I mean the callchains to this function (and of fc_block_rport()) react 
differently depending on the return value.
Returning 0 means that the rport left the blocked state, i.e. is usable 
for traffic again.

If there is no rport at all, I suppose one cannot use it for traffic.
If there is any I/O pending on this scope and we return 0, scsi_eh 
escalates; and if this happens for a host_reset we end up with offlined 
scsi_devices.
I wonder if returning FAST_IO_FAIL would be more appropriate here in 
this case, in order to have scsi_eh let the pending I/O bubble up for a 
timely path failover?


--
Mit freundlichen Grüßen / Kind regards
Steffen Maier

Linux on z Systems Development

IBM Deutschland Research & Development GmbH
Vorsitzende des Aufsichtsrats: Martina Koederitz
Geschaeftsfuehrung: Dirk Wittkopp
Sitz der Gesellschaft: Boeblingen
Registergericht: Amtsgericht Stuttgart, HRB 243294



Re: [PATCH v2 1/1] scsi: fc: check for rport presence in fc_block_scsi_eh

2017-09-26 Thread Hannes Reinecke
On 09/26/2017 08:58 AM, Johannes Thumshirn wrote:
> Coverity-scan recently found a possible NULL pointer dereference in
> fc_block_scsi_eh() as starget_to_rport() either returns the rport for
> the startget or NULL.
> 
> While it is rather unlikely to have fc_block_scsi_eh() called without
> an rport associated it's a good idea to catch potential misuses of the
> API gracefully.
> 
> Signed-off-by: Johannes Thumshirn 
> Reviewed-by: Bart Van Assche 
> ---
> 
> Changes since v1:
> - s/WARN_ON/WARN_ON_ONCE/ (Bart)
> 
> ---
>  drivers/scsi/scsi_transport_fc.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_transport_fc.c 
> b/drivers/scsi/scsi_transport_fc.c
> index ba9d70f8a6a1..38abff7b5dbc 100644
> --- a/drivers/scsi/scsi_transport_fc.c
> +++ b/drivers/scsi/scsi_transport_fc.c
> @@ -3328,6 +3328,9 @@ int fc_block_scsi_eh(struct scsi_cmnd *cmnd)
>  {
>   struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
>  
> + if (WARN_ON_ONCE(!rport))
> + return 0;
> +
>   return fc_block_rport(rport);
>  }
>  EXPORT_SYMBOL(fc_block_scsi_eh);
> 
Reviewed-by: Hannes Reinecke 

Cheers,

Hannes
-- 
Dr. Hannes ReineckeTeamlead Storage & Networking
h...@suse.de   +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)


[PATCH v2 1/1] scsi: fc: check for rport presence in fc_block_scsi_eh

2017-09-26 Thread Johannes Thumshirn
Coverity-scan recently found a possible NULL pointer dereference in
fc_block_scsi_eh() as starget_to_rport() either returns the rport for
the startget or NULL.

While it is rather unlikely to have fc_block_scsi_eh() called without
an rport associated it's a good idea to catch potential misuses of the
API gracefully.

Signed-off-by: Johannes Thumshirn 
Reviewed-by: Bart Van Assche 
---

Changes since v1:
- s/WARN_ON/WARN_ON_ONCE/ (Bart)

---
 drivers/scsi/scsi_transport_fc.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/scsi/scsi_transport_fc.c b/drivers/scsi/scsi_transport_fc.c
index ba9d70f8a6a1..38abff7b5dbc 100644
--- a/drivers/scsi/scsi_transport_fc.c
+++ b/drivers/scsi/scsi_transport_fc.c
@@ -3328,6 +3328,9 @@ int fc_block_scsi_eh(struct scsi_cmnd *cmnd)
 {
struct fc_rport *rport = starget_to_rport(scsi_target(cmnd->device));
 
+   if (WARN_ON_ONCE(!rport))
+   return 0;
+
return fc_block_rport(rport);
 }
 EXPORT_SYMBOL(fc_block_scsi_eh);
-- 
2.13.5



Re: [PATCH] scsi: fc: check for rport presence in fc_block_scsi_eh

2017-09-26 Thread Johannes Thumshirn
On Mon, Sep 25, 2017 at 11:29:07PM +, Bart Van Assche wrote:
> Did you perhaps intend to use WARN_ON_ONCE() instead of WARN_ON()?

Now that you're saying it. I'll send a v2.

Thanks,
Johannes

-- 
Johannes Thumshirn  Storage
jthumsh...@suse.de+49 911 74053 689
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: Felix Imendörffer, Jane Smithard, Graham Norton
HRB 21284 (AG Nürnberg)
Key fingerprint = EC38 9CAB C2C4 F25D 8600 D0D0 0393 969D 2D76 0850


Re: [PATCH V7 1/2] dma-mapping: Rework dma_get_cache_alignment()

2017-09-26 Thread Christoph Hellwig
On Tue, Sep 26, 2017 at 09:48:01AM +0800, 陈华才 wrote:
> Hi, Christoph,
> 
> Can I put the declaration in asm/dma-coherence.h?

Generally something not expose to the rest of the kernel (that is not
in arch/mips/include/) would be preferred, but in the end the architecture
maintainer will have to decide.

> And, last time you said it is OK to pass a NULL to dma_get_cache_alignment() 
> and cc all driver maintainers. I have do so.

No, I asked you to converted everything to pass the struct device and
cc the driver maintainers.