Re: [RFC PATCH 1/6] bsg: fix kernel panic resulting from missing allocation of a reply-buffer

2017-08-14 Thread Benjamin Block
On Sun, Aug 13, 2017 at 04:39:40PM +0200, Christoph Hellwig wrote:
> On Fri, Aug 11, 2017 at 06:01:42PM +0200, Benjamin Block wrote:
> > When the BSG interface is used with bsg-lib, and the user sends a
> > Bidirectional command - so when he gives an input- and output-buffer
> > (most users of our interface will likely do that, if they wanna get the
> > transport-level response data) - bsg will allocate two requests from the
> > queue. The first request's bio is used to map the input and the second
> > request's bio for the output (see bsg_map_hdr() in the if-statement with
> > (op == REQ_OP_SCSI_OUT && hdr->din_xfer_len)).
> > 
> > When we now allocate the full space of bsg_job, sense, dd_data for each
> > request, these will be wasted on the (linked) second request. They will
> > go unused all the time, as only the first request's bsg_job, sense and
> > dd_data is used by the LLDs and BSG itself.
> > 
> > Right now, because we don't allocate this on each request, those spaces
> > are only allocated for the first request in bsg-lib.
> > 
> > Maybe we can ignore this, if it gets to complicated, I don't wanne
> > prolong this unnecessary.
> 
> We have the same 'issue' with bidirection scsi commands - it's a side
> effect of having two request structures for these commands, and the
> only real fix would be to have a single request structure, which would
> be nice especially if we can't do it without growing struct request.
> 

Alright, I was not aware of that. That is fair then. Thx.


Beste Grüße / Best regards,
  - Benjamin Block
-- 
Linux on z Systems Development / IBM Systems & Technology Group
  IBM Deutschland Research & Development GmbH 
Vorsitz. AufsR.: Martina Koederitz /Geschäftsführung: Dirk Wittkopp
Sitz der Gesellschaft: Böblingen / Registergericht: AmtsG Stuttgart, HRB 243294



Re: [RFC PATCH 1/6] bsg: fix kernel panic resulting from missing allocation of a reply-buffer

2017-08-14 Thread Benjamin Block
Hey Christoph,

I looked over the patch in detail, see below.

> From f5b03b82df0569c035022c1c2535696186907f1a Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig 
> Date: Fri, 11 Aug 2017 11:03:29 +0200
> Subject: bsg-lib: allocate sense data for each request
>
> Since we split the scsi_request out of the request the driver is supposed
> to provide storage for the sense buffer.  The bsg-lib code failed to do so,
> though and will crash anytime it is used.
>
> This patch moves bsg-lib to allocate and setup the bsg_job ahead of time,
> and allocate the sense data, which is used as reply buffer in bsg.
>
> Reported-by: Steffen Maier 
> Signed-off-by: Benjamin Block 
> Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request")
> Cc:  #4.11+
> ---
>  block/bsg-lib.c | 51 
> +++--
>  include/linux/blkdev.h  |  1 -
>  include/linux/bsg-lib.h |  2 ++
>  3 files changed, 30 insertions(+), 24 deletions(-)
>
> diff --git a/block/bsg-lib.c b/block/bsg-lib.c
> index c4513b23f57a..c07333c3b785 100644
> --- a/block/bsg-lib.c
> +++ b/block/bsg-lib.c
> @@ -37,13 +37,11 @@ static void bsg_destroy_job(struct kref *kref)
>   struct bsg_job *job = container_of(kref, struct bsg_job, kref);
>   struct request *rq = job->req;
>
> - blk_end_request_all(rq, BLK_STS_OK);
> -
>   put_device(job->dev);   /* release reference for the request */
>
>   kfree(job->request_payload.sg_list);
>   kfree(job->reply_payload.sg_list);
> - kfree(job);
> + blk_end_request_all(rq, BLK_STS_OK);

What is the reason for moving that last line? Just wondering whether
that might change the behavior somehow, although it doesn't look like it
from the code.

>  }
>
>  void bsg_job_put(struct bsg_job *job)
> @@ -100,7 +98,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done);
>   */
>  static void bsg_softirq_done(struct request *rq)
>  {
> - struct bsg_job *job = rq->special;
> + struct bsg_job *job = blk_mq_rq_to_pdu(rq);
>
>   bsg_job_put(job);
>  }
> @@ -129,24 +127,11 @@ static int bsg_map_buffer(struct bsg_buffer *buf, 
> struct request *req)
>  static int bsg_create_job(struct device *dev, struct request *req)
>  {
>   struct request *rsp = req->next_rq;
> - struct request_queue *q = req->q;
> + struct bsg_job *job = blk_mq_rq_to_pdu(req);
>   struct scsi_request *rq = scsi_req(req);
> - struct bsg_job *job;
>   int ret;
>
> - BUG_ON(req->special);
> -
> - job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
> - if (!job)
> - return -ENOMEM;
> -
> - req->special = job;
> - job->req = req;
> - if (q->bsg_job_size)
> - job->dd_data = (void *)[1];
> - job->request = rq->cmd;
>   job->request_len = rq->cmd_len;
> - job->reply = rq->sense;
>   job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
>* allocated */
>   if (req->bio) {
> @@ -187,7 +172,6 @@ static void bsg_request_fn(struct request_queue *q)
>  {
>   struct device *dev = q->queuedata;
>   struct request *req;
> - struct bsg_job *job;
>   int ret;
>
>   if (!get_device(dev))
> @@ -207,8 +191,7 @@ static void bsg_request_fn(struct request_queue *q)
>   continue;
>   }
>
> - job = req->special;
> - ret = q->bsg_job_fn(job);
> + ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req));
>   spin_lock_irq(q->queue_lock);
>   if (ret)
>   break;
> @@ -219,6 +202,27 @@ static void bsg_request_fn(struct request_queue *q)
>   spin_lock_irq(q->queue_lock);
>  }
>
> +static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t 
> gfp)
> +{
> + struct bsg_job *job = blk_mq_rq_to_pdu(req);
> +
> + memset(job, 0, sizeof(*job));
> + job->req = req;
> + job->request = job->sreq.cmd;

That doesn't work with bsg.c if the request submitted by the user is
bigger than BLK_MAX_CDB. There is code in blk_fill_sgv4_hdr_rq() that
will reassign the req->cmd point in that case to something else.

This is maybe wrong in the same vein as my Patch 1 is. If not for the
legacy code in bsg.c, setting this here, will miss changes to that
pointer between request-allocation and job-submission.

So maybe just move this to bsg_create_job().

> + job->dd_data = job + 1;
> + job->reply = job->sreq.sense = kzalloc(job->reply_len, gfp);

job->reply_len will be 0 here, won't it? You probably have to pull the
"job->reply_len = SCSI_SENSE_BUFFERSIZE" here from bsg_create_job().

> + if (!job->reply)
> + return -ENOMEM;
> + return 0;
> +}
> +
> +static void bsg_exit_rq(struct request_queue *q, struct request *req)
> +{
> + struct bsg_job *job = blk_mq_rq_to_pdu(req);
> +
> + kfree(job->reply);

Don't we need to 

Re: [RFC PATCH 1/6] bsg: fix kernel panic resulting from missing allocation of a reply-buffer

2017-08-11 Thread Christoph Hellwig
On Fri, Aug 11, 2017 at 03:49:29PM +0200, Benjamin Block wrote:
> On Fri, Aug 11, 2017 at 11:14:15AM +0200, Christoph Hellwig wrote:
> > But patch 1 still creates an additional copy of the sense data for
> > all bsg users.
> >
> 
> Huh? What additional copy? There is one reply-buffer and that is copied
> into the user-buffer should it contain valid data. Just like in your
> patch, neither you, nor me touches any of the copy-code. There is also
> no changes to how the driver get their data into that buffer, it will
> still be copied in both cases.

You're right - I misread your patch.  But that does make it worse as
this means that with your patch we re-assign the scsi_request.sense
pointer when using bsg.  That will lead to crashes if using the bsg
code against e.g. a normal scsi device using bsg when that request
later gets reused for something that is not bsg.

> 
> > 
> > Can you test the patch below which implements my suggestion?  Your
> > other patches should still apply fine on top modulo minor context
> > changes.
> 
> Only your patch on top of 4.13-rc4. din_xferp (D) is also empty, which is
> not taken from the sense-buffer.

Can't parse this.

> =
> BUG kmalloc-1024 (Not tainted): Invalid object pointer 0x4ad9e0f0
> -

Oops - if we don't allocate the job separately we should not free it either.
Updated patch for that below:

---
>From 4cd32ee48e334b62b55bff0d380833b978454040 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig 
Date: Fri, 11 Aug 2017 11:03:29 +0200
Subject: bsg-lib: allocate sense data for each request

Since we split the scsi_request out of the request the driver is supposed
to provide storage for the sense buffer.  The bsg-lib code failed to do so,
though and will crash anytime it is used.

This patch moves bsg-lib to allocate and setup the bsg_job ahead of time,
and allocate the sense data, which is used as reply buffer in bsg.

Reported-by: Steffen Maier 
Signed-off-by: Benjamin Block 
Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request")
Cc:  #4.11+
---
 block/bsg-lib.c | 53 +++--
 include/linux/blkdev.h  |  1 -
 include/linux/bsg-lib.h |  2 ++
 3 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index c4513b23f57a..215893dbd038 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done);
  */
 static void bsg_softirq_done(struct request *rq)
 {
-   struct bsg_job *job = rq->special;
+   struct bsg_job *job = blk_mq_rq_to_pdu(rq);
 
bsg_job_put(job);
 }
@@ -129,26 +129,9 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct 
request *req)
 static int bsg_create_job(struct device *dev, struct request *req)
 {
struct request *rsp = req->next_rq;
-   struct request_queue *q = req->q;
-   struct scsi_request *rq = scsi_req(req);
-   struct bsg_job *job;
+   struct bsg_job *job = blk_mq_rq_to_pdu(req);
int ret;
 
-   BUG_ON(req->special);
-
-   job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
-   if (!job)
-   return -ENOMEM;
-
-   req->special = job;
-   job->req = req;
-   if (q->bsg_job_size)
-   job->dd_data = (void *)[1];
-   job->request = rq->cmd;
-   job->request_len = rq->cmd_len;
-   job->reply = rq->sense;
-   job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
-* allocated */
if (req->bio) {
ret = bsg_map_buffer(>request_payload, req);
if (ret)
@@ -187,7 +170,6 @@ static void bsg_request_fn(struct request_queue *q)
 {
struct device *dev = q->queuedata;
struct request *req;
-   struct bsg_job *job;
int ret;
 
if (!get_device(dev))
@@ -207,8 +189,7 @@ static void bsg_request_fn(struct request_queue *q)
continue;
}
 
-   job = req->special;
-   ret = q->bsg_job_fn(job);
+   ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req));
spin_lock_irq(q->queue_lock);
if (ret)
break;
@@ -219,6 +200,29 @@ static void bsg_request_fn(struct request_queue *q)
spin_lock_irq(q->queue_lock);
 }
 
+static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp)
+{
+   struct bsg_job *job = blk_mq_rq_to_pdu(req);
+
+   memset(job, 0, sizeof(*job));
+   job->req = req;
+   job->dd_data = job + 1;
+   job->request = job->sreq.cmd;
+   job->request_len = job->sreq.cmd_len;
+   job->reply_len = SCSI_SENSE_BUFFERSIZE;
+   job->reply = job->sreq.sense = 

Re: [RFC PATCH 1/6] bsg: fix kernel panic resulting from missing allocation of a reply-buffer

2017-08-11 Thread Benjamin Block
On Fri, Aug 11, 2017 at 11:14:15AM +0200, Christoph Hellwig wrote:
> But patch 1 still creates an additional copy of the sense data for
> all bsg users.
>

Huh? What additional copy? There is one reply-buffer and that is copied
into the user-buffer should it contain valid data. Just like in your
patch, neither you, nor me touches any of the copy-code. There is also
no changes to how the driver get their data into that buffer, it will
still be copied in both cases.

> 
> Can you test the patch below which implements my suggestion?  Your
> other patches should still apply fine on top modulo minor context
> changes.

Only your patch on top of 4.13-rc4. din_xferp (D) is also empty, which is
not taken from the sense-buffer.

=
BUG kmalloc-1024 (Not tainted): Invalid object pointer 0x4ad9e0f0
-

Disabling lock debugging due to kernel taint
INFO: Slab 0x03d1012b6600 objects=24 used=11 fp=0x4ad9da58 
flags=0x3fffc008101
CPU: 2 PID: 20 Comm: ksoftirqd/2 Tainted: GB   
4.13.0-rc4-bsg-regression+ #2
Hardware name: IBM 2964 N96 702 (z/VM 6.4.0)
Call Trace:
([<00117532>] show_stack+0x8a/0xe0)
 [<00bcbaee>] dump_stack+0x96/0xd8 
 [<003cd5fc>] slab_err+0xac/0xc0 
 [<003d68e4>] free_debug_processing+0x554/0x570 
 [<003d69ae>] __slab_free+0xae/0x618 
 [<003d7dce>] kfree+0x44e/0x4a0 
 [<00859b4e>] blk_done_softirq+0x146/0x160 
 [<00bf4ec0>] __do_softirq+0x3d0/0x840 
 [<001662a6>] run_ksoftirqd+0x3e/0xb8 
 [<001957fc>] smpboot_thread_fn+0x2f4/0x318 
 [<0018f6f6>] kthread+0x166/0x178 
 [<00bf3cf2>] kernel_thread_starter+0x6/0xc 
 [<00bf3cec>] kernel_thread_starter+0x0/0xc 
INFO: lockdep is turned off.
FIX kmalloc-1024: Object at 0x4ad9e0f0 not freed
=
BUG kmalloc-1024 (Tainted: GB  ): Invalid object pointer 
0x4ad9f630
-

INFO: Slab 0x03d1012b6600 objects=24 used=11 fp=0x4ad98558 
flags=0x3fffc008101
CPU: 2 PID: 20 Comm: ksoftirqd/2 Tainted: GB   
4.13.0-rc4-bsg-regression+ #2
Hardware name: IBM 2964 N96 702 (z/VM 6.4.0)
Call Trace:
([<00117532>] show_stack+0x8a/0xe0)
 [<00bcbaee>] dump_stack+0x96/0xd8 
 [<003cd5fc>] slab_err+0xac/0xc0 
 [<003d68e4>] free_debug_processing+0x554/0x570 
 [<003d69ae>] __slab_free+0xae/0x618 
 [<003d7dce>] kfree+0x44e/0x4a0 
 [<00859b4e>] blk_done_softirq+0x146/0x160 
 [<00bf4ec0>] __do_softirq+0x3d0/0x840 
 [<001662a6>] run_ksoftirqd+0x3e/0xb8 
 [<001957fc>] smpboot_thread_fn+0x2f4/0x318 
 [<0018f6f6>] kthread+0x166/0x178 
 [<00bf3cf2>] kernel_thread_starter+0x6/0xc 
 [<00bf3cec>] kernel_thread_starter+0x0/0xc 
INFO: lockdep is turned off.
FIX kmalloc-1024: Object at 0x4ad9f630 not freed
=
BUG kmalloc-1024 (Tainted: GB  ): Invalid object pointer 
0x4ad986a0
-

INFO: Slab 0x03d1012b6600 objects=24 used=13 fp=0x4ad9d508 
flags=0x3fffc008101
CPU: 2 PID: 20 Comm: ksoftirqd/2 Tainted: GB   
4.13.0-rc4-bsg-regression+ #2
Hardware name: IBM 2964 N96 702 (z/VM 6.4.0)
Call Trace:
([<00117532>] show_stack+0x8a/0xe0)
 [<00bcbaee>] dump_stack+0x96/0xd8 
 [<003cd5fc>] slab_err+0xac/0xc0 
 [<003d68e4>] free_debug_processing+0x554/0x570 
 [<003d69ae>] __slab_free+0xae/0x618 
 [<003d7dce>] kfree+0x44e/0x4a0
 [<00859b4e>] blk_done_softirq+0x146/0x160
 [<00bf4ec0>] __do_softirq+0x3d0/0x840
 [<001662a6>] run_ksoftirqd+0x3e/0xb8
 [<001957fc>] smpboot_thread_fn+0x2f4/0x318
 [<0018f6f6>] kthread+0x166/0x178
 [<00bf3cf2>] kernel_thread_starter+0x6/0xc
 [<00bf3cec>] kernel_thread_starter+0x0/0xc
INFO: lockdep is turned off.
FIX kmalloc-1024: Object at 0x4ad986a0 not freed
=
BUG kmalloc-1024 (Tainted: GB  ): Invalid object pointer 
0x4ad9d650
-

INFO: Slab 0x03d1012b6600 objects=24 used=15 fp=0x4ad9ea48 
flags=0x3fffc008101
CPU: 2 PID: 20 Comm: ksoftirqd/2 Tainted: GB   
4.13.0-rc4-bsg-regression+ #2
Hardware name: IBM 2964 N96 702 (z/VM 6.4.0)
Call Trace:
([<00117532>] show_stack+0x8a/0xe0)
 [<00bcbaee>] dump_stack+0x96/0xd8
 [<003cd5fc>] slab_err+0xac/0xc0
 [<003d68e4>] 

Re: [RFC PATCH 1/6] bsg: fix kernel panic resulting from missing allocation of a reply-buffer

2017-08-11 Thread Christoph Hellwig
But patch 1 still creates an additional copy of the sense data for
all bsg users.

Can you test the patch below which implements my suggestion?  Your
other patches should still apply fine on top modulo minor context
changes.

---
>From 4cd32ee48e334b62b55bff0d380833b978454040 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig 
Date: Fri, 11 Aug 2017 11:03:29 +0200
Subject: bsg-lib: allocate sense data for each request

Since we split the scsi_request out of the request the driver is supposed
to provide storage for the sense buffer.  The bsg-lib code failed to do so,
though and will crash anytime it is used.

This patch moves bsg-lib to allocate and setup the bsg_job ahead of time,
and allocate the sense data, which is used as reply buffer in bsg.

Reported-by: Steffen Maier 
Signed-off-by: Benjamin Block 
Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request")
Cc:  #4.11+
---
 block/bsg-lib.c | 53 +++--
 include/linux/blkdev.h  |  1 -
 include/linux/bsg-lib.h |  2 ++
 3 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/block/bsg-lib.c b/block/bsg-lib.c
index c4513b23f57a..215893dbd038 100644
--- a/block/bsg-lib.c
+++ b/block/bsg-lib.c
@@ -100,7 +100,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done);
  */
 static void bsg_softirq_done(struct request *rq)
 {
-   struct bsg_job *job = rq->special;
+   struct bsg_job *job = blk_mq_rq_to_pdu(rq);
 
bsg_job_put(job);
 }
@@ -129,26 +129,9 @@ static int bsg_map_buffer(struct bsg_buffer *buf, struct 
request *req)
 static int bsg_create_job(struct device *dev, struct request *req)
 {
struct request *rsp = req->next_rq;
-   struct request_queue *q = req->q;
-   struct scsi_request *rq = scsi_req(req);
-   struct bsg_job *job;
+   struct bsg_job *job = blk_mq_rq_to_pdu(req);
int ret;
 
-   BUG_ON(req->special);
-
-   job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
-   if (!job)
-   return -ENOMEM;
-
-   req->special = job;
-   job->req = req;
-   if (q->bsg_job_size)
-   job->dd_data = (void *)[1];
-   job->request = rq->cmd;
-   job->request_len = rq->cmd_len;
-   job->reply = rq->sense;
-   job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
-* allocated */
if (req->bio) {
ret = bsg_map_buffer(>request_payload, req);
if (ret)
@@ -187,7 +170,6 @@ static void bsg_request_fn(struct request_queue *q)
 {
struct device *dev = q->queuedata;
struct request *req;
-   struct bsg_job *job;
int ret;
 
if (!get_device(dev))
@@ -207,8 +189,7 @@ static void bsg_request_fn(struct request_queue *q)
continue;
}
 
-   job = req->special;
-   ret = q->bsg_job_fn(job);
+   ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req));
spin_lock_irq(q->queue_lock);
if (ret)
break;
@@ -219,6 +200,29 @@ static void bsg_request_fn(struct request_queue *q)
spin_lock_irq(q->queue_lock);
 }
 
+static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t gfp)
+{
+   struct bsg_job *job = blk_mq_rq_to_pdu(req);
+
+   memset(job, 0, sizeof(*job));
+   job->req = req;
+   job->dd_data = job + 1;
+   job->request = job->sreq.cmd;
+   job->request_len = job->sreq.cmd_len;
+   job->reply_len = SCSI_SENSE_BUFFERSIZE;
+   job->reply = job->sreq.sense = kzalloc(job->reply_len, gfp);
+   if (!job->reply)
+   return -ENOMEM;
+   return 0;
+}
+
+static void bsg_exit_rq(struct request_queue *q, struct request *req)
+{
+   struct bsg_job *job = blk_mq_rq_to_pdu(req);
+
+   kfree(job->reply);
+}
+
 /**
  * bsg_setup_queue - Create and add the bsg hooks so we can receive requests
  * @dev: device to attach bsg device to
@@ -235,7 +239,9 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
char *name,
q = blk_alloc_queue(GFP_KERNEL);
if (!q)
return ERR_PTR(-ENOMEM);
-   q->cmd_size = sizeof(struct scsi_request);
+   q->cmd_size = sizeof(struct bsg_job) + dd_job_size;
+   q->init_rq_fn = bsg_init_rq;
+   q->exit_rq_fn = bsg_exit_rq;
q->request_fn = bsg_request_fn;
 
ret = blk_init_allocated_queue(q);
@@ -243,7 +249,6 @@ struct request_queue *bsg_setup_queue(struct device *dev, 
char *name,
goto out_cleanup_queue;
 
q->queuedata = dev;
-   q->bsg_job_size = dd_job_size;
q->bsg_job_fn = job_fn;
queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q);
queue_flag_set_unlocked(QUEUE_FLAG_SCSI_PASSTHROUGH, q);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 

Re: [RFC PATCH 1/6] bsg: fix kernel panic resulting from missing allocation of a reply-buffer

2017-08-11 Thread Christoph Hellwig
My point was that we now gurantee that that the sense data is not
a stack pointer an a driver can DMA to it.  Now for BSG the sense
data is "just" abused as reply, but the point still stands - we
don't want to pass a possible stack pointer to drivers in a data
buffer because we want to allow DMA to it.

That being said with your patch 4 that becomes a moot point as we'll
now always dynamically allocate it.  So maybe just reorder that to go
first and we should be fine.


Re: [RFC PATCH 1/6] bsg: fix kernel panic resulting from missing allocation of a reply-buffer

2017-08-10 Thread Benjamin Block
On Fri, Aug 11, 2017 at 12:10:38AM +0200, Benjamin Block wrote:
> On Thu, Aug 10, 2017 at 11:32:17AM +0200, Christoph Hellwig wrote:
> > We can't use an on-stack buffer for the sense data, as drivers will
> > dma to it.  So we should reuse the SCSI init_rq_fn() for the BSG
> > queues and/or implement the same scheme.
> > 
> 
...
> 
>  struct sg_io_v4
>  +--+
>  |  |  
>  | request>++
>  |   + _len |  ||
>  |   (A)|  | BSG Request|
>  |  |  | e.g. struct fc_bsg_request | Depends on BSG 
> implementation
>  |  |  || FC vs. iSCSI vs. ...
>  |  |  ++
>  |  |
>  | response--->++ Used as _Output_
>  |   + max_len  |  || User doesn't initialize
>  |   (B)|  | BSG Reply  | User provides (optional)
>  |  |  | e.g. struct fc_bsg_reply   |   memory; May be NULL.
>  |  |  ||
>  |  |  ++
>  |  |
>  | dout_xferp->+---+ Stuff send on the wire by
>  |   + _len |  |   |   the LLD
>  |   (C)|  | Transport Protocol IU | Aligned on PAGE_SIZE
>  |  |  | e.g. FC-GS-7 CT_IU|
>  |  |  |   |
>  |  |  +---+
>  |  |
>  | din_xferp-->+---+ Buffer for response data by
>  |   + _len |  |   |   the LLD
>  |   (D)|  | Transport Protocol IU | Aligned on PAGE_SIZE
>  |  |  | e.g. FC-GS-7 CT_IU|
>  |  |  |   |
>  |  |  +---+
>  +--+
> 
...
> 
>  struct request (E)
>  +--+
>  |  |  struct scsi_request
>  | scsi_request--->+-+
>  |  |  | |
>  |  |  | cmd-> Copy of (A)
>  |  |  |  + _len | Space in struct or kzalloc
>  |  |  |  (G)|
>  |  |  | |
>  |  |  | sense---> Space for BSG Reply
>  |  |  |  + _len | Same Data-Structure as (B)
>  |  |  |  (H)| NOT actually pointer (B)
>  |  |  | | 'reply_buffer' in my patch 
>  |  |  +-+
>  |  |
>  | bio> Mapped via blk_rq_map_user() to (C) dout_xferp
>  |  |
>  | next_rq-+
>  |  |  |
>  +--+  |
>|
>  struct request (F)|(if used)
>  +--+<-+
>  |  |
>  | scsi_request---> Unused here
>  |  |
>  | bio> Mapped via blk_rq_map_user() to (D) din_xferp
>  |  |
>  +--+
> 
...
> 
>  struct bsg_job
>  +-+
>  | |
>  | request---> (G) scsi_request->cmd -> Copy of (A)
>  |   + _len|   e.g. struct fc_bsg_request
>  | |
>  | reply-> (H) scsi_request->sense -> 'reply_buffer'
>  |   + _len|   e.g. struct fc_bsg_reply
>  | |
>  | request_payload---> struct scatterlist ... map (E)->bio
>  |   + _len|
>  |   (I)   |
>  | |
>  | reply_payload-> struct scatterlist ... map (F)->bio
>  |   + _len|
>  |   (J)   |
>  | |
>  +-+
> 

> 
> This worked till it broke. Right now every driver that tries to access
> (H) will panic the system, or cause very undefined behavior. I suspect
> no driver right now tries to do any DMA into (H); before the regression,
> this has been also an on-stack variable (I suspect since BSG was
> introduced, haven't checked though).
> 
> The asymmetries between the first struct request (E) and the following
> (F) also makes it hard to use the same scheme as in other drivers, where
> init_rq_fn() gets to initialize each request in the same'ish way. Or?
> Just looking at it right now, this would require some bigger rework that
> is not appropriate for a stable bug-fix.
> 

Just some more brain-dump here.

One more problem for direct DMA into (H) in the current BSG setup is
probably, that the transport classes have each their own private format
for the BSG reply (struct fc_bsg_reply and struct iscsi_bsg_reply right
now I think). The current stack doesn't take any precaution to properly
align this in accords to what the LLDs specifies for the blk-layer... so
lets assume struct fc_bsg_reply. This has fields for actual protocol IUs
(in contrast to iSCSI, where it only has some vendor-reply buffer [an
array with 0 length...]), but they start after 

Re: [RFC PATCH 1/6] bsg: fix kernel panic resulting from missing allocation of a reply-buffer

2017-08-10 Thread Christoph Hellwig
We can't use an on-stack buffer for the sense data, as drivers will
dma to it.  So we should reuse the SCSI init_rq_fn() for the BSG
queues and/or implement the same scheme.


[RFC PATCH 1/6] bsg: fix kernel panic resulting from missing allocation of a reply-buffer

2017-08-09 Thread Benjamin Block
In contrast to the normal SCSI-lib, the BSG block-queue doesn't make use of
any extra init_rq_fn() to make additional allocations during
request-creation, and the request sense-pointer is not used to transport
SCSI sense data, but is used as backing for the bsg_job->reply pointer;
that in turn is used in the LLDs to store protocol IUs or similar stuff.
This 're-purposing' of the sense-pointer is done in the BSG blk-lib
(bsg_create_job()), during the queue-processing.

Failing to allocate/assign it results in illegal dereferences because LLDs
use this pointer unquestioned, as can be seen in the various
BSG-implementations:

drivers/scsi/libfc/fc_lport.c:  fc_lport_bsg_request()
drivers/scsi/qla2xxx/qla_bsg.c: qla24xx_bsg_request()
drivers/scsi/qla4xxx/ql4_bsg.c: qla4xxx_process_vendor_specific()
drivers/s390/scsi/zfcp_fc.c:zfcp_fc_ct_els_job_handler()
...

An example panic on s390x, using the zFCP driver, looks like this (I had
debugging on, otherwise NULL-pointer dereferences wouldn't even panic on
s390x):

Unable to handle kernel pointer dereference in virtual kernel address space
Failing address: 6b6b6b6b6b6b6000 TEID: 6b6b6b6b6b6b6403
Fault in home space mode while using kernel ASCE.
AS:01590007 R3:0024
Oops: 0038 ilc:2 [#1] PREEMPT SMP DEBUG_PAGEALLOC
Modules linked in: 
CPU: 2 PID: 0 Comm: swapper/2 Not tainted 4.12.0-bsg-regression+ #3
Hardware name: IBM 2964 N96 702 (z/VM 6.4.0)
task: 65cb0100 task.stack: 65cb4000
Krnl PSW : 0704e0018000 03ff801e4156 
(zfcp_fc_ct_els_job_handler+0x16/0x58 [zfcp])
   R:0 T:1 IO:1 EX:1 Key:0 M:1 W:0 P:0 AS:3 CC:2 PM:0 RI:0 EA:3
Krnl GPRS: 0001 5fa9d0d0 5fa9d078 00e16866
   03ff0290 6b6b6b6b6b6b6b6b 59f78f00 000f
   593a0958 593a0958 60d88800 5ddd4c38
   58b50100 0700659cba08 03ff801e8556 659cb9a8
Krnl Code: 03ff801e4146: e3102054lg  %r1,80(%r2)
   03ff801e414c: 58402040   l   %r4,64(%r2)
  #03ff801e4150: e3502024   lg  %r5,32(%r2)
  >03ff801e4156: 50405004   st  %r4,4(%r5)
   03ff801e415a: e54c5008   mvhi8(%r5),0
   03ff801e4160: e33010280012   lt  %r3,40(%r1)
   03ff801e4166: a718fffb   lhi %r1,-5
   03ff801e416a: 1803   lr  %r0,%r3
Call Trace:
([<03ff801e8556>] zfcp_fsf_req_complete+0x726/0x768 [zfcp])
 [<03ff801ea82a>] zfcp_fsf_reqid_check+0x102/0x180 [zfcp]
 [<03ff801eb980>] zfcp_qdio_int_resp+0x230/0x278 [zfcp]
 [<009b91b6>] qdio_kick_handler+0x2ae/0x2c8
 [<009b9e3e>] __tiqdio_inbound_processing+0x406/0xc10
 [<001684c2>] tasklet_action+0x15a/0x1d8
 [<00bd28ec>] __do_softirq+0x3ec/0x848
 [<001675a4>] irq_exit+0x74/0xf8
 [<0010dd6a>] do_IRQ+0xba/0xf0
 [<00bd19e8>] io_int_handler+0x104/0x2d4
 [<001033b6>] enabled_wait+0xb6/0x188
([<0010339e>] enabled_wait+0x9e/0x188)
 [<0010396a>] arch_cpu_idle+0x32/0x50
 [<00bd0112>] default_idle_call+0x52/0x68
 [<001cd0fa>] do_idle+0x102/0x188
 [<001cd41e>] cpu_startup_entry+0x3e/0x48
 [<00118c64>] smp_start_secondary+0x11c/0x130
 [<00bd2016>] restart_int_handler+0x62/0x78
 [<>]   (null)
INFO: lockdep is turned off.
Last Breaking-Event-Address:
 [<03ff801e41d6>] zfcp_fc_ct_job_handler+0x3e/0x48 [zfcp]

Kernel panic - not syncing: Fatal exception in interrupt

To prevent this, allocate a buffer when the BSG blk-request is setup, and
before it is queued for LLD processing.

Reported-by: Steffen Maier 
Signed-off-by: Benjamin Block 
Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request")
Cc:  #4.11+
---
 block/bsg.c | 22 +-
 1 file changed, 17 insertions(+), 5 deletions(-)

diff --git a/block/bsg.c b/block/bsg.c
index 37663b664666..285b1b8126c3 100644
--- a/block/bsg.c
+++ b/block/bsg.c
@@ -74,6 +74,8 @@ static int bsg_major;
 
 static struct kmem_cache *bsg_cmd_cachep;
 
+#define BSG_COMMAND_REPLY_BUFFERSIZE   SCSI_SENSE_BUFFERSIZE
+
 /*
  * our internal command type
  */
@@ -85,6 +87,7 @@ struct bsg_command {
struct bio *bidi_bio;
int err;
struct sg_io_v4 hdr;
+   u8 reply_buffer[BSG_COMMAND_REPLY_BUFFERSIZE];
 };
 
 static void bsg_free_command(struct bsg_command *bc)
@@ -137,7 +140,7 @@ static inline struct hlist_head *bsg_dev_idx_hash(int index)
 
 static int blk_fill_sgv4_hdr_rq(struct request_queue *q, struct request *rq,
struct sg_io_v4 *hdr, struct bsg_device *bd,
-   fmode_t has_write_perm)
+   u8 *reply_buffer, fmode_t has_write_perm)
 {
struct