date:20140206

Re: [PATCH 12/15] scsi: initial blk-mq support

2014-02-06 Thread Sagi Grimberg


On 2/5/2014 2:41 PM, Christoph Hellwig wrote:

Add support for using the blk-mq code to submit requests to SCSI
drivers.  There is very little blk-mq specific code, but that's
partially because important functionality like partial completions
and request requeueing is still missing in blk-mq.  I hope to keep
most of the additions for these in the blk-mq core instead of the
SCSI layer, though.

Based on the earlier scsi-mq prototype by Nicholas Bellinger, although
not a whole lot of actual code is left.

Not-quite-signed-off-yet-by: Christoph Hellwig h...@lst.de
---
  drivers/scsi/scsi.c  |   36 ++-
  drivers/scsi/scsi_lib.c  |  244 --
  drivers/scsi/scsi_priv.h |2 +
  drivers/scsi/scsi_scan.c |5 +-
  include/scsi/scsi_host.h |3 +
  5 files changed, 278 insertions(+), 12 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index adb8bfb..cf5c110 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -44,6 +44,7 @@
  #include linux/string.h
  #include linux/slab.h
  #include linux/blkdev.h
+#include linux/blk-mq.h
  #include linux/delay.h
  #include linux/init.h
  #include linux/completion.h
@@ -688,6 +689,33 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
return 0;
  }
  
+static void scsi_softirq_done_remote(void *data)

+{
+   return scsi_softirq_done(data);
+}
+
+static void scsi_mq_done(struct request *req)
+{
+   int cpu;
+
+#if 0
+   if (!ctx-ipi_redirect)
+   return scsi_softirq_done(cmd);
+#endif
+
+   cpu = get_cpu();
+   if (cpu != req-cpu  cpu_online(req-cpu)) {
+   req-csd.func = scsi_softirq_done_remote;
+   req-csd.info = req;
+   req-csd.flags = 0;
+   __smp_call_function_single(req-cpu, req-csd, 0);
+   } else {
+   scsi_softirq_done(req);
+   }
+
+   put_cpu();
+}
+
  /**
   * scsi_done - Invoke completion on finished SCSI command.
   * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives
@@ -701,8 +729,14 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
   */
  static void scsi_done(struct scsi_cmnd *cmd)
  {
+   struct request *req = cmd-request;
+
trace_scsi_dispatch_cmd_done(cmd);
-   blk_complete_request(cmd-request);
+
+   if (req-mq_ctx)
+   scsi_mq_done(req);
+   else
+   blk_complete_request(req);
  }
  
  /**

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index e67950c..8dd8893 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -20,6 +20,7 @@
  #include linux/delay.h
  #include linux/hardirq.h
  #include linux/scatterlist.h
+#include linux/blk-mq.h
  
  #include scsi/scsi.h

  #include scsi/scsi_cmnd.h
@@ -554,6 +555,15 @@ static bool scsi_end_request(struct scsi_cmnd *cmd, int 
error, int bytes,
struct request *req = cmd-request;
  
  	/*

+* XXX: need to handle partial completions and retries here.
+*/
+   if (req-mq_ctx) {
+   blk_mq_end_io(req, error);
+   put_device(cmd-device-sdev_gendev);
+   return true;
+   }
+
+   /*
 * If there are blocks left over at the end, set up the command
 * to queue the remainder of them.
 */
@@ -1014,12 +1024,15 @@ static int scsi_init_sgtable(struct request *req, 
struct scsi_data_buffer *sdb,
  {
int count;
  
-	/*

-* If sg table allocation fails, requeue request later.
-*/
-   if (unlikely(scsi_alloc_sgtable(sdb, req-nr_phys_segments,
-   gfp_mask))) {
-   return BLKPREP_DEFER;
+   BUG_ON(req-nr_phys_segments  SCSI_MAX_SG_SEGMENTS);
+
+   if (!req-mq_ctx) {
+   /*
+* If sg table allocation fails, requeue request later.
+*/
+   if (unlikely(scsi_alloc_sgtable(sdb, req-nr_phys_segments,
+   gfp_mask)))
+   return BLKPREP_DEFER;
}
  
  	req-buffer = NULL;

@@ -1075,9 +1088,11 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
BUG_ON(prot_sdb == NULL);
ivecs = blk_rq_count_integrity_sg(rq-q, rq-bio);
  
-		if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) {

-   error = BLKPREP_DEFER;
-   goto err_exit;
+   if (!rq-mq_ctx) {
+   if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) {
+   error = BLKPREP_DEFER;
+   goto err_exit;
+   }
}
  
  		count = blk_rq_map_integrity_sg(rq-q, rq-bio,

@@ -1505,7 +1520,7 @@ static void scsi_kill_request(struct request *req, struct 
request_queue *q)
blk_complete_request(req);
  }
  
-static void scsi_softirq_done(struct request *rq)

+void scsi_softirq_done(struct request *rq)
  {
struct

[PATCH][RFC] Add EVPD page 0x83 entries to sysfs

2014-02-06 Thread Hannes Reinecke

EVPD page 0x83 is used to uniquely identify the device.
So instead of having each and every program issue a separate
SG_IO call to retrieve this information it does make far more
sense to display it in sysfs.

Cc: Jeremy Linton jlin...@tributary.com
Cc: Doug Gilbert dgilb...@interlog.com
Cc: Kai Makisara kai.makis...@kolumbus.fi
Signed-off-by: Hannes Reinecke h...@suse.de
---
 drivers/scsi/scsi_scan.c   |   3 +
 drivers/scsi/scsi_sysfs.c  | 166 -
 include/scsi/scsi_device.h |   3 +
 3 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 307a811..073fb84 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -929,6 +929,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned 
char *inq_result,
if (*bflags  BLIST_SKIP_VPD_PAGES)
sdev-skip_vpd_pages = 1;
 
+   if (sdev-scsi_level = SCSI_3)
+   scsi_attach_vpd_ident(sdev);
+
transport_configure_device(sdev-sdev_gendev);
 
if (sdev-host-hostt-slave_configure) {
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 9117d0b..ffe7cb5 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -725,8 +725,170 @@ show_queue_type_field(struct device *dev, struct 
device_attribute *attr,
 
 static DEVICE_ATTR(queue_type, S_IRUGO, show_queue_type_field, NULL);
 
+void scsi_attach_vpd_ident(struct scsi_device *sdev)
+{
+   int ret;
+   int vpd_len = 255;
+   unsigned char *buffer;
+retry:
+   buffer = kmalloc(vpd_len, GFP_KERNEL);
+   if (buffer) {
+   ret = scsi_get_vpd_page(sdev, 0x83, buffer, vpd_len);
+   if (ret == 0) {
+   vpd_len = (buffer[2]  8) + buffer[3];
+   if (vpd_len  255) {
+   kfree(buffer);
+   goto retry;
+   }
+   sdev-vpd_ident_len = vpd_len;
+   sdev-vpd_ident = buffer;
+   }
+   }
+}
+
+#define SCSI_VPD_ASSOC_lun 0x0
+#define SCSI_VPD_ASSOC_port 0x10
+#define SCSI_VPD_ASSOC_target 0x20
+
+#define SCSI_VPD_DESIG_vendor 0x0
+#define SCSI_VPD_DESIG_t10 0x1
+#define SCSI_VPD_DESIG_eui 0x2
+#define SCSI_VPD_DESIG_naa 0x3
+#define SCSI_VPD_DESIG_relport 0x4
+#define SCSI_VPD_DESIG_tpgrp 0x5
+#define SCSI_VPD_DESIG_lugrp 0x6
+#define SCSI_VPD_DESIG_md5 0x7
+#define SCSI_VPD_DESIG_scsi_name 0x8
+
+int scsi_parse_vpd_ident(struct scsi_device *sdev,
+int assoc, int desig, char *buf)
+{
+   unsigned char *d;
+   int len = 0;
+
+   if (!sdev-vpd_ident)
+   return -EINVAL;
+
+   d = sdev-vpd_ident + 4;
+   while (d  sdev-vpd_ident + sdev-vpd_ident_len) {
+   if ((d[1]  0x30) == assoc 
+   (d[1]  0xf) == desig) {
+   switch (d[1]  0xf) {
+   case SCSI_VPD_DESIG_eui:
+   case SCSI_VPD_DESIG_naa:
+   case SCSI_VPD_DESIG_md5:
+   switch (d[3]) {
+   case 8:
+   len += sprintf(buf + len,
+ %8phN\n, d + 4);
+   break;
+   case 12:
+   len += sprintf(buf + len,
+ %12phN\n, d + 4);
+   break;
+   case 16:
+   len += sprintf(buf + len,
+ %16phN\n, d + 4);
+   break;
+   }
+   break;
+   case SCSI_VPD_DESIG_relport:
+   case SCSI_VPD_DESIG_tpgrp:
+   case SCSI_VPD_DESIG_lugrp:
+   len += sprintf(buf + len, %d\n,
+ (int)(d[6]  8) + d[7]);
+   break;
+   case SCSI_VPD_DESIG_vendor:
+   case SCSI_VPD_DESIG_t10:
+   case SCSI_VPD_DESIG_scsi_name:
+   len += snprintf(buf + len, d[3] + 2, %s\n,
+   d + 4);
+   break;
+   }
+   }
+   d += d[3] + 4;
+   }
+
+   return len ? len : -EINVAL;
+}
+
+#define sdev_evpd_ident(assoc,desig)   \
+static ssize_t \
+sdev_show_evpd_ident_##assoc##_##desig (struct device *dev,\
+   struct device_attribute *attr,  \
+

Re: [PATCH] st: Do not rewind for SG_IO

2014-02-06 Thread Hannes Reinecke

On 02/03/2014 10:58 PM, Jeremy Linton wrote:
 On 2/3/2014 2:51 PM, Kay Sievers wrote:
 This is not simple and not going to happen. Sibling devices in /sys cannot
 have a relationship in udev, there are only parent/child dependencies.
 
   Ok.. so if we can't ignore all the spare device nodes in a given /sys 
 entry
 for a given device. Why open the device to scan it?
 
   I've often wondered why the serial number isn't part of the data in /sys
 along with the manufacture/model. The last tape drive I saw that failed to
 respond to inquiry page 0x80 was over a decade ago (probably manufactured in
 the early 90s). So enabling it just for tape is pretty safe.
 
 
   Matching Manufacturer/model/serial is going to be better than anything 
 your
 going to get out of 0x83 anyway. That data is guaranteed to be there, but its
 also guaranteed to be unreliable (every device, and every port has a slightly
 different set of descriptors they choose to support).
 
   Plus, your not going to have issues accidentally rewinding a device, or
 resetting a tape density, or accidentally turning compression off if you don't
 open the device.
 
And indeed, I have been wondering about this, too.
And (again) it is something which has been on my To-Do list for a
long time.

Moving EVPD page 0x83 (and maybe 0x80, too) into sysfs will save
quite a lot of headache we have currently; udev won't have to
call 'sg_inq', information will be present even though the
device itself might be temporarily unavailable yadda yadda.

So I've decided to bite the bullet and sent out a patch,
check for 'Add EVPD page 0x83 entries to sysfs'.

Reviews are welcome.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke   zSeries  Storage
h...@suse.de  +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][RFC] Add EVPD page 0x83 entries to sysfs

2014-02-06 Thread Hannes Reinecke

On 02/06/2014 02:04 PM, Hannes Reinecke wrote:
 EVPD page 0x83 is used to uniquely identify the device.
 So instead of having each and every program issue a separate
 SG_IO call to retrieve this information it does make far more
 sense to display it in sysfs.
 
 Cc: Jeremy Linton jlin...@tributary.com
 Cc: Doug Gilbert dgilb...@interlog.com
 Cc: Kai Makisara kai.makis...@kolumbus.fi
 Signed-off-by: Hannes Reinecke h...@suse.de
 ---
  drivers/scsi/scsi_scan.c   |   3 +
  drivers/scsi/scsi_sysfs.c  | 166 
 -
  include/scsi/scsi_device.h |   3 +
  3 files changed, 171 insertions(+), 1 deletion(-)
 
 diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
 index 307a811..073fb84 100644
 --- a/drivers/scsi/scsi_scan.c
 +++ b/drivers/scsi/scsi_scan.c
 @@ -929,6 +929,9 @@ static int scsi_add_lun(struct scsi_device *sdev, 
 unsigned char *inq_result,
   if (*bflags  BLIST_SKIP_VPD_PAGES)
   sdev-skip_vpd_pages = 1;
  
 + if (sdev-scsi_level = SCSI_3)
 + scsi_attach_vpd_ident(sdev);
 +
   transport_configure_device(sdev-sdev_gendev);
  
   if (sdev-host-hostt-slave_configure) {
 diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
 index 9117d0b..ffe7cb5 100644
 --- a/drivers/scsi/scsi_sysfs.c
 +++ b/drivers/scsi/scsi_sysfs.c
 @@ -725,8 +725,170 @@ show_queue_type_field(struct device *dev, struct 
 device_attribute *attr,
  
  static DEVICE_ATTR(queue_type, S_IRUGO, show_queue_type_field, NULL);
  
 +void scsi_attach_vpd_ident(struct scsi_device *sdev)
 +{
 + int ret;
 + int vpd_len = 255;
 + unsigned char *buffer;
 +retry:
 + buffer = kmalloc(vpd_len, GFP_KERNEL);
 + if (buffer) {
 + ret = scsi_get_vpd_page(sdev, 0x83, buffer, vpd_len);
 + if (ret == 0) {
 + vpd_len = (buffer[2]  8) + buffer[3];
 + if (vpd_len  255) {
 + kfree(buffer);
 + goto retry;
 + }
 + sdev-vpd_ident_len = vpd_len;
 + sdev-vpd_ident = buffer;
 + }
 + }
 +}
 +
Bummer. A missing kfree() if scsi_get_vpd_page() returns an error.
But this is just an RFC for now, so I'll be fixing it up once
it looks as if it would be moving forward...

Cheers,

Hannes
-- 
Dr. Hannes Reinecke   zSeries  Storage
h...@suse.de  +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] st: Do not rewind for SG_IO

2014-02-06 Thread Martin K. Petersen

 Hannes == Hannes Reinecke h...@suse.de writes:

Hannes Moving EVPD page 0x83 (and maybe 0x80, too) into sysfs will save
Hannes quite a lot of headache we have currently; udev won't have to
Hannes call 'sg_inq', information will be present even though the
Hannes device itself might be temporarily unavailable yadda yadda.

Hannes So I've decided to bite the bullet and sent out a patch, check
Hannes for 'Add EVPD page 0x83 entries to sysfs'.

I'm already doing something similar since I need it for xcopy.

My patch provides both the original VPD 0x83 and 0x80 bits as well as a
handle identical to /sbin/scsi_id.

I'll take a look at your patch later today...

-- 
Martin K. Petersen  Oracle Linux Engineering
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] st: Do not rewind for SG_IO

2014-02-06 Thread Hannes Reinecke

On 02/06/2014 02:21 PM, Martin K. Petersen wrote:
 Hannes == Hannes Reinecke h...@suse.de writes:
 
 Hannes Moving EVPD page 0x83 (and maybe 0x80, too) into sysfs will save
 Hannes quite a lot of headache we have currently; udev won't have to
 Hannes call 'sg_inq', information will be present even though the
 Hannes device itself might be temporarily unavailable yadda yadda.
 
 Hannes So I've decided to bite the bullet and sent out a patch, check
 Hannes for 'Add EVPD page 0x83 entries to sysfs'.
 
 I'm already doing something similar since I need it for xcopy.
 
Hehe. Beat you to it.

 My patch provides both the original VPD 0x83 and 0x80 bits as well as a
 handle identical to /sbin/scsi_id.
 
Bah, don't do that.
That should better be handled by udev rules.
I've got a set of patches moving from scsi_id to sg_inq, which can
be easily adapted to using sysfs directly.

Once we figure out if that's the direction we want to go ...

 I'll take a look at your patch later today...
 
Thanks.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke   zSeries  Storage
h...@suse.de  +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] st: Do not rewind for SG_IO

2014-02-06 Thread Martin K. Petersen

 Hannes == Hannes Reinecke h...@suse.de writes:

 My patch provides both the original VPD 0x83 and 0x80 bits as well as
 a handle identical to /sbin/scsi_id.
 
Hannes Bah, don't do that.  That should better be handled by udev
Hannes rules.  I've got a set of patches moving from scsi_id to sg_inq,
Hannes which can be easily adapted to using sysfs directly.

I just want to get out of the userland sending random SCSI commands
business. That is a world of pain right now.

I wanted to provide a handle that was guaranteed to be compatible with
existing tooling. If you have worked around that in udev rules I guess
that's OK with me.

-- 
Martin K. Petersen  Oracle Linux Engineering
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

RE: [PATCH 10/22] isci: Use pci_enable_msix_range()

2014-02-06 Thread Dorau, Lukasz

On Tuesday, February 04, 2014 12:17 PM Alexander Gordeev agord...@redhat.com 
wrote:
 As result of deprecation of MSI-X/MSI enablement functions
 pci_enable_msix() and pci_enable_msi_block() all drivers
 using these two interfaces need to be updated to use the
 new pci_enable_msi_range() and pci_enable_msix_range()
 interfaces.
 
 Signed-off-by: Alexander Gordeev agord...@redhat.com
 Cc: Lukasz Dorau lukasz.do...@intel.com
 Cc: Maciej Patelczyk maciej.patelc...@intel.com
 Cc: Dave Jiang dave.ji...@intel.com
 Cc: intel-linux-...@intel.com
 Cc: linux-scsi@vger.kernel.org
 Cc: linux-...@vger.kernel.org
 ---
  drivers/scsi/isci/init.c |5 +++--
  1 files changed, 3 insertions(+), 2 deletions(-)
 
 diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c
 index d25d0d8..b99f307 100644
 --- a/drivers/scsi/isci/init.c
 +++ b/drivers/scsi/isci/init.c
 @@ -356,8 +356,9 @@ static int isci_setup_interrupts(struct pci_dev *pdev)
   for (i = 0; i  num_msix; i++)
   pci_info-msix_entries[i].entry = i;
 
 - err = pci_enable_msix(pdev, pci_info-msix_entries, num_msix);
 - if (err)
 + err = pci_enable_msix_range(pdev,
 + pci_info-msix_entries, num_msix, num_msix);
 + if (err  0)
   goto intx;
 
   for (i = 0; i  num_msix; i++) {
 --
 1.7.7.6

Looks fine.

Acked-by: Lukasz Dorau lukasz.do...@intel.com

Thanks,
Lukasz

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] st: Do not rewind for SG_IO

2014-02-06 Thread James Bottomley

On Thu, 2014-02-06 at 08:50 -0500, Martin K. Petersen wrote:
  Hannes == Hannes Reinecke h...@suse.de writes:
 
  My patch provides both the original VPD 0x83 and 0x80 bits as well as
  a handle identical to /sbin/scsi_id.
  
 Hannes Bah, don't do that.  That should better be handled by udev
 Hannes rules.  I've got a set of patches moving from scsi_id to sg_inq,
 Hannes which can be easily adapted to using sysfs directly.
 
 I just want to get out of the userland sending random SCSI commands
 business. That is a world of pain right now.

Well, in the words of Bill Clinton I feel your pain.  However, I need
to know we won't pick up that same whole world of pain in the kernel.
Remember it's not just SCSI devices, it's also ATA devices and
potentially other block devices ... then there's blkid and all the weird
things it does. Then there's transport identifiers for multi-path
reservations and so on.

Convince me this path won't have us shifting a whole bunch of weird from
user space to the kernel without any reduction in the weird factor.
Remember the point is not what can we do for all the nicely behaved
SCSI-3 devices, it's what do we do for everything.

James

 I wanted to provide a handle that was guaranteed to be compatible with
 existing tooling. If you have worked around that in udev rules I guess
 that's OK with me.
 


--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] st: Do not rewind for SG_IO

2014-02-06 Thread Hannes Reinecke

On 02/06/2014 03:38 PM, James Bottomley wrote:
 On Thu, 2014-02-06 at 08:50 -0500, Martin K. Petersen wrote:
 Hannes == Hannes Reinecke h...@suse.de writes:

 My patch provides both the original VPD 0x83 and 0x80 bits as well as
 a handle identical to /sbin/scsi_id.

 Hannes Bah, don't do that.  That should better be handled by udev
 Hannes rules.  I've got a set of patches moving from scsi_id to sg_inq,
 Hannes which can be easily adapted to using sysfs directly.

 I just want to get out of the userland sending random SCSI commands
 business. That is a world of pain right now.
 
 Well, in the words of Bill Clinton I feel your pain.  However, I need
 to know we won't pick up that same whole world of pain in the kernel.
 Remember it's not just SCSI devices, it's also ATA devices and
 potentially other block devices ... then there's blkid and all the weird
 things it does. Then there's transport identifiers for multi-path
 reservations and so on.
 
blkid is actually not so bad, _if_ it would distinguish between
'metadata not found' and 'I/O error during metadata read'.
I made a patch which hopefully should find it's way upstream.

And blkid just issues plain READ commands, so _this_ behaviour won't
change ...

 Convince me this path won't have us shifting a whole bunch of weird from
 user space to the kernel without any reduction in the weird factor.
 Remember the point is not what can we do for all the nicely behaved
 SCSI-3 devices, it's what do we do for everything.
 
Well, first and foremost the patch should be limited to SCSI-3 (and
later devices). So we'd be insulated from the most obvious crap.

So that leaves only devices which claim to be SCSI-3 or something,
but still keel over when asked for VPD pages.
However, this type of devices will fail already, as 'sd.c' is
already asking for all sorts of VPD pages.
Which leaves only non-Disk devices, but those tend to have a better
SCSI implementation as you can't make quick bucks with, say, as SCSI
Enclosure device.

But the prime motivator behind this patch is that we can be
reasonably sure the device will answer at all.
When retrieving the EVPD pages from userspace you always have the
problem that the device might have gone away or being unresponsive
by the time you get around sending the SG_IO call.
So you always have these stuck user-space processes asking for
information which _had_ been present at one point. In particular
udev is prone for this; anyone who ever came across the message

udevd[]: worker unexpectedly returned with status 0x0100

knows what I'm talking about ...

And the more udev events for a device you get, the higher the
likelyhood for this to happen is.

Plus I could re-use this information for my scsi_dh_alua patchset,
and for xcopy and friends we'll be needing it, too.

Cheers,

Hannes
-- 
Dr. Hannes Reinecke   zSeries  Storage
h...@suse.de  +49 911 74053 688
SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg)
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

I HAVE A PROPOSAL FOR YOU!

2014-02-06 Thread Yung Kyu Kim

Hello,

The Project is about the exportation of 100,000 barrels of Light Crude
Oil daily out from Iraq to Turkey through my client's company in Iraq
at the rate of $92.00 a barrel. This amount to $9,200,000 daily. I ask
for your support as a foreigner to handle this business project with my
client and you are not expected to invest in Iraq

If yes, let me know and we will discuss this project proper.

yung kim.

CONTACT MY PRIVATE EMAIL:yung@qq.com
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 12/17] scsi: avoid taking host_lock in scsi_run_queue unless nessecary

2014-02-06 Thread Christoph Hellwig

On Wed, Feb 05, 2014 at 03:54:17PM -0800, James Bottomley wrote:
  -/*
  - * Function:   scsi_run_queue()
  - *
  - * Purpose:Select a proper request queue to serve next
  - *
  - * Arguments:  q   - last request's queue
  - *
  - * Returns: Nothing
  - *
  - * Notes:  The previous command was completely finished, start
  - * a new one if possible.
  - */
 
 Instead of dumping the description, how about converting it to docbook
 and making it match the new function?

I dropped it because there's nothing that isn't trivially obvious from
the code.  The point of comments should be to explain why things are
done and not have a redundant writeup of what is done.

Docbook comments make sense for exported functions but not so much for
a static function with a handful of callers.

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 02/15] blk-mq: support at_head inserations for blk_execute_rq

2014-02-06 Thread Christoph Hellwig

On Wed, Feb 05, 2014 at 06:27:38PM -0800, Muthu Kumar wrote:
 Currently its not used by any drivers. Are we sure we don't need it
 public? If sure, please remove the EXPORT_SYMBOL() for it.

Drivers shouldn't use it, it's a low-level interface.  As mentioned in
the intro this is not quite a coherent series yet, but I'll post the
blk-mq patches in a slightly nicer form agains Jens' tree soon.

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 04/17] scsi: avoid useless free_list lock roundtrips

2014-02-06 Thread Christoph Hellwig

On Wed, Feb 05, 2014 at 03:44:04PM -0800, James Bottomley wrote:
 Why do this?  cmd is still likely to be not NULL, which helps the
 compiler optimise.

Because testing for non-NULL pointers gets that hint implicitly from
gcc.

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 05/17] scsi: simplify command allocation and freeing a bit

2014-02-06 Thread Christoph Hellwig

On Wed, Feb 05, 2014 at 03:51:35PM -0800, James Bottomley wrote:
 On Wed, 2014-02-05 at 04:39 -0800, Christoph Hellwig wrote:
 
  Just have one level of alloc/free functions that take a host instead
  of two levels for the allocation and different calling conventions
  for the free.
  
  Signed-off-by: Christoph Hellwig h...@lst.de
  ---
   drivers/scsi/scsi.c |   77 
  +++
   1 file changed, 22 insertions(+), 55 deletions(-)
  
  diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
  index 4139486..5347f7d 100644
  --- a/drivers/scsi/scsi.c
  +++ b/drivers/scsi/scsi.c
  @@ -160,79 +160,46 @@ static struct scsi_host_cmd_pool scsi_cmd_dma_pool = {
   
   static DEFINE_MUTEX(host_cmd_pool_mutex);
   
  -/**
  - * scsi_pool_alloc_command - internal function to get a fully allocated 
  command
  - * @pool:  slab pool to allocate the command from
  - * @gfp_mask:  mask for the allocation
  - *
  - * Returns a fully allocated command (with the allied sense buffer) or
  - * NULL on failure
  - */
  -static struct scsi_cmnd *
  -scsi_pool_alloc_command(struct scsi_host_cmd_pool *pool, gfp_t gfp_mask)
  -{
  -   struct scsi_cmnd *cmd;
  -
  -   cmd = kmem_cache_zalloc(pool-cmd_slab, gfp_mask | pool-gfp_mask);
  -   if (!cmd)
  -   return NULL;
  -
  -   cmd-sense_buffer = kmem_cache_alloc(pool-sense_slab,
  -gfp_mask | pool-gfp_mask);
  -   if (!cmd-sense_buffer) {
  -   kmem_cache_free(pool-cmd_slab, cmd);
  -   return NULL;
  -   }
  -
  -   return cmd;
  -}
  -
  -/**
  - * scsi_pool_free_command - internal function to release a command
  - * @pool:  slab pool to allocate the command from
  - * @cmd:   command to release
  - *
  - * the command must previously have been allocated by
  - * scsi_pool_alloc_command.
  - */
   static void
  -scsi_pool_free_command(struct scsi_host_cmd_pool *pool,
  -struct scsi_cmnd *cmd)
  +scsi_host_free_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 
 You lost the docbook function description here when you changed the name.

It's a static function with two callers, and it's more obvious from the
function than the comment what it does.

I'm probably one of the people with the highest comment to code ratios
in the kernel, but I'd rather explain in long comments when something
is non-obvious than putting useless boilerplates in.  If you insist on
the comment I'll put it back, but it seems utterly useless.

 This docbook elimination looks spurious; why do it?

Same as above.

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 00/17] SCSI data path micro-optimizations

2014-02-06 Thread Christoph Hellwig

On Wed, Feb 05, 2014 at 03:41:04PM -0800, James Bottomley wrote:
 I will say that you do make the series hard to review by including
 things that do code churn for no gain: like making all the error
 handling goto based instead of the current in-place if () clause
 based ... this doesn't do anything for optmisation, so why do it?  (I'm
 not going to take sides on which is better, since we do both).

You mean the command allocation bits?  When I inline one function into a
nother I might as well add coherent error handling.  And for more than
one level of unwinding gotos are always better because a humand actually
can grasp it.

 There's also a lot of extraneous stuff in here.  I'm assuming most of
 the performance stuff is get/put removal and locking efficiency, things
 like this

This was already cut down.   If it makes your life easier we can do the
cmd allocation part separate of the rest, but there's really no churn
left.

 
 2,3,5 are allocation simplification.  In general it doesn't look so bad,
 but it doesn't seem to be part of the series.  It needs an ACK from the
 megaraid people since they're the only consumer of the interface you're
 trying to eliminate

It's needed to not make the cmd_size addition not a complete mess.

 
 6,7 is a new interface for command allocation and a virtio consumer.
 OK, as it goes, but also separable ... and preferably, if, as you say,
 it's important for mq, some more users, also doesn't need to be part of
 the series.

Feel free to consider 1-7 and the rest separate series already, I'll
resend them that way next time.

 9,10 look like they aren't your patches, so they need an Author From:
 field for git (9 needs your signoff, if you're sending it).

The are, and they are properly attributed in the git tree.
Unfortunately quilt still hasn't learned to properly deal with From:
headers.

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 12/15] scsi: initial blk-mq support

2014-02-06 Thread Christoph Hellwig

On Thu, Feb 06, 2014 at 10:38:17AM +0200, Sagi Grimberg wrote:
 Both you and Nic offer a single HW queue per sdev.
 I'm wandering if that should be the LLD's decision (if chooses to
 use multiple queues)?
 
 Trying to understand how LLDs will fit in a way they exploit
 multi-queue and actually
 maintain multiple queues. SRP/iSER for example maintain a single
 queue per connection
 (or session in iSCSI). Now with multi-queue all requests of that
 shost will eventually
 boil-down to posting on a single queue which might transition the
 bottleneck to the LLDs.
 
 I noticed virtio_scsi implementation is choosing a queue per command
 based on current
 processor id without any explicit mapping (unless I missed it).
 
 I guess my question is where do (or should) LLDs plug-in to this mq scheme?

Just using blk-mq helps with lock contention and cacheline issues, while
being conceptually simple, that's why it's the priority.  See the
proposal I sent before the patch series for more details.

That being said if you have simple enough patches for real multiqueue
support I'd be more than happy to carry them along.
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 01/15] block: rework flush sequencing for blk-mq

2014-02-06 Thread Christoph Hellwig

On Wed, Feb 05, 2014 at 06:08:37PM -0800, Muthu Kumar wrote:
  diff --git a/block/blk-core.c b/block/blk-core.c
  index c00e0bd..d3eb330 100644
  --- a/block/blk-core.c
  +++ b/block/blk-core.c
  @@ -693,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t 
  *lock, int node_id)
  if (!uninit_q)
  return NULL;
 
  +   uninit_q-flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL);
 
 
 Shouldn't this be kzalloc_node()?

Probably.  There's also various kinds of optimization potential like
allocating one per hw_ctx or similar.  But until we have a device that
has high enough IOPS to matter and needs cache flushes I wouldn't worry
about optimizing the flush path.

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] block/null_blk: Fix completion processing from LIFO to FIFO

2014-02-06 Thread Shlomo Pongratz

The completion queue is implemented using lockless list.

The llist_add is adds the events to the list head which is a push operation.
The processing of the completion elements is done by disconnecting all the
pushed elements and iterating over the disconnected list. The problem is
that the processing is done in reverse order w.r.t order of the insertion
i.e. LIFO processing. By reversing the disconnected list which is done in
linear time the desired FIFO processing is achieved.

Signed-off-by: Shlomo Pongratz shlo...@mellanox.com
---
 drivers/block/null_blk.c |2 ++
 1 files changed, 2 insertions(+), 0 deletions(-)

diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c
index 3107282..cc801cd 100644
--- a/drivers/block/null_blk.c
+++ b/drivers/block/null_blk.c
@@ -195,6 +195,7 @@ static enum hrtimer_restart null_cmd_timer_expired(struct 
hrtimer *timer)
cq = per_cpu(completion_queues, smp_processor_id());
 
while ((entry = llist_del_all(cq-list)) != NULL) {
+   entry = llist_reverse_order(entry);
do {
cmd = container_of(entry, struct nullb_cmd, ll_list);
end_cmd(cmd);
@@ -235,6 +236,7 @@ static void null_ipi_cmd_end_io(void *data)
cq = per_cpu(completion_queues, smp_processor_id());
 
entry = llist_del_all(cq-list);
+   entry = llist_reverse_order(entry);
 
while (entry) {
next = entry-next;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] block/null_blk: Fix completion processing from LIFO to FIFO

2014-02-06 Thread Christoph Hellwig

On Thu, Feb 06, 2014 at 06:33:17PM +0200, Shlomo Pongratz wrote:
 The completion queue is implemented using lockless list.
 
 The llist_add is adds the events to the list head which is a push operation.
 The processing of the completion elements is done by disconnecting all the
 pushed elements and iterating over the disconnected list. The problem is
 that the processing is done in reverse order w.r.t order of the insertion
 i.e. LIFO processing. By reversing the disconnected list which is done in
 linear time the desired FIFO processing is achieved.

I think it should just switch to using __smp_call_function_single
directly, mirroring commit 3d6efbf62c797a2924785f482e4ce8aa8820ec72 for
the blk-mq core.

I've actually a patch in the queue that allows generic request
completion offloading similar to blk_complete_request() in the old
request code, but it'll need a bit more polishing first.

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 02/17] megaraid: simplify internal command handling

2014-02-06 Thread Christoph Hellwig

Hi Neela,

can you look over this from the megaraid perspective?

On Wed, Feb 05, 2014 at 04:39:32AM -0800, Christoph Hellwig wrote:
 We don't use the passed in scsi command for anything, so just add a
 adapter-wide internal status to go along with the internal scb that
 is used unter int_mtx to pass back the return value and get rid of
 all the complexities and abuse of the scsi_cmnd structure.
 
 Signed-off-by: Christoph Hellwig h...@lst.de
 ---
  drivers/scsi/megaraid.c |  120 
 ---
  drivers/scsi/megaraid.h |3 +-
  2 files changed, 32 insertions(+), 91 deletions(-)
 
 diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c
 index 816db12..8bca30f 100644
 --- a/drivers/scsi/megaraid.c
 +++ b/drivers/scsi/megaraid.c
 @@ -531,13 +531,6 @@ mega_build_cmd(adapter_t *adapter, Scsi_Cmnd *cmd, int 
 *busy)
   int target = 0;
   int ldrv_num = 0;   /* logical drive number */
  
 -
 - /*
 -  * filter the internal and ioctl commands
 -  */
 - if((cmd-cmnd[0] == MEGA_INTERNAL_CMD))
 - return (scb_t *)cmd-host_scribble;
 -
   /*
* We know what channels our logical drives are on - mega_find_card()
*/
 @@ -1439,19 +1432,22 @@ mega_cmd_done(adapter_t *adapter, u8 completed[], int 
 nstatus, int status)
  
   cmdid = completed[i];
  
 - if( cmdid == CMDID_INT_CMDS ) { /* internal command */
 + /*
 +  * Only free SCBs for the commands coming down from the
 +  * mid-layer, not for which were issued internally
 +  *
 +  * For internal command, restore the status returned by the
 +  * firmware so that user can interpret it.
 +  */
 + if (cmdid == CMDID_INT_CMDS) {
   scb = adapter-int_scb;
 - cmd = scb-cmd;
 - mbox = (mbox_t *)scb-raw_mbox;
  
 - /*
 -  * Internal command interface do not fire the extended
 -  * passthru or 64-bit passthru
 -  */
 - pthru = scb-pthru;
 + list_del_init(scb-list);
 + scb-state = SCB_FREE;
  
 - }
 - else {
 + adapter-int_status = status;
 + complete(adapter-int_waitq);
 + } else {
   scb = adapter-scb_list[cmdid];
  
   /*
 @@ -1640,25 +1636,7 @@ mega_cmd_done(adapter_t *adapter, u8 completed[], int 
 nstatus, int status)
   cmd-result |= (DID_BAD_TARGET  16)|status;
   }
  
 - /*
 -  * Only free SCBs for the commands coming down from the
 -  * mid-layer, not for which were issued internally
 -  *
 -  * For internal command, restore the status returned by the
 -  * firmware so that user can interpret it.
 -  */
 - if( cmdid == CMDID_INT_CMDS ) { /* internal command */
 - cmd-result = status;
 -
 - /*
 -  * Remove the internal command from the pending list
 -  */
 - list_del_init(scb-list);
 - scb-state = SCB_FREE;
 - }
 - else {
 - mega_free_scb(adapter, scb);
 - }
 + mega_free_scb(adapter, scb);
  
   /* Add Scsi_Command to end of completed queue */
   list_add_tail(SCSI_LIST(cmd), adapter-completed_list);
 @@ -4133,23 +4111,15 @@ mega_internal_dev_inquiry(adapter_t *adapter, u8 ch, 
 u8 tgt,
   * The last argument is the address of the passthru structure if the command
   * to be fired is a passthru command
   *
 - * lockscope specifies whether the caller has already acquired the lock. Of
 - * course, the caller must know which lock we are talking about.
 - *
   * Note: parameter 'pthru' is null for non-passthru commands.
   */
  static int
  mega_internal_command(adapter_t *adapter, megacmd_t *mc, mega_passthru 
 *pthru)
  {
 - Scsi_Cmnd   *scmd;
 - struct  scsi_device *sdev;
 + unsigned long flags;
   scb_t   *scb;
   int rval;
  
 - scmd = scsi_allocate_command(GFP_KERNEL);
 - if (!scmd)
 - return -ENOMEM;
 -
   /*
* The internal commands share one command id and hence are
* serialized. This is so because we want to reserve maximum number of
 @@ -4160,73 +4130,45 @@ mega_internal_command(adapter_t *adapter, megacmd_t 
 *mc, mega_passthru *pthru)
   scb = adapter-int_scb;
   memset(scb, 0, sizeof(scb_t));
  
 - sdev = kzalloc(sizeof(struct scsi_device), GFP_KERNEL);
 - scmd-device = sdev;
 -
 - memset(adapter-int_cdb, 0, sizeof(adapter-int_cdb));
 - scmd-cmnd = adapter-int_cdb;
 - scmd-device-host =

Re: [PATCH 13/17] scsi: push host_lock down into scsi_{host,target}_queue_ready

2014-02-06 Thread James Bottomley


On Wed, 2014-02-05 at 04:39 -0800, Christoph Hellwig wrote:
 Prepare for not taking a host-wide lock in the dispatch path by pushing
 the lock down into the places that actually need it.  Note that this
 patch is just a preparation step, as it will actually increase lock
 roundtrips and thus decrease performance on its own.

I'm not really convinced by the rest of the series.  I think patches
4,8,9,10,11,12 (lock elimination from fast path and get/put reduction)
are where the improvements are at and they mostly look ready to apply
modulo some author and signoff fixes.

I'm dubious about replacing a locked set of checks and increments with
atomics for the simple reason that atomics are pretty expensive on
non-x86, so you've likely slowed the critical path down for them.  Even
on x86, atomics can be very expensive because of the global bus lock.  I
think about three of them in a row is where you might as well stick with
the lock.

Could you benchmark this lot and show what the actual improvement is
just for this series, if any?

I also think we should be getting more utility out of threading
guarantees.  So, if there's only one thread active per device we don't
need any device counters to be atomic.  Likewise, u32 read/write is an
atomic operation, so we might be able to use sloppy counters for the
target and host stuff (one per CPU that are incremented/decremented on
that CPU ... this will only work using CPU locality ... completion on
same CPU but that seems to be an element of a lot of stuff nowadays).

I'm not saying this will all pan out, but I am saying I don't think just
making all the counters atomic to reduce locking will buy us a huge
amount, so I'd appreciate careful benchmarking to confirm or invalidate
this hypothesis first.

James


--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 02/15] blk-mq: support at_head inserations for blk_execute_rq

2014-02-06 Thread Muthu Kumar

On Thu, Feb 6, 2014 at 8:17 AM, Christoph Hellwig h...@infradead.org wrote:
 On Wed, Feb 05, 2014 at 06:27:38PM -0800, Muthu Kumar wrote:
 Currently its not used by any drivers. Are we sure we don't need it
 public? If sure, please remove the EXPORT_SYMBOL() for it.

 Drivers shouldn't use it, it's a low-level interface.  As mentioned in
 the intro this is not quite a coherent series yet, but I'll post the
 blk-mq patches in a slightly nicer form agains Jens' tree soon.


Alright then.. I will wait for that patch. BTW, is the scsi-mq work
done on your git tree only or anyone else has their own git tree
(Bart? NAB?)?
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 02/15] blk-mq: support at_head inserations for blk_execute_rq

2014-02-06 Thread Christoph Hellwig

On Thu, Feb 06, 2014 at 09:05:10AM -0800, Muthu Kumar wrote:
 Alright then.. I will wait for that patch. BTW, is the scsi-mq work
 done on your git tree only or anyone else has their own git tree

For now I put out a git tree for those who don't like large patch
series.  Happy to take patches for it, but we'll have to see who
has the energy to keep the tree in the long run.

That being said I hope to feed patches into the upstream trees as
quickly as possible and thus only keep a small stack of patches around.
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 13/17] scsi: push host_lock down into scsi_{host,target}_queue_ready

2014-02-06 Thread Bart Van Assche

On 02/06/14 17:56, James Bottomley wrote:
 Could you benchmark this lot and show what the actual improvement is
 just for this series, if any?

I see a performance improvement of 12% with the SRP protocol for the
SCSI core optimizations alone (I am still busy measuring the impact of
the blk-mq conversion but I can already see that it is really
significant). Please note that the performance impact depends a lot on
the workload (number of LUNs per SCSI host e.g.) so maybe the workload I
chose is not doing justice to Christoph's work. And it's also important
to mention that with the workload I ran I was saturating the target
system CPU (a quad core Intel i5). In other words, results might be
better with a more powerful target system.

Bart.
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 13/17] scsi: push host_lock down into scsi_{host,target}_queue_ready

2014-02-06 Thread James Bottomley

On Thu, 2014-02-06 at 18:10 +0100, Bart Van Assche wrote:
 On 02/06/14 17:56, James Bottomley wrote:
  Could you benchmark this lot and show what the actual improvement is
  just for this series, if any?
 
 I see a performance improvement of 12% with the SRP protocol for the
 SCSI core optimizations alone (I am still busy measuring the impact of
 the blk-mq conversion but I can already see that it is really
 significant). Please note that the performance impact depends a lot on
 the workload (number of LUNs per SCSI host e.g.) so maybe the workload I
 chose is not doing justice to Christoph's work. And it's also important
 to mention that with the workload I ran I was saturating the target
 system CPU (a quad core Intel i5). In other words, results might be
 better with a more powerful target system.

On what?  Just the patches I indicated or the whole series?  My specific
concern is that swapping a critical section for atomics may not buy us
anything even on x86 and may slow down non-x86.  That's the bit I'd like
benchmarks to explore.

James


--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 5/6] scsi: remove a useless get/put_device pair in scsi_next_command

2014-02-06 Thread Christoph Hellwig

From: Bart Van Assche bvanass...@acm.org

Eliminate a get_device() / put_device() pair from scsi_next_command().
Both are atomic operations hence removing these slightly improves
performance.

[hch: slight changes due to different context]

Signed-off-by: Bart Van Assche bvanass...@acm.org
Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi_lib.c |5 -
 1 file changed, 5 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7d35678..91ca414 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -526,14 +526,9 @@ void scsi_next_command(struct scsi_cmnd *cmd)
struct scsi_device *sdev = cmd-device;
struct request_queue *q = sdev-request_queue;
 
-   /* need to hold a reference on the device before we let go of the cmd */
-   get_device(sdev-sdev_gendev);
-
scsi_put_command(cmd);
-   put_device(sdev-sdev_gendev);
scsi_run_queue(q);
 
-   /* ok to remove device now */
put_device(sdev-sdev_gendev);
 }
 
-- 
1.7.10.4


--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/6] first batch of SCSI data path micro-optimizations

2014-02-06 Thread Christoph Hellwig

This series contains a few optimizations for the SCSI data I/O path.
These are the patches acceptable to James as-is from the previous series.
No separate benchmarking has been performed for these patches.

This work was sponsored by the ION division of Fusion IO.
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/6] scsi: do not manipulate device reference counts in scsi_get/put_command

2014-02-06 Thread Christoph Hellwig

Many callers won't need this and we can optimize them away.  In addition
the handling in the __-prefixed variants was inconsistant to start with.

Based on an earlier patch from Bart Van Assche.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi.c |   36 
 drivers/scsi/scsi_error.c   |6 ++
 drivers/scsi/scsi_lib.c |   12 +++-
 drivers/scsi/scsi_tgt_lib.c |3 ++-
 include/scsi/scsi_cmnd.h|3 +--
 5 files changed, 32 insertions(+), 28 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index fb86479..843b4f1 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -284,27 +284,19 @@ EXPORT_SYMBOL_GPL(__scsi_get_command);
  */
 struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask)
 {
-   struct scsi_cmnd *cmd;
+   struct scsi_cmnd *cmd = __scsi_get_command(dev-host, gfp_mask);
+   unsigned long flags;
 
-   /* Bail if we can't get a reference to the device */
-   if (!get_device(dev-sdev_gendev))
+   if (unlikely(cmd == NULL))
return NULL;
 
-   cmd = __scsi_get_command(dev-host, gfp_mask);
-
-   if (likely(cmd != NULL)) {
-   unsigned long flags;
-
-   cmd-device = dev;
-   INIT_LIST_HEAD(cmd-list);
-   INIT_DELAYED_WORK(cmd-abort_work, scmd_eh_abort_handler);
-   spin_lock_irqsave(dev-list_lock, flags);
-   list_add_tail(cmd-list, dev-cmd_list);
-   spin_unlock_irqrestore(dev-list_lock, flags);
-   cmd-jiffies_at_alloc = jiffies;
-   } else
-   put_device(dev-sdev_gendev);
-
+   cmd-device = dev;
+   INIT_LIST_HEAD(cmd-list);
+   INIT_DELAYED_WORK(cmd-abort_work, scmd_eh_abort_handler);
+   spin_lock_irqsave(dev-list_lock, flags);
+   list_add_tail(cmd-list, dev-cmd_list);
+   spin_unlock_irqrestore(dev-list_lock, flags);
+   cmd-jiffies_at_alloc = jiffies;
return cmd;
 }
 EXPORT_SYMBOL(scsi_get_command);
@@ -315,8 +307,7 @@ EXPORT_SYMBOL(scsi_get_command);
  * @cmd: Command to free
  * @dev: parent scsi device
  */
-void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd,
-   struct device *dev)
+void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd)
 {
unsigned long flags;
 
@@ -331,8 +322,6 @@ void __scsi_put_command(struct Scsi_Host *shost, struct 
scsi_cmnd *cmd,
 
if (likely(cmd != NULL))
scsi_pool_free_command(shost-cmd_pool, cmd);
-
-   put_device(dev);
 }
 EXPORT_SYMBOL(__scsi_put_command);
 
@@ -346,7 +335,6 @@ EXPORT_SYMBOL(__scsi_put_command);
  */
 void scsi_put_command(struct scsi_cmnd *cmd)
 {
-   struct scsi_device *sdev = cmd-device;
unsigned long flags;
 
/* serious error if the command hasn't come from a device list */
@@ -357,7 +345,7 @@ void scsi_put_command(struct scsi_cmnd *cmd)
 
cancel_delayed_work(cmd-abort_work);
 
-   __scsi_put_command(cmd-device-host, cmd, sdev-sdev_gendev);
+   __scsi_put_command(cmd-device-host, cmd);
 }
 EXPORT_SYMBOL(scsi_put_command);
 
diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c
index 78b004d..771c16b 100644
--- a/drivers/scsi/scsi_error.c
+++ b/drivers/scsi/scsi_error.c
@@ -2288,6 +2288,11 @@ scsi_reset_provider(struct scsi_device *dev, int flag)
if (scsi_autopm_get_host(shost)  0)
return FAILED;
 
+   if (!get_device(dev-sdev_gendev)) {
+   rtn = FAILED;
+   goto out_put_autopm_host;
+   }
+
scmd = scsi_get_command(dev, GFP_KERNEL);
blk_rq_init(NULL, req);
scmd-request = req;
@@ -2345,6 +2350,7 @@ scsi_reset_provider(struct scsi_device *dev, int flag)
scsi_run_host_queues(shost);
 
scsi_next_command(scmd);
+out_put_autopm_host:
scsi_autopm_put_host(shost);
return rtn;
 }
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index ad516c0..500178c 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -95,6 +95,7 @@ static void scsi_unprep_request(struct request *req)
req-special = NULL;
 
scsi_put_command(cmd);
+   put_device(cmd-device-sdev_gendev);
 }
 
 /**
@@ -529,6 +530,7 @@ void scsi_next_command(struct scsi_cmnd *cmd)
get_device(sdev-sdev_gendev);
 
scsi_put_command(cmd);
+   put_device(sdev-sdev_gendev);
scsi_run_queue(q);
 
/* ok to remove device now */
@@ -1116,6 +1118,7 @@ err_exit:
scsi_release_buffers(cmd);
cmd-request-special = NULL;
scsi_put_command(cmd);
+   put_device(cmd-device-sdev_gendev);
return error;
 }
 EXPORT_SYMBOL(scsi_init_io);
@@ -1126,9 +1129,15 @@ static struct scsi_cmnd *scsi_get_cmd_from_req(struct 
scsi_device *sdev,
struct scsi_cmnd *cmd;
 
if (!req-special) {
+   /* Bail if we can't get a

[PATCH 1/6] scsi: avoid useless free_list lock roundtrips

2014-02-06 Thread Christoph Hellwig

Avoid hitting the host-wide free_list lock unless we need to put a command
back onto the freelist.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi.c |   11 ++-
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
index d8afec8..fb86479 100644
--- a/drivers/scsi/scsi.c
+++ b/drivers/scsi/scsi.c
@@ -320,13 +320,14 @@ void __scsi_put_command(struct Scsi_Host *shost, struct 
scsi_cmnd *cmd,
 {
unsigned long flags;
 
-   /* changing locks here, don't need to restore the irq state */
-   spin_lock_irqsave(shost-free_list_lock, flags);
if (unlikely(list_empty(shost-free_list))) {
-   list_add(cmd-list, shost-free_list);
-   cmd = NULL;
+   spin_lock_irqsave(shost-free_list_lock, flags);
+   if (list_empty(shost-free_list)) {
+   list_add(cmd-list, shost-free_list);
+   cmd = NULL;
+   }
+   spin_unlock_irqrestore(shost-free_list_lock, flags);
}
-   spin_unlock_irqrestore(shost-free_list_lock, flags);
 
if (likely(cmd != NULL))
scsi_pool_free_command(shost-cmd_pool, cmd);
-- 
1.7.10.4


--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 4/6] scsi: remove a useless get/put_device pair in scsi_request_fn

2014-02-06 Thread Christoph Hellwig

From: Bart Van Assche bvanass...@acm.org

SCSI devices may only be removed by calling scsi_remove_device().
That function must invoke blk_cleanup_queue() before the final put
of sdev-sdev_gendev. Since blk_cleanup_queue() waits for the
block queue to drain and then tears it down, scsi_request_fn cannot
be active anymore after blk_cleanup_queue() has returned and hence
the get_device()/put_device() pair in scsi_request_fn is unnecessary.

Signed-off-by: Bart Van Assche bvanass...@acm.org
Acked-by: Tejun Heo t...@kernel.org
Reviewed-by: Mike Christie micha...@cs.wisc.edu
---
 drivers/scsi/scsi_lib.c |   14 +++---
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 500178c..7d35678 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1558,16 +1558,14 @@ static void scsi_softirq_done(struct request *rq)
  * Lock status: IO request lock assumed to be held when called.
  */
 static void scsi_request_fn(struct request_queue *q)
+   __releases(q-queue_lock)
+   __acquires(q-queue_lock)
 {
struct scsi_device *sdev = q-queuedata;
struct Scsi_Host *shost;
struct scsi_cmnd *cmd;
struct request *req;
 
-   if(!get_device(sdev-sdev_gendev))
-   /* We must be tearing the block queue down already */
-   return;
-
/*
 * To start with, we keep looping until the queue is empty, or until
 * the host is no longer able to accept any more requests.
@@ -1656,7 +1654,7 @@ static void scsi_request_fn(struct request_queue *q)
goto out_delay;
}
 
-   goto out;
+   return;
 
  not_ready:
spin_unlock_irq(shost-host_lock);
@@ -1675,12 +1673,6 @@ static void scsi_request_fn(struct request_queue *q)
 out_delay:
if (sdev-device_busy == 0)
blk_delay_queue(q, SCSI_QUEUE_DELAY);
-out:
-   /* must be careful here...if we trigger the -remove() function
-* we cannot be holding the q lock */
-   spin_unlock_irq(q-queue_lock);
-   put_device(sdev-sdev_gendev);
-   spin_lock_irq(q-queue_lock);
 }
 
 u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost)
-- 
1.7.10.4


--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 6/6] scsi: remove a useless get/put_device pair in scsi_requeue_command

2014-02-06 Thread Christoph Hellwig

Avoid a spurious device get/put cycle by using scsi_put_command and folding
scsi_unprep_request into scsi_requeue_command.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi_lib.c |   35 +++
 1 file changed, 3 insertions(+), 32 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 91ca414..9350691 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -75,29 +75,6 @@ struct kmem_cache *scsi_sdb_cache;
  */
 #define SCSI_QUEUE_DELAY   3
 
-/*
- * Function:   scsi_unprep_request()
- *
- * Purpose:Remove all preparation done for a request, including its
- * associated scsi_cmnd, so that it can be requeued.
- *
- * Arguments:  req - request to unprepare
- *
- * Lock status:Assumed that no locks are held upon entry.
- *
- * Returns:Nothing.
- */
-static void scsi_unprep_request(struct request *req)
-{
-   struct scsi_cmnd *cmd = req-special;
-
-   blk_unprep_request(req);
-   req-special = NULL;
-
-   scsi_put_command(cmd);
-   put_device(cmd-device-sdev_gendev);
-}
-
 /**
  * __scsi_queue_insert - private queue insertion
  * @cmd: The SCSI command being requeued
@@ -503,16 +480,10 @@ static void scsi_requeue_command(struct request_queue *q, 
struct scsi_cmnd *cmd)
struct request *req = cmd-request;
unsigned long flags;
 
-   /*
-* We need to hold a reference on the device to avoid the queue being
-* killed after the unlock and before scsi_run_queue is invoked which
-* may happen because scsi_unprep_request() puts the command which
-* releases its reference on the device.
-*/
-   get_device(sdev-sdev_gendev);
-
spin_lock_irqsave(q-queue_lock, flags);
-   scsi_unprep_request(req);
+   blk_unprep_request(req);
+   req-special = NULL;
+   scsi_put_command(cmd);
blk_requeue_request(q, req);
spin_unlock_irqrestore(q-queue_lock, flags);
 
-- 
1.7.10.4


--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/6] scsi: avoid taking host_lock in scsi_run_queue unless nessecary

2014-02-06 Thread Christoph Hellwig

If we don't have starved devices we don't need to take the host lock
to iterate over them.  Also split the function up to be more clear.

Signed-off-by: Christoph Hellwig h...@lst.de
---
 drivers/scsi/scsi_lib.c |   43 ---
 1 file changed, 24 insertions(+), 19 deletions(-)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7bd7f0d..ad516c0 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -385,29 +385,12 @@ static inline int scsi_host_is_busy(struct Scsi_Host 
*shost)
return 0;
 }
 
-/*
- * Function:   scsi_run_queue()
- *
- * Purpose:Select a proper request queue to serve next
- *
- * Arguments:  q   - last request's queue
- *
- * Returns: Nothing
- *
- * Notes:  The previous command was completely finished, start
- * a new one if possible.
- */
-static void scsi_run_queue(struct request_queue *q)
+static void scsi_starved_list_run(struct Scsi_Host *shost)
 {
-   struct scsi_device *sdev = q-queuedata;
-   struct Scsi_Host *shost;
LIST_HEAD(starved_list);
+   struct scsi_device *sdev;
unsigned long flags;
 
-   shost = sdev-host;
-   if (scsi_target(sdev)-single_lun)
-   scsi_single_lun_run(sdev);
-
spin_lock_irqsave(shost-host_lock, flags);
list_splice_init(shost-starved_list, starved_list);
 
@@ -459,6 +442,28 @@ static void scsi_run_queue(struct request_queue *q)
/* put any unprocessed entries back */
list_splice(starved_list, shost-starved_list);
spin_unlock_irqrestore(shost-host_lock, flags);
+}
+
+/*
+ * Function:   scsi_run_queue()
+ *
+ * Purpose:Select a proper request queue to serve next
+ *
+ * Arguments:  q   - last request's queue
+ *
+ * Returns: Nothing
+ *
+ * Notes:  The previous command was completely finished, start
+ * a new one if possible.
+ */
+static void scsi_run_queue(struct request_queue *q)
+{
+   struct scsi_device *sdev = q-queuedata;
+
+   if (scsi_target(sdev)-single_lun)
+   scsi_single_lun_run(sdev);
+   if (!list_empty(sdev-host-starved_list))
+   scsi_starved_list_run(sdev-host);
 
blk_run_queue(q);
 }
-- 
1.7.10.4


--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH][RFC] Add EVPD page 0x83 entries to sysfs

2014-02-06 Thread Douglas Gilbert


On 14-02-06 08:04 AM, Hannes Reinecke wrote:

EVPD page 0x83 is used to uniquely identify the device.
So instead of having each and every program issue a separate
SG_IO call to retrieve this information it does make far more
sense to display it in sysfs.

Cc: Jeremy Linton jlin...@tributary.com
Cc: Doug Gilbert dgilb...@interlog.com


See below.


Cc: Kai Makisara kai.makis...@kolumbus.fi
Signed-off-by: Hannes Reinecke h...@suse.de
---
  drivers/scsi/scsi_scan.c   |   3 +
  drivers/scsi/scsi_sysfs.c  | 166 -
  include/scsi/scsi_device.h |   3 +
  3 files changed, 171 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 307a811..073fb84 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -929,6 +929,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned 
char *inq_result,
if (*bflags  BLIST_SKIP_VPD_PAGES)
sdev-skip_vpd_pages = 1;

+   if (sdev-scsi_level = SCSI_3)
+   scsi_attach_vpd_ident(sdev);
+
transport_configure_device(sdev-sdev_gendev);

if (sdev-host-hostt-slave_configure) {
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 9117d0b..ffe7cb5 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -725,8 +725,170 @@ show_queue_type_field(struct device *dev, struct 
device_attribute *attr,

  static DEVICE_ATTR(queue_type, S_IRUGO, show_queue_type_field, NULL);

+void scsi_attach_vpd_ident(struct scsi_device *sdev)
+{
+   int ret;
+   int vpd_len = 255;
+   unsigned char *buffer;
+retry:
+   buffer = kmalloc(vpd_len, GFP_KERNEL);
+   if (buffer) {
+   ret = scsi_get_vpd_page(sdev, 0x83, buffer, vpd_len);
+   if (ret == 0) {
+   vpd_len = (buffer[2]  8) + buffer[3];
+   if (vpd_len  255) {
+   kfree(buffer);
+   goto retry;
+   }
+   sdev-vpd_ident_len = vpd_len;
+   sdev-vpd_ident = buffer;
+   }
+   }
+}
+
+#define SCSI_VPD_ASSOC_lun 0x0
+#define SCSI_VPD_ASSOC_port 0x10
+#define SCSI_VPD_ASSOC_target 0x20
+
+#define SCSI_VPD_DESIG_vendor 0x0
+#define SCSI_VPD_DESIG_t10 0x1
+#define SCSI_VPD_DESIG_eui 0x2
+#define SCSI_VPD_DESIG_naa 0x3
+#define SCSI_VPD_DESIG_relport 0x4
+#define SCSI_VPD_DESIG_tpgrp 0x5
+#define SCSI_VPD_DESIG_lugrp 0x6
+#define SCSI_VPD_DESIG_md5 0x7
+#define SCSI_VPD_DESIG_scsi_name 0x8
+
+int scsi_parse_vpd_ident(struct scsi_device *sdev,
+int assoc, int desig, char *buf)
+{
+   unsigned char *d;
+   int len = 0;
+
+   if (!sdev-vpd_ident)
+   return -EINVAL;
+
+   d = sdev-vpd_ident + 4;
+   while (d  sdev-vpd_ident + sdev-vpd_ident_len) {
+   if ((d[1]  0x30) == assoc 


Wouldn't it be clearer if:
if (((d[1]  0x30)  4) == assoc 

and the lun, port and target defines were changed to agree
with T10?


+   (d[1]  0xf) == desig) {
+   switch (d[1]  0xf) {
+   case SCSI_VPD_DESIG_eui:
+   case SCSI_VPD_DESIG_naa:
+   case SCSI_VPD_DESIG_md5:
+   switch (d[3]) {
+   case 8:
+   len += sprintf(buf + len,
+ %8phN\n, d + 4);
+   break;
+   case 12:
+   len += sprintf(buf + len,
+ %12phN\n, d + 4);
+   break;
+   case 16:
+   len += sprintf(buf + len,
+ %16phN\n, d + 4);
+   break;
+   }
+   break;
+   case SCSI_VPD_DESIG_relport:
+   case SCSI_VPD_DESIG_tpgrp:
+   case SCSI_VPD_DESIG_lugrp:
+   len += sprintf(buf + len, %d\n,
+ (int)(d[6]  8) + d[7]);
+   break;
+   case SCSI_VPD_DESIG_vendor:
+   case SCSI_VPD_DESIG_t10:
+   case SCSI_VPD_DESIG_scsi_name:
+   len += snprintf(buf + len, d[3] + 2, %s\n,
+   d + 4);
+   break;
+   }
+   }
+   d += d[3] + 4;
+   }
+
+   return len ? len : -EINVAL;
+}
+
+#define sdev_evpd_ident(assoc,desig)   \
+static ssize_t

Re: [PATCH] st: Do not rewind for SG_IO

2014-02-06 Thread Douglas Gilbert


On 14-02-06 10:13 AM, Hannes Reinecke wrote:

On 02/06/2014 03:38 PM, James Bottomley wrote:

On Thu, 2014-02-06 at 08:50 -0500, Martin K. Petersen wrote:

Hannes == Hannes Reinecke h...@suse.de writes:



My patch provides both the original VPD 0x83 and 0x80 bits as well as
a handle identical to /sbin/scsi_id.


Hannes Bah, don't do that.  That should better be handled by udev
Hannes rules.  I've got a set of patches moving from scsi_id to sg_inq,
Hannes which can be easily adapted to using sysfs directly.

I just want to get out of the userland sending random SCSI commands
business. That is a world of pain right now.


Well, in the words of Bill Clinton I feel your pain.  However, I need
to know we won't pick up that same whole world of pain in the kernel.
Remember it's not just SCSI devices, it's also ATA devices and
potentially other block devices ... then there's blkid and all the weird
things it does. Then there's transport identifiers for multi-path
reservations and so on.


blkid is actually not so bad, _if_ it would distinguish between
'metadata not found' and 'I/O error during metadata read'.
I made a patch which hopefully should find it's way upstream.

And blkid just issues plain READ commands, so _this_ behaviour won't
change ...


Convince me this path won't have us shifting a whole bunch of weird from
user space to the kernel without any reduction in the weird factor.
Remember the point is not what can we do for all the nicely behaved
SCSI-3 devices, it's what do we do for everything.


Well, first and foremost the patch should be limited to SCSI-3 (and
later devices). So we'd be insulated from the most obvious crap.

So that leaves only devices which claim to be SCSI-3 or something,
but still keel over when asked for VPD pages.
However, this type of devices will fail already, as 'sd.c' is
already asking for all sorts of VPD pages.
Which leaves only non-Disk devices, but those tend to have a better
SCSI implementation as you can't make quick bucks with, say, as SCSI
Enclosure device.

But the prime motivator behind this patch is that we can be
reasonably sure the device will answer at all.
When retrieving the EVPD pages from userspace you always have the
problem that the device might have gone away or being unresponsive
by the time you get around sending the SG_IO call.
So you always have these stuck user-space processes asking for
information which _had_ been present at one point. In particular
udev is prone for this; anyone who ever came across the message

udevd[]: worker unexpectedly returned with status 0x0100

knows what I'm talking about ...


Running a small array with an external power supply which
is insufficient for the task of spinning up more than 1 or
2 disks is interesting to watch :-)

Doug Gilbert


And the more udev events for a device you get, the higher the
likelyhood for this to happen is.

Plus I could re-use this information for my scsi_dh_alua patchset,
and for xcopy and friends we'll be needing it, too.

Cheers,

Hannes



--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] block/null_blk: Fix completion processing from LIFO to FIFO

2014-02-06 Thread Jens Axboe

On Thu, Feb 06 2014, Christoph Hellwig wrote:
 On Thu, Feb 06, 2014 at 06:33:17PM +0200, Shlomo Pongratz wrote:
  The completion queue is implemented using lockless list.
  
  The llist_add is adds the events to the list head which is a push operation.
  The processing of the completion elements is done by disconnecting all the
  pushed elements and iterating over the disconnected list. The problem is
  that the processing is done in reverse order w.r.t order of the insertion
  i.e. LIFO processing. By reversing the disconnected list which is done in
  linear time the desired FIFO processing is achieved.
 
 I think it should just switch to using __smp_call_function_single
 directly, mirroring commit 3d6efbf62c797a2924785f482e4ce8aa8820ec72 for
 the blk-mq core.
 
 I've actually a patch in the queue that allows generic request
 completion offloading similar to blk_complete_request() in the old
 request code, but it'll need a bit more polishing first.

The patch is trivial enough that I may as well just apply it. It's not a
bug as such, but doing FIFO completions does make more sense. I was
aware of this issued, fwiw, it just didn't really matter for my testing
and I opted to avoid a list reversal to make it cheaper.

-- 
Jens Axboe

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] scsi/isci/port_config: Fix a infinite loop.

2014-02-06 Thread Dan Williams

On Tue, Jul 16, 2013 at 7:54 PM, Xinghai Yu yuxing...@cn.fujitsu.com wrote:
 It seems the phy_index++; have been placed in wrong place, without it
 the while circle up will do a infinite loop.

 Signed-off-by: Xinghai Yu yuxing...@cn.fujitsu.com
 ---
  drivers/scsi/isci/port_config.c |2 +-
  1 files changed, 1 insertions(+), 1 deletions(-)

 diff --git a/drivers/scsi/isci/port_config.c b/drivers/scsi/isci/port_config.c
 index cd962da..85c77f6 100644
 --- a/drivers/scsi/isci/port_config.c
 +++ b/drivers/scsi/isci/port_config.c
 @@ -311,9 +311,9 @@ sci_mpc_agent_validate_phy_configuration(struct isci_host 
 *ihost,
   ihost-phys[phy_index]);

 assigned_phy_mask |= (1  phy_index);
 +   phy_index++;
 }

 -   phy_index++;
 }

 return sci_port_configuration_agent_validate_ports(ihost, port_agent);

Hmm, I'm not aware of anyone using mpc mode.  Maybe we should just remove it?
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/4] isci: fix needless ata reset escalations

2014-02-06 Thread Dan Williams

isci is needlessly tying libata's hands by returning
SAM_STAT_CHECK_CONDITION to some ata errors.  Instead, prefer
SAS_PROTO_RESPONSE to let libata (via sas_ata_task_done()) disposition
the device-to-host fis.

For example isci is triggering an HSM Violation where AHCI is showing a
simple media error for the same bus condition:

isci:
ata7.00: failed command: READ VERIFY SECTOR(S)
ata7.00: cmd 40/00:01:00:00:00/00:00:00:00:00/e0 tag 0
 res 01/04:00:00:00:00/00:00:00:00:00/e0 Emask 0x3 (HSM violation)

ahci:
ata6.00: failed command: READ VERIFY SECTOR(S)
ata6.00: cmd 40/00:01:00:00:00/00:00:00:00:00/e0 tag 0
 res 51/40:01:00:00:00/00:00:00:00:00/e0 Emask 0x9 (media error)

Note that the isci response matches this from sas_ata_task_done():
/* We saw a SAS error. Send a vague error. */
[..]
dev-sata_dev.fis[3] = 0x04; /* status err */
dev-sata_dev.fis[2] = ATA_ERR;

The end effect is that isci is needlessly triggering hard resets when
they are not necessary.

Cc: sta...@vger.kernel.org
Reported-by: Xun Ni xun...@intel.com
Tested-by: Nelson Cheng nelson.ch...@intel.com
Acked-by: Lukasz Dorau lukasz.do...@intel.com
Signed-off-by: Dan Williams dan.j.willi...@intel.com
---
 drivers/scsi/isci/request.c |8 ++--
 1 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c
index 99d2930b18c8..56e38096f0c4 100644
--- a/drivers/scsi/isci/request.c
+++ b/drivers/scsi/isci/request.c
@@ -2723,13 +2723,9 @@ static void isci_process_stp_response(struct sas_task 
*task, struct dev_to_host_
memcpy(resp-ending_fis, fis, sizeof(*fis));
ts-buf_valid_size = sizeof(*resp);
 
-   /* If the device fault bit is set in the status register, then
-* set the sense data and return.
-*/
-   if (fis-status  ATA_DF)
+   /* If an error is flagged let libata decode the fis */
+   if (ac_err_mask(fis-status))
ts-stat = SAS_PROTO_RESPONSE;
-   else if (fis-status  ATA_ERR)
-   ts-stat = SAM_STAT_CHECK_CONDITION;
else
ts-stat = SAM_STAT_GOOD;
 

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/4] isci, libsas fixes for 3.4-rc2

2014-02-06 Thread Dan Williams

Hi James,

Here are some collected fixes.  All but patch 2 are tagged for -stable.

Patch 1 and 4 have been on the list since before the 3.14 merge window,
patch 2 and 3 are new.

Please apply, thank you.

[PATCH 1/4] isci: fix reset timeout handling
[PATCH 2/4] scsi, libsas: introduce scmd_dbg() to quiet false positive 
timeout messages
[PATCH 3/4] isci: fix needless ata reset escalations
[PATCH 4/4] isci: correct erroneous for_each_isci_host macro

 drivers/scsi/isci/host.h|5 ++---
 drivers/scsi/isci/port_config.c |7 ---
 drivers/scsi/isci/request.c |8 ++--
 drivers/scsi/isci/task.c|2 +-
 drivers/scsi/libsas/sas_scsi_host.c |2 +-
 include/scsi/scsi_device.h  |   12 
 6 files changed, 18 insertions(+), 18 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/4] scsi, libsas: introduce scmd_dbg() to quiet false positive timeout messages

2014-02-06 Thread Dan Williams

libsas sometimes short circuits timeouts to force commands into error
recovery.  It is misleading to log that the command timed-out in
sas_scsi_timed_out() when in fact it was just queued for error handling.
It's also redundant in the case of a true timeout as libata eh will
detect and report timeouts via it's AC_ERR_TIMEOUT facility.

Given that some environments consider timeout errors to be indicative
of impending device failure demote the sas_scsi_timed_out() timeout
message to be disabled by default.  This parallels ata_scsi_timed_out().

Cc: Jack Wang jack_w...@usish.com
Cc: Anand Kumar Santhanam anandkumar.santha...@pmcs.com
Cc: Sangeetha Gnanasekaran sangeetha.gnanaseka...@pmcs.com
Cc: Nikith Ganigarakoppal nikith.ganigarakop...@pmcs.com
Reported-by: Xun Ni xun...@intel.com
Tested-by: Nelson Cheng nelson.ch...@intel.com
Acked-by: Lukasz Dorau lukasz.do...@intel.com
Signed-off-by: Dan Williams dan.j.willi...@intel.com
---
 drivers/scsi/libsas/sas_scsi_host.c |2 +-
 include/scsi/scsi_device.h  |   12 
 2 files changed, 13 insertions(+), 1 deletions(-)

diff --git a/drivers/scsi/libsas/sas_scsi_host.c 
b/drivers/scsi/libsas/sas_scsi_host.c
index da3aee17faa5..25d0f127424d 100644
--- a/drivers/scsi/libsas/sas_scsi_host.c
+++ b/drivers/scsi/libsas/sas_scsi_host.c
@@ -862,7 +862,7 @@ out:
 
 enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd)
 {
-   scmd_printk(KERN_DEBUG, cmd, command %p timed out\n, cmd);
+   scmd_dbg(cmd, command %p timed out\n, cmd);
 
return BLK_EH_NOT_HANDLED;
 }
diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h
index d65fbec2533d..067ac9f1607c 100644
--- a/include/scsi/scsi_device.h
+++ b/include/scsi/scsi_device.h
@@ -235,12 +235,24 @@ struct scsi_dh_data {
 #define sdev_printk(prefix, sdev, fmt, a...)   \
dev_printk(prefix, (sdev)-sdev_gendev, fmt, ##a)
 
+#define sdev_dbg(sdev, fmt, a...) \
+   dev_dbg((sdev)-sdev_gendev, fmt, ##a)
+
 #define scmd_printk(prefix, scmd, fmt, a...)   \
 (scmd)-request-rq_disk ? \
sdev_printk(prefix, (scmd)-device, [%s]  fmt,\
(scmd)-request-rq_disk-disk_name, ##a) : \
sdev_printk(prefix, (scmd)-device, fmt, ##a)
 
+#define scmd_dbg(scmd, fmt, a...) \
+do {  \
+   if ((scmd)-request-rq_disk)  \
+   sdev_dbg((scmd)-device, [%s]  fmt,  \
+(scmd)-request-rq_disk-disk_name, ##a);\
+   else   \
+   sdev_dbg((scmd)-device, fmt, ##a);\
+   } while (0)
+
 enum scsi_target_state {
STARGET_CREATED = 1,
STARGET_RUNNING,

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 4/4] isci: correct erroneous for_each_isci_host macro

2014-02-06 Thread Dan Williams

From: Lukasz Dorau lukasz.do...@intel.com

In the first place, the loop 'for' in the macro 'for_each_isci_host'
(drivers/scsi/isci/host.h:314) is incorrect, because it accesses
the 3rd element of 2 element array. After the 2nd iteration it executes
the instruction:
ihost = to_pci_info(pdev)-hosts[2]
(while the size of the 'hosts' array equals 2) and reads an
out of range element.

In the second place, this loop is incorrectly optimized by GCC v4.8
(see http://marc.info/?l=linux-kernelm=138998871911336w=2).
As a result, on platforms with two SCU controllers,
the loop is executed more times than it can be (for i=0,1 and 2).
It causes kernel panic during entering the S3 state
and the following oops after 'rmmod isci':

BUG: unable to handle kernel NULL pointer dereference at (null)
IP: [8131360b] __list_add+0x1b/0xc0
Oops:  [#1] SMP
RIP: 0010:[8131360b]  [8131360b] __list_add+0x1b/0xc0
Call Trace:
  [81661b84] __mutex_lock_slowpath+0x114/0x1b0
  [81661c3f] mutex_lock+0x1f/0x30
  [a03e97cb] sas_disable_events+0x1b/0x50 [libsas]
  [a03e9818] sas_unregister_ha+0x18/0x60 [libsas]
  [a040316e] isci_unregister+0x1e/0x40 [isci]
  [a0403efd] isci_pci_remove+0x5d/0x100 [isci]
  [813391cb] pci_device_remove+0x3b/0xb0
  [813fbf7f] __device_release_driver+0x7f/0xf0
  [813fc8f8] driver_detach+0xa8/0xb0
  [813fbb8b] bus_remove_driver+0x9b/0x120
  [813fcf2c] driver_unregister+0x2c/0x50
  [813381f3] pci_unregister_driver+0x23/0x80
  [a04152f8] isci_exit+0x10/0x1e [isci]
  [810d199b] SyS_delete_module+0x16b/0x2d0
  [81012a21] ? do_notify_resume+0x61/0xa0
  [8166ce29] system_call_fastpath+0x16/0x1b

The loop has been corrected.
This patch fixes kernel panic during entering the S3 state
and the above oops.

Signed-off-by: Lukasz Dorau lukasz.do...@intel.com
Reviewed-by: Maciej Patelczyk maciej.patelc...@intel.com
Tested-by: Lukasz Dorau lukasz.do...@intel.com
Cc: Dave Jiang dave.ji...@intel.com
Cc: sta...@vger.kernel.org
Signed-off-by: Dan Williams dan.j.willi...@intel.com
---
 drivers/scsi/isci/host.h |5 ++---
 1 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/scsi/isci/host.h b/drivers/scsi/isci/host.h
index 4911310a38f5..22a9bb1abae1 100644
--- a/drivers/scsi/isci/host.h
+++ b/drivers/scsi/isci/host.h
@@ -311,9 +311,8 @@ static inline struct Scsi_Host *to_shost(struct isci_host 
*ihost)
 }
 
 #define for_each_isci_host(id, ihost, pdev) \
-   for (id = 0, ihost = to_pci_info(pdev)-hosts[id]; \
-id  ARRAY_SIZE(to_pci_info(pdev)-hosts)  ihost; \
-ihost = to_pci_info(pdev)-hosts[++id])
+   for (id = 0; id  SCI_MAX_CONTROLLERS  \
+(ihost = to_pci_info(pdev)-hosts[id]); id++)
 
 static inline void wait_for_start(struct isci_host *ihost)
 {

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/4] isci: fix reset timeout handling

2014-02-06 Thread Dan Williams

Remove an erroneous BUG_ON() in the case of a hard reset timeout.  The
reset timeout handler puts the port into the awaiting link-up state.
The timeout causes the device to be disconnected and we need to be in
the awaiting link-up state to re-connect the port.  The BUG_ON() made
the incorrect assumption that resets never timeout and we always
complete the reset in the resetting state.

Testing this patch also uncovered that libata continues to attempt to
reset the port long after the driver has torn down the context.  Once
the driver has committed to abandoning the link it must indicate to
libata that recovery ends by returning -ENODEV from
-lldd_I_T_nexus_reset().

Cc: sta...@vger.kernel.org
Cc: Maciej Patelczyk maciej.patelc...@intel.com
Cc: Dave Jiang dave.ji...@intel.com
Acked-by: Lukasz Dorau lukasz.do...@intel.com
Reported-by: David Milburn dmilb...@redhat.com
Reported-by: Xun Ni xun...@intel.com
Tested-by: Xun Ni xun...@intel.com
Signed-off-by: Dan Williams dan.j.willi...@intel.com
---
 drivers/scsi/isci/port_config.c |7 ---
 drivers/scsi/isci/task.c|2 +-
 2 files changed, 1 insertions(+), 8 deletions(-)

diff --git a/drivers/scsi/isci/port_config.c b/drivers/scsi/isci/port_config.c
index 85c77f6b802b..ac879745ef80 100644
--- a/drivers/scsi/isci/port_config.c
+++ b/drivers/scsi/isci/port_config.c
@@ -615,13 +615,6 @@ static void sci_apc_agent_link_up(struct isci_host *ihost,
  
SCIC_SDS_APC_WAIT_LINK_UP_NOTIFICATION);
} else {
/* the phy is already the part of the port */
-   u32 port_state = iport-sm.current_state_id;
-
-   /* if the PORT'S state is resetting then the link up is from
-* port hard reset in this case, we need to tell the port
-* that link up is recieved
-*/
-   BUG_ON(port_state != SCI_PORT_RESETTING);
port_agent-phy_ready_mask |= 1  phy_index;
sci_port_link_up(iport, iphy);
}
diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c
index 0d30ca849e8f..5d6fda72d659 100644
--- a/drivers/scsi/isci/task.c
+++ b/drivers/scsi/isci/task.c
@@ -801,7 +801,7 @@ int isci_task_I_T_nexus_reset(struct domain_device *dev)
/* XXX: need to cleanup any ireqs targeting this
 * domain_device
 */
-   ret = TMF_RESP_FUNC_COMPLETE;
+   ret = -ENODEV;
goto out;
}
 

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 13/17] scsi: push host_lock down into scsi_{host,target}_queue_ready

2014-02-06 Thread Nicholas A. Bellinger

On Thu, 2014-02-06 at 18:10 +0100, Bart Van Assche wrote:
 On 02/06/14 17:56, James Bottomley wrote:
  Could you benchmark this lot and show what the actual improvement is
  just for this series, if any?
 
 I see a performance improvement of 12% with the SRP protocol for the
 SCSI core optimizations alone (I am still busy measuring the impact of
 the blk-mq conversion but I can already see that it is really
 significant). Please note that the performance impact depends a lot on
 the workload (number of LUNs per SCSI host e.g.) so maybe the workload I
 chose is not doing justice to Christoph's work. And it's also important
 to mention that with the workload I ran I was saturating the target
 system CPU (a quad core Intel i5). In other words, results might be
 better with a more powerful target system.
 

Starting with a baseline using scsi_debug that NOPs REQ_TYPE_FS commands
to measure improvements would be a better baseline vs. scsi_request_fn()
existing code that what your doing above.

That way at least it's easier to measure specific scsi-core overhead
down to the LLD's queuecommand without everything else in the way.

--nab

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 12/15] scsi: initial blk-mq support

2014-02-06 Thread Nicholas A. Bellinger

On Wed, 2014-02-05 at 04:41 -0800, Christoph Hellwig wrote:
 plain text document attachment
 (0012-scsi-initial-blk-mq-support.patch)
 Add support for using the blk-mq code to submit requests to SCSI
 drivers.  There is very little blk-mq specific code, but that's
 partially because important functionality like partial completions
 and request requeueing is still missing in blk-mq.  I hope to keep
 most of the additions for these in the blk-mq core instead of the
 SCSI layer, though.
 
 Based on the earlier scsi-mq prototype by Nicholas Bellinger, although
 not a whole lot of actual code is left.
 
 Not-quite-signed-off-yet-by: Christoph Hellwig h...@lst.de
 ---
  drivers/scsi/scsi.c  |   36 ++-
  drivers/scsi/scsi_lib.c  |  244 
 --
  drivers/scsi/scsi_priv.h |2 +
  drivers/scsi/scsi_scan.c |5 +-
  include/scsi/scsi_host.h |3 +
  5 files changed, 278 insertions(+), 12 deletions(-)
 
 diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
 index adb8bfb..cf5c110 100644
 --- a/drivers/scsi/scsi.c
 +++ b/drivers/scsi/scsi.c
 @@ -44,6 +44,7 @@
  #include linux/string.h
  #include linux/slab.h
  #include linux/blkdev.h
 +#include linux/blk-mq.h
  #include linux/delay.h
  #include linux/init.h
  #include linux/completion.h
 @@ -688,6 +689,33 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
   return 0;
  }
  
 +static void scsi_softirq_done_remote(void *data)
 +{
 + return scsi_softirq_done(data);
 +}
 +
 +static void scsi_mq_done(struct request *req)
 +{
 + int cpu;
 +
 +#if 0
 + if (!ctx-ipi_redirect)
 + return scsi_softirq_done(cmd);
 +#endif
 +
 + cpu = get_cpu();
 + if (cpu != req-cpu  cpu_online(req-cpu)) {
 + req-csd.func = scsi_softirq_done_remote;
 + req-csd.info = req;
 + req-csd.flags = 0;
 + __smp_call_function_single(req-cpu, req-csd, 0);
 + } else {
 + scsi_softirq_done(req);
 + }
 +
 + put_cpu();
 +}
 +
  /**
   * scsi_done - Invoke completion on finished SCSI command.
   * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives
 @@ -701,8 +729,14 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd)
   */
  static void scsi_done(struct scsi_cmnd *cmd)
  {
 + struct request *req = cmd-request;
 +
   trace_scsi_dispatch_cmd_done(cmd);
 - blk_complete_request(cmd-request);
 +
 + if (req-mq_ctx)
 + scsi_mq_done(req);
 + else
 + blk_complete_request(req);
  }
  

Is there extra scsi_mq_done() part that does IPI here even necessary
anymore..?

I was under the assumption that blk_mq_end_io() is already taking care
of this..?

  /**
 diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
 index e67950c..8dd8893 100644
 --- a/drivers/scsi/scsi_lib.c
 +++ b/drivers/scsi/scsi_lib.c
 @@ -20,6 +20,7 @@
  #include linux/delay.h
  #include linux/hardirq.h
  #include linux/scatterlist.h
 +#include linux/blk-mq.h
  
  #include scsi/scsi.h
  #include scsi/scsi_cmnd.h
 @@ -554,6 +555,15 @@ static bool scsi_end_request(struct scsi_cmnd *cmd, int 
 error, int bytes,
   struct request *req = cmd-request;
  
   /*
 +  * XXX: need to handle partial completions and retries here.
 +  */
 + if (req-mq_ctx) {
 + blk_mq_end_io(req, error);
 + put_device(cmd-device-sdev_gendev);
 + return true;
 + }
 +
 + /*
* If there are blocks left over at the end, set up the command
* to queue the remainder of them.
*/
 @@ -1014,12 +1024,15 @@ static int scsi_init_sgtable(struct request *req, 
 struct scsi_data_buffer *sdb,
  {
   int count;
  
 - /*
 -  * If sg table allocation fails, requeue request later.
 -  */
 - if (unlikely(scsi_alloc_sgtable(sdb, req-nr_phys_segments,
 - gfp_mask))) {
 - return BLKPREP_DEFER;
 + BUG_ON(req-nr_phys_segments  SCSI_MAX_SG_SEGMENTS);
 +
 + if (!req-mq_ctx) {
 + /*
 +  * If sg table allocation fails, requeue request later.
 +  */
 + if (unlikely(scsi_alloc_sgtable(sdb, req-nr_phys_segments,
 + gfp_mask)))
 + return BLKPREP_DEFER;
   }
  
   req-buffer = NULL;
 @@ -1075,9 +1088,11 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask)
   BUG_ON(prot_sdb == NULL);
   ivecs = blk_rq_count_integrity_sg(rq-q, rq-bio);
  
 - if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) {
 - error = BLKPREP_DEFER;
 - goto err_exit;
 + if (!rq-mq_ctx) {
 + if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) {
 + error = BLKPREP_DEFER;
 + goto err_exit;
 + }
   }
  
   count = blk_rq_map_integrity_sg(rq-q,

Re: [PATCH 06/32] target: Convert lu_gp_ref_cnt to kref

2014-02-06 Thread Nicholas A. Bellinger

On Wed, 2014-02-05 at 14:02 -0800, Andy Grover wrote:
 Hi nab, I'm getting back to looking at this patchset, but wanted to just
 discuss and understand this one first because all the kref ones are
 similar. see below.
 
 On 12/16/2013 12:52 PM, Nicholas A. Bellinger wrote:
  On Fri, 2013-12-13 at 15:58 -0800, Andy Grover wrote:
  Use kref to handle reference counting
 
  Signed-off-by: Andy Grover agro...@redhat.com
  ---
   drivers/target/target_core_alua.c |   37 
  -
   include/target/target_core_base.h |2 +-
   2 files changed, 21 insertions(+), 18 deletions(-)
 
  diff --git a/drivers/target/target_core_alua.c 
  b/drivers/target/target_core_alua.c
  index 2ac2f11..8c01ade 100644
  --- a/drivers/target/target_core_alua.c
  +++ b/drivers/target/target_core_alua.c
  @@ -54,6 +54,16 @@ static LIST_HEAD(lu_gps_list);
   
   struct t10_alua_lu_gp *default_lu_gp;
   
  +static void release_alua_lu_gp(struct kref *ref)
  +{
  +  struct t10_alua_lu_gp *lu_gp = container_of(ref, struct t10_alua_lu_gp, 
  refcount);
  +
  +  kmem_cache_free(t10_alua_lu_gp_cache, lu_gp);
  +}
  +
  +#define get_alua_lu_gp(x) kref_get(x-refcount)
  +#define put_alua_lu_gp(x) kref_put(x-refcount, release_alua_lu_gp)
  +
   /*
* REPORT_TARGET_PORT_GROUPS
*
  @@ -898,8 +908,7 @@ int core_alua_do_port_transition(
 local_lu_gp_mem = l_dev-dev_alua_lu_gp_mem;
 spin_lock(local_lu_gp_mem-lu_gp_mem_lock);
 lu_gp = local_lu_gp_mem-lu_gp;
  -  atomic_inc(lu_gp-lu_gp_ref_cnt);
  -  smp_mb__after_atomic_inc();
  +  get_alua_lu_gp(lu_gp);
 spin_unlock(local_lu_gp_mem-lu_gp_mem_lock);
 /*
  * For storage objects that are members of the 'default_lu_gp',
  @@ -913,8 +922,8 @@ int core_alua_do_port_transition(
  */
 core_alua_do_transition_tg_pt(l_tg_pt_gp, l_port, l_nacl,
 md_buf, new_state, explicit);
  -  atomic_dec(lu_gp-lu_gp_ref_cnt);
  -  smp_mb__after_atomic_dec();
  +
  +  put_alua_lu_gp(lu_gp);
 kfree(md_buf);
 return 0;
 }
  @@ -985,8 +994,7 @@ int core_alua_do_port_transition(
 l_tg_pt_gp-tg_pt_gp_id, (explicit) ? explicit : implicit,
 core_alua_dump_state(new_state));
   
  -  atomic_dec(lu_gp-lu_gp_ref_cnt);
  -  smp_mb__after_atomic_dec();
  +  put_alua_lu_gp(lu_gp);
 kfree(md_buf);
 return 0;
   }
  @@ -1107,7 +1115,8 @@ core_alua_allocate_lu_gp(const char *name, int 
  def_group)
 INIT_LIST_HEAD(lu_gp-lu_gp_node);
 INIT_LIST_HEAD(lu_gp-lu_gp_mem_list);
 spin_lock_init(lu_gp-lu_gp_lock);
  -  atomic_set(lu_gp-lu_gp_ref_cnt, 0);
  +
  +  kref_init(lu_gp-refcount);
   
 if (def_group) {
 lu_gp-lu_gp_id = alua_lu_gps_counter++;
  @@ -1200,13 +1209,7 @@ void core_alua_free_lu_gp(struct t10_alua_lu_gp 
  *lu_gp)
 list_del(lu_gp-lu_gp_node);
 alua_lu_gps_count--;
 spin_unlock(lu_gps_lock);
  -  /*
  -   * Allow struct t10_alua_lu_gp * referenced by 
  core_alua_get_lu_gp_by_name()
  -   * in target_core_configfs.c:target_core_store_alua_lu_gp() to be
  -   * released with core_alua_put_lu_gp_from_name()
  -   */
  -  while (atomic_read(lu_gp-lu_gp_ref_cnt))
  -  cpu_relax();
  +
 /*
  * Release reference to struct t10_alua_lu_gp * from all associated
  * struct se_device.
  @@ -1241,7 +1244,7 @@ void core_alua_free_lu_gp(struct t10_alua_lu_gp 
  *lu_gp)
 }
 spin_unlock(lu_gp-lu_gp_lock);
   
  -  kmem_cache_free(t10_alua_lu_gp_cache, lu_gp);
  +  put_alua_lu_gp(lu_gp);
   }
   
 
  The assumption that it's safe to 'Release reference to struct
  t10_alua_lu_gp * from all associated struct device' below the original
  cpu_relax(), while there are still other process contexts doing their
  respective put_alua_lu_gp() is totally wrong.
 
 The only other spot is core_alua_do_port_transition, afaics. I think if
 it races with free_lu_gp, lu_gp will either be the old lu_gp (which no
 longer will have anything on lu_gp_mem_list) or will be default_lu_gp.
 If it's the old lu_gp then it iterates over an empty list, and then the
 lu_gp gets finally freed by the put() at the bottom.
 
  Furthermore, allowing a configfs_group_ops-drop_item() to return while
  there are still active references from other process contexts means that
  the parent struct config_group is no longer referenced counted (eg:
  configfs child is removed), and introduces a whole host of potential
  bugs.
  
  So that said, NAK on this patch.
 
 I think some of the other patches used drop_item() and thus were bad,
 but in this one the existing code is already calling
 core_alua_free_lu_gp() from release().
 
 Thoughts?
 

The problem with this patch and all of the other patches that follow the
same logic is the false assumption that it's safe to return from
configfs_group_operations-drop_item() before all references to the
underlying data structure containing the associated struct config_group
have

LSI SAS2008 SATA TRIM not working

2014-02-06 Thread Kurt Miller

Various sources indicate that LSI's SAS2008 controllers support TRIM
when running their IT firmware (LSI [1] and this list [2]). However, I
have not been able to get it working with Dell PERC H200 or H310 cross
flashed into LSI IT firmware. Currently I'm testing with Samsung 840 EVO
SATA SSDs. I have tried various LSI IT firmware versions (P14, P16, P18)
and various Linux distributions (Ubuntu 13.10, Ubuntu 12.04, Ubuntu 14
beta, RHEL 7 beta, Fedora 19).

The SSDs report they support TRIM:

# hdparm -I /dev/sdc | grep TRIM supported
*   Data Set Management TRIM supported (limit 8 blocks)

Checking for support shows TRIM support is not making it through:

# cat /sys/block/sdc/queue/discard_granularity 
0

# fstrim /srv/node/disk2p1
fstrim: /srv/node/disk2p1: FITRIM ioctl failed: Operation not supported

I am not using LVM or crypto. I've tried both ext4 and xfs. Here is an
example fstab line:

LABEL=disk2p1 /srv/node/disk2p1 xfs
noatime,nodiratime,nobarrier,logbufs=8,discard 0 0

The dmesg lines for mpt2sas look as follows:

mpt2sas version 15.100.00.00 loaded
mpt2sas0: 64 BIT PCI BUS DMA ADDRESSING SUPPORTED, total mem (16418600
kB)
mpt2sas :04:00.0: irq 79 for MSI/MSI-X
mpt2sas0-msix0: PCI-MSI-X enabled: IRQ 79
mpt2sas0: iomem(0xdf2b), mapped(0xc90011be),
size(65536)
mpt2sas0: ioport(0xfc00), size(256)
mpt2sas0: sending message unit reset !!
mpt2sas0: message unit reset: SUCCESS
mpt2sas0: Allocated physical memory: size(8056 kB)
mpt2sas0: Current Controller Queue Depth(3579), Max Controller Queue
Depth(3712)
mpt2sas0: Scatter Gather Elements per IO(128)
mpt2sas0: LSISAS2008: FWVersion(14.00.01.00), ChipRevision(0x03),
BiosVersion(07.27.00.00)
mpt2sas0: Dell 6Gbps SAS HBA: Vendor(0x1000), Device(0x0072),
SSVID(0x1028), SSDID(0x1F1C)
mpt2sas0: Protocol=(Initiator,Target), Capabilities=(TLR,EEDP,Snapshot
Buffer,Diag Trace Buffer,Task Set Full,NCQ)
mpt2sas0: sending port enable !!
mpt2sas0: host_add: handle(0x0001), sas_addr(0x590b11c027281600),
phys(8)
mpt2sas0: port enable: SUCCESS

Is TRIM working for anyone using LSI SAS2008 controllers?

I'm out of ideas as to what could be the cause of the failure. Is it
just that mpt2sas doesn't support TRIM contrary to the links below?

Thanks,
-Kurt

[1]
http://mycusthelp.info/LSI/_cs/AnswerDetail.aspx?sSessionID=6784104245IHLKQYLMXJTXVUMPQBJOGGFDORLPZBinc=8039caller=~%2fFindAnswers.aspx%3ftxtCriteria%3dtrim%26sSessionid%3d6784104245IHLKQYLMXJTXVUMPQBJOGGFDORLPZB

[2] http://comments.gmane.org/gmane.linux.scsi/69511

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

FW: [LSF/MM TOPIC] New Storage capabilities

2014-02-06 Thread Knight, Frederick

Several new features are becoming a reality in SCSI and ATA this year, and I 
would like to participate in the discussions on supporting these new features.

  a) SCSI conglomerate LUNs (using more bits in the LUN to manage groupings 
of logical units);
  b) Atomic commands;
  c) IO and LBA HINTS (for both SCSI and ATA/IDE);
a. For storage tiering;
b. For cache management;
  d) FC - 128Gig parallel and breakout mode

I attend T10 (SCSI), T11 (FC), T13 (ATA/IDE), IETF (iSCSI), and SNIA and can 
provide expertise in the areas listed above as well as the topics covered in 
those standards committees.

Fred Knight
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH v2] scsi: Add 'retry_timeout' to avoid infinite command retry

2014-02-06 Thread Eiichi Tsukata

Currently, scsi error handling in scsi_io_completion() tries to
unconditionally requeue scsi command when device keeps some error state.
For example, UNIT_ATTENTION causes infinite retry with
action == ACTION_RETRY.
This is because retryable errors are thought to be temporary and the scsi
device will soon recover from those errors. Normally, such retry policy is
appropriate because the device will soon recover from temporary error state.

But there is no guarantee that device is able to recover from error state
immediately. Actually, we've experienced an infinite retry on some hardware.
Therefore hardware error can results in infinite command retry loop.

This patch adds 'retry_timeout' sysfs attribute which limits the retry time
of each scsi command. This attribute is located in scsi sysfs directory
for example /sys/bus/scsi/devices/X:X:X:X/ and value is in seconds.
Once scsi command retry time is longer than this timeout,
the command is treated as failure. 'retry_timeout' is set to '0' by default
which means no timeout set.

Usage:
 - To set retry timeout(set retry_timeout to 30 sec):
# echo 30  /sys/bus/scsi/devices/X:X:X:X/retry_timeout

Changes in v2:
 - check retry timeout in scsi_io_completion() instead of scsi_softirq_done()

Signed-off-by: Eiichi Tsukata eiichi.tsukata...@hitachi.com
Cc: James E.J. Bottomley jbottom...@parallels.com
Cc: linux-scsi@vger.kernel.org
Cc: linux-ker...@vger.kernel.org
---
 drivers/scsi/scsi_lib.c|   22 ++
 drivers/scsi/scsi_scan.c   |1 +
 drivers/scsi/scsi_sysfs.c  |   32 
 include/scsi/scsi.h|1 +
 include/scsi/scsi_device.h |1 +
 5 files changed, 57 insertions(+)

diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 7bd7f0d..813b287 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -741,6 +741,24 @@ static int __scsi_error_from_host_byte(struct scsi_cmnd 
*cmd, int result)
 }
 
 /*
+ * Check if scsi command excessed retry timeout
+ */
+static int scsi_retry_timed_out(struct scsi_cmnd *cmd)
+{
+   unsigned int retry_timeout;
+
+   retry_timeout = cmd-device-retry_timeout;
+   if (retry_timeout 
+   time_before(cmd-jiffies_at_alloc + retry_timeout, jiffies)) {
+   scmd_printk(KERN_ERR, cmd, retry timeout, waited %us\n,
+   retry_timeout/HZ);
+   return 1;
+   }
+
+   return 0;
+}
+
+/*
  * Function:scsi_io_completion()
  *
  * Purpose: Completion processing for block device I/O requests.
@@ -989,6 +1007,10 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned 
int good_bytes)
action = ACTION_FAIL;
}
 
+   if ((action == ACTION_RETRY || action == ACTION_DELAYED_RETRY) 
+   scsi_retry_timed_out(cmd))
+   action = ACTION_FAIL;
+
switch (action) {
case ACTION_FAIL:
/* Give up and fail the remainder of the request */
diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c
index 307a811..4ab044a 100644
--- a/drivers/scsi/scsi_scan.c
+++ b/drivers/scsi/scsi_scan.c
@@ -925,6 +925,7 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned 
char *inq_result,
sdev-no_dif = 1;
 
sdev-eh_timeout = SCSI_DEFAULT_EH_TIMEOUT;
+   sdev-retry_timeout = SCSI_DEFAULT_RETRY_TIMEOUT;
 
if (*bflags  BLIST_SKIP_VPD_PAGES)
sdev-skip_vpd_pages = 1;
diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
index 8ff62c2..eaa2118 100644
--- a/drivers/scsi/scsi_sysfs.c
+++ b/drivers/scsi/scsi_sysfs.c
@@ -627,6 +627,37 @@ sdev_store_eh_timeout(struct device *dev, struct 
device_attribute *attr,
 static DEVICE_ATTR(eh_timeout, S_IRUGO | S_IWUSR, sdev_show_eh_timeout, 
sdev_store_eh_timeout);
 
 static ssize_t
+sdev_show_retry_timeout(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   struct scsi_device *sdev;
+   sdev = to_scsi_device(dev);
+   return snprintf(buf, 20, %u\n, sdev-retry_timeout / HZ);
+}
+
+static ssize_t
+sdev_store_retry_timeout(struct device *dev, struct device_attribute *attr,
+const char *buf, size_t count)
+{
+   struct scsi_device *sdev;
+   unsigned int retry_timeout;
+   int err;
+
+   if (!capable(CAP_SYS_ADMIN))
+   return -EACCES;
+
+   sdev = to_scsi_device(dev);
+   err = kstrtouint(buf, 10, retry_timeout);
+   if (err)
+   return err;
+   sdev-retry_timeout = retry_timeout * HZ;
+
+   return count;
+}
+static DEVICE_ATTR(retry_timeout, S_IRUGO | S_IWUSR,
+  sdev_show_retry_timeout, sdev_store_retry_timeout);
+
+static ssize_t
 store_rescan_field (struct device *dev, struct device_attribute *attr,
const char *buf, size_t count)
 {
@@ -797,6 +828,7 @@ static struct attribute *scsi_sdev_attrs[] = {
dev_attr_state.attr,

Investitionshilfe.

2014-02-06 Thread Miss Laura Buisson




Investitionshilfe.

Ich bin Fräulein Laura Buisson, Einzel weiblich (nie verheiratet) mich
und meinen jüngeren Bruder, plane ich, zu Ihrem Land für
Investitionszwecke zu verlagern.

Ich habe einige Fonds und im Hotel-und Transportgeschäft investieren
wollen und wir $ 10.500.000,00 (zehn Millionen fünfhunderttausend
US-Dollar) müssen wir Sie fungieren als unsere ausländischen
Geschäftspartner zu meinem verstorbenen Vater.

Wir werden Ihre Prozentsatz auf Ihre Antwort zu diskutieren, bitte ich
möchte, dass Sie diese Informationen als eine dringende Nachricht zu
behandeln, und antworten Sie mir ohne Verzögerung, so dass ich meine
endgültige Entscheidung zu treffen.

Weitere Einzelheiten wird euch gegeben werden, wenn ich von Ihnen höre,
mit Ihren persönlichen Daten als auch.

Ich warte, von Ihnen bald zu hören.

Fräulein Laura Buisson.


--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/6] iscsi changes for scsi-misc

2014-02-06 Thread Mike Christie

On 12/20/2013 12:53 PM, micha...@cs.wisc.edu wrote:
 The following patches are some features and fixes for scsi-misc.
 
 James, if you were going to merge the libiscsi locking changes here
 http://www.spinics.net/lists/linux-scsi/msg69903.html
 do not bother. The qlogic patches that were just merged had a conflict.
 The patches in the following emails should apply cleanly to misc.
 

Ignore this patchset. It has a regression. I will resend when I am done
testing a fix.

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 06/32] target: Convert lu_gp_ref_cnt to kref

2014-02-06 Thread Andy Grover

On 02/06/2014 03:51 PM, Nicholas A. Bellinger wrote:
 The problem with this patch and all of the other patches that follow the
 same logic is the false assumption that it's safe to return from
 configfs_group_operations-drop_item() before all references to the
 underlying data structure containing the associated struct config_group
 have been dropped.
 
 In this particular case, target_core_alua_drop_lu_gp() -
 config_item_put() - target_core_alua_lu_gp_release() -
 core_alua_free_lu_gp() is still being called from -drop_item() via
 target_core_alua_lu_gp_ops-release(), so the same holds true here as
 well.

Yes exactly. That's why the configfs release() doesn't free the lu_gp,
it just lowers the lu_gp refcount. release() being called doesn't mean
the object is going away, it just means configfs is done with it.

If do_port_transition has incremented it in the meantime, the lu_gp
won't be freed from the release() (because the underlying object's
refcount will still be nonzero) but only when do_port_transition is
done. In the normal case where there isn't a ref from
do_port_transition, then it is safe to free the lu_gp from release -
put_alua_lu_gp - release_alua_lu_gp - kmem_cache_free.

-- Andy

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 04/16] scsi_dh_alua: Make stpg synchronous

2014-02-06 Thread Mike Christie

On 01/31/2014 03:29 AM, Hannes Reinecke wrote:
 We should be issuing STPG synchronously as we need to
 evaluate the return code on failure.
 
 Signed-off-by: Hannes Reinecke h...@suse.de

I think we need to also make dm-mpath.c use a non-ordered workqueue for
kmpath_handlerd. With this patch and the current ordered workqueue in
dm-mpath I think we will only be able to do one STPG at a time. I think
if we use a normal old non-ordered workqueue then we would be limited to
have max_active STPGs executing.
--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 04/16] scsi_dh_alua: Make stpg synchronous

2014-02-06 Thread Mike Christie

On 02/06/2014 07:24 PM, Mike Christie wrote:
 On 01/31/2014 03:29 AM, Hannes Reinecke wrote:
 We should be issuing STPG synchronously as we need to
 evaluate the return code on failure.

 Signed-off-by: Hannes Reinecke h...@suse.de
 
 I think we need to also make dm-mpath.c use a non-ordered workqueue for
 kmpath_handlerd. With this patch and the current ordered workqueue in
 dm-mpath I think we will only be able to do one STPG at a time. I think
 if we use a normal old non-ordered workqueue then we would be limited to
 have max_active STPGs executing.

I goofed and commented in the order I saw the patches :) I take this
comment back for this patch, because I see in 16/16 you added a new
workqueue to scsi_dh_alua to do rtpgs and stpgs.

For 16/16 though, do we want to make kmpath_aluad a non single threaded
workqueue? It looks like max_active for single threaded is only one work
at a time too.

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2] scsi: Add 'retry_timeout' to avoid infinite command retry

2014-02-06 Thread James Bottomley

On Fri, 2014-02-07 at 09:22 +0900, Eiichi Tsukata wrote:
 Currently, scsi error handling in scsi_io_completion() tries to
 unconditionally requeue scsi command when device keeps some error state.
 For example, UNIT_ATTENTION causes infinite retry with
 action == ACTION_RETRY.
 This is because retryable errors are thought to be temporary and the scsi
 device will soon recover from those errors. Normally, such retry policy is
 appropriate because the device will soon recover from temporary error state.



 But there is no guarantee that device is able to recover from error state
 immediately. Actually, we've experienced an infinite retry on some hardware.
 Therefore hardware error can results in infinite command retry loop.

Could you please add an analysis of the actual failure; which devices
and what conditions.

 This patch adds 'retry_timeout' sysfs attribute which limits the retry time
 of each scsi command. This attribute is located in scsi sysfs directory
 for example /sys/bus/scsi/devices/X:X:X:X/ and value is in seconds.
 Once scsi command retry time is longer than this timeout,
 the command is treated as failure. 'retry_timeout' is set to '0' by default
 which means no timeout set.

Don't do this ... you're mixing a feature (which you'd need to justify)
with an apparent bug fix.

Once you dump all the complexity, I think the patch boils down to a
simple check before the action switch in scsi_io_completion():

if (action !=  ACTION_FAIL 
time_before(cmd-jiffies_at_alloc + wait_for, jiffies)) {
action = ACTION_FAIL;
description = command timed out;
}


James


--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH v2] scsi: Add 'retry_timeout' to avoid infinite command retry

2014-02-06 Thread Libo Chen

On 2014/2/7 13:46, James Bottomley wrote:
 On Fri, 2014-02-07 at 09:22 +0900, Eiichi Tsukata wrote:
 Currently, scsi error handling in scsi_io_completion() tries to
 unconditionally requeue scsi command when device keeps some error state.
 For example, UNIT_ATTENTION causes infinite retry with
 action == ACTION_RETRY.
 This is because retryable errors are thought to be temporary and the scsi
 device will soon recover from those errors. Normally, such retry policy is
 appropriate because the device will soon recover from temporary error state.
 
 
 
 But there is no guarantee that device is able to recover from error state
 immediately. Actually, we've experienced an infinite retry on some hardware.
 Therefore hardware error can results in infinite command retry loop.
 
 Could you please add an analysis of the actual failure; which devices
 and what conditions.
 


same question, can you explain?

 This patch adds 'retry_timeout' sysfs attribute which limits the retry time
 of each scsi command. This attribute is located in scsi sysfs directory
 for example /sys/bus/scsi/devices/X:X:X:X/ and value is in seconds.
 Once scsi command retry time is longer than this timeout,
 the command is treated as failure. 'retry_timeout' is set to '0' by default
 which means no timeout set.
 
 Don't do this ... you're mixing a feature (which you'd need to justify)
 with an apparent bug fix.
 
 Once you dump all the complexity, I think the patch boils down to a
 simple check before the action switch in scsi_io_completion():
 
   if (action !=  ACTION_FAIL 
   time_before(cmd-jiffies_at_alloc + wait_for, jiffies)) {
   action = ACTION_FAIL;
   description = command timed out;
   }
 
 
 James
 
 
 --
 To unsubscribe from this list: send the line unsubscribe linux-kernel in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html
 Please read the FAQ at  http://www.tux.org/lkml/
 
 


--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 0/6]: iscsi changes for scsi-misc (v2)

2014-02-06 Thread michaelc

This patchset has Mellanox's libiscsi locking changes, and various
fixes.

V2 - Fix for MaxCmdSn handling in patch 3/6 where the part of the function
that also updates the MaxCmdSn also got cut by accident.

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 6/6] iscsi_tcp: check for valid session before accessing

2014-02-06 Thread michaelc

From: Mike Christie micha...@cs.wisc.edu

Check that the session is setup before accessing its
connection. This fixes a oops where userspace tries
to get the ip address before the session is bound to
a host.

Signed-off-by: Mike Christie micha...@cs.wisc.edu
---
 drivers/scsi/iscsi_tcp.c |3 +++
 1 files changed, 3 insertions(+), 0 deletions(-)

diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c
index 12b3512..bfb6d07 100644
--- a/drivers/scsi/iscsi_tcp.c
+++ b/drivers/scsi/iscsi_tcp.c
@@ -759,6 +759,9 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host 
*shost,
 
switch (param) {
case ISCSI_HOST_PARAM_IPADDRESS:
+   if (!session)
+   return -ENOTCONN;
+
spin_lock_bh(session-frwd_lock);
conn = session-leadconn;
if (!conn) {
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 4/6] be2iscsi: fix lun test in device reset callout

2014-02-06 Thread michaelc

From: Mike Christie micha...@cs.wisc.edu

We want to be checking the scsi_cmnd's lun against
the possible tasks in the driver. Current check tests
task against itself which was useless.

Signed-off-by: Mike Christie micha...@cs.wisc.edu
---
 drivers/scsi/be2iscsi/be_main.c |2 +-
 1 files changed, 1 insertions(+), 1 deletions(-)

diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 8d82f2c..bc77a6f 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -325,7 +325,7 @@ static int beiscsi_eh_device_reset(struct scsi_cmnd *sc)
if (!abrt_task-sc || abrt_task-state == ISCSI_TASK_FREE)
continue;
 
-   if (abrt_task-sc-device-lun != abrt_task-sc-device-lun)
+   if (sc-device-lun != abrt_task-sc-device-lun)
continue;
 
/* Invalidate WRB Posted for this Task */
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/6] SCSI/libiscsi: Restructure iscsi_tcp r2t response logic

2014-02-06 Thread michaelc

From: Shlomo Pongratz shlo...@mellanox.com

Restructure the iscsi_tcp_r2t_rsp routine in order to avoid allocating
r2t from r2tpool.queue and returning it back in case the parameters
rhdr-data_length and or rhdr-data_offset prohibit the requing.

Since the values of these parameters are known prior to the allocation,
we can pre-check and thus avoid futile allocations.

Signed-off-by: Shlomo Pongratz shlo...@mellanox.com
Signed-off-by: Or Gerlitz ogerl...@mellanox.com
Signed-off-by: Mike Christie micha...@cs.wisc.edu
---
 drivers/scsi/libiscsi_tcp.c |   43 ++-
 1 files changed, 22 insertions(+), 21 deletions(-)

diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c
index 1d58d53..7f59073 100644
--- a/drivers/scsi/libiscsi_tcp.c
+++ b/drivers/scsi/libiscsi_tcp.c
@@ -529,6 +529,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, 
struct iscsi_task *task)
struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn-in.hdr;
struct iscsi_r2t_info *r2t;
int r2tsn = be32_to_cpu(rhdr-r2tsn);
+   u32 data_length;
+   u32 data_offset;
int rc;
 
if (tcp_conn-in.datalen) {
@@ -554,40 +556,39 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, 
struct iscsi_task *task)
return 0;
}
 
-   rc = kfifo_out(tcp_task-r2tpool.queue, (void*)r2t, sizeof(void*));
-   if (!rc) {
-   iscsi_conn_printk(KERN_ERR, conn, Could not allocate R2T. 
- Target has sent more R2Ts than it 
- negotiated for or driver has leaked.\n);
-   return ISCSI_ERR_PROTO;
-   }
-
-   r2t-exp_statsn = rhdr-statsn;
-   r2t-data_length = be32_to_cpu(rhdr-data_length);
-   if (r2t-data_length == 0) {
+   data_length = be32_to_cpu(rhdr-data_length);
+   if (data_length == 0) {
iscsi_conn_printk(KERN_ERR, conn,
  invalid R2T with zero data len\n);
-   kfifo_in(tcp_task-r2tpool.queue, (void*)r2t,
-   sizeof(void*));
return ISCSI_ERR_DATALEN;
}
 
-   if (r2t-data_length  session-max_burst)
+   if (data_length  session-max_burst)
ISCSI_DBG_TCP(conn, invalid R2T with data len %u and max 
  burst %u. Attempting to execute request.\n,
- r2t-data_length, session-max_burst);
+ data_length, session-max_burst);
 
-   r2t-data_offset = be32_to_cpu(rhdr-data_offset);
-   if (r2t-data_offset + r2t-data_length  scsi_out(task-sc)-length) {
+   data_offset = be32_to_cpu(rhdr-data_offset);
+   if (data_offset + data_length  scsi_out(task-sc)-length) {
iscsi_conn_printk(KERN_ERR, conn,
  invalid R2T with data len %u at offset %u 
- and total length %d\n, r2t-data_length,
- r2t-data_offset, scsi_out(task-sc)-length);
-   kfifo_in(tcp_task-r2tpool.queue, (void*)r2t,
-   sizeof(void*));
+ and total length %d\n, data_length,
+ data_offset, scsi_out(task-sc)-length);
return ISCSI_ERR_DATALEN;
}
 
+   rc = kfifo_out(tcp_task-r2tpool.queue, (void*)r2t, sizeof(void*));
+   if (!rc) {
+   iscsi_conn_printk(KERN_ERR, conn, Could not allocate R2T. 
+ Target has sent more R2Ts than it 
+ negotiated for or driver has leaked.\n);
+   return ISCSI_ERR_PROTO;
+   }
+
+   r2t-exp_statsn = rhdr-statsn;
+   r2t-data_length = data_length;
+   r2t-data_offset = data_offset;
+
r2t-ttt = rhdr-ttt; /* no flip */
r2t-datasn = 0;
r2t-sent = 0;
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/6] libiscsi: remove unneeded queue work when max_cmdsn is increased

2014-02-06 Thread michaelc

From: Mike Christie micha...@cs.wisc.edu

iscsi_queuecommand will only take in commands that can fit in the
current window. So, if a command is on the cmdqueue then it can
fit in the current window. If a command is on the mgmtqueue, then
we are setting the immediate bit so they will also fit in the
window. As a result, we never need to to do a iscsi_conn_queue_work
when the maxCmdSn is increased.

What should happen is that a command will complete the window will
be increased, then the scsi layer will send us more commands by
running the scsi_device queues.

Signed-off-by: Mike Christie micha...@cs.wisc.edu
---
 drivers/scsi/libiscsi.c |   10 +-
 1 files changed, 1 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c
index 6afb6fc..9ca42a2 100644
--- a/drivers/scsi/libiscsi.c
+++ b/drivers/scsi/libiscsi.c
@@ -110,16 +110,8 @@ static void __iscsi_update_cmdsn(struct iscsi_session 
*session,
session-exp_cmdsn = exp_cmdsn;
 
if (max_cmdsn != session-max_cmdsn 
-   !iscsi_sna_lt(max_cmdsn, session-max_cmdsn)) {
+   !iscsi_sna_lt(max_cmdsn, session-max_cmdsn))
session-max_cmdsn = max_cmdsn;
-   /*
-* if the window closed with IO queued, then kick the
-* xmit thread
-*/
-   if (!list_empty(session-leadconn-cmdqueue) ||
-   !list_empty(session-leadconn-mgmtqueue))
-   iscsi_conn_queue_work(session-leadconn);
-   }
 }
 
 void iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr)
-- 
1.7.1

--
To unsubscribe from this list: send the line unsubscribe linux-scsi in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 2/6] SCSI/libiscsi: Reduce locking contention in fast path (v2)

2014-02-06 Thread michaelc

From: Shlomo Pongratz shlo...@mellanox.com

Replace the session lock with two locks, a forward lock and
a backwards lock named frwd_lock and back_lock respectively.

The forward lock protects resources that change while sending a
request to the target, such as cmdsn, queued_cmdsn, and allocating
task from the commands' pool with kfifo_out.

The backward lock protects resources that change while processing
a response or in error path, such as cmdsn_exp, cmdsn_max, and
returning tasks to the commands' pool with kfifo_in.

Under a steady state fast-path situation, that is when one
or more processes/threads submit IO to an iscsi device and
a single kernel upcall (e.g softirq) is dealing with processing
of responses without errors, this patch eliminates the contention
between the queuecommand()/request response/scsi_done() flows
associated with iscsi sessions.

Between the forward and the backward locks exists a strict locking
hierarchy. The mutual exclusion zone protected by the forward lock can
enclose the mutual exclusion zone protected by the backward lock but not
vice versa.

For example, in iscsi_conn_teardown or in iscsi_xmit_data when there is
a failure and __iscsi_put_task is called, the backward lock is taken while
the forward lock is still taken. On the other hand, if in the RX path a nop
is to be sent, for example in iscsi_handle_reject or __iscsi_complete_pdu
than the forward lock is released and the backward lock is taken for the
duration of iscsi_send_nopout, later the backward lock is released and the
forward lock is retaken.

libiscsi_tcp uses two kernel fifos the r2t pool and the r2t queue.

The insertion and deletion from these queues didn't corespond to the
assumption taken by the new forward/backwards session locking paradigm.

That is, in iscsi_tcp_clenup_task which belongs to the RX (backwards)
path, r2t is taken out from r2t queue and inserted to the r2t pool.
In iscsi_tcp_get_curr_r2t which belong to the TX (forward) path, r2t
is also inserted to the r2t pool and another r2t is pulled from r2t
queue.

Only in iscsi_tcp_r2t_rsp which is called in the RX path but can requeue
to the TX path, r2t is taken from the r2t pool and inserted to the r2t
queue.

In order to cope with this situation, two spin locks were added,
pool2queue and queue2pool. The former protects extracting from the
r2t pool and inserting to the r2t queue, and the later protects the
extracing from the r2t queue and inserting to the r2t pool.

Signed-off-by: Shlomo Pongratz shlo...@mellanox.com
Signed-off-by: Or Gerlitz ogerl...@mellanox.com
[minor fix up to apply cleanly and compile fix]
Signed-off-by: Mike Christie micha...@cs.wisc.edu
---
 drivers/scsi/be2iscsi/be_main.c  |   26 +++---
 drivers/scsi/bnx2i/bnx2i_hwi.c   |   46 
 drivers/scsi/bnx2i/bnx2i_iscsi.c |8 +-
 drivers/scsi/iscsi_tcp.c |   22 ++--
 drivers/scsi/libiscsi.c  |  214 +
 drivers/scsi/libiscsi_tcp.c  |   28 +++--
 drivers/scsi/qla4xxx/ql4_isr.c   |4 +-
 include/scsi/libiscsi.h  |   17 ++-
 include/scsi/libiscsi_tcp.h  |2 +
 9 files changed, 206 insertions(+), 161 deletions(-)

diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c
index 1f37505..8d82f2c 100644
--- a/drivers/scsi/be2iscsi/be_main.c
+++ b/drivers/scsi/be2iscsi/be_main.c
@@ -232,20 +232,20 @@ static int beiscsi_eh_abort(struct scsi_cmnd *sc)
cls_session = starget_to_session(scsi_target(sc-device));
session = cls_session-dd_data;
 
-   spin_lock_bh(session-lock);
+   spin_lock_bh(session-frwd_lock);
if (!aborted_task || !aborted_task-sc) {
/* we raced */
-   spin_unlock_bh(session-lock);
+   spin_unlock_bh(session-frwd_lock);
return SUCCESS;
}
 
aborted_io_task = aborted_task-dd_data;
if (!aborted_io_task-scsi_cmnd) {
/* raced or invalid command */
-   spin_unlock_bh(session-lock);
+   spin_unlock_bh(session-frwd_lock);
return SUCCESS;
}
-   spin_unlock_bh(session-lock);
+   spin_unlock_bh(session-frwd_lock);
/* Invalidate WRB Posted for this Task */
AMAP_SET_BITS(struct amap_iscsi_wrb, invld,
  aborted_io_task-pwrb_handle-pwrb,
@@ -307,9 +307,9 @@ static int beiscsi_eh_device_reset(struct scsi_cmnd *sc)
/* invalidate iocbs */
cls_session = starget_to_session(scsi_target(sc-device));
session = cls_session-dd_data;
-   spin_lock_bh(session-lock);
+   spin_lock_bh(session-frwd_lock);
if (!session-leadconn || session-state != ISCSI_STATE_LOGGED_IN) {
-   spin_unlock_bh(session-lock);
+   spin_unlock_bh(session-frwd_lock);
return FAILED;
}
conn = session-leadconn;
@@ -338,7 +338,7 @@ static int beiscsi_eh_device_reset(struct scsi_cmnd *sc)
num_invalidate++;

61 matches

Mail list logo