Re: [PATCH 12/15] scsi: initial blk-mq support
On 2/5/2014 2:41 PM, Christoph Hellwig wrote: Add support for using the blk-mq code to submit requests to SCSI drivers. There is very little blk-mq specific code, but that's partially because important functionality like partial completions and request requeueing is still missing in blk-mq. I hope to keep most of the additions for these in the blk-mq core instead of the SCSI layer, though. Based on the earlier scsi-mq prototype by Nicholas Bellinger, although not a whole lot of actual code is left. Not-quite-signed-off-yet-by: Christoph Hellwig h...@lst.de --- drivers/scsi/scsi.c | 36 ++- drivers/scsi/scsi_lib.c | 244 -- drivers/scsi/scsi_priv.h |2 + drivers/scsi/scsi_scan.c |5 +- include/scsi/scsi_host.h |3 + 5 files changed, 278 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index adb8bfb..cf5c110 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -44,6 +44,7 @@ #include linux/string.h #include linux/slab.h #include linux/blkdev.h +#include linux/blk-mq.h #include linux/delay.h #include linux/init.h #include linux/completion.h @@ -688,6 +689,33 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) return 0; } +static void scsi_softirq_done_remote(void *data) +{ + return scsi_softirq_done(data); +} + +static void scsi_mq_done(struct request *req) +{ + int cpu; + +#if 0 + if (!ctx-ipi_redirect) + return scsi_softirq_done(cmd); +#endif + + cpu = get_cpu(); + if (cpu != req-cpu cpu_online(req-cpu)) { + req-csd.func = scsi_softirq_done_remote; + req-csd.info = req; + req-csd.flags = 0; + __smp_call_function_single(req-cpu, req-csd, 0); + } else { + scsi_softirq_done(req); + } + + put_cpu(); +} + /** * scsi_done - Invoke completion on finished SCSI command. * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives @@ -701,8 +729,14 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) */ static void scsi_done(struct scsi_cmnd *cmd) { + struct request *req = cmd-request; + trace_scsi_dispatch_cmd_done(cmd); - blk_complete_request(cmd-request); + + if (req-mq_ctx) + scsi_mq_done(req); + else + blk_complete_request(req); } /** diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e67950c..8dd8893 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -20,6 +20,7 @@ #include linux/delay.h #include linux/hardirq.h #include linux/scatterlist.h +#include linux/blk-mq.h #include scsi/scsi.h #include scsi/scsi_cmnd.h @@ -554,6 +555,15 @@ static bool scsi_end_request(struct scsi_cmnd *cmd, int error, int bytes, struct request *req = cmd-request; /* +* XXX: need to handle partial completions and retries here. +*/ + if (req-mq_ctx) { + blk_mq_end_io(req, error); + put_device(cmd-device-sdev_gendev); + return true; + } + + /* * If there are blocks left over at the end, set up the command * to queue the remainder of them. */ @@ -1014,12 +1024,15 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb, { int count; - /* -* If sg table allocation fails, requeue request later. -*/ - if (unlikely(scsi_alloc_sgtable(sdb, req-nr_phys_segments, - gfp_mask))) { - return BLKPREP_DEFER; + BUG_ON(req-nr_phys_segments SCSI_MAX_SG_SEGMENTS); + + if (!req-mq_ctx) { + /* +* If sg table allocation fails, requeue request later. +*/ + if (unlikely(scsi_alloc_sgtable(sdb, req-nr_phys_segments, + gfp_mask))) + return BLKPREP_DEFER; } req-buffer = NULL; @@ -1075,9 +1088,11 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask) BUG_ON(prot_sdb == NULL); ivecs = blk_rq_count_integrity_sg(rq-q, rq-bio); - if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) { - error = BLKPREP_DEFER; - goto err_exit; + if (!rq-mq_ctx) { + if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) { + error = BLKPREP_DEFER; + goto err_exit; + } } count = blk_rq_map_integrity_sg(rq-q, rq-bio, @@ -1505,7 +1520,7 @@ static void scsi_kill_request(struct request *req, struct request_queue *q) blk_complete_request(req); } -static void scsi_softirq_done(struct request *rq) +void scsi_softirq_done(struct request *rq) { struct
[PATCH][RFC] Add EVPD page 0x83 entries to sysfs
EVPD page 0x83 is used to uniquely identify the device. So instead of having each and every program issue a separate SG_IO call to retrieve this information it does make far more sense to display it in sysfs. Cc: Jeremy Linton jlin...@tributary.com Cc: Doug Gilbert dgilb...@interlog.com Cc: Kai Makisara kai.makis...@kolumbus.fi Signed-off-by: Hannes Reinecke h...@suse.de --- drivers/scsi/scsi_scan.c | 3 + drivers/scsi/scsi_sysfs.c | 166 - include/scsi/scsi_device.h | 3 + 3 files changed, 171 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 307a811..073fb84 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -929,6 +929,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, if (*bflags BLIST_SKIP_VPD_PAGES) sdev-skip_vpd_pages = 1; + if (sdev-scsi_level = SCSI_3) + scsi_attach_vpd_ident(sdev); + transport_configure_device(sdev-sdev_gendev); if (sdev-host-hostt-slave_configure) { diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 9117d0b..ffe7cb5 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -725,8 +725,170 @@ show_queue_type_field(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(queue_type, S_IRUGO, show_queue_type_field, NULL); +void scsi_attach_vpd_ident(struct scsi_device *sdev) +{ + int ret; + int vpd_len = 255; + unsigned char *buffer; +retry: + buffer = kmalloc(vpd_len, GFP_KERNEL); + if (buffer) { + ret = scsi_get_vpd_page(sdev, 0x83, buffer, vpd_len); + if (ret == 0) { + vpd_len = (buffer[2] 8) + buffer[3]; + if (vpd_len 255) { + kfree(buffer); + goto retry; + } + sdev-vpd_ident_len = vpd_len; + sdev-vpd_ident = buffer; + } + } +} + +#define SCSI_VPD_ASSOC_lun 0x0 +#define SCSI_VPD_ASSOC_port 0x10 +#define SCSI_VPD_ASSOC_target 0x20 + +#define SCSI_VPD_DESIG_vendor 0x0 +#define SCSI_VPD_DESIG_t10 0x1 +#define SCSI_VPD_DESIG_eui 0x2 +#define SCSI_VPD_DESIG_naa 0x3 +#define SCSI_VPD_DESIG_relport 0x4 +#define SCSI_VPD_DESIG_tpgrp 0x5 +#define SCSI_VPD_DESIG_lugrp 0x6 +#define SCSI_VPD_DESIG_md5 0x7 +#define SCSI_VPD_DESIG_scsi_name 0x8 + +int scsi_parse_vpd_ident(struct scsi_device *sdev, +int assoc, int desig, char *buf) +{ + unsigned char *d; + int len = 0; + + if (!sdev-vpd_ident) + return -EINVAL; + + d = sdev-vpd_ident + 4; + while (d sdev-vpd_ident + sdev-vpd_ident_len) { + if ((d[1] 0x30) == assoc + (d[1] 0xf) == desig) { + switch (d[1] 0xf) { + case SCSI_VPD_DESIG_eui: + case SCSI_VPD_DESIG_naa: + case SCSI_VPD_DESIG_md5: + switch (d[3]) { + case 8: + len += sprintf(buf + len, + %8phN\n, d + 4); + break; + case 12: + len += sprintf(buf + len, + %12phN\n, d + 4); + break; + case 16: + len += sprintf(buf + len, + %16phN\n, d + 4); + break; + } + break; + case SCSI_VPD_DESIG_relport: + case SCSI_VPD_DESIG_tpgrp: + case SCSI_VPD_DESIG_lugrp: + len += sprintf(buf + len, %d\n, + (int)(d[6] 8) + d[7]); + break; + case SCSI_VPD_DESIG_vendor: + case SCSI_VPD_DESIG_t10: + case SCSI_VPD_DESIG_scsi_name: + len += snprintf(buf + len, d[3] + 2, %s\n, + d + 4); + break; + } + } + d += d[3] + 4; + } + + return len ? len : -EINVAL; +} + +#define sdev_evpd_ident(assoc,desig) \ +static ssize_t \ +sdev_show_evpd_ident_##assoc##_##desig (struct device *dev,\ + struct device_attribute *attr, \ +
Re: [PATCH] st: Do not rewind for SG_IO
On 02/03/2014 10:58 PM, Jeremy Linton wrote: On 2/3/2014 2:51 PM, Kay Sievers wrote: This is not simple and not going to happen. Sibling devices in /sys cannot have a relationship in udev, there are only parent/child dependencies. Ok.. so if we can't ignore all the spare device nodes in a given /sys entry for a given device. Why open the device to scan it? I've often wondered why the serial number isn't part of the data in /sys along with the manufacture/model. The last tape drive I saw that failed to respond to inquiry page 0x80 was over a decade ago (probably manufactured in the early 90s). So enabling it just for tape is pretty safe. Matching Manufacturer/model/serial is going to be better than anything your going to get out of 0x83 anyway. That data is guaranteed to be there, but its also guaranteed to be unreliable (every device, and every port has a slightly different set of descriptors they choose to support). Plus, your not going to have issues accidentally rewinding a device, or resetting a tape density, or accidentally turning compression off if you don't open the device. And indeed, I have been wondering about this, too. And (again) it is something which has been on my To-Do list for a long time. Moving EVPD page 0x83 (and maybe 0x80, too) into sysfs will save quite a lot of headache we have currently; udev won't have to call 'sg_inq', information will be present even though the device itself might be temporarily unavailable yadda yadda. So I've decided to bite the bullet and sent out a patch, check for 'Add EVPD page 0x83 entries to sysfs'. Reviews are welcome. Cheers, Hannes -- Dr. Hannes Reinecke zSeries Storage h...@suse.de +49 911 74053 688 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg) -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RFC] Add EVPD page 0x83 entries to sysfs
On 02/06/2014 02:04 PM, Hannes Reinecke wrote: EVPD page 0x83 is used to uniquely identify the device. So instead of having each and every program issue a separate SG_IO call to retrieve this information it does make far more sense to display it in sysfs. Cc: Jeremy Linton jlin...@tributary.com Cc: Doug Gilbert dgilb...@interlog.com Cc: Kai Makisara kai.makis...@kolumbus.fi Signed-off-by: Hannes Reinecke h...@suse.de --- drivers/scsi/scsi_scan.c | 3 + drivers/scsi/scsi_sysfs.c | 166 - include/scsi/scsi_device.h | 3 + 3 files changed, 171 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 307a811..073fb84 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -929,6 +929,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, if (*bflags BLIST_SKIP_VPD_PAGES) sdev-skip_vpd_pages = 1; + if (sdev-scsi_level = SCSI_3) + scsi_attach_vpd_ident(sdev); + transport_configure_device(sdev-sdev_gendev); if (sdev-host-hostt-slave_configure) { diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 9117d0b..ffe7cb5 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -725,8 +725,170 @@ show_queue_type_field(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(queue_type, S_IRUGO, show_queue_type_field, NULL); +void scsi_attach_vpd_ident(struct scsi_device *sdev) +{ + int ret; + int vpd_len = 255; + unsigned char *buffer; +retry: + buffer = kmalloc(vpd_len, GFP_KERNEL); + if (buffer) { + ret = scsi_get_vpd_page(sdev, 0x83, buffer, vpd_len); + if (ret == 0) { + vpd_len = (buffer[2] 8) + buffer[3]; + if (vpd_len 255) { + kfree(buffer); + goto retry; + } + sdev-vpd_ident_len = vpd_len; + sdev-vpd_ident = buffer; + } + } +} + Bummer. A missing kfree() if scsi_get_vpd_page() returns an error. But this is just an RFC for now, so I'll be fixing it up once it looks as if it would be moving forward... Cheers, Hannes -- Dr. Hannes Reinecke zSeries Storage h...@suse.de +49 911 74053 688 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg) -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] st: Do not rewind for SG_IO
Hannes == Hannes Reinecke h...@suse.de writes: Hannes Moving EVPD page 0x83 (and maybe 0x80, too) into sysfs will save Hannes quite a lot of headache we have currently; udev won't have to Hannes call 'sg_inq', information will be present even though the Hannes device itself might be temporarily unavailable yadda yadda. Hannes So I've decided to bite the bullet and sent out a patch, check Hannes for 'Add EVPD page 0x83 entries to sysfs'. I'm already doing something similar since I need it for xcopy. My patch provides both the original VPD 0x83 and 0x80 bits as well as a handle identical to /sbin/scsi_id. I'll take a look at your patch later today... -- Martin K. Petersen Oracle Linux Engineering -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] st: Do not rewind for SG_IO
On 02/06/2014 02:21 PM, Martin K. Petersen wrote: Hannes == Hannes Reinecke h...@suse.de writes: Hannes Moving EVPD page 0x83 (and maybe 0x80, too) into sysfs will save Hannes quite a lot of headache we have currently; udev won't have to Hannes call 'sg_inq', information will be present even though the Hannes device itself might be temporarily unavailable yadda yadda. Hannes So I've decided to bite the bullet and sent out a patch, check Hannes for 'Add EVPD page 0x83 entries to sysfs'. I'm already doing something similar since I need it for xcopy. Hehe. Beat you to it. My patch provides both the original VPD 0x83 and 0x80 bits as well as a handle identical to /sbin/scsi_id. Bah, don't do that. That should better be handled by udev rules. I've got a set of patches moving from scsi_id to sg_inq, which can be easily adapted to using sysfs directly. Once we figure out if that's the direction we want to go ... I'll take a look at your patch later today... Thanks. Cheers, Hannes -- Dr. Hannes Reinecke zSeries Storage h...@suse.de +49 911 74053 688 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg) -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] st: Do not rewind for SG_IO
Hannes == Hannes Reinecke h...@suse.de writes: My patch provides both the original VPD 0x83 and 0x80 bits as well as a handle identical to /sbin/scsi_id. Hannes Bah, don't do that. That should better be handled by udev Hannes rules. I've got a set of patches moving from scsi_id to sg_inq, Hannes which can be easily adapted to using sysfs directly. I just want to get out of the userland sending random SCSI commands business. That is a world of pain right now. I wanted to provide a handle that was guaranteed to be compatible with existing tooling. If you have worked around that in udev rules I guess that's OK with me. -- Martin K. Petersen Oracle Linux Engineering -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
RE: [PATCH 10/22] isci: Use pci_enable_msix_range()
On Tuesday, February 04, 2014 12:17 PM Alexander Gordeev agord...@redhat.com wrote: As result of deprecation of MSI-X/MSI enablement functions pci_enable_msix() and pci_enable_msi_block() all drivers using these two interfaces need to be updated to use the new pci_enable_msi_range() and pci_enable_msix_range() interfaces. Signed-off-by: Alexander Gordeev agord...@redhat.com Cc: Lukasz Dorau lukasz.do...@intel.com Cc: Maciej Patelczyk maciej.patelc...@intel.com Cc: Dave Jiang dave.ji...@intel.com Cc: intel-linux-...@intel.com Cc: linux-scsi@vger.kernel.org Cc: linux-...@vger.kernel.org --- drivers/scsi/isci/init.c |5 +++-- 1 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/scsi/isci/init.c b/drivers/scsi/isci/init.c index d25d0d8..b99f307 100644 --- a/drivers/scsi/isci/init.c +++ b/drivers/scsi/isci/init.c @@ -356,8 +356,9 @@ static int isci_setup_interrupts(struct pci_dev *pdev) for (i = 0; i num_msix; i++) pci_info-msix_entries[i].entry = i; - err = pci_enable_msix(pdev, pci_info-msix_entries, num_msix); - if (err) + err = pci_enable_msix_range(pdev, + pci_info-msix_entries, num_msix, num_msix); + if (err 0) goto intx; for (i = 0; i num_msix; i++) { -- 1.7.7.6 Looks fine. Acked-by: Lukasz Dorau lukasz.do...@intel.com Thanks, Lukasz -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] st: Do not rewind for SG_IO
On Thu, 2014-02-06 at 08:50 -0500, Martin K. Petersen wrote: Hannes == Hannes Reinecke h...@suse.de writes: My patch provides both the original VPD 0x83 and 0x80 bits as well as a handle identical to /sbin/scsi_id. Hannes Bah, don't do that. That should better be handled by udev Hannes rules. I've got a set of patches moving from scsi_id to sg_inq, Hannes which can be easily adapted to using sysfs directly. I just want to get out of the userland sending random SCSI commands business. That is a world of pain right now. Well, in the words of Bill Clinton I feel your pain. However, I need to know we won't pick up that same whole world of pain in the kernel. Remember it's not just SCSI devices, it's also ATA devices and potentially other block devices ... then there's blkid and all the weird things it does. Then there's transport identifiers for multi-path reservations and so on. Convince me this path won't have us shifting a whole bunch of weird from user space to the kernel without any reduction in the weird factor. Remember the point is not what can we do for all the nicely behaved SCSI-3 devices, it's what do we do for everything. James I wanted to provide a handle that was guaranteed to be compatible with existing tooling. If you have worked around that in udev rules I guess that's OK with me. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] st: Do not rewind for SG_IO
On 02/06/2014 03:38 PM, James Bottomley wrote: On Thu, 2014-02-06 at 08:50 -0500, Martin K. Petersen wrote: Hannes == Hannes Reinecke h...@suse.de writes: My patch provides both the original VPD 0x83 and 0x80 bits as well as a handle identical to /sbin/scsi_id. Hannes Bah, don't do that. That should better be handled by udev Hannes rules. I've got a set of patches moving from scsi_id to sg_inq, Hannes which can be easily adapted to using sysfs directly. I just want to get out of the userland sending random SCSI commands business. That is a world of pain right now. Well, in the words of Bill Clinton I feel your pain. However, I need to know we won't pick up that same whole world of pain in the kernel. Remember it's not just SCSI devices, it's also ATA devices and potentially other block devices ... then there's blkid and all the weird things it does. Then there's transport identifiers for multi-path reservations and so on. blkid is actually not so bad, _if_ it would distinguish between 'metadata not found' and 'I/O error during metadata read'. I made a patch which hopefully should find it's way upstream. And blkid just issues plain READ commands, so _this_ behaviour won't change ... Convince me this path won't have us shifting a whole bunch of weird from user space to the kernel without any reduction in the weird factor. Remember the point is not what can we do for all the nicely behaved SCSI-3 devices, it's what do we do for everything. Well, first and foremost the patch should be limited to SCSI-3 (and later devices). So we'd be insulated from the most obvious crap. So that leaves only devices which claim to be SCSI-3 or something, but still keel over when asked for VPD pages. However, this type of devices will fail already, as 'sd.c' is already asking for all sorts of VPD pages. Which leaves only non-Disk devices, but those tend to have a better SCSI implementation as you can't make quick bucks with, say, as SCSI Enclosure device. But the prime motivator behind this patch is that we can be reasonably sure the device will answer at all. When retrieving the EVPD pages from userspace you always have the problem that the device might have gone away or being unresponsive by the time you get around sending the SG_IO call. So you always have these stuck user-space processes asking for information which _had_ been present at one point. In particular udev is prone for this; anyone who ever came across the message udevd[]: worker unexpectedly returned with status 0x0100 knows what I'm talking about ... And the more udev events for a device you get, the higher the likelyhood for this to happen is. Plus I could re-use this information for my scsi_dh_alua patchset, and for xcopy and friends we'll be needing it, too. Cheers, Hannes -- Dr. Hannes Reinecke zSeries Storage h...@suse.de +49 911 74053 688 SUSE LINUX Products GmbH, Maxfeldstr. 5, 90409 Nürnberg GF: J. Hawn, J. Guild, F. Imendörffer, HRB 16746 (AG Nürnberg) -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
I HAVE A PROPOSAL FOR YOU!
Hello, The Project is about the exportation of 100,000 barrels of Light Crude Oil daily out from Iraq to Turkey through my client's company in Iraq at the rate of $92.00 a barrel. This amount to $9,200,000 daily. I ask for your support as a foreigner to handle this business project with my client and you are not expected to invest in Iraq If yes, let me know and we will discuss this project proper. yung kim. CONTACT MY PRIVATE EMAIL:yung@qq.com -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 12/17] scsi: avoid taking host_lock in scsi_run_queue unless nessecary
On Wed, Feb 05, 2014 at 03:54:17PM -0800, James Bottomley wrote: -/* - * Function: scsi_run_queue() - * - * Purpose:Select a proper request queue to serve next - * - * Arguments: q - last request's queue - * - * Returns: Nothing - * - * Notes: The previous command was completely finished, start - * a new one if possible. - */ Instead of dumping the description, how about converting it to docbook and making it match the new function? I dropped it because there's nothing that isn't trivially obvious from the code. The point of comments should be to explain why things are done and not have a redundant writeup of what is done. Docbook comments make sense for exported functions but not so much for a static function with a handful of callers. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 02/15] blk-mq: support at_head inserations for blk_execute_rq
On Wed, Feb 05, 2014 at 06:27:38PM -0800, Muthu Kumar wrote: Currently its not used by any drivers. Are we sure we don't need it public? If sure, please remove the EXPORT_SYMBOL() for it. Drivers shouldn't use it, it's a low-level interface. As mentioned in the intro this is not quite a coherent series yet, but I'll post the blk-mq patches in a slightly nicer form agains Jens' tree soon. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 04/17] scsi: avoid useless free_list lock roundtrips
On Wed, Feb 05, 2014 at 03:44:04PM -0800, James Bottomley wrote: Why do this? cmd is still likely to be not NULL, which helps the compiler optimise. Because testing for non-NULL pointers gets that hint implicitly from gcc. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 05/17] scsi: simplify command allocation and freeing a bit
On Wed, Feb 05, 2014 at 03:51:35PM -0800, James Bottomley wrote: On Wed, 2014-02-05 at 04:39 -0800, Christoph Hellwig wrote: Just have one level of alloc/free functions that take a host instead of two levels for the allocation and different calling conventions for the free. Signed-off-by: Christoph Hellwig h...@lst.de --- drivers/scsi/scsi.c | 77 +++ 1 file changed, 22 insertions(+), 55 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index 4139486..5347f7d 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -160,79 +160,46 @@ static struct scsi_host_cmd_pool scsi_cmd_dma_pool = { static DEFINE_MUTEX(host_cmd_pool_mutex); -/** - * scsi_pool_alloc_command - internal function to get a fully allocated command - * @pool: slab pool to allocate the command from - * @gfp_mask: mask for the allocation - * - * Returns a fully allocated command (with the allied sense buffer) or - * NULL on failure - */ -static struct scsi_cmnd * -scsi_pool_alloc_command(struct scsi_host_cmd_pool *pool, gfp_t gfp_mask) -{ - struct scsi_cmnd *cmd; - - cmd = kmem_cache_zalloc(pool-cmd_slab, gfp_mask | pool-gfp_mask); - if (!cmd) - return NULL; - - cmd-sense_buffer = kmem_cache_alloc(pool-sense_slab, -gfp_mask | pool-gfp_mask); - if (!cmd-sense_buffer) { - kmem_cache_free(pool-cmd_slab, cmd); - return NULL; - } - - return cmd; -} - -/** - * scsi_pool_free_command - internal function to release a command - * @pool: slab pool to allocate the command from - * @cmd: command to release - * - * the command must previously have been allocated by - * scsi_pool_alloc_command. - */ static void -scsi_pool_free_command(struct scsi_host_cmd_pool *pool, -struct scsi_cmnd *cmd) +scsi_host_free_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd) You lost the docbook function description here when you changed the name. It's a static function with two callers, and it's more obvious from the function than the comment what it does. I'm probably one of the people with the highest comment to code ratios in the kernel, but I'd rather explain in long comments when something is non-obvious than putting useless boilerplates in. If you insist on the comment I'll put it back, but it seems utterly useless. This docbook elimination looks spurious; why do it? Same as above. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 00/17] SCSI data path micro-optimizations
On Wed, Feb 05, 2014 at 03:41:04PM -0800, James Bottomley wrote: I will say that you do make the series hard to review by including things that do code churn for no gain: like making all the error handling goto based instead of the current in-place if () clause based ... this doesn't do anything for optmisation, so why do it? (I'm not going to take sides on which is better, since we do both). You mean the command allocation bits? When I inline one function into a nother I might as well add coherent error handling. And for more than one level of unwinding gotos are always better because a humand actually can grasp it. There's also a lot of extraneous stuff in here. I'm assuming most of the performance stuff is get/put removal and locking efficiency, things like this This was already cut down. If it makes your life easier we can do the cmd allocation part separate of the rest, but there's really no churn left. 2,3,5 are allocation simplification. In general it doesn't look so bad, but it doesn't seem to be part of the series. It needs an ACK from the megaraid people since they're the only consumer of the interface you're trying to eliminate It's needed to not make the cmd_size addition not a complete mess. 6,7 is a new interface for command allocation and a virtio consumer. OK, as it goes, but also separable ... and preferably, if, as you say, it's important for mq, some more users, also doesn't need to be part of the series. Feel free to consider 1-7 and the rest separate series already, I'll resend them that way next time. 9,10 look like they aren't your patches, so they need an Author From: field for git (9 needs your signoff, if you're sending it). The are, and they are properly attributed in the git tree. Unfortunately quilt still hasn't learned to properly deal with From: headers. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 12/15] scsi: initial blk-mq support
On Thu, Feb 06, 2014 at 10:38:17AM +0200, Sagi Grimberg wrote: Both you and Nic offer a single HW queue per sdev. I'm wandering if that should be the LLD's decision (if chooses to use multiple queues)? Trying to understand how LLDs will fit in a way they exploit multi-queue and actually maintain multiple queues. SRP/iSER for example maintain a single queue per connection (or session in iSCSI). Now with multi-queue all requests of that shost will eventually boil-down to posting on a single queue which might transition the bottleneck to the LLDs. I noticed virtio_scsi implementation is choosing a queue per command based on current processor id without any explicit mapping (unless I missed it). I guess my question is where do (or should) LLDs plug-in to this mq scheme? Just using blk-mq helps with lock contention and cacheline issues, while being conceptually simple, that's why it's the priority. See the proposal I sent before the patch series for more details. That being said if you have simple enough patches for real multiqueue support I'd be more than happy to carry them along. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 01/15] block: rework flush sequencing for blk-mq
On Wed, Feb 05, 2014 at 06:08:37PM -0800, Muthu Kumar wrote: diff --git a/block/blk-core.c b/block/blk-core.c index c00e0bd..d3eb330 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -693,11 +693,20 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) if (!uninit_q) return NULL; + uninit_q-flush_rq = kzalloc(sizeof(struct request), GFP_KERNEL); Shouldn't this be kzalloc_node()? Probably. There's also various kinds of optimization potential like allocating one per hw_ctx or similar. But until we have a device that has high enough IOPS to matter and needs cache flushes I wouldn't worry about optimizing the flush path. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] block/null_blk: Fix completion processing from LIFO to FIFO
The completion queue is implemented using lockless list. The llist_add is adds the events to the list head which is a push operation. The processing of the completion elements is done by disconnecting all the pushed elements and iterating over the disconnected list. The problem is that the processing is done in reverse order w.r.t order of the insertion i.e. LIFO processing. By reversing the disconnected list which is done in linear time the desired FIFO processing is achieved. Signed-off-by: Shlomo Pongratz shlo...@mellanox.com --- drivers/block/null_blk.c |2 ++ 1 files changed, 2 insertions(+), 0 deletions(-) diff --git a/drivers/block/null_blk.c b/drivers/block/null_blk.c index 3107282..cc801cd 100644 --- a/drivers/block/null_blk.c +++ b/drivers/block/null_blk.c @@ -195,6 +195,7 @@ static enum hrtimer_restart null_cmd_timer_expired(struct hrtimer *timer) cq = per_cpu(completion_queues, smp_processor_id()); while ((entry = llist_del_all(cq-list)) != NULL) { + entry = llist_reverse_order(entry); do { cmd = container_of(entry, struct nullb_cmd, ll_list); end_cmd(cmd); @@ -235,6 +236,7 @@ static void null_ipi_cmd_end_io(void *data) cq = per_cpu(completion_queues, smp_processor_id()); entry = llist_del_all(cq-list); + entry = llist_reverse_order(entry); while (entry) { next = entry-next; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] block/null_blk: Fix completion processing from LIFO to FIFO
On Thu, Feb 06, 2014 at 06:33:17PM +0200, Shlomo Pongratz wrote: The completion queue is implemented using lockless list. The llist_add is adds the events to the list head which is a push operation. The processing of the completion elements is done by disconnecting all the pushed elements and iterating over the disconnected list. The problem is that the processing is done in reverse order w.r.t order of the insertion i.e. LIFO processing. By reversing the disconnected list which is done in linear time the desired FIFO processing is achieved. I think it should just switch to using __smp_call_function_single directly, mirroring commit 3d6efbf62c797a2924785f482e4ce8aa8820ec72 for the blk-mq core. I've actually a patch in the queue that allows generic request completion offloading similar to blk_complete_request() in the old request code, but it'll need a bit more polishing first. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 02/17] megaraid: simplify internal command handling
Hi Neela, can you look over this from the megaraid perspective? On Wed, Feb 05, 2014 at 04:39:32AM -0800, Christoph Hellwig wrote: We don't use the passed in scsi command for anything, so just add a adapter-wide internal status to go along with the internal scb that is used unter int_mtx to pass back the return value and get rid of all the complexities and abuse of the scsi_cmnd structure. Signed-off-by: Christoph Hellwig h...@lst.de --- drivers/scsi/megaraid.c | 120 --- drivers/scsi/megaraid.h |3 +- 2 files changed, 32 insertions(+), 91 deletions(-) diff --git a/drivers/scsi/megaraid.c b/drivers/scsi/megaraid.c index 816db12..8bca30f 100644 --- a/drivers/scsi/megaraid.c +++ b/drivers/scsi/megaraid.c @@ -531,13 +531,6 @@ mega_build_cmd(adapter_t *adapter, Scsi_Cmnd *cmd, int *busy) int target = 0; int ldrv_num = 0; /* logical drive number */ - - /* - * filter the internal and ioctl commands - */ - if((cmd-cmnd[0] == MEGA_INTERNAL_CMD)) - return (scb_t *)cmd-host_scribble; - /* * We know what channels our logical drives are on - mega_find_card() */ @@ -1439,19 +1432,22 @@ mega_cmd_done(adapter_t *adapter, u8 completed[], int nstatus, int status) cmdid = completed[i]; - if( cmdid == CMDID_INT_CMDS ) { /* internal command */ + /* + * Only free SCBs for the commands coming down from the + * mid-layer, not for which were issued internally + * + * For internal command, restore the status returned by the + * firmware so that user can interpret it. + */ + if (cmdid == CMDID_INT_CMDS) { scb = adapter-int_scb; - cmd = scb-cmd; - mbox = (mbox_t *)scb-raw_mbox; - /* - * Internal command interface do not fire the extended - * passthru or 64-bit passthru - */ - pthru = scb-pthru; + list_del_init(scb-list); + scb-state = SCB_FREE; - } - else { + adapter-int_status = status; + complete(adapter-int_waitq); + } else { scb = adapter-scb_list[cmdid]; /* @@ -1640,25 +1636,7 @@ mega_cmd_done(adapter_t *adapter, u8 completed[], int nstatus, int status) cmd-result |= (DID_BAD_TARGET 16)|status; } - /* - * Only free SCBs for the commands coming down from the - * mid-layer, not for which were issued internally - * - * For internal command, restore the status returned by the - * firmware so that user can interpret it. - */ - if( cmdid == CMDID_INT_CMDS ) { /* internal command */ - cmd-result = status; - - /* - * Remove the internal command from the pending list - */ - list_del_init(scb-list); - scb-state = SCB_FREE; - } - else { - mega_free_scb(adapter, scb); - } + mega_free_scb(adapter, scb); /* Add Scsi_Command to end of completed queue */ list_add_tail(SCSI_LIST(cmd), adapter-completed_list); @@ -4133,23 +4111,15 @@ mega_internal_dev_inquiry(adapter_t *adapter, u8 ch, u8 tgt, * The last argument is the address of the passthru structure if the command * to be fired is a passthru command * - * lockscope specifies whether the caller has already acquired the lock. Of - * course, the caller must know which lock we are talking about. - * * Note: parameter 'pthru' is null for non-passthru commands. */ static int mega_internal_command(adapter_t *adapter, megacmd_t *mc, mega_passthru *pthru) { - Scsi_Cmnd *scmd; - struct scsi_device *sdev; + unsigned long flags; scb_t *scb; int rval; - scmd = scsi_allocate_command(GFP_KERNEL); - if (!scmd) - return -ENOMEM; - /* * The internal commands share one command id and hence are * serialized. This is so because we want to reserve maximum number of @@ -4160,73 +4130,45 @@ mega_internal_command(adapter_t *adapter, megacmd_t *mc, mega_passthru *pthru) scb = adapter-int_scb; memset(scb, 0, sizeof(scb_t)); - sdev = kzalloc(sizeof(struct scsi_device), GFP_KERNEL); - scmd-device = sdev; - - memset(adapter-int_cdb, 0, sizeof(adapter-int_cdb)); - scmd-cmnd = adapter-int_cdb; - scmd-device-host =
Re: [PATCH 13/17] scsi: push host_lock down into scsi_{host,target}_queue_ready
On Wed, 2014-02-05 at 04:39 -0800, Christoph Hellwig wrote: Prepare for not taking a host-wide lock in the dispatch path by pushing the lock down into the places that actually need it. Note that this patch is just a preparation step, as it will actually increase lock roundtrips and thus decrease performance on its own. I'm not really convinced by the rest of the series. I think patches 4,8,9,10,11,12 (lock elimination from fast path and get/put reduction) are where the improvements are at and they mostly look ready to apply modulo some author and signoff fixes. I'm dubious about replacing a locked set of checks and increments with atomics for the simple reason that atomics are pretty expensive on non-x86, so you've likely slowed the critical path down for them. Even on x86, atomics can be very expensive because of the global bus lock. I think about three of them in a row is where you might as well stick with the lock. Could you benchmark this lot and show what the actual improvement is just for this series, if any? I also think we should be getting more utility out of threading guarantees. So, if there's only one thread active per device we don't need any device counters to be atomic. Likewise, u32 read/write is an atomic operation, so we might be able to use sloppy counters for the target and host stuff (one per CPU that are incremented/decremented on that CPU ... this will only work using CPU locality ... completion on same CPU but that seems to be an element of a lot of stuff nowadays). I'm not saying this will all pan out, but I am saying I don't think just making all the counters atomic to reduce locking will buy us a huge amount, so I'd appreciate careful benchmarking to confirm or invalidate this hypothesis first. James -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 02/15] blk-mq: support at_head inserations for blk_execute_rq
On Thu, Feb 6, 2014 at 8:17 AM, Christoph Hellwig h...@infradead.org wrote: On Wed, Feb 05, 2014 at 06:27:38PM -0800, Muthu Kumar wrote: Currently its not used by any drivers. Are we sure we don't need it public? If sure, please remove the EXPORT_SYMBOL() for it. Drivers shouldn't use it, it's a low-level interface. As mentioned in the intro this is not quite a coherent series yet, but I'll post the blk-mq patches in a slightly nicer form agains Jens' tree soon. Alright then.. I will wait for that patch. BTW, is the scsi-mq work done on your git tree only or anyone else has their own git tree (Bart? NAB?)? -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 02/15] blk-mq: support at_head inserations for blk_execute_rq
On Thu, Feb 06, 2014 at 09:05:10AM -0800, Muthu Kumar wrote: Alright then.. I will wait for that patch. BTW, is the scsi-mq work done on your git tree only or anyone else has their own git tree For now I put out a git tree for those who don't like large patch series. Happy to take patches for it, but we'll have to see who has the energy to keep the tree in the long run. That being said I hope to feed patches into the upstream trees as quickly as possible and thus only keep a small stack of patches around. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 13/17] scsi: push host_lock down into scsi_{host,target}_queue_ready
On 02/06/14 17:56, James Bottomley wrote: Could you benchmark this lot and show what the actual improvement is just for this series, if any? I see a performance improvement of 12% with the SRP protocol for the SCSI core optimizations alone (I am still busy measuring the impact of the blk-mq conversion but I can already see that it is really significant). Please note that the performance impact depends a lot on the workload (number of LUNs per SCSI host e.g.) so maybe the workload I chose is not doing justice to Christoph's work. And it's also important to mention that with the workload I ran I was saturating the target system CPU (a quad core Intel i5). In other words, results might be better with a more powerful target system. Bart. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 13/17] scsi: push host_lock down into scsi_{host,target}_queue_ready
On Thu, 2014-02-06 at 18:10 +0100, Bart Van Assche wrote: On 02/06/14 17:56, James Bottomley wrote: Could you benchmark this lot and show what the actual improvement is just for this series, if any? I see a performance improvement of 12% with the SRP protocol for the SCSI core optimizations alone (I am still busy measuring the impact of the blk-mq conversion but I can already see that it is really significant). Please note that the performance impact depends a lot on the workload (number of LUNs per SCSI host e.g.) so maybe the workload I chose is not doing justice to Christoph's work. And it's also important to mention that with the workload I ran I was saturating the target system CPU (a quad core Intel i5). In other words, results might be better with a more powerful target system. On what? Just the patches I indicated or the whole series? My specific concern is that swapping a critical section for atomics may not buy us anything even on x86 and may slow down non-x86. That's the bit I'd like benchmarks to explore. James -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 5/6] scsi: remove a useless get/put_device pair in scsi_next_command
From: Bart Van Assche bvanass...@acm.org Eliminate a get_device() / put_device() pair from scsi_next_command(). Both are atomic operations hence removing these slightly improves performance. [hch: slight changes due to different context] Signed-off-by: Bart Van Assche bvanass...@acm.org Signed-off-by: Christoph Hellwig h...@lst.de --- drivers/scsi/scsi_lib.c |5 - 1 file changed, 5 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 7d35678..91ca414 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -526,14 +526,9 @@ void scsi_next_command(struct scsi_cmnd *cmd) struct scsi_device *sdev = cmd-device; struct request_queue *q = sdev-request_queue; - /* need to hold a reference on the device before we let go of the cmd */ - get_device(sdev-sdev_gendev); - scsi_put_command(cmd); - put_device(sdev-sdev_gendev); scsi_run_queue(q); - /* ok to remove device now */ put_device(sdev-sdev_gendev); } -- 1.7.10.4 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/6] first batch of SCSI data path micro-optimizations
This series contains a few optimizations for the SCSI data I/O path. These are the patches acceptable to James as-is from the previous series. No separate benchmarking has been performed for these patches. This work was sponsored by the ION division of Fusion IO. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/6] scsi: do not manipulate device reference counts in scsi_get/put_command
Many callers won't need this and we can optimize them away. In addition the handling in the __-prefixed variants was inconsistant to start with. Based on an earlier patch from Bart Van Assche. Signed-off-by: Christoph Hellwig h...@lst.de --- drivers/scsi/scsi.c | 36 drivers/scsi/scsi_error.c |6 ++ drivers/scsi/scsi_lib.c | 12 +++- drivers/scsi/scsi_tgt_lib.c |3 ++- include/scsi/scsi_cmnd.h|3 +-- 5 files changed, 32 insertions(+), 28 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index fb86479..843b4f1 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -284,27 +284,19 @@ EXPORT_SYMBOL_GPL(__scsi_get_command); */ struct scsi_cmnd *scsi_get_command(struct scsi_device *dev, gfp_t gfp_mask) { - struct scsi_cmnd *cmd; + struct scsi_cmnd *cmd = __scsi_get_command(dev-host, gfp_mask); + unsigned long flags; - /* Bail if we can't get a reference to the device */ - if (!get_device(dev-sdev_gendev)) + if (unlikely(cmd == NULL)) return NULL; - cmd = __scsi_get_command(dev-host, gfp_mask); - - if (likely(cmd != NULL)) { - unsigned long flags; - - cmd-device = dev; - INIT_LIST_HEAD(cmd-list); - INIT_DELAYED_WORK(cmd-abort_work, scmd_eh_abort_handler); - spin_lock_irqsave(dev-list_lock, flags); - list_add_tail(cmd-list, dev-cmd_list); - spin_unlock_irqrestore(dev-list_lock, flags); - cmd-jiffies_at_alloc = jiffies; - } else - put_device(dev-sdev_gendev); - + cmd-device = dev; + INIT_LIST_HEAD(cmd-list); + INIT_DELAYED_WORK(cmd-abort_work, scmd_eh_abort_handler); + spin_lock_irqsave(dev-list_lock, flags); + list_add_tail(cmd-list, dev-cmd_list); + spin_unlock_irqrestore(dev-list_lock, flags); + cmd-jiffies_at_alloc = jiffies; return cmd; } EXPORT_SYMBOL(scsi_get_command); @@ -315,8 +307,7 @@ EXPORT_SYMBOL(scsi_get_command); * @cmd: Command to free * @dev: parent scsi device */ -void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd, - struct device *dev) +void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd) { unsigned long flags; @@ -331,8 +322,6 @@ void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd, if (likely(cmd != NULL)) scsi_pool_free_command(shost-cmd_pool, cmd); - - put_device(dev); } EXPORT_SYMBOL(__scsi_put_command); @@ -346,7 +335,6 @@ EXPORT_SYMBOL(__scsi_put_command); */ void scsi_put_command(struct scsi_cmnd *cmd) { - struct scsi_device *sdev = cmd-device; unsigned long flags; /* serious error if the command hasn't come from a device list */ @@ -357,7 +345,7 @@ void scsi_put_command(struct scsi_cmnd *cmd) cancel_delayed_work(cmd-abort_work); - __scsi_put_command(cmd-device-host, cmd, sdev-sdev_gendev); + __scsi_put_command(cmd-device-host, cmd); } EXPORT_SYMBOL(scsi_put_command); diff --git a/drivers/scsi/scsi_error.c b/drivers/scsi/scsi_error.c index 78b004d..771c16b 100644 --- a/drivers/scsi/scsi_error.c +++ b/drivers/scsi/scsi_error.c @@ -2288,6 +2288,11 @@ scsi_reset_provider(struct scsi_device *dev, int flag) if (scsi_autopm_get_host(shost) 0) return FAILED; + if (!get_device(dev-sdev_gendev)) { + rtn = FAILED; + goto out_put_autopm_host; + } + scmd = scsi_get_command(dev, GFP_KERNEL); blk_rq_init(NULL, req); scmd-request = req; @@ -2345,6 +2350,7 @@ scsi_reset_provider(struct scsi_device *dev, int flag) scsi_run_host_queues(shost); scsi_next_command(scmd); +out_put_autopm_host: scsi_autopm_put_host(shost); return rtn; } diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index ad516c0..500178c 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -95,6 +95,7 @@ static void scsi_unprep_request(struct request *req) req-special = NULL; scsi_put_command(cmd); + put_device(cmd-device-sdev_gendev); } /** @@ -529,6 +530,7 @@ void scsi_next_command(struct scsi_cmnd *cmd) get_device(sdev-sdev_gendev); scsi_put_command(cmd); + put_device(sdev-sdev_gendev); scsi_run_queue(q); /* ok to remove device now */ @@ -1116,6 +1118,7 @@ err_exit: scsi_release_buffers(cmd); cmd-request-special = NULL; scsi_put_command(cmd); + put_device(cmd-device-sdev_gendev); return error; } EXPORT_SYMBOL(scsi_init_io); @@ -1126,9 +1129,15 @@ static struct scsi_cmnd *scsi_get_cmd_from_req(struct scsi_device *sdev, struct scsi_cmnd *cmd; if (!req-special) { + /* Bail if we can't get a
[PATCH 1/6] scsi: avoid useless free_list lock roundtrips
Avoid hitting the host-wide free_list lock unless we need to put a command back onto the freelist. Signed-off-by: Christoph Hellwig h...@lst.de --- drivers/scsi/scsi.c | 11 ++- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index d8afec8..fb86479 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -320,13 +320,14 @@ void __scsi_put_command(struct Scsi_Host *shost, struct scsi_cmnd *cmd, { unsigned long flags; - /* changing locks here, don't need to restore the irq state */ - spin_lock_irqsave(shost-free_list_lock, flags); if (unlikely(list_empty(shost-free_list))) { - list_add(cmd-list, shost-free_list); - cmd = NULL; + spin_lock_irqsave(shost-free_list_lock, flags); + if (list_empty(shost-free_list)) { + list_add(cmd-list, shost-free_list); + cmd = NULL; + } + spin_unlock_irqrestore(shost-free_list_lock, flags); } - spin_unlock_irqrestore(shost-free_list_lock, flags); if (likely(cmd != NULL)) scsi_pool_free_command(shost-cmd_pool, cmd); -- 1.7.10.4 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/6] scsi: remove a useless get/put_device pair in scsi_request_fn
From: Bart Van Assche bvanass...@acm.org SCSI devices may only be removed by calling scsi_remove_device(). That function must invoke blk_cleanup_queue() before the final put of sdev-sdev_gendev. Since blk_cleanup_queue() waits for the block queue to drain and then tears it down, scsi_request_fn cannot be active anymore after blk_cleanup_queue() has returned and hence the get_device()/put_device() pair in scsi_request_fn is unnecessary. Signed-off-by: Bart Van Assche bvanass...@acm.org Acked-by: Tejun Heo t...@kernel.org Reviewed-by: Mike Christie micha...@cs.wisc.edu --- drivers/scsi/scsi_lib.c | 14 +++--- 1 file changed, 3 insertions(+), 11 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 500178c..7d35678 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1558,16 +1558,14 @@ static void scsi_softirq_done(struct request *rq) * Lock status: IO request lock assumed to be held when called. */ static void scsi_request_fn(struct request_queue *q) + __releases(q-queue_lock) + __acquires(q-queue_lock) { struct scsi_device *sdev = q-queuedata; struct Scsi_Host *shost; struct scsi_cmnd *cmd; struct request *req; - if(!get_device(sdev-sdev_gendev)) - /* We must be tearing the block queue down already */ - return; - /* * To start with, we keep looping until the queue is empty, or until * the host is no longer able to accept any more requests. @@ -1656,7 +1654,7 @@ static void scsi_request_fn(struct request_queue *q) goto out_delay; } - goto out; + return; not_ready: spin_unlock_irq(shost-host_lock); @@ -1675,12 +1673,6 @@ static void scsi_request_fn(struct request_queue *q) out_delay: if (sdev-device_busy == 0) blk_delay_queue(q, SCSI_QUEUE_DELAY); -out: - /* must be careful here...if we trigger the -remove() function -* we cannot be holding the q lock */ - spin_unlock_irq(q-queue_lock); - put_device(sdev-sdev_gendev); - spin_lock_irq(q-queue_lock); } u64 scsi_calculate_bounce_limit(struct Scsi_Host *shost) -- 1.7.10.4 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/6] scsi: remove a useless get/put_device pair in scsi_requeue_command
Avoid a spurious device get/put cycle by using scsi_put_command and folding scsi_unprep_request into scsi_requeue_command. Signed-off-by: Christoph Hellwig h...@lst.de --- drivers/scsi/scsi_lib.c | 35 +++ 1 file changed, 3 insertions(+), 32 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 91ca414..9350691 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -75,29 +75,6 @@ struct kmem_cache *scsi_sdb_cache; */ #define SCSI_QUEUE_DELAY 3 -/* - * Function: scsi_unprep_request() - * - * Purpose:Remove all preparation done for a request, including its - * associated scsi_cmnd, so that it can be requeued. - * - * Arguments: req - request to unprepare - * - * Lock status:Assumed that no locks are held upon entry. - * - * Returns:Nothing. - */ -static void scsi_unprep_request(struct request *req) -{ - struct scsi_cmnd *cmd = req-special; - - blk_unprep_request(req); - req-special = NULL; - - scsi_put_command(cmd); - put_device(cmd-device-sdev_gendev); -} - /** * __scsi_queue_insert - private queue insertion * @cmd: The SCSI command being requeued @@ -503,16 +480,10 @@ static void scsi_requeue_command(struct request_queue *q, struct scsi_cmnd *cmd) struct request *req = cmd-request; unsigned long flags; - /* -* We need to hold a reference on the device to avoid the queue being -* killed after the unlock and before scsi_run_queue is invoked which -* may happen because scsi_unprep_request() puts the command which -* releases its reference on the device. -*/ - get_device(sdev-sdev_gendev); - spin_lock_irqsave(q-queue_lock, flags); - scsi_unprep_request(req); + blk_unprep_request(req); + req-special = NULL; + scsi_put_command(cmd); blk_requeue_request(q, req); spin_unlock_irqrestore(q-queue_lock, flags); -- 1.7.10.4 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/6] scsi: avoid taking host_lock in scsi_run_queue unless nessecary
If we don't have starved devices we don't need to take the host lock to iterate over them. Also split the function up to be more clear. Signed-off-by: Christoph Hellwig h...@lst.de --- drivers/scsi/scsi_lib.c | 43 --- 1 file changed, 24 insertions(+), 19 deletions(-) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 7bd7f0d..ad516c0 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -385,29 +385,12 @@ static inline int scsi_host_is_busy(struct Scsi_Host *shost) return 0; } -/* - * Function: scsi_run_queue() - * - * Purpose:Select a proper request queue to serve next - * - * Arguments: q - last request's queue - * - * Returns: Nothing - * - * Notes: The previous command was completely finished, start - * a new one if possible. - */ -static void scsi_run_queue(struct request_queue *q) +static void scsi_starved_list_run(struct Scsi_Host *shost) { - struct scsi_device *sdev = q-queuedata; - struct Scsi_Host *shost; LIST_HEAD(starved_list); + struct scsi_device *sdev; unsigned long flags; - shost = sdev-host; - if (scsi_target(sdev)-single_lun) - scsi_single_lun_run(sdev); - spin_lock_irqsave(shost-host_lock, flags); list_splice_init(shost-starved_list, starved_list); @@ -459,6 +442,28 @@ static void scsi_run_queue(struct request_queue *q) /* put any unprocessed entries back */ list_splice(starved_list, shost-starved_list); spin_unlock_irqrestore(shost-host_lock, flags); +} + +/* + * Function: scsi_run_queue() + * + * Purpose:Select a proper request queue to serve next + * + * Arguments: q - last request's queue + * + * Returns: Nothing + * + * Notes: The previous command was completely finished, start + * a new one if possible. + */ +static void scsi_run_queue(struct request_queue *q) +{ + struct scsi_device *sdev = q-queuedata; + + if (scsi_target(sdev)-single_lun) + scsi_single_lun_run(sdev); + if (!list_empty(sdev-host-starved_list)) + scsi_starved_list_run(sdev-host); blk_run_queue(q); } -- 1.7.10.4 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RFC] Add EVPD page 0x83 entries to sysfs
On 14-02-06 08:04 AM, Hannes Reinecke wrote: EVPD page 0x83 is used to uniquely identify the device. So instead of having each and every program issue a separate SG_IO call to retrieve this information it does make far more sense to display it in sysfs. Cc: Jeremy Linton jlin...@tributary.com Cc: Doug Gilbert dgilb...@interlog.com See below. Cc: Kai Makisara kai.makis...@kolumbus.fi Signed-off-by: Hannes Reinecke h...@suse.de --- drivers/scsi/scsi_scan.c | 3 + drivers/scsi/scsi_sysfs.c | 166 - include/scsi/scsi_device.h | 3 + 3 files changed, 171 insertions(+), 1 deletion(-) diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 307a811..073fb84 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -929,6 +929,9 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, if (*bflags BLIST_SKIP_VPD_PAGES) sdev-skip_vpd_pages = 1; + if (sdev-scsi_level = SCSI_3) + scsi_attach_vpd_ident(sdev); + transport_configure_device(sdev-sdev_gendev); if (sdev-host-hostt-slave_configure) { diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 9117d0b..ffe7cb5 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -725,8 +725,170 @@ show_queue_type_field(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(queue_type, S_IRUGO, show_queue_type_field, NULL); +void scsi_attach_vpd_ident(struct scsi_device *sdev) +{ + int ret; + int vpd_len = 255; + unsigned char *buffer; +retry: + buffer = kmalloc(vpd_len, GFP_KERNEL); + if (buffer) { + ret = scsi_get_vpd_page(sdev, 0x83, buffer, vpd_len); + if (ret == 0) { + vpd_len = (buffer[2] 8) + buffer[3]; + if (vpd_len 255) { + kfree(buffer); + goto retry; + } + sdev-vpd_ident_len = vpd_len; + sdev-vpd_ident = buffer; + } + } +} + +#define SCSI_VPD_ASSOC_lun 0x0 +#define SCSI_VPD_ASSOC_port 0x10 +#define SCSI_VPD_ASSOC_target 0x20 + +#define SCSI_VPD_DESIG_vendor 0x0 +#define SCSI_VPD_DESIG_t10 0x1 +#define SCSI_VPD_DESIG_eui 0x2 +#define SCSI_VPD_DESIG_naa 0x3 +#define SCSI_VPD_DESIG_relport 0x4 +#define SCSI_VPD_DESIG_tpgrp 0x5 +#define SCSI_VPD_DESIG_lugrp 0x6 +#define SCSI_VPD_DESIG_md5 0x7 +#define SCSI_VPD_DESIG_scsi_name 0x8 + +int scsi_parse_vpd_ident(struct scsi_device *sdev, +int assoc, int desig, char *buf) +{ + unsigned char *d; + int len = 0; + + if (!sdev-vpd_ident) + return -EINVAL; + + d = sdev-vpd_ident + 4; + while (d sdev-vpd_ident + sdev-vpd_ident_len) { + if ((d[1] 0x30) == assoc Wouldn't it be clearer if: if (((d[1] 0x30) 4) == assoc and the lun, port and target defines were changed to agree with T10? + (d[1] 0xf) == desig) { + switch (d[1] 0xf) { + case SCSI_VPD_DESIG_eui: + case SCSI_VPD_DESIG_naa: + case SCSI_VPD_DESIG_md5: + switch (d[3]) { + case 8: + len += sprintf(buf + len, + %8phN\n, d + 4); + break; + case 12: + len += sprintf(buf + len, + %12phN\n, d + 4); + break; + case 16: + len += sprintf(buf + len, + %16phN\n, d + 4); + break; + } + break; + case SCSI_VPD_DESIG_relport: + case SCSI_VPD_DESIG_tpgrp: + case SCSI_VPD_DESIG_lugrp: + len += sprintf(buf + len, %d\n, + (int)(d[6] 8) + d[7]); + break; + case SCSI_VPD_DESIG_vendor: + case SCSI_VPD_DESIG_t10: + case SCSI_VPD_DESIG_scsi_name: + len += snprintf(buf + len, d[3] + 2, %s\n, + d + 4); + break; + } + } + d += d[3] + 4; + } + + return len ? len : -EINVAL; +} + +#define sdev_evpd_ident(assoc,desig) \ +static ssize_t
Re: [PATCH] st: Do not rewind for SG_IO
On 14-02-06 10:13 AM, Hannes Reinecke wrote: On 02/06/2014 03:38 PM, James Bottomley wrote: On Thu, 2014-02-06 at 08:50 -0500, Martin K. Petersen wrote: Hannes == Hannes Reinecke h...@suse.de writes: My patch provides both the original VPD 0x83 and 0x80 bits as well as a handle identical to /sbin/scsi_id. Hannes Bah, don't do that. That should better be handled by udev Hannes rules. I've got a set of patches moving from scsi_id to sg_inq, Hannes which can be easily adapted to using sysfs directly. I just want to get out of the userland sending random SCSI commands business. That is a world of pain right now. Well, in the words of Bill Clinton I feel your pain. However, I need to know we won't pick up that same whole world of pain in the kernel. Remember it's not just SCSI devices, it's also ATA devices and potentially other block devices ... then there's blkid and all the weird things it does. Then there's transport identifiers for multi-path reservations and so on. blkid is actually not so bad, _if_ it would distinguish between 'metadata not found' and 'I/O error during metadata read'. I made a patch which hopefully should find it's way upstream. And blkid just issues plain READ commands, so _this_ behaviour won't change ... Convince me this path won't have us shifting a whole bunch of weird from user space to the kernel without any reduction in the weird factor. Remember the point is not what can we do for all the nicely behaved SCSI-3 devices, it's what do we do for everything. Well, first and foremost the patch should be limited to SCSI-3 (and later devices). So we'd be insulated from the most obvious crap. So that leaves only devices which claim to be SCSI-3 or something, but still keel over when asked for VPD pages. However, this type of devices will fail already, as 'sd.c' is already asking for all sorts of VPD pages. Which leaves only non-Disk devices, but those tend to have a better SCSI implementation as you can't make quick bucks with, say, as SCSI Enclosure device. But the prime motivator behind this patch is that we can be reasonably sure the device will answer at all. When retrieving the EVPD pages from userspace you always have the problem that the device might have gone away or being unresponsive by the time you get around sending the SG_IO call. So you always have these stuck user-space processes asking for information which _had_ been present at one point. In particular udev is prone for this; anyone who ever came across the message udevd[]: worker unexpectedly returned with status 0x0100 knows what I'm talking about ... Running a small array with an external power supply which is insufficient for the task of spinning up more than 1 or 2 disks is interesting to watch :-) Doug Gilbert And the more udev events for a device you get, the higher the likelyhood for this to happen is. Plus I could re-use this information for my scsi_dh_alua patchset, and for xcopy and friends we'll be needing it, too. Cheers, Hannes -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] block/null_blk: Fix completion processing from LIFO to FIFO
On Thu, Feb 06 2014, Christoph Hellwig wrote: On Thu, Feb 06, 2014 at 06:33:17PM +0200, Shlomo Pongratz wrote: The completion queue is implemented using lockless list. The llist_add is adds the events to the list head which is a push operation. The processing of the completion elements is done by disconnecting all the pushed elements and iterating over the disconnected list. The problem is that the processing is done in reverse order w.r.t order of the insertion i.e. LIFO processing. By reversing the disconnected list which is done in linear time the desired FIFO processing is achieved. I think it should just switch to using __smp_call_function_single directly, mirroring commit 3d6efbf62c797a2924785f482e4ce8aa8820ec72 for the blk-mq core. I've actually a patch in the queue that allows generic request completion offloading similar to blk_complete_request() in the old request code, but it'll need a bit more polishing first. The patch is trivial enough that I may as well just apply it. It's not a bug as such, but doing FIFO completions does make more sense. I was aware of this issued, fwiw, it just didn't really matter for my testing and I opted to avoid a list reversal to make it cheaper. -- Jens Axboe -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] scsi/isci/port_config: Fix a infinite loop.
On Tue, Jul 16, 2013 at 7:54 PM, Xinghai Yu yuxing...@cn.fujitsu.com wrote: It seems the phy_index++; have been placed in wrong place, without it the while circle up will do a infinite loop. Signed-off-by: Xinghai Yu yuxing...@cn.fujitsu.com --- drivers/scsi/isci/port_config.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/scsi/isci/port_config.c b/drivers/scsi/isci/port_config.c index cd962da..85c77f6 100644 --- a/drivers/scsi/isci/port_config.c +++ b/drivers/scsi/isci/port_config.c @@ -311,9 +311,9 @@ sci_mpc_agent_validate_phy_configuration(struct isci_host *ihost, ihost-phys[phy_index]); assigned_phy_mask |= (1 phy_index); + phy_index++; } - phy_index++; } return sci_port_configuration_agent_validate_ports(ihost, port_agent); Hmm, I'm not aware of anyone using mpc mode. Maybe we should just remove it? -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/4] isci: fix needless ata reset escalations
isci is needlessly tying libata's hands by returning SAM_STAT_CHECK_CONDITION to some ata errors. Instead, prefer SAS_PROTO_RESPONSE to let libata (via sas_ata_task_done()) disposition the device-to-host fis. For example isci is triggering an HSM Violation where AHCI is showing a simple media error for the same bus condition: isci: ata7.00: failed command: READ VERIFY SECTOR(S) ata7.00: cmd 40/00:01:00:00:00/00:00:00:00:00/e0 tag 0 res 01/04:00:00:00:00/00:00:00:00:00/e0 Emask 0x3 (HSM violation) ahci: ata6.00: failed command: READ VERIFY SECTOR(S) ata6.00: cmd 40/00:01:00:00:00/00:00:00:00:00/e0 tag 0 res 51/40:01:00:00:00/00:00:00:00:00/e0 Emask 0x9 (media error) Note that the isci response matches this from sas_ata_task_done(): /* We saw a SAS error. Send a vague error. */ [..] dev-sata_dev.fis[3] = 0x04; /* status err */ dev-sata_dev.fis[2] = ATA_ERR; The end effect is that isci is needlessly triggering hard resets when they are not necessary. Cc: sta...@vger.kernel.org Reported-by: Xun Ni xun...@intel.com Tested-by: Nelson Cheng nelson.ch...@intel.com Acked-by: Lukasz Dorau lukasz.do...@intel.com Signed-off-by: Dan Williams dan.j.willi...@intel.com --- drivers/scsi/isci/request.c |8 ++-- 1 files changed, 2 insertions(+), 6 deletions(-) diff --git a/drivers/scsi/isci/request.c b/drivers/scsi/isci/request.c index 99d2930b18c8..56e38096f0c4 100644 --- a/drivers/scsi/isci/request.c +++ b/drivers/scsi/isci/request.c @@ -2723,13 +2723,9 @@ static void isci_process_stp_response(struct sas_task *task, struct dev_to_host_ memcpy(resp-ending_fis, fis, sizeof(*fis)); ts-buf_valid_size = sizeof(*resp); - /* If the device fault bit is set in the status register, then -* set the sense data and return. -*/ - if (fis-status ATA_DF) + /* If an error is flagged let libata decode the fis */ + if (ac_err_mask(fis-status)) ts-stat = SAS_PROTO_RESPONSE; - else if (fis-status ATA_ERR) - ts-stat = SAM_STAT_CHECK_CONDITION; else ts-stat = SAM_STAT_GOOD; -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/4] isci, libsas fixes for 3.4-rc2
Hi James, Here are some collected fixes. All but patch 2 are tagged for -stable. Patch 1 and 4 have been on the list since before the 3.14 merge window, patch 2 and 3 are new. Please apply, thank you. [PATCH 1/4] isci: fix reset timeout handling [PATCH 2/4] scsi, libsas: introduce scmd_dbg() to quiet false positive timeout messages [PATCH 3/4] isci: fix needless ata reset escalations [PATCH 4/4] isci: correct erroneous for_each_isci_host macro drivers/scsi/isci/host.h|5 ++--- drivers/scsi/isci/port_config.c |7 --- drivers/scsi/isci/request.c |8 ++-- drivers/scsi/isci/task.c|2 +- drivers/scsi/libsas/sas_scsi_host.c |2 +- include/scsi/scsi_device.h | 12 6 files changed, 18 insertions(+), 18 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/4] scsi, libsas: introduce scmd_dbg() to quiet false positive timeout messages
libsas sometimes short circuits timeouts to force commands into error recovery. It is misleading to log that the command timed-out in sas_scsi_timed_out() when in fact it was just queued for error handling. It's also redundant in the case of a true timeout as libata eh will detect and report timeouts via it's AC_ERR_TIMEOUT facility. Given that some environments consider timeout errors to be indicative of impending device failure demote the sas_scsi_timed_out() timeout message to be disabled by default. This parallels ata_scsi_timed_out(). Cc: Jack Wang jack_w...@usish.com Cc: Anand Kumar Santhanam anandkumar.santha...@pmcs.com Cc: Sangeetha Gnanasekaran sangeetha.gnanaseka...@pmcs.com Cc: Nikith Ganigarakoppal nikith.ganigarakop...@pmcs.com Reported-by: Xun Ni xun...@intel.com Tested-by: Nelson Cheng nelson.ch...@intel.com Acked-by: Lukasz Dorau lukasz.do...@intel.com Signed-off-by: Dan Williams dan.j.willi...@intel.com --- drivers/scsi/libsas/sas_scsi_host.c |2 +- include/scsi/scsi_device.h | 12 2 files changed, 13 insertions(+), 1 deletions(-) diff --git a/drivers/scsi/libsas/sas_scsi_host.c b/drivers/scsi/libsas/sas_scsi_host.c index da3aee17faa5..25d0f127424d 100644 --- a/drivers/scsi/libsas/sas_scsi_host.c +++ b/drivers/scsi/libsas/sas_scsi_host.c @@ -862,7 +862,7 @@ out: enum blk_eh_timer_return sas_scsi_timed_out(struct scsi_cmnd *cmd) { - scmd_printk(KERN_DEBUG, cmd, command %p timed out\n, cmd); + scmd_dbg(cmd, command %p timed out\n, cmd); return BLK_EH_NOT_HANDLED; } diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index d65fbec2533d..067ac9f1607c 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -235,12 +235,24 @@ struct scsi_dh_data { #define sdev_printk(prefix, sdev, fmt, a...) \ dev_printk(prefix, (sdev)-sdev_gendev, fmt, ##a) +#define sdev_dbg(sdev, fmt, a...) \ + dev_dbg((sdev)-sdev_gendev, fmt, ##a) + #define scmd_printk(prefix, scmd, fmt, a...) \ (scmd)-request-rq_disk ? \ sdev_printk(prefix, (scmd)-device, [%s] fmt,\ (scmd)-request-rq_disk-disk_name, ##a) : \ sdev_printk(prefix, (scmd)-device, fmt, ##a) +#define scmd_dbg(scmd, fmt, a...) \ +do { \ + if ((scmd)-request-rq_disk) \ + sdev_dbg((scmd)-device, [%s] fmt, \ +(scmd)-request-rq_disk-disk_name, ##a);\ + else \ + sdev_dbg((scmd)-device, fmt, ##a);\ + } while (0) + enum scsi_target_state { STARGET_CREATED = 1, STARGET_RUNNING, -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/4] isci: correct erroneous for_each_isci_host macro
From: Lukasz Dorau lukasz.do...@intel.com In the first place, the loop 'for' in the macro 'for_each_isci_host' (drivers/scsi/isci/host.h:314) is incorrect, because it accesses the 3rd element of 2 element array. After the 2nd iteration it executes the instruction: ihost = to_pci_info(pdev)-hosts[2] (while the size of the 'hosts' array equals 2) and reads an out of range element. In the second place, this loop is incorrectly optimized by GCC v4.8 (see http://marc.info/?l=linux-kernelm=138998871911336w=2). As a result, on platforms with two SCU controllers, the loop is executed more times than it can be (for i=0,1 and 2). It causes kernel panic during entering the S3 state and the following oops after 'rmmod isci': BUG: unable to handle kernel NULL pointer dereference at (null) IP: [8131360b] __list_add+0x1b/0xc0 Oops: [#1] SMP RIP: 0010:[8131360b] [8131360b] __list_add+0x1b/0xc0 Call Trace: [81661b84] __mutex_lock_slowpath+0x114/0x1b0 [81661c3f] mutex_lock+0x1f/0x30 [a03e97cb] sas_disable_events+0x1b/0x50 [libsas] [a03e9818] sas_unregister_ha+0x18/0x60 [libsas] [a040316e] isci_unregister+0x1e/0x40 [isci] [a0403efd] isci_pci_remove+0x5d/0x100 [isci] [813391cb] pci_device_remove+0x3b/0xb0 [813fbf7f] __device_release_driver+0x7f/0xf0 [813fc8f8] driver_detach+0xa8/0xb0 [813fbb8b] bus_remove_driver+0x9b/0x120 [813fcf2c] driver_unregister+0x2c/0x50 [813381f3] pci_unregister_driver+0x23/0x80 [a04152f8] isci_exit+0x10/0x1e [isci] [810d199b] SyS_delete_module+0x16b/0x2d0 [81012a21] ? do_notify_resume+0x61/0xa0 [8166ce29] system_call_fastpath+0x16/0x1b The loop has been corrected. This patch fixes kernel panic during entering the S3 state and the above oops. Signed-off-by: Lukasz Dorau lukasz.do...@intel.com Reviewed-by: Maciej Patelczyk maciej.patelc...@intel.com Tested-by: Lukasz Dorau lukasz.do...@intel.com Cc: Dave Jiang dave.ji...@intel.com Cc: sta...@vger.kernel.org Signed-off-by: Dan Williams dan.j.willi...@intel.com --- drivers/scsi/isci/host.h |5 ++--- 1 files changed, 2 insertions(+), 3 deletions(-) diff --git a/drivers/scsi/isci/host.h b/drivers/scsi/isci/host.h index 4911310a38f5..22a9bb1abae1 100644 --- a/drivers/scsi/isci/host.h +++ b/drivers/scsi/isci/host.h @@ -311,9 +311,8 @@ static inline struct Scsi_Host *to_shost(struct isci_host *ihost) } #define for_each_isci_host(id, ihost, pdev) \ - for (id = 0, ihost = to_pci_info(pdev)-hosts[id]; \ -id ARRAY_SIZE(to_pci_info(pdev)-hosts) ihost; \ -ihost = to_pci_info(pdev)-hosts[++id]) + for (id = 0; id SCI_MAX_CONTROLLERS \ +(ihost = to_pci_info(pdev)-hosts[id]); id++) static inline void wait_for_start(struct isci_host *ihost) { -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/4] isci: fix reset timeout handling
Remove an erroneous BUG_ON() in the case of a hard reset timeout. The reset timeout handler puts the port into the awaiting link-up state. The timeout causes the device to be disconnected and we need to be in the awaiting link-up state to re-connect the port. The BUG_ON() made the incorrect assumption that resets never timeout and we always complete the reset in the resetting state. Testing this patch also uncovered that libata continues to attempt to reset the port long after the driver has torn down the context. Once the driver has committed to abandoning the link it must indicate to libata that recovery ends by returning -ENODEV from -lldd_I_T_nexus_reset(). Cc: sta...@vger.kernel.org Cc: Maciej Patelczyk maciej.patelc...@intel.com Cc: Dave Jiang dave.ji...@intel.com Acked-by: Lukasz Dorau lukasz.do...@intel.com Reported-by: David Milburn dmilb...@redhat.com Reported-by: Xun Ni xun...@intel.com Tested-by: Xun Ni xun...@intel.com Signed-off-by: Dan Williams dan.j.willi...@intel.com --- drivers/scsi/isci/port_config.c |7 --- drivers/scsi/isci/task.c|2 +- 2 files changed, 1 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/isci/port_config.c b/drivers/scsi/isci/port_config.c index 85c77f6b802b..ac879745ef80 100644 --- a/drivers/scsi/isci/port_config.c +++ b/drivers/scsi/isci/port_config.c @@ -615,13 +615,6 @@ static void sci_apc_agent_link_up(struct isci_host *ihost, SCIC_SDS_APC_WAIT_LINK_UP_NOTIFICATION); } else { /* the phy is already the part of the port */ - u32 port_state = iport-sm.current_state_id; - - /* if the PORT'S state is resetting then the link up is from -* port hard reset in this case, we need to tell the port -* that link up is recieved -*/ - BUG_ON(port_state != SCI_PORT_RESETTING); port_agent-phy_ready_mask |= 1 phy_index; sci_port_link_up(iport, iphy); } diff --git a/drivers/scsi/isci/task.c b/drivers/scsi/isci/task.c index 0d30ca849e8f..5d6fda72d659 100644 --- a/drivers/scsi/isci/task.c +++ b/drivers/scsi/isci/task.c @@ -801,7 +801,7 @@ int isci_task_I_T_nexus_reset(struct domain_device *dev) /* XXX: need to cleanup any ireqs targeting this * domain_device */ - ret = TMF_RESP_FUNC_COMPLETE; + ret = -ENODEV; goto out; } -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 13/17] scsi: push host_lock down into scsi_{host,target}_queue_ready
On Thu, 2014-02-06 at 18:10 +0100, Bart Van Assche wrote: On 02/06/14 17:56, James Bottomley wrote: Could you benchmark this lot and show what the actual improvement is just for this series, if any? I see a performance improvement of 12% with the SRP protocol for the SCSI core optimizations alone (I am still busy measuring the impact of the blk-mq conversion but I can already see that it is really significant). Please note that the performance impact depends a lot on the workload (number of LUNs per SCSI host e.g.) so maybe the workload I chose is not doing justice to Christoph's work. And it's also important to mention that with the workload I ran I was saturating the target system CPU (a quad core Intel i5). In other words, results might be better with a more powerful target system. Starting with a baseline using scsi_debug that NOPs REQ_TYPE_FS commands to measure improvements would be a better baseline vs. scsi_request_fn() existing code that what your doing above. That way at least it's easier to measure specific scsi-core overhead down to the LLD's queuecommand without everything else in the way. --nab -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 12/15] scsi: initial blk-mq support
On Wed, 2014-02-05 at 04:41 -0800, Christoph Hellwig wrote: plain text document attachment (0012-scsi-initial-blk-mq-support.patch) Add support for using the blk-mq code to submit requests to SCSI drivers. There is very little blk-mq specific code, but that's partially because important functionality like partial completions and request requeueing is still missing in blk-mq. I hope to keep most of the additions for these in the blk-mq core instead of the SCSI layer, though. Based on the earlier scsi-mq prototype by Nicholas Bellinger, although not a whole lot of actual code is left. Not-quite-signed-off-yet-by: Christoph Hellwig h...@lst.de --- drivers/scsi/scsi.c | 36 ++- drivers/scsi/scsi_lib.c | 244 -- drivers/scsi/scsi_priv.h |2 + drivers/scsi/scsi_scan.c |5 +- include/scsi/scsi_host.h |3 + 5 files changed, 278 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index adb8bfb..cf5c110 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -44,6 +44,7 @@ #include linux/string.h #include linux/slab.h #include linux/blkdev.h +#include linux/blk-mq.h #include linux/delay.h #include linux/init.h #include linux/completion.h @@ -688,6 +689,33 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) return 0; } +static void scsi_softirq_done_remote(void *data) +{ + return scsi_softirq_done(data); +} + +static void scsi_mq_done(struct request *req) +{ + int cpu; + +#if 0 + if (!ctx-ipi_redirect) + return scsi_softirq_done(cmd); +#endif + + cpu = get_cpu(); + if (cpu != req-cpu cpu_online(req-cpu)) { + req-csd.func = scsi_softirq_done_remote; + req-csd.info = req; + req-csd.flags = 0; + __smp_call_function_single(req-cpu, req-csd, 0); + } else { + scsi_softirq_done(req); + } + + put_cpu(); +} + /** * scsi_done - Invoke completion on finished SCSI command. * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives @@ -701,8 +729,14 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) */ static void scsi_done(struct scsi_cmnd *cmd) { + struct request *req = cmd-request; + trace_scsi_dispatch_cmd_done(cmd); - blk_complete_request(cmd-request); + + if (req-mq_ctx) + scsi_mq_done(req); + else + blk_complete_request(req); } Is there extra scsi_mq_done() part that does IPI here even necessary anymore..? I was under the assumption that blk_mq_end_io() is already taking care of this..? /** diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e67950c..8dd8893 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -20,6 +20,7 @@ #include linux/delay.h #include linux/hardirq.h #include linux/scatterlist.h +#include linux/blk-mq.h #include scsi/scsi.h #include scsi/scsi_cmnd.h @@ -554,6 +555,15 @@ static bool scsi_end_request(struct scsi_cmnd *cmd, int error, int bytes, struct request *req = cmd-request; /* + * XXX: need to handle partial completions and retries here. + */ + if (req-mq_ctx) { + blk_mq_end_io(req, error); + put_device(cmd-device-sdev_gendev); + return true; + } + + /* * If there are blocks left over at the end, set up the command * to queue the remainder of them. */ @@ -1014,12 +1024,15 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb, { int count; - /* - * If sg table allocation fails, requeue request later. - */ - if (unlikely(scsi_alloc_sgtable(sdb, req-nr_phys_segments, - gfp_mask))) { - return BLKPREP_DEFER; + BUG_ON(req-nr_phys_segments SCSI_MAX_SG_SEGMENTS); + + if (!req-mq_ctx) { + /* + * If sg table allocation fails, requeue request later. + */ + if (unlikely(scsi_alloc_sgtable(sdb, req-nr_phys_segments, + gfp_mask))) + return BLKPREP_DEFER; } req-buffer = NULL; @@ -1075,9 +1088,11 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask) BUG_ON(prot_sdb == NULL); ivecs = blk_rq_count_integrity_sg(rq-q, rq-bio); - if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) { - error = BLKPREP_DEFER; - goto err_exit; + if (!rq-mq_ctx) { + if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) { + error = BLKPREP_DEFER; + goto err_exit; + } } count = blk_rq_map_integrity_sg(rq-q,
Re: [PATCH 06/32] target: Convert lu_gp_ref_cnt to kref
On Wed, 2014-02-05 at 14:02 -0800, Andy Grover wrote: Hi nab, I'm getting back to looking at this patchset, but wanted to just discuss and understand this one first because all the kref ones are similar. see below. On 12/16/2013 12:52 PM, Nicholas A. Bellinger wrote: On Fri, 2013-12-13 at 15:58 -0800, Andy Grover wrote: Use kref to handle reference counting Signed-off-by: Andy Grover agro...@redhat.com --- drivers/target/target_core_alua.c | 37 - include/target/target_core_base.h |2 +- 2 files changed, 21 insertions(+), 18 deletions(-) diff --git a/drivers/target/target_core_alua.c b/drivers/target/target_core_alua.c index 2ac2f11..8c01ade 100644 --- a/drivers/target/target_core_alua.c +++ b/drivers/target/target_core_alua.c @@ -54,6 +54,16 @@ static LIST_HEAD(lu_gps_list); struct t10_alua_lu_gp *default_lu_gp; +static void release_alua_lu_gp(struct kref *ref) +{ + struct t10_alua_lu_gp *lu_gp = container_of(ref, struct t10_alua_lu_gp, refcount); + + kmem_cache_free(t10_alua_lu_gp_cache, lu_gp); +} + +#define get_alua_lu_gp(x) kref_get(x-refcount) +#define put_alua_lu_gp(x) kref_put(x-refcount, release_alua_lu_gp) + /* * REPORT_TARGET_PORT_GROUPS * @@ -898,8 +908,7 @@ int core_alua_do_port_transition( local_lu_gp_mem = l_dev-dev_alua_lu_gp_mem; spin_lock(local_lu_gp_mem-lu_gp_mem_lock); lu_gp = local_lu_gp_mem-lu_gp; - atomic_inc(lu_gp-lu_gp_ref_cnt); - smp_mb__after_atomic_inc(); + get_alua_lu_gp(lu_gp); spin_unlock(local_lu_gp_mem-lu_gp_mem_lock); /* * For storage objects that are members of the 'default_lu_gp', @@ -913,8 +922,8 @@ int core_alua_do_port_transition( */ core_alua_do_transition_tg_pt(l_tg_pt_gp, l_port, l_nacl, md_buf, new_state, explicit); - atomic_dec(lu_gp-lu_gp_ref_cnt); - smp_mb__after_atomic_dec(); + + put_alua_lu_gp(lu_gp); kfree(md_buf); return 0; } @@ -985,8 +994,7 @@ int core_alua_do_port_transition( l_tg_pt_gp-tg_pt_gp_id, (explicit) ? explicit : implicit, core_alua_dump_state(new_state)); - atomic_dec(lu_gp-lu_gp_ref_cnt); - smp_mb__after_atomic_dec(); + put_alua_lu_gp(lu_gp); kfree(md_buf); return 0; } @@ -1107,7 +1115,8 @@ core_alua_allocate_lu_gp(const char *name, int def_group) INIT_LIST_HEAD(lu_gp-lu_gp_node); INIT_LIST_HEAD(lu_gp-lu_gp_mem_list); spin_lock_init(lu_gp-lu_gp_lock); - atomic_set(lu_gp-lu_gp_ref_cnt, 0); + + kref_init(lu_gp-refcount); if (def_group) { lu_gp-lu_gp_id = alua_lu_gps_counter++; @@ -1200,13 +1209,7 @@ void core_alua_free_lu_gp(struct t10_alua_lu_gp *lu_gp) list_del(lu_gp-lu_gp_node); alua_lu_gps_count--; spin_unlock(lu_gps_lock); - /* - * Allow struct t10_alua_lu_gp * referenced by core_alua_get_lu_gp_by_name() - * in target_core_configfs.c:target_core_store_alua_lu_gp() to be - * released with core_alua_put_lu_gp_from_name() - */ - while (atomic_read(lu_gp-lu_gp_ref_cnt)) - cpu_relax(); + /* * Release reference to struct t10_alua_lu_gp * from all associated * struct se_device. @@ -1241,7 +1244,7 @@ void core_alua_free_lu_gp(struct t10_alua_lu_gp *lu_gp) } spin_unlock(lu_gp-lu_gp_lock); - kmem_cache_free(t10_alua_lu_gp_cache, lu_gp); + put_alua_lu_gp(lu_gp); } The assumption that it's safe to 'Release reference to struct t10_alua_lu_gp * from all associated struct device' below the original cpu_relax(), while there are still other process contexts doing their respective put_alua_lu_gp() is totally wrong. The only other spot is core_alua_do_port_transition, afaics. I think if it races with free_lu_gp, lu_gp will either be the old lu_gp (which no longer will have anything on lu_gp_mem_list) or will be default_lu_gp. If it's the old lu_gp then it iterates over an empty list, and then the lu_gp gets finally freed by the put() at the bottom. Furthermore, allowing a configfs_group_ops-drop_item() to return while there are still active references from other process contexts means that the parent struct config_group is no longer referenced counted (eg: configfs child is removed), and introduces a whole host of potential bugs. So that said, NAK on this patch. I think some of the other patches used drop_item() and thus were bad, but in this one the existing code is already calling core_alua_free_lu_gp() from release(). Thoughts? The problem with this patch and all of the other patches that follow the same logic is the false assumption that it's safe to return from configfs_group_operations-drop_item() before all references to the underlying data structure containing the associated struct config_group have
LSI SAS2008 SATA TRIM not working
Various sources indicate that LSI's SAS2008 controllers support TRIM when running their IT firmware (LSI [1] and this list [2]). However, I have not been able to get it working with Dell PERC H200 or H310 cross flashed into LSI IT firmware. Currently I'm testing with Samsung 840 EVO SATA SSDs. I have tried various LSI IT firmware versions (P14, P16, P18) and various Linux distributions (Ubuntu 13.10, Ubuntu 12.04, Ubuntu 14 beta, RHEL 7 beta, Fedora 19). The SSDs report they support TRIM: # hdparm -I /dev/sdc | grep TRIM supported * Data Set Management TRIM supported (limit 8 blocks) Checking for support shows TRIM support is not making it through: # cat /sys/block/sdc/queue/discard_granularity 0 # fstrim /srv/node/disk2p1 fstrim: /srv/node/disk2p1: FITRIM ioctl failed: Operation not supported I am not using LVM or crypto. I've tried both ext4 and xfs. Here is an example fstab line: LABEL=disk2p1 /srv/node/disk2p1 xfs noatime,nodiratime,nobarrier,logbufs=8,discard 0 0 The dmesg lines for mpt2sas look as follows: mpt2sas version 15.100.00.00 loaded mpt2sas0: 64 BIT PCI BUS DMA ADDRESSING SUPPORTED, total mem (16418600 kB) mpt2sas :04:00.0: irq 79 for MSI/MSI-X mpt2sas0-msix0: PCI-MSI-X enabled: IRQ 79 mpt2sas0: iomem(0xdf2b), mapped(0xc90011be), size(65536) mpt2sas0: ioport(0xfc00), size(256) mpt2sas0: sending message unit reset !! mpt2sas0: message unit reset: SUCCESS mpt2sas0: Allocated physical memory: size(8056 kB) mpt2sas0: Current Controller Queue Depth(3579), Max Controller Queue Depth(3712) mpt2sas0: Scatter Gather Elements per IO(128) mpt2sas0: LSISAS2008: FWVersion(14.00.01.00), ChipRevision(0x03), BiosVersion(07.27.00.00) mpt2sas0: Dell 6Gbps SAS HBA: Vendor(0x1000), Device(0x0072), SSVID(0x1028), SSDID(0x1F1C) mpt2sas0: Protocol=(Initiator,Target), Capabilities=(TLR,EEDP,Snapshot Buffer,Diag Trace Buffer,Task Set Full,NCQ) mpt2sas0: sending port enable !! mpt2sas0: host_add: handle(0x0001), sas_addr(0x590b11c027281600), phys(8) mpt2sas0: port enable: SUCCESS Is TRIM working for anyone using LSI SAS2008 controllers? I'm out of ideas as to what could be the cause of the failure. Is it just that mpt2sas doesn't support TRIM contrary to the links below? Thanks, -Kurt [1] http://mycusthelp.info/LSI/_cs/AnswerDetail.aspx?sSessionID=6784104245IHLKQYLMXJTXVUMPQBJOGGFDORLPZBinc=8039caller=~%2fFindAnswers.aspx%3ftxtCriteria%3dtrim%26sSessionid%3d6784104245IHLKQYLMXJTXVUMPQBJOGGFDORLPZB [2] http://comments.gmane.org/gmane.linux.scsi/69511 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
FW: [LSF/MM TOPIC] New Storage capabilities
Several new features are becoming a reality in SCSI and ATA this year, and I would like to participate in the discussions on supporting these new features. a) SCSI conglomerate LUNs (using more bits in the LUN to manage groupings of logical units); b) Atomic commands; c) IO and LBA HINTS (for both SCSI and ATA/IDE); a. For storage tiering; b. For cache management; d) FC - 128Gig parallel and breakout mode I attend T10 (SCSI), T11 (FC), T13 (ATA/IDE), IETF (iSCSI), and SNIA and can provide expertise in the areas listed above as well as the topics covered in those standards committees. Fred Knight -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] scsi: Add 'retry_timeout' to avoid infinite command retry
Currently, scsi error handling in scsi_io_completion() tries to unconditionally requeue scsi command when device keeps some error state. For example, UNIT_ATTENTION causes infinite retry with action == ACTION_RETRY. This is because retryable errors are thought to be temporary and the scsi device will soon recover from those errors. Normally, such retry policy is appropriate because the device will soon recover from temporary error state. But there is no guarantee that device is able to recover from error state immediately. Actually, we've experienced an infinite retry on some hardware. Therefore hardware error can results in infinite command retry loop. This patch adds 'retry_timeout' sysfs attribute which limits the retry time of each scsi command. This attribute is located in scsi sysfs directory for example /sys/bus/scsi/devices/X:X:X:X/ and value is in seconds. Once scsi command retry time is longer than this timeout, the command is treated as failure. 'retry_timeout' is set to '0' by default which means no timeout set. Usage: - To set retry timeout(set retry_timeout to 30 sec): # echo 30 /sys/bus/scsi/devices/X:X:X:X/retry_timeout Changes in v2: - check retry timeout in scsi_io_completion() instead of scsi_softirq_done() Signed-off-by: Eiichi Tsukata eiichi.tsukata...@hitachi.com Cc: James E.J. Bottomley jbottom...@parallels.com Cc: linux-scsi@vger.kernel.org Cc: linux-ker...@vger.kernel.org --- drivers/scsi/scsi_lib.c| 22 ++ drivers/scsi/scsi_scan.c |1 + drivers/scsi/scsi_sysfs.c | 32 include/scsi/scsi.h|1 + include/scsi/scsi_device.h |1 + 5 files changed, 57 insertions(+) diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 7bd7f0d..813b287 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -741,6 +741,24 @@ static int __scsi_error_from_host_byte(struct scsi_cmnd *cmd, int result) } /* + * Check if scsi command excessed retry timeout + */ +static int scsi_retry_timed_out(struct scsi_cmnd *cmd) +{ + unsigned int retry_timeout; + + retry_timeout = cmd-device-retry_timeout; + if (retry_timeout + time_before(cmd-jiffies_at_alloc + retry_timeout, jiffies)) { + scmd_printk(KERN_ERR, cmd, retry timeout, waited %us\n, + retry_timeout/HZ); + return 1; + } + + return 0; +} + +/* * Function:scsi_io_completion() * * Purpose: Completion processing for block device I/O requests. @@ -989,6 +1007,10 @@ void scsi_io_completion(struct scsi_cmnd *cmd, unsigned int good_bytes) action = ACTION_FAIL; } + if ((action == ACTION_RETRY || action == ACTION_DELAYED_RETRY) + scsi_retry_timed_out(cmd)) + action = ACTION_FAIL; + switch (action) { case ACTION_FAIL: /* Give up and fail the remainder of the request */ diff --git a/drivers/scsi/scsi_scan.c b/drivers/scsi/scsi_scan.c index 307a811..4ab044a 100644 --- a/drivers/scsi/scsi_scan.c +++ b/drivers/scsi/scsi_scan.c @@ -925,6 +925,7 @@ static int scsi_add_lun(struct scsi_device *sdev, unsigned char *inq_result, sdev-no_dif = 1; sdev-eh_timeout = SCSI_DEFAULT_EH_TIMEOUT; + sdev-retry_timeout = SCSI_DEFAULT_RETRY_TIMEOUT; if (*bflags BLIST_SKIP_VPD_PAGES) sdev-skip_vpd_pages = 1; diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c index 8ff62c2..eaa2118 100644 --- a/drivers/scsi/scsi_sysfs.c +++ b/drivers/scsi/scsi_sysfs.c @@ -627,6 +627,37 @@ sdev_store_eh_timeout(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(eh_timeout, S_IRUGO | S_IWUSR, sdev_show_eh_timeout, sdev_store_eh_timeout); static ssize_t +sdev_show_retry_timeout(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct scsi_device *sdev; + sdev = to_scsi_device(dev); + return snprintf(buf, 20, %u\n, sdev-retry_timeout / HZ); +} + +static ssize_t +sdev_store_retry_timeout(struct device *dev, struct device_attribute *attr, +const char *buf, size_t count) +{ + struct scsi_device *sdev; + unsigned int retry_timeout; + int err; + + if (!capable(CAP_SYS_ADMIN)) + return -EACCES; + + sdev = to_scsi_device(dev); + err = kstrtouint(buf, 10, retry_timeout); + if (err) + return err; + sdev-retry_timeout = retry_timeout * HZ; + + return count; +} +static DEVICE_ATTR(retry_timeout, S_IRUGO | S_IWUSR, + sdev_show_retry_timeout, sdev_store_retry_timeout); + +static ssize_t store_rescan_field (struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { @@ -797,6 +828,7 @@ static struct attribute *scsi_sdev_attrs[] = { dev_attr_state.attr,
Investitionshilfe.
Investitionshilfe. Ich bin Fräulein Laura Buisson, Einzel weiblich (nie verheiratet) mich und meinen jüngeren Bruder, plane ich, zu Ihrem Land für Investitionszwecke zu verlagern. Ich habe einige Fonds und im Hotel-und Transportgeschäft investieren wollen und wir $ 10.500.000,00 (zehn Millionen fünfhunderttausend US-Dollar) müssen wir Sie fungieren als unsere ausländischen Geschäftspartner zu meinem verstorbenen Vater. Wir werden Ihre Prozentsatz auf Ihre Antwort zu diskutieren, bitte ich möchte, dass Sie diese Informationen als eine dringende Nachricht zu behandeln, und antworten Sie mir ohne Verzögerung, so dass ich meine endgültige Entscheidung zu treffen. Weitere Einzelheiten wird euch gegeben werden, wenn ich von Ihnen höre, mit Ihren persönlichen Daten als auch. Ich warte, von Ihnen bald zu hören. Fräulein Laura Buisson. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/6] iscsi changes for scsi-misc
On 12/20/2013 12:53 PM, micha...@cs.wisc.edu wrote: The following patches are some features and fixes for scsi-misc. James, if you were going to merge the libiscsi locking changes here http://www.spinics.net/lists/linux-scsi/msg69903.html do not bother. The qlogic patches that were just merged had a conflict. The patches in the following emails should apply cleanly to misc. Ignore this patchset. It has a regression. I will resend when I am done testing a fix. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 06/32] target: Convert lu_gp_ref_cnt to kref
On 02/06/2014 03:51 PM, Nicholas A. Bellinger wrote: The problem with this patch and all of the other patches that follow the same logic is the false assumption that it's safe to return from configfs_group_operations-drop_item() before all references to the underlying data structure containing the associated struct config_group have been dropped. In this particular case, target_core_alua_drop_lu_gp() - config_item_put() - target_core_alua_lu_gp_release() - core_alua_free_lu_gp() is still being called from -drop_item() via target_core_alua_lu_gp_ops-release(), so the same holds true here as well. Yes exactly. That's why the configfs release() doesn't free the lu_gp, it just lowers the lu_gp refcount. release() being called doesn't mean the object is going away, it just means configfs is done with it. If do_port_transition has incremented it in the meantime, the lu_gp won't be freed from the release() (because the underlying object's refcount will still be nonzero) but only when do_port_transition is done. In the normal case where there isn't a ref from do_port_transition, then it is safe to free the lu_gp from release - put_alua_lu_gp - release_alua_lu_gp - kmem_cache_free. -- Andy -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 04/16] scsi_dh_alua: Make stpg synchronous
On 01/31/2014 03:29 AM, Hannes Reinecke wrote: We should be issuing STPG synchronously as we need to evaluate the return code on failure. Signed-off-by: Hannes Reinecke h...@suse.de I think we need to also make dm-mpath.c use a non-ordered workqueue for kmpath_handlerd. With this patch and the current ordered workqueue in dm-mpath I think we will only be able to do one STPG at a time. I think if we use a normal old non-ordered workqueue then we would be limited to have max_active STPGs executing. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 04/16] scsi_dh_alua: Make stpg synchronous
On 02/06/2014 07:24 PM, Mike Christie wrote: On 01/31/2014 03:29 AM, Hannes Reinecke wrote: We should be issuing STPG synchronously as we need to evaluate the return code on failure. Signed-off-by: Hannes Reinecke h...@suse.de I think we need to also make dm-mpath.c use a non-ordered workqueue for kmpath_handlerd. With this patch and the current ordered workqueue in dm-mpath I think we will only be able to do one STPG at a time. I think if we use a normal old non-ordered workqueue then we would be limited to have max_active STPGs executing. I goofed and commented in the order I saw the patches :) I take this comment back for this patch, because I see in 16/16 you added a new workqueue to scsi_dh_alua to do rtpgs and stpgs. For 16/16 though, do we want to make kmpath_aluad a non single threaded workqueue? It looks like max_active for single threaded is only one work at a time too. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] scsi: Add 'retry_timeout' to avoid infinite command retry
On Fri, 2014-02-07 at 09:22 +0900, Eiichi Tsukata wrote: Currently, scsi error handling in scsi_io_completion() tries to unconditionally requeue scsi command when device keeps some error state. For example, UNIT_ATTENTION causes infinite retry with action == ACTION_RETRY. This is because retryable errors are thought to be temporary and the scsi device will soon recover from those errors. Normally, such retry policy is appropriate because the device will soon recover from temporary error state. But there is no guarantee that device is able to recover from error state immediately. Actually, we've experienced an infinite retry on some hardware. Therefore hardware error can results in infinite command retry loop. Could you please add an analysis of the actual failure; which devices and what conditions. This patch adds 'retry_timeout' sysfs attribute which limits the retry time of each scsi command. This attribute is located in scsi sysfs directory for example /sys/bus/scsi/devices/X:X:X:X/ and value is in seconds. Once scsi command retry time is longer than this timeout, the command is treated as failure. 'retry_timeout' is set to '0' by default which means no timeout set. Don't do this ... you're mixing a feature (which you'd need to justify) with an apparent bug fix. Once you dump all the complexity, I think the patch boils down to a simple check before the action switch in scsi_io_completion(): if (action != ACTION_FAIL time_before(cmd-jiffies_at_alloc + wait_for, jiffies)) { action = ACTION_FAIL; description = command timed out; } James -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v2] scsi: Add 'retry_timeout' to avoid infinite command retry
On 2014/2/7 13:46, James Bottomley wrote: On Fri, 2014-02-07 at 09:22 +0900, Eiichi Tsukata wrote: Currently, scsi error handling in scsi_io_completion() tries to unconditionally requeue scsi command when device keeps some error state. For example, UNIT_ATTENTION causes infinite retry with action == ACTION_RETRY. This is because retryable errors are thought to be temporary and the scsi device will soon recover from those errors. Normally, such retry policy is appropriate because the device will soon recover from temporary error state. But there is no guarantee that device is able to recover from error state immediately. Actually, we've experienced an infinite retry on some hardware. Therefore hardware error can results in infinite command retry loop. Could you please add an analysis of the actual failure; which devices and what conditions. same question, can you explain? This patch adds 'retry_timeout' sysfs attribute which limits the retry time of each scsi command. This attribute is located in scsi sysfs directory for example /sys/bus/scsi/devices/X:X:X:X/ and value is in seconds. Once scsi command retry time is longer than this timeout, the command is treated as failure. 'retry_timeout' is set to '0' by default which means no timeout set. Don't do this ... you're mixing a feature (which you'd need to justify) with an apparent bug fix. Once you dump all the complexity, I think the patch boils down to a simple check before the action switch in scsi_io_completion(): if (action != ACTION_FAIL time_before(cmd-jiffies_at_alloc + wait_for, jiffies)) { action = ACTION_FAIL; description = command timed out; } James -- To unsubscribe from this list: send the line unsubscribe linux-kernel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/6]: iscsi changes for scsi-misc (v2)
This patchset has Mellanox's libiscsi locking changes, and various fixes. V2 - Fix for MaxCmdSn handling in patch 3/6 where the part of the function that also updates the MaxCmdSn also got cut by accident. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 6/6] iscsi_tcp: check for valid session before accessing
From: Mike Christie micha...@cs.wisc.edu Check that the session is setup before accessing its connection. This fixes a oops where userspace tries to get the ip address before the session is bound to a host. Signed-off-by: Mike Christie micha...@cs.wisc.edu --- drivers/scsi/iscsi_tcp.c |3 +++ 1 files changed, 3 insertions(+), 0 deletions(-) diff --git a/drivers/scsi/iscsi_tcp.c b/drivers/scsi/iscsi_tcp.c index 12b3512..bfb6d07 100644 --- a/drivers/scsi/iscsi_tcp.c +++ b/drivers/scsi/iscsi_tcp.c @@ -759,6 +759,9 @@ static int iscsi_sw_tcp_host_get_param(struct Scsi_Host *shost, switch (param) { case ISCSI_HOST_PARAM_IPADDRESS: + if (!session) + return -ENOTCONN; + spin_lock_bh(session-frwd_lock); conn = session-leadconn; if (!conn) { -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 4/6] be2iscsi: fix lun test in device reset callout
From: Mike Christie micha...@cs.wisc.edu We want to be checking the scsi_cmnd's lun against the possible tasks in the driver. Current check tests task against itself which was useless. Signed-off-by: Mike Christie micha...@cs.wisc.edu --- drivers/scsi/be2iscsi/be_main.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 8d82f2c..bc77a6f 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -325,7 +325,7 @@ static int beiscsi_eh_device_reset(struct scsi_cmnd *sc) if (!abrt_task-sc || abrt_task-state == ISCSI_TASK_FREE) continue; - if (abrt_task-sc-device-lun != abrt_task-sc-device-lun) + if (sc-device-lun != abrt_task-sc-device-lun) continue; /* Invalidate WRB Posted for this Task */ -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/6] SCSI/libiscsi: Restructure iscsi_tcp r2t response logic
From: Shlomo Pongratz shlo...@mellanox.com Restructure the iscsi_tcp_r2t_rsp routine in order to avoid allocating r2t from r2tpool.queue and returning it back in case the parameters rhdr-data_length and or rhdr-data_offset prohibit the requing. Since the values of these parameters are known prior to the allocation, we can pre-check and thus avoid futile allocations. Signed-off-by: Shlomo Pongratz shlo...@mellanox.com Signed-off-by: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Mike Christie micha...@cs.wisc.edu --- drivers/scsi/libiscsi_tcp.c | 43 ++- 1 files changed, 22 insertions(+), 21 deletions(-) diff --git a/drivers/scsi/libiscsi_tcp.c b/drivers/scsi/libiscsi_tcp.c index 1d58d53..7f59073 100644 --- a/drivers/scsi/libiscsi_tcp.c +++ b/drivers/scsi/libiscsi_tcp.c @@ -529,6 +529,8 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task) struct iscsi_r2t_rsp *rhdr = (struct iscsi_r2t_rsp *)tcp_conn-in.hdr; struct iscsi_r2t_info *r2t; int r2tsn = be32_to_cpu(rhdr-r2tsn); + u32 data_length; + u32 data_offset; int rc; if (tcp_conn-in.datalen) { @@ -554,40 +556,39 @@ static int iscsi_tcp_r2t_rsp(struct iscsi_conn *conn, struct iscsi_task *task) return 0; } - rc = kfifo_out(tcp_task-r2tpool.queue, (void*)r2t, sizeof(void*)); - if (!rc) { - iscsi_conn_printk(KERN_ERR, conn, Could not allocate R2T. - Target has sent more R2Ts than it - negotiated for or driver has leaked.\n); - return ISCSI_ERR_PROTO; - } - - r2t-exp_statsn = rhdr-statsn; - r2t-data_length = be32_to_cpu(rhdr-data_length); - if (r2t-data_length == 0) { + data_length = be32_to_cpu(rhdr-data_length); + if (data_length == 0) { iscsi_conn_printk(KERN_ERR, conn, invalid R2T with zero data len\n); - kfifo_in(tcp_task-r2tpool.queue, (void*)r2t, - sizeof(void*)); return ISCSI_ERR_DATALEN; } - if (r2t-data_length session-max_burst) + if (data_length session-max_burst) ISCSI_DBG_TCP(conn, invalid R2T with data len %u and max burst %u. Attempting to execute request.\n, - r2t-data_length, session-max_burst); + data_length, session-max_burst); - r2t-data_offset = be32_to_cpu(rhdr-data_offset); - if (r2t-data_offset + r2t-data_length scsi_out(task-sc)-length) { + data_offset = be32_to_cpu(rhdr-data_offset); + if (data_offset + data_length scsi_out(task-sc)-length) { iscsi_conn_printk(KERN_ERR, conn, invalid R2T with data len %u at offset %u - and total length %d\n, r2t-data_length, - r2t-data_offset, scsi_out(task-sc)-length); - kfifo_in(tcp_task-r2tpool.queue, (void*)r2t, - sizeof(void*)); + and total length %d\n, data_length, + data_offset, scsi_out(task-sc)-length); return ISCSI_ERR_DATALEN; } + rc = kfifo_out(tcp_task-r2tpool.queue, (void*)r2t, sizeof(void*)); + if (!rc) { + iscsi_conn_printk(KERN_ERR, conn, Could not allocate R2T. + Target has sent more R2Ts than it + negotiated for or driver has leaked.\n); + return ISCSI_ERR_PROTO; + } + + r2t-exp_statsn = rhdr-statsn; + r2t-data_length = data_length; + r2t-data_offset = data_offset; + r2t-ttt = rhdr-ttt; /* no flip */ r2t-datasn = 0; r2t-sent = 0; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/6] libiscsi: remove unneeded queue work when max_cmdsn is increased
From: Mike Christie micha...@cs.wisc.edu iscsi_queuecommand will only take in commands that can fit in the current window. So, if a command is on the cmdqueue then it can fit in the current window. If a command is on the mgmtqueue, then we are setting the immediate bit so they will also fit in the window. As a result, we never need to to do a iscsi_conn_queue_work when the maxCmdSn is increased. What should happen is that a command will complete the window will be increased, then the scsi layer will send us more commands by running the scsi_device queues. Signed-off-by: Mike Christie micha...@cs.wisc.edu --- drivers/scsi/libiscsi.c | 10 +- 1 files changed, 1 insertions(+), 9 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 6afb6fc..9ca42a2 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -110,16 +110,8 @@ static void __iscsi_update_cmdsn(struct iscsi_session *session, session-exp_cmdsn = exp_cmdsn; if (max_cmdsn != session-max_cmdsn - !iscsi_sna_lt(max_cmdsn, session-max_cmdsn)) { + !iscsi_sna_lt(max_cmdsn, session-max_cmdsn)) session-max_cmdsn = max_cmdsn; - /* -* if the window closed with IO queued, then kick the -* xmit thread -*/ - if (!list_empty(session-leadconn-cmdqueue) || - !list_empty(session-leadconn-mgmtqueue)) - iscsi_conn_queue_work(session-leadconn); - } } void iscsi_update_cmdsn(struct iscsi_session *session, struct iscsi_nopin *hdr) -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/6] SCSI/libiscsi: Reduce locking contention in fast path (v2)
From: Shlomo Pongratz shlo...@mellanox.com Replace the session lock with two locks, a forward lock and a backwards lock named frwd_lock and back_lock respectively. The forward lock protects resources that change while sending a request to the target, such as cmdsn, queued_cmdsn, and allocating task from the commands' pool with kfifo_out. The backward lock protects resources that change while processing a response or in error path, such as cmdsn_exp, cmdsn_max, and returning tasks to the commands' pool with kfifo_in. Under a steady state fast-path situation, that is when one or more processes/threads submit IO to an iscsi device and a single kernel upcall (e.g softirq) is dealing with processing of responses without errors, this patch eliminates the contention between the queuecommand()/request response/scsi_done() flows associated with iscsi sessions. Between the forward and the backward locks exists a strict locking hierarchy. The mutual exclusion zone protected by the forward lock can enclose the mutual exclusion zone protected by the backward lock but not vice versa. For example, in iscsi_conn_teardown or in iscsi_xmit_data when there is a failure and __iscsi_put_task is called, the backward lock is taken while the forward lock is still taken. On the other hand, if in the RX path a nop is to be sent, for example in iscsi_handle_reject or __iscsi_complete_pdu than the forward lock is released and the backward lock is taken for the duration of iscsi_send_nopout, later the backward lock is released and the forward lock is retaken. libiscsi_tcp uses two kernel fifos the r2t pool and the r2t queue. The insertion and deletion from these queues didn't corespond to the assumption taken by the new forward/backwards session locking paradigm. That is, in iscsi_tcp_clenup_task which belongs to the RX (backwards) path, r2t is taken out from r2t queue and inserted to the r2t pool. In iscsi_tcp_get_curr_r2t which belong to the TX (forward) path, r2t is also inserted to the r2t pool and another r2t is pulled from r2t queue. Only in iscsi_tcp_r2t_rsp which is called in the RX path but can requeue to the TX path, r2t is taken from the r2t pool and inserted to the r2t queue. In order to cope with this situation, two spin locks were added, pool2queue and queue2pool. The former protects extracting from the r2t pool and inserting to the r2t queue, and the later protects the extracing from the r2t queue and inserting to the r2t pool. Signed-off-by: Shlomo Pongratz shlo...@mellanox.com Signed-off-by: Or Gerlitz ogerl...@mellanox.com [minor fix up to apply cleanly and compile fix] Signed-off-by: Mike Christie micha...@cs.wisc.edu --- drivers/scsi/be2iscsi/be_main.c | 26 +++--- drivers/scsi/bnx2i/bnx2i_hwi.c | 46 drivers/scsi/bnx2i/bnx2i_iscsi.c |8 +- drivers/scsi/iscsi_tcp.c | 22 ++-- drivers/scsi/libiscsi.c | 214 + drivers/scsi/libiscsi_tcp.c | 28 +++-- drivers/scsi/qla4xxx/ql4_isr.c |4 +- include/scsi/libiscsi.h | 17 ++- include/scsi/libiscsi_tcp.h |2 + 9 files changed, 206 insertions(+), 161 deletions(-) diff --git a/drivers/scsi/be2iscsi/be_main.c b/drivers/scsi/be2iscsi/be_main.c index 1f37505..8d82f2c 100644 --- a/drivers/scsi/be2iscsi/be_main.c +++ b/drivers/scsi/be2iscsi/be_main.c @@ -232,20 +232,20 @@ static int beiscsi_eh_abort(struct scsi_cmnd *sc) cls_session = starget_to_session(scsi_target(sc-device)); session = cls_session-dd_data; - spin_lock_bh(session-lock); + spin_lock_bh(session-frwd_lock); if (!aborted_task || !aborted_task-sc) { /* we raced */ - spin_unlock_bh(session-lock); + spin_unlock_bh(session-frwd_lock); return SUCCESS; } aborted_io_task = aborted_task-dd_data; if (!aborted_io_task-scsi_cmnd) { /* raced or invalid command */ - spin_unlock_bh(session-lock); + spin_unlock_bh(session-frwd_lock); return SUCCESS; } - spin_unlock_bh(session-lock); + spin_unlock_bh(session-frwd_lock); /* Invalidate WRB Posted for this Task */ AMAP_SET_BITS(struct amap_iscsi_wrb, invld, aborted_io_task-pwrb_handle-pwrb, @@ -307,9 +307,9 @@ static int beiscsi_eh_device_reset(struct scsi_cmnd *sc) /* invalidate iocbs */ cls_session = starget_to_session(scsi_target(sc-device)); session = cls_session-dd_data; - spin_lock_bh(session-lock); + spin_lock_bh(session-frwd_lock); if (!session-leadconn || session-state != ISCSI_STATE_LOGGED_IN) { - spin_unlock_bh(session-lock); + spin_unlock_bh(session-frwd_lock); return FAILED; } conn = session-leadconn; @@ -338,7 +338,7 @@ static int beiscsi_eh_device_reset(struct scsi_cmnd *sc) num_invalidate++;