[PATCHv3 3/6] hpsa: Ignore errors for unsupported LV_DEVICE_ID VPD page

2017-08-14 Thread Hannes Reinecke
Legacy boards might not support the LV_DEVICE_ID VPD page, so
we shouldn't print out an error message here.

Signed-off-by: Hannes Reinecke 
Reviewed-by: Christoph Hellwig 
Acked-by: Don Brace 
---
 drivers/scsi/hpsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 0b0b6dc..b34ec42 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3827,7 +3827,7 @@ static int hpsa_update_device_info(struct ctlr_info *h,
memset(this_device->device_id, 0,
sizeof(this_device->device_id));
if (hpsa_get_device_id(h, scsi3addr, this_device->device_id, 8,
-   sizeof(this_device->device_id)))
+   sizeof(this_device->device_id) < 0))
dev_err(&h->pdev->dev,
"hpsa%d: %s: can't get device id for host 
%d:C0:T%d:L%d\t%s\t%.16s\n",
h->ctlr, __func__,
-- 
1.8.5.6



[PATCHv3 0/6] hpsa legacy support

2017-08-14 Thread Hannes Reinecke
Hi all,

here's now the third attempt to add support for legacy boards, ie
for boards previously supported by cciss only.
With this patchset the hpsa driver should work with all Smart Array
boards if the 'hpsa_allow_any' module option is set, rendering the
cciss driver obsolete.
Consequently I've added a patch to remove the cciss driver and make
'hpsa' an alias to 'cciss' and removed the 'hpsa_allow_any' module
option.

Hannes Reinecke (6):
  hpsa: add support for legacy boards
  hpsa: disable volume status check for legacy boards
  hpsa: Ignore errors for unsupported LV_DEVICE_ID VPD page
  hpsa: do not print errors for unsupported report luns format
  cciss: Drop obsolete driver
  hpsa: Remove 'hpsa_allow_any' module option

 Documentation/blockdev/cciss.txt |  194 --
 MAINTAINERS  |   10 -
 drivers/block/Kconfig|   27 -
 drivers/block/Makefile   |1 -
 drivers/block/cciss.c| 5415 --
 drivers/block/cciss.h|  433 ---
 drivers/block/cciss_cmd.h|  269 --
 drivers/block/cciss_scsi.c   | 1653 
 drivers/block/cciss_scsi.h   |   79 -
 drivers/scsi/hpsa.c  |  108 +-
 drivers/scsi/hpsa.h  |   81 +-
 11 files changed, 142 insertions(+), 8128 deletions(-)
 delete mode 100644 Documentation/blockdev/cciss.txt
 delete mode 100644 drivers/block/cciss.c
 delete mode 100644 drivers/block/cciss.h
 delete mode 100644 drivers/block/cciss_cmd.h
 delete mode 100644 drivers/block/cciss_scsi.c
 delete mode 100644 drivers/block/cciss_scsi.h

-- 
1.8.5.6



[PATCHv3 4/6] hpsa: do not print errors for unsupported report luns format

2017-08-14 Thread Hannes Reinecke
Legacy boards might not support the 'extended' report luns format,
but as this is to be expected we don't need to print out an error here.

Signed-off-by: Hannes Reinecke 
Acked-by: Don Brace 
---
 drivers/scsi/hpsa.c | 20 +++-
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index b34ec42..2da8f6f 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3601,7 +3601,7 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, 
int logical,
memset(scsi3addr, 0, sizeof(scsi3addr));
if (fill_cmd(c, logical ? HPSA_REPORT_LOG : HPSA_REPORT_PHYS, h,
buf, bufsize, 0, scsi3addr, TYPE_CMD)) {
-   rc = -1;
+   rc = -EAGAIN;
goto out;
}
if (extended_response)
@@ -3614,16 +3614,19 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info 
*h, int logical,
if (ei->CommandStatus != 0 &&
ei->CommandStatus != CMD_DATA_UNDERRUN) {
hpsa_scsi_interpret_error(h, c);
-   rc = -1;
+   rc = -EIO;
} else {
struct ReportLUNdata *rld = buf;
 
if (rld->extended_response_flag != extended_response) {
-   dev_err(&h->pdev->dev,
-   "report luns requested format %u, got %u\n",
-   extended_response,
-   rld->extended_response_flag);
-   rc = -1;
+   if (!h->legacy_board) {
+   dev_err(&h->pdev->dev,
+   "report luns requested format %u, got 
%u\n",
+   extended_response,
+   rld->extended_response_flag);
+   rc = -EINVAL;
+   } else
+   rc = -EOPNOTSUPP;
}
}
 out:
@@ -3639,7 +3642,7 @@ static inline int hpsa_scsi_do_report_phys_luns(struct 
ctlr_info *h,
 
rc = hpsa_scsi_do_report_luns(h, 0, buf, bufsize,
  HPSA_REPORT_PHYS_EXTENDED);
-   if (!rc || !hpsa_allow_any)
+   if (!rc || rc != -EOPNOTSUPP)
return rc;
 
/* REPORT PHYS EXTENDED is not supported */
@@ -6617,7 +6620,6 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct 
ctlr_info *h,
default:
dev_warn(&h->pdev->dev, "unknown command 0x%c\n", cmd);
BUG();
-   return -1;
}
} else if (cmd_type == TYPE_MSG) {
switch (cmd) {
-- 
1.8.5.6



[PATCHv3 6/6] hpsa: Remove 'hpsa_allow_any' module option

2017-08-14 Thread Hannes Reinecke
As the cciss driver has been removed there are no overlapping
PCI IDs anymore, and the 'hpsa_allow_any' flag can be removed.

Signed-off-by: Hannes Reinecke 
---
 drivers/scsi/hpsa.c | 26 +++---
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index c633b35..2773dd7 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -83,10 +83,6 @@
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("cciss");
 
-static int hpsa_allow_any;
-module_param(hpsa_allow_any, int, S_IRUGO|S_IWUSR);
-MODULE_PARM_DESC(hpsa_allow_any,
-   "Allow hpsa driver to access unknown HP Smart Array hardware");
 static int hpsa_simple_mode;
 module_param(hpsa_simple_mode, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(hpsa_simple_mode,
@@ -7299,23 +7295,15 @@ static int hpsa_lookup_board_id(struct pci_dev *pdev, 
u32 *board_id,
if (products[i].access != &SA5A_access &&
products[i].access != &SA5B_access)
return i;
-   if (hpsa_allow_any) {
-   dev_warn(&pdev->dev,
-"legacy board ID: 0x%08x\n",
-*board_id);
-   if (legacy_board)
-   *legacy_board = true;
-   return i;
-   }
+   dev_warn(&pdev->dev,
+"legacy board ID: 0x%08x\n",
+*board_id);
+   if (legacy_board)
+   *legacy_board = true;
+   return i;
}
 
-   if ((subsystem_vendor_id != PCI_VENDOR_ID_HP &&
-   subsystem_vendor_id != PCI_VENDOR_ID_COMPAQ) ||
-   !hpsa_allow_any) {
-   dev_warn(&pdev->dev, "unrecognized board ID: "
-   "0x%08x, ignoring.\n", *board_id);
-   return -ENODEV;
-   }
+   dev_warn(&pdev->dev, "unrecognized board ID: 0x%08x\n", *board_id);
if (legacy_board)
*legacy_board = true;
return ARRAY_SIZE(products) - 1; /* generic unknown smart array */
-- 
1.8.5.6



[PATCHv3 1/6] hpsa: add support for legacy boards

2017-08-14 Thread Hannes Reinecke
Add support for legacy boards, ensuring to enable the driver for
those boards only when 'hpsa_allow_any' is set.
The attribute 'legacy_board' is set to '1' if the device is
a legacy board, and '0' otherwise.

Signed-off-by: Hannes Reinecke 
Acked-by: Don Brace 
---
 drivers/scsi/hpsa.c | 67 +++-
 drivers/scsi/hpsa.h | 81 -
 2 files changed, 121 insertions(+), 27 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 4f7cdb2..fbe7fbc 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -148,6 +148,8 @@
{PCI_VENDOR_ID_HP, 0x333f, 0x103c, 0x333f},
{PCI_VENDOR_ID_HP, PCI_ANY_ID,  PCI_ANY_ID, PCI_ANY_ID,
PCI_CLASS_STORAGE_RAID << 8, 0x << 8, 0},
+   {PCI_VENDOR_ID_COMPAQ, PCI_ANY_ID,  PCI_ANY_ID, PCI_ANY_ID,
+   PCI_CLASS_STORAGE_RAID << 8, 0x << 8, 0},
{0,}
 };
 
@@ -158,6 +160,26 @@
  *  access = Address of the struct of function pointers
  */
 static struct board_type products[] = {
+   {0x40700E11, "Smart Array 5300", &SA5A_access},
+   {0x40800E11, "Smart Array 5i", &SA5B_access},
+   {0x40820E11, "Smart Array 532", &SA5B_access},
+   {0x40830E11, "Smart Array 5312", &SA5B_access},
+   {0x409A0E11, "Smart Array 641", &SA5A_access},
+   {0x409B0E11, "Smart Array 642", &SA5A_access},
+   {0x409C0E11, "Smart Array 6400", &SA5A_access},
+   {0x409D0E11, "Smart Array 6400 EM", &SA5A_access},
+   {0x40910E11, "Smart Array 6i", &SA5A_access},
+   {0x3225103C, "Smart Array P600", &SA5A_access},
+   {0x3223103C, "Smart Array P800", &SA5A_access},
+   {0x3234103C, "Smart Array P400", &SA5A_access},
+   {0x3235103C, "Smart Array P400i", &SA5A_access},
+   {0x3211103C, "Smart Array E200i", &SA5A_access},
+   {0x3212103C, "Smart Array E200", &SA5A_access},
+   {0x3213103C, "Smart Array E200i", &SA5A_access},
+   {0x3214103C, "Smart Array E200i", &SA5A_access},
+   {0x3215103C, "Smart Array E200i", &SA5A_access},
+   {0x3237103C, "Smart Array E500", &SA5A_access},
+   {0x323D103C, "Smart Array P700m", &SA5A_access},
{0x3241103C, "Smart Array P212", &SA5_access},
{0x3243103C, "Smart Array P410", &SA5_access},
{0x3245103C, "Smart Array P410i", &SA5_access},
@@ -278,7 +300,8 @@ static int hpsa_find_cfg_addrs(struct pci_dev *pdev, void 
__iomem *vaddr,
   u64 *cfg_offset);
 static int hpsa_pci_find_memory_BAR(struct pci_dev *pdev,
unsigned long *memory_bar);
-static int hpsa_lookup_board_id(struct pci_dev *pdev, u32 *board_id);
+static int hpsa_lookup_board_id(struct pci_dev *pdev, u32 *board_id,
+   bool *legacy_board);
 static int wait_for_device_to_become_ready(struct ctlr_info *h,
   unsigned char lunaddr[],
   int reply_queue);
@@ -866,6 +889,16 @@ static ssize_t host_show_ctlr_num(struct device *dev,
return snprintf(buf, 20, "%d\n", h->ctlr);
 }
 
+static ssize_t host_show_legacy_board(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   struct ctlr_info *h;
+   struct Scsi_Host *shost = class_to_shost(dev);
+
+   h = shost_to_hba(shost);
+   return snprintf(buf, 20, "%d\n", h->legacy_board ? 1 : 0);
+}
+
 static DEVICE_ATTR(raid_level, S_IRUGO, raid_level_show, NULL);
 static DEVICE_ATTR(lunid, S_IRUGO, lunid_show, NULL);
 static DEVICE_ATTR(unique_id, S_IRUGO, unique_id_show, NULL);
@@ -891,6 +924,8 @@ static DEVICE_ATTR(lockup_detected, S_IRUGO,
host_show_lockup_detected, NULL);
 static DEVICE_ATTR(ctlr_num, S_IRUGO,
host_show_ctlr_num, NULL);
+static DEVICE_ATTR(legacy_board, S_IRUGO,
+   host_show_legacy_board, NULL);
 
 static struct device_attribute *hpsa_sdev_attrs[] = {
&dev_attr_raid_level,
@@ -912,6 +947,7 @@ static DEVICE_ATTR(ctlr_num, S_IRUGO,
&dev_attr_raid_offload_debug,
&dev_attr_lockup_detected,
&dev_attr_ctlr_num,
+   &dev_attr_legacy_board,
NULL,
 };
 
@@ -7232,7 +7268,8 @@ static int hpsa_interrupt_mode(struct ctlr_info *h)
return 0;
 }
 
-static int hpsa_lookup_board_id(struct pci_dev *pdev, u32 *board_id)
+static int hpsa_lookup_board_id(struct pci_dev *pdev, u32 *board_id,
+   bool *legacy_board)
 {
int i;
u32 subsystem_vendor_id, subsystem_device_id;
@@ -7242,9 +7279,22 @@ static int hpsa_lookup_board_id(struct pci_dev *pdev, 
u32 *board_id)
*board_id = ((subsystem_device_id << 16) & 0x) |
subsystem_vendor_id;
 
+   if (legacy_board)
+   *legacy_board = false;
for (i = 0; i < ARRAY_SIZE(products); i++)
-   if (*board_id == products[i].board_id)
-   return i;
+   if (

[PATCHv3 2/6] hpsa: disable volume status check for legacy boards

2017-08-14 Thread Hannes Reinecke
Legacy boards might not support volume status, so assume
the volume is online here.

Signed-off-by: Hannes Reinecke 
Acked-by: Don Brace 
---
 drivers/scsi/hpsa.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index fbe7fbc..0b0b6dc 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3845,6 +3845,16 @@ static int hpsa_update_device_info(struct ctlr_info *h,
if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
hpsa_get_ioaccel_status(h, scsi3addr, this_device);
volume_offline = hpsa_volume_offline(h, scsi3addr);
+   if (volume_offline == HPSA_VPD_LV_STATUS_UNSUPPORTED &&
+   h->legacy_board) {
+   /*
+* Legacy boards might not support volume status
+*/
+   dev_info(&h->pdev->dev,
+"C0:T%d:L%d Volume status not available, 
assuming online.\n",
+this_device->target, this_device->lun);
+   volume_offline = 0;
+   }
this_device->volume_offline = volume_offline;
if (volume_offline == HPSA_LV_FAILED) {
rc = HPSA_LV_FAILED;
-- 
1.8.5.6



Re: [PATCHv2 0/6] hpsa legacy support

2017-08-14 Thread Hannes Reinecke
On 08/15/2017 05:39 AM, Martin K. Petersen wrote:
> 
> Hannes,
> 
>> here's now the second attempt to add support for legacy boards, ie for
>> boards previously supported by cciss only.  With this patchset the
>> hpsa driver should work with all Smart Array boards if the
>> 'hpsa_allow_any' module option is set, rendering the cciss driver
>> obsolete.  Consequently I've added a patch to remove the cciss driver
>> and make 'hpsa' an alias to 'cciss'.
> 
> How does the module alias solve the problem that hpsa_allow_any=1 needs
> to be added for users currently running cciss?
> 
> I also fail to see what the point is of having an opt-in parameter when
> there is no alternative after cciss has been removed. It's a
> don't-be-broken switch.
> 
Hmm ... considering this (and the fact that we now have the
->legacy_board switch) we can indeed remove that option.
Will be redoing the patchset.

Cheers,

Hannes
-- 
Dr. Hannes ReineckeTeamlead Storage & Networking
h...@suse.de   +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)


Re: [PATCHv2 0/6] hpsa legacy support

2017-08-14 Thread Martin K. Petersen

Hannes,

> here's now the second attempt to add support for legacy boards, ie for
> boards previously supported by cciss only.  With this patchset the
> hpsa driver should work with all Smart Array boards if the
> 'hpsa_allow_any' module option is set, rendering the cciss driver
> obsolete.  Consequently I've added a patch to remove the cciss driver
> and make 'hpsa' an alias to 'cciss'.

How does the module alias solve the problem that hpsa_allow_any=1 needs
to be added for users currently running cciss?

I also fail to see what the point is of having an opt-in parameter when
there is no alternative after cciss has been removed. It's a
don't-be-broken switch.

-- 
Martin K. Petersen  Oracle Linux Engineering


Re: [PATCHv3 2/5] scsi: Export blacklist flags to sysfs

2017-08-14 Thread Martin K. Petersen

Hannes,

> + name = sdev_bflags_name(bflags);
> + if (name)
> + blen = snprintf(ptr, strlen(name) + 1,
> + "%s", name);
> + else
> + blen = snprintf(ptr, 67, "0x%X", bflags);

It seems this else statement facilitates papering over the fact that
scsi_sysfs.c and scsi_devinfo.h can get out of sync.

-- 
Martin K. Petersen  Oracle Linux Engineering


Re: [PATCH] sym53c8xx: Avoid undefined behaviour in drivers/scsi/sym53c8xx_2/sym_hipd.c:762

2017-08-14 Thread Martin K. Petersen

Helge,

> On parisc I see this UBSAN warning with a sym53c896:
>
>  UBSAN: Undefined behaviour in ./drivers/scsi/sym53c8xx_2/sym_hipd.c:762:24
>  index -1903078336 is out of range for type 'u32 [7]'
>
> Avoid this warning by switching to dev64_ul().

Applied to 4.14/scsi-queue. Thank you, Helge!

-- 
Martin K. Petersen  Oracle Linux Engineering


Re: [PATCHv2 0/2] scsi: pollable 'state' attribute

2017-08-14 Thread Martin K. Petersen

Hannes,

> here's a small patchset to make the 'state' device attribute pollable.
> It was supposed to be a small patch, but then hch suggested a rework.

Applied to 4.14/scsi-queue. Thank you!

-- 
Martin K. Petersen  Oracle Linux Engineering


Re: [PATCH] Revert "scsi: default to scsi-mq"

2017-08-14 Thread Martin K. Petersen

Christoph,

> Defaulting to scsi-mq in 4.13-rc has shown various regressions
> on setups that we didn't previously consider.  Fixes for them are
> in progress, but too invasive to make it in this cycle.  So for
> now revert the commit that defaults to blk-mq for SCSI.  For 4.14
> we'll plan to try again with these fixes.
>
> This reverts commit 5c279bd9e40624f4ab6e688671026d6005b066fa.

Applied to 4.13/scsi-fixes. Thanks!

-- 
Martin K. Petersen  Oracle Linux Engineering


答复: [iscsi] Deadlock occurred when network is in error

2017-08-14 Thread Tangchen (UVP)
Hi, Bart,

Thank you very much for the quick response. 

But I'm not using mq, and I run into these two problems in a non-mq system.
The patch you pointed out is fix for mq, so I don't think it can resolve this 
problem.

IIUC, mq is for SSD ?  I'm not using ssd, so mq is disabled.


On Mon, 2017-08-14 at 11:23 +, Tangchen (UVP) wrote:
> Problem 2:
> 
> ***
> [What it looks like]
> ***
> When remove a scsi device, and the network error happens, __blk_drain_queue() 
> could hang forever.
> 
> # cat /proc/19160/stack
> [] msleep+0x1d/0x30
> [] __blk_drain_queue+0xe4/0x160 [] 
> blk_cleanup_queue+0x106/0x2e0 [] 
> __scsi_remove_device+0x52/0xc0 [scsi_mod] [] 
> scsi_remove_device+0x2b/0x40 [scsi_mod] [] 
> sdev_store_delete_callback+0x10/0x20 [scsi_mod] [] 
> sysfs_schedule_callback_work+0x15/0x80
> [] process_one_work+0x169/0x340 [] 
> worker_thread+0x183/0x490 [] kthread+0x96/0xa0 
> [] kernel_thread_helper+0x4/0x10 
> [] 0x
> 
> The request queue of this device was stopped. So the following check will be 
> true forever:
> __blk_run_queue()
> {
> if (unlikely(blk_queue_stopped(q)))
> return;
> 
> __blk_run_queue_uncond(q);
> }
> 
> So __blk_run_queue_uncond() will never be called, and the process hang.
> 
> [ ... ]
>
> 
> [How to reproduce]
> 
> Unfortunately I cannot reproduce it in the latest kernel. 
> The script below will help to reproduce, but not very often.
> 
> # create network error
> tc qdisc add dev eth1 root netem loss 60%
> 
> # restart iscsid and rescan scsi bus again and again while [ 1 ] do 
> systemctl restart iscsid
> rescan-scsi-bus
> (http://manpages.ubuntu.com/manpages/trusty/man8/rescan-scsi-bus.8.html)
> done

This should have been fixed by commit 36e3cf273977 ("scsi: Avoid that SCSI 
queues get stuck"). The first mainline kernel that includes this commit is 
kernel v4.11.

> void __blk_run_queue(struct request_queue *q) {
> -   if (unlikely(blk_queue_stopped(q)))
> +   if (unlikely(blk_queue_stopped(q)) && 
> + unlikely(!blk_queue_dying(q)))
> return;
> 
> __blk_run_queue_uncond(q);

Are you aware that the single queue block layer is on its way out and will be 
removed sooner or later? Please focus your testing on scsi-mq. 

Regarding the above patch: it is wrong because it will cause lockups during 
path removal for other block drivers. Please drop this patch.

Bart.


[PATCH] sd: read unmap block limits even if lbpme=0

2017-08-14 Thread tom . ty89
From: Tom Yan 

Some devices may not be decent enough to report lbpme bit properly
even when they do support unmap and report relevant information
in the block limits and logical block provisioning VPDs properly
(Namely, ASMedia ASM1351, a UAS-SATA bridge). One of the reasons
is, lbpme=1 is not a requirement for "DeleteNotify" in Windows to
be activated:

[root@archlinux ~]# sg_readcap -l /dev/sda | grep lb
   Logical block provisioning: lbpme=0, lbprz=0
[root@archlinux ~]# sg_vpd -p lbpv /dev/sda | grep \(LB
  Unmap command supported (LBPU): 1
  Write same (16) with unmap bit supported (LBWS): 0
  Write same (10) with unmap bit supported (LBWS10): 0
  Logical block provisioning read zeros (LBPRZ): 0
[root@archlinux ~]# sg_vpd -p bl /dev/sda | grep -i unmap
  Maximum unmap LBA count: 4194240
  Maximum unmap block descriptor count: 1
  Optimal unmap granularity: 1
  Unmap granularity alignment valid: 0
  Unmap granularity alignment: 0

While there may be a point to retain the "strict policy" on this,
by not configuring discard for such device automatically, there
is little reason not to read the relevant data from the VPD, for
users are allowed to configure discard for a device via the
provisioning_mode sysfs file.

While discard_max_bytes can be changed manually, it is better
if the value would be limited by a discard_max_hw_bytes that is
set from the hardware limit that is reported in the VPD.

Before this commit:
[root@archlinux ~]# cat (...)/provisioning_mode
full
[root@archlinux ~]# grep . /sys/block/sda/queue/discard_*
/sys/block/sda/queue/discard_granularity:0
/sys/block/sda/queue/discard_max_bytes:0
/sys/block/sda/queue/discard_max_hw_bytes:0
/sys/block/sda/queue/discard_zeroes_data:0
[root@archlinux ~]# echo -n unmap > (...)/provisioning_mode
[root@archlinux ~]# grep . /sys/block/sda/queue/discard_*
/sys/block/sda/queue/discard_granularity:512
/sys/block/sda/queue/discard_max_bytes:4294966784
/sys/block/sda/queue/discard_max_hw_bytes:4294966784
/sys/block/sda/queue/discard_zeroes_data:0

After this commit:
[root@archlinux ~]# cat (...)/provisioning_mode
full
[root@archlinux ~]# grep . /sys/block/sda/queue/discard_*
/sys/block/sda/queue/discard_granularity:0
/sys/block/sda/queue/discard_max_bytes:0
/sys/block/sda/queue/discard_max_hw_bytes:0
/sys/block/sda/queue/discard_zeroes_data:0
[root@archlinux ~]# echo -n unmap > (...)/provisioning_mode
[root@archlinux ~]# grep . /sys/block/sda/queue/discard_*
/sys/block/sda/queue/discard_granularity:512
/sys/block/sda/queue/discard_max_bytes:2147450880
/sys/block/sda/queue/discard_max_hw_bytes:2147450880
/sys/block/sda/queue/discard_zeroes_data:0

Signed-off-by: Tom Yan 

diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index bea36adeee17..ea9e6fc76b63 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -2883,9 +2883,6 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
 
sdkp->max_ws_blocks = (u32)get_unaligned_be64(&buffer[36]);
 
-   if (!sdkp->lbpme)
-   goto out;
-
lba_count = get_unaligned_be32(&buffer[20]);
desc_count = get_unaligned_be32(&buffer[24]);
 
@@ -2898,6 +2895,9 @@ static void sd_read_block_limits(struct scsi_disk *sdkp)
sdkp->unmap_alignment =
get_unaligned_be32(&buffer[32]) & ~(1 << 31);
 
+   if (!sdkp->lbpme)
+   goto out;
+
if (!sdkp->lbpvpd) { /* LBP VPD page not provided */
 
if (sdkp->max_unmap_blocks)
-- 
2.14.1



Re: Spurious DISK_EVENT_MEDIA_CHANGE on USB DVD hotplug?

2017-08-14 Thread Tejun Heo
Hello, Joe.

On Thu, Aug 10, 2017 at 10:45:54AM -0400, Joe Lawrence wrote:
> In the case of my USB DVD -> laptop example, there is no media in my
> device, however I still see the DISK_EVENT_MEDIA_CHANGE event.  This is
> a bit confusing, and I was wondering if there was an explanation for
> the following:
> 
> drivers/scsi/sr.c :: sr_probe()
> 
> disk->events = DISK_EVENT_MEDIA_CHANGE | DISK_EVENT_EJECT_REQUEST;
> ...
> cd->media_present = 1;
> 
> DISK_EVENT_MEDIA_CHANGE events will pass through to userspace and
> for some reason cd->media_present defaults to 1?  More on that below...

I don't have any concrete ideas but I think the only thing it's trying
to do is to always generate at least one changed event no matter what.

...
> sr_check_events() compares the previous (in this case, default)
> media_present value with what the TUR returns.  If it has changed, then
> turn on the DISK_EVENT_MEDIA_CHANGE event bit.
> 
> In my laptop USB DVD case, !scsi_status_is_good and sshdr.asc == 0x3a,
> so last_present (1) and cd->media_present (0) mis-compare and the change
> event is set.  That does not seem intuitive to me, is this a bug?

It's not incorrect.  We can try to change the behavior to avoid double
notifications but given that this has been like this for a really long
time and that it isn't technically incorrect, I'm not sure changing it
is a good idea.  It might as well break other things.

> Bringing this back to the reported BMC case, which presumably does have
> "media" present in the virtual device... is it reasonable to expect a
> DISK_EVENT_MEDIA_CHANGE even for a new device that contains media?  (I
> haven't verified, but in this case GET_EVENT_STATUS_NOTIFICATION might
> be enough to set media present.)

Yeah, I think so.

> If there is documentation that explains DISK_EVENT_MEDIA_CHANGE conditions
> somewhere, feel free to point me there.

AFAIK, there isn't any.  The only thing it tries to do is generating
at least one event after media change.  Given that media is reported
present after the last notification, I think userspace should be able
to do the right thing (and must have been doing the right thing until
recently).

Thanks.

-- 
tejun


RE: [PATCHv2 6/6] cciss: Drop obsolete driver

2017-08-14 Thread Don Brace
> -Original Message-
> From: Hannes Reinecke [mailto:h...@suse.de]
> Sent: Monday, August 14, 2017 6:19 AM
> To: Martin K. Petersen 
> Cc: Christoph Hellwig ; James Bottomley
> ; Don Brace
> ; Meelis Roos ; linux-
> s...@vger.kernel.org; Hannes Reinecke ; Hannes Reinecke
> 
> Subject: [PATCHv2 6/6] cciss: Drop obsolete driver
> 
> EXTERNAL EMAIL
> 
> 
> The hpsa driver now has support for all boards the cciss driver
> used to support, so this patch removes the cciss driver and
> make hpsa an alias to cciss.
> 
> Signed-off-by: Hannes Reinecke 
> Cc: Don Brace 
> ---
>  Documentation/blockdev/cciss.txt |  194 --
>  MAINTAINERS  |   10 -
>  drivers/block/Kconfig|   27 -
>  drivers/block/Makefile   |1 -
>  drivers/block/cciss.c| 5415 
> --
>  drivers/block/cciss.h|  433 ---
>  drivers/block/cciss_cmd.h|  269 --
>  drivers/block/cciss_scsi.c   | 1653 
>  drivers/block/cciss_scsi.h   |   79 -
>  drivers/scsi/hpsa.c  |1 +
>  10 files changed, 1 insertion(+), 8081 deletions(-)
>  delete mode 100644 Documentation/blockdev/cciss.txt
>  delete mode 100644 drivers/block/cciss.c
>  delete mode 100644 drivers/block/cciss.h
>  delete mode 100644 drivers/block/cciss_cmd.h
>  delete mode 100644 drivers/block/cciss_scsi.c
>  delete mode 100644 drivers/block/cciss_scsi.h
> 
Acked-by: Don Brace 


Re: [RFC PATCH 1/6] bsg: fix kernel panic resulting from missing allocation of a reply-buffer

2017-08-14 Thread Benjamin Block
On Sun, Aug 13, 2017 at 04:39:40PM +0200, Christoph Hellwig wrote:
> On Fri, Aug 11, 2017 at 06:01:42PM +0200, Benjamin Block wrote:
> > When the BSG interface is used with bsg-lib, and the user sends a
> > Bidirectional command - so when he gives an input- and output-buffer
> > (most users of our interface will likely do that, if they wanna get the
> > transport-level response data) - bsg will allocate two requests from the
> > queue. The first request's bio is used to map the input and the second
> > request's bio for the output (see bsg_map_hdr() in the if-statement with
> > (op == REQ_OP_SCSI_OUT && hdr->din_xfer_len)).
> > 
> > When we now allocate the full space of bsg_job, sense, dd_data for each
> > request, these will be wasted on the (linked) second request. They will
> > go unused all the time, as only the first request's bsg_job, sense and
> > dd_data is used by the LLDs and BSG itself.
> > 
> > Right now, because we don't allocate this on each request, those spaces
> > are only allocated for the first request in bsg-lib.
> > 
> > Maybe we can ignore this, if it gets to complicated, I don't wanne
> > prolong this unnecessary.
> 
> We have the same 'issue' with bidirection scsi commands - it's a side
> effect of having two request structures for these commands, and the
> only real fix would be to have a single request structure, which would
> be nice especially if we can't do it without growing struct request.
> 

Alright, I was not aware of that. That is fair then. Thx.


Beste Grüße / Best regards,
  - Benjamin Block
-- 
Linux on z Systems Development / IBM Systems & Technology Group
  IBM Deutschland Research & Development GmbH 
Vorsitz. AufsR.: Martina Koederitz /Geschäftsführung: Dirk Wittkopp
Sitz der Gesellschaft: Böblingen / Registergericht: AmtsG Stuttgart, HRB 243294



Re: [RFC PATCH 1/6] bsg: fix kernel panic resulting from missing allocation of a reply-buffer

2017-08-14 Thread Benjamin Block
Hey Christoph,

I looked over the patch in detail, see below.

> From f5b03b82df0569c035022c1c2535696186907f1a Mon Sep 17 00:00:00 2001
> From: Christoph Hellwig 
> Date: Fri, 11 Aug 2017 11:03:29 +0200
> Subject: bsg-lib: allocate sense data for each request
>
> Since we split the scsi_request out of the request the driver is supposed
> to provide storage for the sense buffer.  The bsg-lib code failed to do so,
> though and will crash anytime it is used.
>
> This patch moves bsg-lib to allocate and setup the bsg_job ahead of time,
> and allocate the sense data, which is used as reply buffer in bsg.
>
> Reported-by: Steffen Maier 
> Signed-off-by: Benjamin Block 
> Fixes: 82ed4db499b8 ("block: split scsi_request out of struct request")
> Cc:  #4.11+
> ---
>  block/bsg-lib.c | 51 
> +++--
>  include/linux/blkdev.h  |  1 -
>  include/linux/bsg-lib.h |  2 ++
>  3 files changed, 30 insertions(+), 24 deletions(-)
>
> diff --git a/block/bsg-lib.c b/block/bsg-lib.c
> index c4513b23f57a..c07333c3b785 100644
> --- a/block/bsg-lib.c
> +++ b/block/bsg-lib.c
> @@ -37,13 +37,11 @@ static void bsg_destroy_job(struct kref *kref)
>   struct bsg_job *job = container_of(kref, struct bsg_job, kref);
>   struct request *rq = job->req;
>
> - blk_end_request_all(rq, BLK_STS_OK);
> -
>   put_device(job->dev);   /* release reference for the request */
>
>   kfree(job->request_payload.sg_list);
>   kfree(job->reply_payload.sg_list);
> - kfree(job);
> + blk_end_request_all(rq, BLK_STS_OK);

What is the reason for moving that last line? Just wondering whether
that might change the behavior somehow, although it doesn't look like it
from the code.

>  }
>
>  void bsg_job_put(struct bsg_job *job)
> @@ -100,7 +98,7 @@ EXPORT_SYMBOL_GPL(bsg_job_done);
>   */
>  static void bsg_softirq_done(struct request *rq)
>  {
> - struct bsg_job *job = rq->special;
> + struct bsg_job *job = blk_mq_rq_to_pdu(rq);
>
>   bsg_job_put(job);
>  }
> @@ -129,24 +127,11 @@ static int bsg_map_buffer(struct bsg_buffer *buf, 
> struct request *req)
>  static int bsg_create_job(struct device *dev, struct request *req)
>  {
>   struct request *rsp = req->next_rq;
> - struct request_queue *q = req->q;
> + struct bsg_job *job = blk_mq_rq_to_pdu(req);
>   struct scsi_request *rq = scsi_req(req);
> - struct bsg_job *job;
>   int ret;
>
> - BUG_ON(req->special);
> -
> - job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL);
> - if (!job)
> - return -ENOMEM;
> -
> - req->special = job;
> - job->req = req;
> - if (q->bsg_job_size)
> - job->dd_data = (void *)&job[1];
> - job->request = rq->cmd;
>   job->request_len = rq->cmd_len;
> - job->reply = rq->sense;
>   job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer
>* allocated */
>   if (req->bio) {
> @@ -187,7 +172,6 @@ static void bsg_request_fn(struct request_queue *q)
>  {
>   struct device *dev = q->queuedata;
>   struct request *req;
> - struct bsg_job *job;
>   int ret;
>
>   if (!get_device(dev))
> @@ -207,8 +191,7 @@ static void bsg_request_fn(struct request_queue *q)
>   continue;
>   }
>
> - job = req->special;
> - ret = q->bsg_job_fn(job);
> + ret = q->bsg_job_fn(blk_mq_rq_to_pdu(req));
>   spin_lock_irq(q->queue_lock);
>   if (ret)
>   break;
> @@ -219,6 +202,27 @@ static void bsg_request_fn(struct request_queue *q)
>   spin_lock_irq(q->queue_lock);
>  }
>
> +static int bsg_init_rq(struct request_queue *q, struct request *req, gfp_t 
> gfp)
> +{
> + struct bsg_job *job = blk_mq_rq_to_pdu(req);
> +
> + memset(job, 0, sizeof(*job));
> + job->req = req;
> + job->request = job->sreq.cmd;

That doesn't work with bsg.c if the request submitted by the user is
bigger than BLK_MAX_CDB. There is code in blk_fill_sgv4_hdr_rq() that
will reassign the req->cmd point in that case to something else.

This is maybe wrong in the same vein as my Patch 1 is. If not for the
legacy code in bsg.c, setting this here, will miss changes to that
pointer between request-allocation and job-submission.

So maybe just move this to bsg_create_job().

> + job->dd_data = job + 1;
> + job->reply = job->sreq.sense = kzalloc(job->reply_len, gfp);

job->reply_len will be 0 here, won't it? You probably have to pull the
"job->reply_len = SCSI_SENSE_BUFFERSIZE" here from bsg_create_job().

> + if (!job->reply)
> + return -ENOMEM;
> + return 0;
> +}
> +
> +static void bsg_exit_rq(struct request_queue *q, struct request *req)
> +{
> + struct bsg_job *job = blk_mq_rq_to_pdu(req);
> +
> + kfree(job->reply);

Don't we need to free the reply-buffer as well?

Not sure how you wanna proceed. I can also sketch somethi

Re: [SCSI] qla2xxx: Fix a memory leak in an error path of qla2x00_process_els()

2017-08-14 Thread Bart Van Assche
On Mon, 2017-08-14 at 10:38 -0500, Steve Magnani wrote:
> I've been porting patches from mainline into a copy of QLogic's target driver 
> tree
> and ran across an anomaly in this changeset of yours from 2013
> (8c0eb596baa51f2b43949c698c644727ef17805c).
> 
> 
> The commit log says:
> 
>  ... Make it easy for Coverity (and for humans) to recognize that there 
> is no
>  fcport leak in the error path by changing the
> 
>bsg_job->request->msgcode == FC_BSG_HST_ELS_NOLOGIN
> 
>  test into
> 
>bsg_job->request->msgcode != FC_BSG_RPT_ELS.
> 
> 
> But the change actually made was this:
> 
> @@ -399,7 +399,7 @@ done_unmap_sg:
>   goto done_free_fcport;
>   
>   done_free_fcport:
> - if (bsg_job->request->msgcode == FC_BSG_HST_ELS_NOLOGIN)
> + if (bsg_job->request->msgcode == FC_BSG_RPT_ELS)
>   kfree(fcport);
>   done:
>   return rval;
> 
> 
> Shouldn't the "== FC_BSG_RPT_ELS" be "!= FC_BSG_RPT_ELS"?

Hello Steve,

Without having had a close look at the patch, I think you are right. On
http://www.spinics.net/lists/linux-scsi/msg66513.html one can see that
the change I had posted originally is as follows:

 done_free_fcport:
-   if (bsg_job->request->msgcode == FC_BSG_HST_ELS_NOLOGIN)
+   if (bsg_job->request->msgcode != FC_BSG_RPT_ELS)
kfree(fcport);
 done:

I have not been able to find the reposted version of this patch so I can't
figure out anymore what went wrong ...

Bart.

Re: [PATCH] Revert "scsi: default to scsi-mq"

2017-08-14 Thread Dave Kleikamp
On 08/13/2017 12:44 PM, Christoph Hellwig wrote:
> Defaulting to scsi-mq in 4.13-rc has shown various regressions
> on setups that we didn't previously consider.  Fixes for them are
> in progress, but too invasive to make it in this cycle.  So for
> now revert the commit that defaults to blk-mq for SCSI.  For 4.14
> we'll plan to try again with these fixes.

I'm not sure if this is one of your known regressions, but I've had a
problem on my Lenovo T410 with the 4.13-rc kernels. When unplugging the
power cord, the hard disk gets suspended.

[ 1499.629415] thinkpad_acpi: EC reports that Thermal Table has changed
[ 1499.832116] sd 0:0:0:0: [sda] Synchronizing SCSI cache
[ 1499.885831] e1000e: eth0 NIC Link is Down
[ 1499.897531] sd 0:0:0:0: [sda] Stopping disk
[ 1499.923736] EXT4-fs (sda5): re-mounted. Opts: data=ordered,commit=600
[ 1504.240444] thinkpad_acpi: EC reports that Thermal Table has changed
[ 1515.502561] sd 0:0:0:0: timing out command, waited 15s

Adding "scsi_mod.use_blk_mq=0" to the command line fixes it.

Thanks,
Shaggy

> 
> This reverts commit 5c279bd9e40624f4ab6e688671026d6005b066fa.
> 
> Signed-off-by: Christoph Hellwig 
> ---
>  drivers/scsi/Kconfig | 11 +++
>  drivers/scsi/scsi.c  |  4 
>  2 files changed, 15 insertions(+)
> 
> diff --git a/drivers/scsi/Kconfig b/drivers/scsi/Kconfig
> index f4538d7a3016..d145e0d90227 100644
> --- a/drivers/scsi/Kconfig
> +++ b/drivers/scsi/Kconfig
> @@ -47,6 +47,17 @@ config SCSI_NETLINK
>   default n
>   depends on NET
>  
> +config SCSI_MQ_DEFAULT
> + bool "SCSI: use blk-mq I/O path by default"
> + depends on SCSI
> + ---help---
> +   This option enables the new blk-mq based I/O path for SCSI
> +   devices by default.  With the option the scsi_mod.use_blk_mq
> +   module/boot option defaults to Y, without it to N, but it can
> +   still be overridden either way.
> +
> +   If unsure say N.
> +
>  config SCSI_PROC_FS
>   bool "legacy /proc/scsi/ support"
>   depends on SCSI && PROC_FS
> diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c
> index 3d38c6d463b8..1bf274e3b2b6 100644
> --- a/drivers/scsi/scsi.c
> +++ b/drivers/scsi/scsi.c
> @@ -800,7 +800,11 @@ MODULE_LICENSE("GPL");
>  module_param(scsi_logging_level, int, S_IRUGO|S_IWUSR);
>  MODULE_PARM_DESC(scsi_logging_level, "a bit mask of logging levels");
>  
> +#ifdef CONFIG_SCSI_MQ_DEFAULT
>  bool scsi_use_blk_mq = true;
> +#else
> +bool scsi_use_blk_mq = false;
> +#endif
>  module_param_named(use_blk_mq, scsi_use_blk_mq, bool, S_IWUSR | S_IRUGO);
>  
>  static int __init init_scsi(void)
> 


RE: [SCSI] qla2xxx: Fix a memory leak in an error path of qla2x00_process_els()

2017-08-14 Thread Steve Magnani

Bart -

I've been porting patches from mainline into a copy of QLogic's target driver 
tree
and ran across an anomaly in this changeset of yours from 2013
(8c0eb596baa51f2b43949c698c644727ef17805c).


The commit log says:

... Make it easy for Coverity (and for humans) to recognize that there is no
fcport leak in the error path by changing the

  bsg_job->request->msgcode == FC_BSG_HST_ELS_NOLOGIN

test into

  bsg_job->request->msgcode != FC_BSG_RPT_ELS.


But the change actually made was this:

@@ -399,7 +399,7 @@ done_unmap_sg:
goto done_free_fcport;
 
 done_free_fcport:

-   if (bsg_job->request->msgcode == FC_BSG_HST_ELS_NOLOGIN)
+   if (bsg_job->request->msgcode == FC_BSG_RPT_ELS)
kfree(fcport);
 done:
return rval;


Shouldn't the "== FC_BSG_RPT_ELS" be "!= FC_BSG_RPT_ELS"?

Regards,

 Steven J. Magnani   "I claim this network for MARS!
 www.digidescorp.com  Earthling, return my space modulator!"

 #include 



Re: [PATCH] Revert "scsi: default to scsi-mq"

2017-08-14 Thread Bart Van Assche
On Sun, 2017-08-13 at 19:44 +0200, Christoph Hellwig wrote:
> Defaulting to scsi-mq in 4.13-rc has shown various regressions
> on setups that we didn't previously consider.  Fixes for them are
> in progress, but too invasive to make it in this cycle.  So for
> now revert the commit that defaults to blk-mq for SCSI.  For 4.14
> we'll plan to try again with these fixes.

Should a patch for v4.14 perhaps be queued now? Anyway:

Reviewed-by: Bart Van Assche 



Re: [iscsi] Deadlock occurred when network is in error

2017-08-14 Thread Bart Van Assche
On Mon, 2017-08-14 at 11:23 +, Tangchen (UVP) wrote:
> Problem 2:
> 
> ***
> [What it looks like]
> ***
> When remove a scsi device, and the network error happens, __blk_drain_queue() 
> could hang forever.
> 
> # cat /proc/19160/stack 
> [] msleep+0x1d/0x30
> [] __blk_drain_queue+0xe4/0x160
> [] blk_cleanup_queue+0x106/0x2e0
> [] __scsi_remove_device+0x52/0xc0 [scsi_mod]
> [] scsi_remove_device+0x2b/0x40 [scsi_mod]
> [] sdev_store_delete_callback+0x10/0x20 [scsi_mod]
> [] sysfs_schedule_callback_work+0x15/0x80
> [] process_one_work+0x169/0x340
> [] worker_thread+0x183/0x490
> [] kthread+0x96/0xa0
> [] kernel_thread_helper+0x4/0x10
> [] 0x
> 
> The request queue of this device was stopped. So the following check will be 
> true forever:
> __blk_run_queue()
> {
> if (unlikely(blk_queue_stopped(q)))
> return;
> 
> __blk_run_queue_uncond(q);
> }
> 
> So __blk_run_queue_uncond() will never be called, and the process hang.
> 
> [ ... ]
>
> 
> [How to reproduce]
> 
> Unfortunately I cannot reproduce it in the latest kernel. 
> The script below will help to reproduce, but not very often.
> 
> # create network error
> tc qdisc add dev eth1 root netem loss 60%
> 
> # restart iscsid and rescan scsi bus again and again
> while [ 1 ]
> do
> systemctl restart iscsid
> rescan-scsi-bus
> (http://manpages.ubuntu.com/manpages/trusty/man8/rescan-scsi-bus.8.html)
> done

This should have been fixed by commit 36e3cf273977 ("scsi: Avoid that SCSI
queues get stuck"). The first mainline kernel that includes this commit is
kernel v4.11.

> void __blk_run_queue(struct request_queue *q)
> {
> -   if (unlikely(blk_queue_stopped(q)))
> +   if (unlikely(blk_queue_stopped(q)) && unlikely(!blk_queue_dying(q)))
> return;
> 
> __blk_run_queue_uncond(q);

Are you aware that the single queue block layer is on its way out and will
be removed sooner or later? Please focus your testing on scsi-mq. 

Regarding the above patch: it is wrong because it will cause lockups during
path removal for other block drivers. Please drop this patch.

Bart.

Re: [PATCH v2 1/2] nvmet_fc: add defer_req callback for deferment of cmd buffer return

2017-08-14 Thread Bart Van Assche
On 08/04/17 17:29, James Smart wrote:
> + /* Cleanup defer'ed IOs in queue */
> + list_for_each_entry(deferfcp, &queue->avail_defer_list, req_list) {
> + list_del(&deferfcp->req_list);
> + kfree(deferfcp);
> + }

Hello James,

Coverity reports a user-after-free for the above code:

*** CID 1416424:  Memory - illegal accesses  (USE_AFTER_FREE)
/drivers/nvme/target/fc.c: 738 in nvmet_fc_delete_target_queue()
732 &tgtport->fc_target_port, 
fod->fcpreq);
733 }
734 }
735 }
736 
737 /* Cleanup defer'ed IOs in queue */
>>> CID 1416424:  Memory - illegal accesses  (USE_AFTER_FREE)
>>> Dereferencing freed pointer "deferfcp".
738 list_for_each_entry(deferfcp, &queue->avail_defer_list, 
req_list) {
739 list_del(&deferfcp->req_list);
740 kfree(deferfcp);
741 }
742 
743 for (;;) {


Re: [PATCH] scsi-sysfs: correct errno for host_reset

2017-08-14 Thread weiping zhang
On Fri, Aug 11, 2017 at 01:52:17AM +0800, weiping zhang wrote:
> if scsi_host_template->host_reset is NULL, when user
> "echo adapter > /sys/class/scsi_host/hostx/host_reset",
> -EINVAL will return even string compare successfully. It make user confuse.
> 
> change to:
> -EINVAL   if string not match "adapter" / "firmware";
> -EOPNOTSUPP   if string match but not implement ->host_reset.
> 
> Signed-off-by: weiping zhang 
> ---
>  drivers/scsi/scsi_sysfs.c | 2 ++
>  1 file changed, 2 insertions(+)
> 
> diff --git a/drivers/scsi/scsi_sysfs.c b/drivers/scsi/scsi_sysfs.c
> index dff8faf..3e664f4 100644
> --- a/drivers/scsi/scsi_sysfs.c
> +++ b/drivers/scsi/scsi_sysfs.c
> @@ -272,6 +272,8 @@ store_host_reset(struct device *dev, struct 
> device_attribute *attr,
>  
>   if (sht->host_reset)
>   ret = sht->host_reset(shost, type);
> + else
> + ret = -EOPNOTSUPP;
>  
>  exit_store_host_reset:
>   if (ret == 0)
> -- 
Hi Martin,

Could you please take a look at this patch at your convinience.

thanks


[PATCHv2 5/6] hpsa: handle unsupported devices more gracefully

2017-08-14 Thread Hannes Reinecke
From: Jeff Mahoney 

Add a warning message if an unsupported device is found and the
hpsa_allow_any parameter is not set.
Also make the hpsa_allow_any parameter writeable once the hpsa
driver is loaded.

Signed-off-by: Jeff Mahoney 
Signed-off-by: Hannes Reinecke 
Acked-by: Don Brace 
---
 drivers/scsi/hpsa.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index c90a83d..7168cec 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -85,7 +85,7 @@
 static int hpsa_allow_any;
 module_param(hpsa_allow_any, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(hpsa_allow_any,
-   "Allow hpsa driver to access unknown HP Smart Array hardware");
+   "Allow hpsa driver to access unsupported HP Smart Array 
hardware");
 static int hpsa_simple_mode;
 module_param(hpsa_simple_mode, int, S_IRUGO|S_IWUSR);
 MODULE_PARM_DESC(hpsa_simple_mode,
@@ -7314,6 +7314,7 @@ static int hpsa_lookup_board_id(struct pci_dev *pdev, u32 
*board_id,
!hpsa_allow_any) {
dev_warn(&pdev->dev, "unrecognized board ID: "
"0x%08x, ignoring.\n", *board_id);
+   dev_warn(&pdev->dev, "This device may be enabled by loading the 
hpsa module with the hpsa_allow_any=1 option or by writing \"%s\" to 
/sys/bus/pci/drivers/hpsa/bind while the module is loaded. Please note that the 
driver is untested with this device and will result in an unsupported 
environment.\n", dev_name(&pdev->dev));
return -ENODEV;
}
if (legacy_board)
-- 
1.8.5.6



[iscsi] Deadlock occurred when network is in error

2017-08-14 Thread Tangchen (UVP)
Hi,

I found two hangup problems between iscsid service and iscsi module. And I can 
reproduce one
of them in the latest kernel always. So I think the problems really exist. 

It really took me a long time to find out why due to my lack of knowledge of 
iscsi. But I cannot
find a good way to solve them both.

Please do help to take a look at them. Thx.

=
Problem 1:

***
[What it looks like]
***
First, we connect to 10 remote LUNs with iscsid service with at least two 
dirrerent sessions. 
When network error occurs, the session could be in error. If we do login and 
logout, iscsid
service could run into D state.

My colleague has posted an email to report this problem before. And he posted a 
long call trace.
But barely gain any feedback.
(https://lkml.org/lkml/2017/6/19/330)


**
[Why it happens]
**
In the latest kernel, asynchronous part of sd_probe() was executed
in scsi_sd_probe_domain, and sd_remove() would wait until all the
works in scsi_sd_probe_domain finished. When we use iscsi based
remote storage, and the network is broken, the following deadlock
could happen.

1. An iscsi session login is in progress, and calls sd_probe() to
   probe a remote lun. The synchronous part has finished, and the
   asynchronous part is scheduled in scsi_sd_probe_domain, and will
   submit io to execute scsi cmd to obtain device info. When the
   network is broken, the session will go into ISCSI_SESSION_FAILED
   state, and the io will retry until the session becomes
   ISCSI_SESSION_FREE. As a result, the work in scsi_sd_probe_domain
   hangs.

2. On the other hand, iscsi kernel module detects network ping
   timeout, and triggers ISCSI_KEVENT_CONN_ERROR event. iscsid in
   user space will handle this event by triggering
   ISCSI_UEVENT_DESTROY_SESSION event. Destroy session process is
   synchronous, and when it calls sd_remove() to remove the lun,
   it waits until all the works in scsi_sd_probe_domain finish. As
   a result, it hangs, and iscsid in user space goes into D state
   which is not killable, and not able to handle all the other
   events.



[How to reproduce]

With the script below, I can reproduce it in the latest kernel always.

# create network errors
tc qdisc add dev eth1 root netem loss 60%

while [1]
do
iscsiadm -m node -T xx -login
sleep 5
iscsiadm -m node -T xx -logout &
iscsiadm -m node -T yy -login &
done

xx and yy are two different target names.

Connect to about 10 remote LUNs, and run the script for about half an hour will 
reproduce the problem.


***
[How I avoid it for now]
***
To avoid this problem, I simply remove scsi_sd_probe_domain, and call 
sd_probe_async() synchronously in sd_probe().
So sd_remove() doesn't need to wait for the domain again.

@@ -2986,7 +2986,40 @@ static int sd_probe(struct device *dev)
get_device(&sdkp->dev); /* prevent release before async_schedule */
-   async_schedule_domain(sd_probe_async, sdkp, &scsi_sd_probe_domain);
+   sd_probe_async((void *)sdkp, 0);

I know this is not a good way, so would you please give some advice about it ?



=
Problem 2:

***
[What it looks like]
***
When remove a scsi device, and the network error happens, __blk_drain_queue() 
could hang forever.

# cat /proc/19160/stack 
[] msleep+0x1d/0x30
[] __blk_drain_queue+0xe4/0x160
[] blk_cleanup_queue+0x106/0x2e0
[] __scsi_remove_device+0x52/0xc0 [scsi_mod]
[] scsi_remove_device+0x2b/0x40 [scsi_mod]
[] sdev_store_delete_callback+0x10/0x20 [scsi_mod]
[] sysfs_schedule_callback_work+0x15/0x80
[] process_one_work+0x169/0x340
[] worker_thread+0x183/0x490
[] kthread+0x96/0xa0
[] kernel_thread_helper+0x4/0x10
[] 0x

The request queue of this device was stopped. So the following check will be 
true forever:
__blk_run_queue()
{
if (unlikely(blk_queue_stopped(q)))
return;

__blk_run_queue_uncond(q);
}

So __blk_run_queue_uncond() will never be called, and the process hang.


**
[Why it happens]
**
When the network error happens, iscsi kernel module detected the ping timeout 
and 
tried to recover the session. Here, the queue was stopped, or you can also say 
session was blocked.

iscsi_start_session_recovery(session, conn, flag);
|-> iscsi_block_session(session->cls_session);
   |-> blk_stop_queue(q)

The session should be unblocked if the session is recovered or the recovery 
times out.
But it was not unblocked properly because scsi_remove_device() deleted the the 
device 
first, and then called __blk_drain_queue(). 

__scsi_remove_device()
|-> device_del(dev)
|-> blk_cleanup_queue()
  |-> scsi_request_fn()
|-> __blk_drain_queue()

At this time, the device was not on the children list of the parent device. So 
when 
__iscsi_unblock_session() tried to unblock the parent device and its 

[PATCHv2 0/6] hpsa legacy support

2017-08-14 Thread Hannes Reinecke
Hi all,

here's now the second attempt to add support for legacy boards, ie
for boards previously supported by cciss only.
With this patchset the hpsa driver should work with all Smart Array
boards if the 'hpsa_allow_any' module option is set, rendering the
cciss driver obsolete.
Consequently I've added a patch to remove the cciss driver and make
'hpsa' an alias to 'cciss'.

Hannes Reinecke (5):
  hpsa: add support for legacy boards
  hpsa: disable volume status check for legacy boards
  hpsa: Ignore errors for unsupported LV_DEVICE_ID VPD page
  hpsa: do not print errors for unsupported report luns format
  cciss: Drop obsolete driver

Jeff Mahoney (1):
  hpsa: handle unsupported devices more gracefully

 Documentation/blockdev/cciss.txt |  194 --
 MAINTAINERS  |   10 -
 drivers/block/Kconfig|   27 -
 drivers/block/Makefile   |1 -
 drivers/block/cciss.c| 5415 --
 drivers/block/cciss.h|  433 ---
 drivers/block/cciss_cmd.h|  269 --
 drivers/block/cciss_scsi.c   | 1653 
 drivers/block/cciss_scsi.h   |   79 -
 drivers/scsi/hpsa.c  |  104 +-
 drivers/scsi/hpsa.h  |   81 +-
 11 files changed, 147 insertions(+), 8119 deletions(-)
 delete mode 100644 Documentation/blockdev/cciss.txt
 delete mode 100644 drivers/block/cciss.c
 delete mode 100644 drivers/block/cciss.h
 delete mode 100644 drivers/block/cciss_cmd.h
 delete mode 100644 drivers/block/cciss_scsi.c
 delete mode 100644 drivers/block/cciss_scsi.h

-- 
1.8.5.6



[PATCHv2 1/6] hpsa: add support for legacy boards

2017-08-14 Thread Hannes Reinecke
Add support for legacy boards, ensuring to enable the driver for
those boards only when 'hpsa_allow_any' is set.
The attribute 'legacy_board' is set to '1' if the device is
a legacy board, and '0' otherwise.

Signed-off-by: Hannes Reinecke 
Cc: Don Brace 
---
 drivers/scsi/hpsa.c | 67 +++-
 drivers/scsi/hpsa.h | 81 -
 2 files changed, 121 insertions(+), 27 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 4f7cdb2..fbe7fbc 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -148,6 +148,8 @@
{PCI_VENDOR_ID_HP, 0x333f, 0x103c, 0x333f},
{PCI_VENDOR_ID_HP, PCI_ANY_ID,  PCI_ANY_ID, PCI_ANY_ID,
PCI_CLASS_STORAGE_RAID << 8, 0x << 8, 0},
+   {PCI_VENDOR_ID_COMPAQ, PCI_ANY_ID,  PCI_ANY_ID, PCI_ANY_ID,
+   PCI_CLASS_STORAGE_RAID << 8, 0x << 8, 0},
{0,}
 };
 
@@ -158,6 +160,26 @@
  *  access = Address of the struct of function pointers
  */
 static struct board_type products[] = {
+   {0x40700E11, "Smart Array 5300", &SA5A_access},
+   {0x40800E11, "Smart Array 5i", &SA5B_access},
+   {0x40820E11, "Smart Array 532", &SA5B_access},
+   {0x40830E11, "Smart Array 5312", &SA5B_access},
+   {0x409A0E11, "Smart Array 641", &SA5A_access},
+   {0x409B0E11, "Smart Array 642", &SA5A_access},
+   {0x409C0E11, "Smart Array 6400", &SA5A_access},
+   {0x409D0E11, "Smart Array 6400 EM", &SA5A_access},
+   {0x40910E11, "Smart Array 6i", &SA5A_access},
+   {0x3225103C, "Smart Array P600", &SA5A_access},
+   {0x3223103C, "Smart Array P800", &SA5A_access},
+   {0x3234103C, "Smart Array P400", &SA5A_access},
+   {0x3235103C, "Smart Array P400i", &SA5A_access},
+   {0x3211103C, "Smart Array E200i", &SA5A_access},
+   {0x3212103C, "Smart Array E200", &SA5A_access},
+   {0x3213103C, "Smart Array E200i", &SA5A_access},
+   {0x3214103C, "Smart Array E200i", &SA5A_access},
+   {0x3215103C, "Smart Array E200i", &SA5A_access},
+   {0x3237103C, "Smart Array E500", &SA5A_access},
+   {0x323D103C, "Smart Array P700m", &SA5A_access},
{0x3241103C, "Smart Array P212", &SA5_access},
{0x3243103C, "Smart Array P410", &SA5_access},
{0x3245103C, "Smart Array P410i", &SA5_access},
@@ -278,7 +300,8 @@ static int hpsa_find_cfg_addrs(struct pci_dev *pdev, void 
__iomem *vaddr,
   u64 *cfg_offset);
 static int hpsa_pci_find_memory_BAR(struct pci_dev *pdev,
unsigned long *memory_bar);
-static int hpsa_lookup_board_id(struct pci_dev *pdev, u32 *board_id);
+static int hpsa_lookup_board_id(struct pci_dev *pdev, u32 *board_id,
+   bool *legacy_board);
 static int wait_for_device_to_become_ready(struct ctlr_info *h,
   unsigned char lunaddr[],
   int reply_queue);
@@ -866,6 +889,16 @@ static ssize_t host_show_ctlr_num(struct device *dev,
return snprintf(buf, 20, "%d\n", h->ctlr);
 }
 
+static ssize_t host_show_legacy_board(struct device *dev,
+   struct device_attribute *attr, char *buf)
+{
+   struct ctlr_info *h;
+   struct Scsi_Host *shost = class_to_shost(dev);
+
+   h = shost_to_hba(shost);
+   return snprintf(buf, 20, "%d\n", h->legacy_board ? 1 : 0);
+}
+
 static DEVICE_ATTR(raid_level, S_IRUGO, raid_level_show, NULL);
 static DEVICE_ATTR(lunid, S_IRUGO, lunid_show, NULL);
 static DEVICE_ATTR(unique_id, S_IRUGO, unique_id_show, NULL);
@@ -891,6 +924,8 @@ static DEVICE_ATTR(lockup_detected, S_IRUGO,
host_show_lockup_detected, NULL);
 static DEVICE_ATTR(ctlr_num, S_IRUGO,
host_show_ctlr_num, NULL);
+static DEVICE_ATTR(legacy_board, S_IRUGO,
+   host_show_legacy_board, NULL);
 
 static struct device_attribute *hpsa_sdev_attrs[] = {
&dev_attr_raid_level,
@@ -912,6 +947,7 @@ static DEVICE_ATTR(ctlr_num, S_IRUGO,
&dev_attr_raid_offload_debug,
&dev_attr_lockup_detected,
&dev_attr_ctlr_num,
+   &dev_attr_legacy_board,
NULL,
 };
 
@@ -7232,7 +7268,8 @@ static int hpsa_interrupt_mode(struct ctlr_info *h)
return 0;
 }
 
-static int hpsa_lookup_board_id(struct pci_dev *pdev, u32 *board_id)
+static int hpsa_lookup_board_id(struct pci_dev *pdev, u32 *board_id,
+   bool *legacy_board)
 {
int i;
u32 subsystem_vendor_id, subsystem_device_id;
@@ -7242,9 +7279,22 @@ static int hpsa_lookup_board_id(struct pci_dev *pdev, 
u32 *board_id)
*board_id = ((subsystem_device_id << 16) & 0x) |
subsystem_vendor_id;
 
+   if (legacy_board)
+   *legacy_board = false;
for (i = 0; i < ARRAY_SIZE(products); i++)
-   if (*board_id == products[i].board_id)
-   return i;
+   if (*board

[PATCHv2 3/6] hpsa: Ignore errors for unsupported LV_DEVICE_ID VPD page

2017-08-14 Thread Hannes Reinecke
Legacy boards might not support the LV_DEVICE_ID VPD page, so
we shouldn't print out an error message here.

Signed-off-by: Hannes Reinecke 
Reviewed-by: Christoph Hellwig 
Acked-by: Don Brace 
---
 drivers/scsi/hpsa.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index 0b0b6dc..b34ec42 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3827,7 +3827,7 @@ static int hpsa_update_device_info(struct ctlr_info *h,
memset(this_device->device_id, 0,
sizeof(this_device->device_id));
if (hpsa_get_device_id(h, scsi3addr, this_device->device_id, 8,
-   sizeof(this_device->device_id)))
+   sizeof(this_device->device_id) < 0))
dev_err(&h->pdev->dev,
"hpsa%d: %s: can't get device id for host 
%d:C0:T%d:L%d\t%s\t%.16s\n",
h->ctlr, __func__,
-- 
1.8.5.6



[PATCHv2 4/6] hpsa: do not print errors for unsupported report luns format

2017-08-14 Thread Hannes Reinecke
Legacy boards might not support the 'extended' report luns format,
but as this is to be expected we don't need to print out an error here.

Signed-off-by: Hannes Reinecke 
Acked-by: Don Brace 
---
 drivers/scsi/hpsa.c | 21 -
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index b34ec42..c90a83d 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3601,7 +3601,7 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info *h, 
int logical,
memset(scsi3addr, 0, sizeof(scsi3addr));
if (fill_cmd(c, logical ? HPSA_REPORT_LOG : HPSA_REPORT_PHYS, h,
buf, bufsize, 0, scsi3addr, TYPE_CMD)) {
-   rc = -1;
+   rc = -EAGAIN;
goto out;
}
if (extended_response)
@@ -3614,16 +3614,17 @@ static int hpsa_scsi_do_report_luns(struct ctlr_info 
*h, int logical,
if (ei->CommandStatus != 0 &&
ei->CommandStatus != CMD_DATA_UNDERRUN) {
hpsa_scsi_interpret_error(h, c);
-   rc = -1;
+   rc = -EIO;
} else {
struct ReportLUNdata *rld = buf;
 
if (rld->extended_response_flag != extended_response) {
-   dev_err(&h->pdev->dev,
-   "report luns requested format %u, got %u\n",
-   extended_response,
-   rld->extended_response_flag);
-   rc = -1;
+   if (!h->legacy_board)
+   dev_err(&h->pdev->dev,
+   "report luns requested format %u, got 
%u\n",
+   extended_response,
+   rld->extended_response_flag);
+   rc = -EOPNOTSUPP;
}
}
 out:
@@ -3639,7 +3640,10 @@ static inline int hpsa_scsi_do_report_phys_luns(struct 
ctlr_info *h,
 
rc = hpsa_scsi_do_report_luns(h, 0, buf, bufsize,
  HPSA_REPORT_PHYS_EXTENDED);
-   if (!rc || !hpsa_allow_any)
+   if (rc == -EOPNOTSUPP) {
+   if (!h->legacy_board)
+   return rc;
+   } else if (rc < 0)
return rc;
 
/* REPORT PHYS EXTENDED is not supported */
@@ -6617,7 +6621,6 @@ static int fill_cmd(struct CommandList *c, u8 cmd, struct 
ctlr_info *h,
default:
dev_warn(&h->pdev->dev, "unknown command 0x%c\n", cmd);
BUG();
-   return -1;
}
} else if (cmd_type == TYPE_MSG) {
switch (cmd) {
-- 
1.8.5.6



[PATCHv2 2/6] hpsa: disable volume status check for legacy boards

2017-08-14 Thread Hannes Reinecke
Legacy boards might not support volume status, so assume
the volume is online here.

Signed-off-by: Hannes Reinecke 
Acked-by: Don Brace 
---
 drivers/scsi/hpsa.c | 10 ++
 1 file changed, 10 insertions(+)

diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
index fbe7fbc..0b0b6dc 100644
--- a/drivers/scsi/hpsa.c
+++ b/drivers/scsi/hpsa.c
@@ -3845,6 +3845,16 @@ static int hpsa_update_device_info(struct ctlr_info *h,
if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
hpsa_get_ioaccel_status(h, scsi3addr, this_device);
volume_offline = hpsa_volume_offline(h, scsi3addr);
+   if (volume_offline == HPSA_VPD_LV_STATUS_UNSUPPORTED &&
+   h->legacy_board) {
+   /*
+* Legacy boards might not support volume status
+*/
+   dev_info(&h->pdev->dev,
+"C0:T%d:L%d Volume status not available, 
assuming online.\n",
+this_device->target, this_device->lun);
+   volume_offline = 0;
+   }
this_device->volume_offline = volume_offline;
if (volume_offline == HPSA_LV_FAILED) {
rc = HPSA_LV_FAILED;
-- 
1.8.5.6



Re: [PATCH 3/6] hpsa: disable volume status check for older controller

2017-08-14 Thread Hannes Reinecke
On 08/11/2017 03:23 PM, Tomas Henzl wrote:
> On 8.8.2017 10:35, Hannes Reinecke wrote:
>> Older Controller might not support volume status, so assume
>> the volume is online here.
>>
>> Signed-off-by: Hannes Reinecke 
>> ---
>>  drivers/scsi/hpsa.c | 11 +++
>>  1 file changed, 11 insertions(+)
>>
>> diff --git a/drivers/scsi/hpsa.c b/drivers/scsi/hpsa.c
>> index 7ca6078..4ebf5d4 100644
>> --- a/drivers/scsi/hpsa.c
>> +++ b/drivers/scsi/hpsa.c
>> @@ -3832,6 +3832,17 @@ static int hpsa_update_device_info(struct ctlr_info 
>> *h,
>>  if (h->fw_support & MISC_FW_RAID_OFFLOAD_BASIC)
>>  hpsa_get_ioaccel_status(h, scsi3addr, this_device);
>>  volume_offline = hpsa_volume_offline(h, scsi3addr);
>> +if (volume_offline == HPSA_VPD_LV_STATUS_UNSUPPORTED &&
>> +!h->supported) {
>> +/*
>> + * Older / unsupported controllers might not support
>> + * volume status
>> + */
>> +dev_info(&h->pdev->dev,
>> + "C0:T%d:L%d Volume status not available, 
>> assuming online.\n",
>> + this_device->target, this_device->lun);
>> +volume_offline = 0;
> 
> Hi,
> could we have here
> volume_offline = HPSA_LV_OK;
> instead ?

Hmm; rather not (for now).
At several places we're just checking for (!h->volume_offline); before
changing that toe HPSA_LV_OK we'd need to change all of them to
(h->volume_offline == HPSA_LV_OK) to avoid any issues here.

Cheers,

Hannes
-- 
Dr. Hannes ReineckeTeamlead Storage & Networking
h...@suse.de   +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)


Re: [PATCH 1/6] hpsa: consolidate status variables

2017-08-14 Thread Hannes Reinecke
On 08/09/2017 03:40 PM, Christoph Hellwig wrote:
> These seem to be modified from sysfs files, so you'll run into
> read-modify-write atomicy issues with bitfields.
> 
> That being said while plain ints are somewhat safe the right thing
> would be to use proper atomic bitops.
> 
True, but rewriting that into atomics would change the flow of the
driver by quite a bit (as quite some spinlocks can be removed),
so I'd rather _not_ attempt this now.

So I'll be dropping this patch.

Cheers,

Hannes
-- 
Dr. Hannes ReineckeTeamlead Storage & Networking
h...@suse.de   +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)