[PATCH] klist: Make it safe to use klists in atomic context
In the scsi_transport_srp implementation it cannot be avoided to iterate over a klist from atomic context when using the legacy block layer instead of blk-mq. Hence this patch that makes it safe to use klists in atomic context. This patch avoids that lockdep reports the following: WARNING: SOFTIRQ-safe -> SOFTIRQ-unsafe lock order detected Possible interrupt unsafe locking scenario: CPU0CPU1 lock(&(&k->k_lock)->rlock); local_irq_disable(); lock(&(&q->__queue_lock)->rlock); lock(&(&k->k_lock)->rlock); lock(&(&q->__queue_lock)->rlock); stack backtrace: Workqueue: kblockd blk_timeout_work Call Trace: dump_stack+0xa4/0xf5 check_usage+0x6e6/0x700 __lock_acquire+0x185d/0x1b50 lock_acquire+0xd2/0x260 _raw_spin_lock+0x32/0x50 klist_next+0x47/0x190 device_for_each_child+0x8e/0x100 srp_timed_out+0xaf/0x1d0 [scsi_transport_srp] scsi_times_out+0xd4/0x410 [scsi_mod] blk_rq_timed_out+0x36/0x70 blk_timeout_work+0x1b5/0x220 process_one_work+0x4fe/0xad0 worker_thread+0x63/0x5a0 kthread+0x1c1/0x1e0 ret_from_fork+0x24/0x30 See also commit c9ddf73476ff ("scsi: scsi_transport_srp: Fix shost to rport translation"). Signed-off-by: Bart Van Assche Cc: Martin K. Petersen Cc: James Bottomley --- lib/klist.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/lib/klist.c b/lib/klist.c index 0507fa5d84c5..f6b547812fe3 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -336,8 +336,9 @@ struct klist_node *klist_prev(struct klist_iter *i) void (*put)(struct klist_node *) = i->i_klist->put; struct klist_node *last = i->i_cur; struct klist_node *prev; + unsigned long flags; - spin_lock(&i->i_klist->k_lock); + spin_lock_irqsave(&i->i_klist->k_lock, flags); if (last) { prev = to_klist_node(last->n_node.prev); @@ -356,7 +357,7 @@ struct klist_node *klist_prev(struct klist_iter *i) prev = to_klist_node(prev->n_node.prev); } - spin_unlock(&i->i_klist->k_lock); + spin_unlock_irqrestore(&i->i_klist->k_lock, flags); if (put && last) put(last); @@ -377,8 +378,9 @@ struct klist_node *klist_next(struct klist_iter *i) void (*put)(struct klist_node *) = i->i_klist->put; struct klist_node *last = i->i_cur; struct klist_node *next; + unsigned long flags; - spin_lock(&i->i_klist->k_lock); + spin_lock_irqsave(&i->i_klist->k_lock, flags); if (last) { next = to_klist_node(last->n_node.next); @@ -397,7 +399,7 @@ struct klist_node *klist_next(struct klist_iter *i) next = to_klist_node(next->n_node.next); } - spin_unlock(&i->i_klist->k_lock); + spin_unlock_irqrestore(&i->i_klist->k_lock, flags); if (put && last) put(last); -- 2.17.1
[PATCH 4/6] tcmu: misc nl code cleanup
Some misc cleanup of the nl rework patches. 1. Fix space instead of tabs use and extra newline. 2. Drop initializing variables to 0 when not needed 3. Just pass the skb_buff and msg_header pointers to tcmu_netlink_event_send. Signed-off-by: Mike Christie --- drivers/target/target_core_user.c | 21 + 1 file changed, 9 insertions(+), 12 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 9835ea3..2a2e9e4e 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1628,11 +1628,9 @@ static int tcmu_netlink_event_init(struct tcmu_dev *udev, static int tcmu_netlink_event_send(struct tcmu_dev *udev, enum tcmu_genl_cmd cmd, - struct sk_buff **buf, void **hdr) + struct sk_buff *skb, void *msg_header) { - int ret = 0; - struct sk_buff *skb = *buf; - void *msg_header = *hdr; + int ret; genlmsg_end(skb, msg_header); @@ -1644,7 +1642,7 @@ static int tcmu_netlink_event_send(struct tcmu_dev *udev, ret = genlmsg_multicast_allns(&tcmu_genl_family, skb, 0, TCMU_MCGRP_CONFIG, GFP_KERNEL); - /* We don't care if no one is listening */ + /* We don't care if no one is listening */ if (ret == -ESRCH) ret = 0; if (!ret) @@ -1662,9 +1660,8 @@ static int tcmu_send_dev_add_event(struct tcmu_dev *udev) &msg_header); if (ret < 0) return ret; - return tcmu_netlink_event_send(udev, TCMU_CMD_ADDED_DEVICE, &skb, - &msg_header); - + return tcmu_netlink_event_send(udev, TCMU_CMD_ADDED_DEVICE, skb, + msg_header); } static int tcmu_send_dev_remove_event(struct tcmu_dev *udev) @@ -1678,7 +1675,7 @@ static int tcmu_send_dev_remove_event(struct tcmu_dev *udev) if (ret < 0) return ret; return tcmu_netlink_event_send(udev, TCMU_CMD_REMOVED_DEVICE, - &skb, &msg_header); + skb, msg_header); } static int tcmu_update_uio_info(struct tcmu_dev *udev) @@ -2197,7 +2194,7 @@ static int tcmu_send_dev_config_event(struct tcmu_dev *udev, return ret; } return tcmu_netlink_event_send(udev, TCMU_CMD_RECONFIG_DEVICE, - &skb, &msg_header); + skb, msg_header); } @@ -2259,7 +2256,7 @@ static int tcmu_send_dev_size_event(struct tcmu_dev *udev, u64 size) return ret; } return tcmu_netlink_event_send(udev, TCMU_CMD_RECONFIG_DEVICE, - &skb, &msg_header); + skb, msg_header); } static ssize_t tcmu_dev_size_store(struct config_item *item, const char *page, @@ -2341,7 +2338,7 @@ static int tcmu_send_emulate_write_cache(struct tcmu_dev *udev, u8 val) return ret; } return tcmu_netlink_event_send(udev, TCMU_CMD_RECONFIG_DEVICE, - &skb, &msg_header); + skb, msg_header); } static ssize_t tcmu_emulate_write_cache_store(struct config_item *item, -- 2.7.2
[PATCH 5/6] tcmu: add module wide block/reset_netlink support
This patch based on Xiubo's patches adds 2 tcmu attr to block and resetup the netlink interface. It's used during userspace daemon reinitialization after the daemon has crashed while there is outstanding nl requests. The daemon can block the nl interface, kill outstanding requests in the kernel and then reopen the netlink socket and unblock it to allow new requests. Signed-off-by: Mike Christie --- drivers/target/target_core_user.c | 100 -- 1 file changed, 97 insertions(+), 3 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 2a2e9e4e..9555981 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -94,6 +94,7 @@ #define TCMU_GLOBAL_MAX_BLOCKS_DEF (512 * 1024) static u8 tcmu_kern_cmd_reply_supported; +static u8 tcmu_netlink_blocked; static struct device *tcmu_root_device; @@ -255,6 +256,92 @@ MODULE_PARM_DESC(global_max_data_area_mb, "Max MBs allowed to be allocated to all the tcmu device's " "data areas."); +static int tcmu_get_block_netlink(char *buffer, + const struct kernel_param *kp) +{ + return sprintf(buffer, "%s\n", tcmu_netlink_blocked ? + "blocked" : "unblocked"); +} + +static int tcmu_set_block_netlink(const char *str, + const struct kernel_param *kp) +{ + int ret; + u8 val; + + ret = kstrtou8(str, 0, &val); + if (ret < 0) + return ret; + + if (val > 1) { + pr_err("Invalid block netlink value %u\n", val); + return -EINVAL; + } + + tcmu_netlink_blocked = val; + return 0; +} + +static const struct kernel_param_ops tcmu_block_netlink_op = { + .set = tcmu_set_block_netlink, + .get = tcmu_get_block_netlink, +}; + +module_param_cb(block_netlink, &tcmu_block_netlink_op, NULL, S_IWUSR | S_IRUGO); +MODULE_PARM_DESC(block_netlink, "Block new netlink commands."); + +static int tcmu_fail_netlink_cmd(struct tcmu_nl_cmd *nl_cmd) +{ + struct tcmu_dev *udev = nl_cmd->udev; + + if (!tcmu_netlink_blocked) { + pr_err("Could not reset device's netlink interface. Netlink is not blocked.\n"); + return -EBUSY; + } + + if (nl_cmd->cmd != TCMU_CMD_UNSPEC) { + pr_debug("Aborting nl cmd %d on %s\n", nl_cmd->cmd, udev->name); + nl_cmd->status = -EINTR; + list_del(&nl_cmd->nl_list); + complete(&nl_cmd->complete); + } + return 0; +} + +static int tcmu_set_reset_netlink(const char *str, + const struct kernel_param *kp) +{ + struct tcmu_nl_cmd *nl_cmd, *tmp_cmd; + int ret; + u8 val; + + ret = kstrtou8(str, 0, &val); + if (ret < 0) + return ret; + + if (val != 1) { + pr_err("Invalid reset netlink value %u\n", val); + return -EINVAL; + } + + mutex_lock(&tcmu_nl_cmd_mutex); + list_for_each_entry_safe(nl_cmd, tmp_cmd, &tcmu_nl_cmd_list, nl_list) { + ret = tcmu_fail_netlink_cmd(nl_cmd); + if (ret) + break; + } + mutex_unlock(&tcmu_nl_cmd_mutex); + + return ret; +} + +static const struct kernel_param_ops tcmu_reset_netlink_op = { + .set = tcmu_set_reset_netlink, +}; + +module_param_cb(reset_netlink, &tcmu_reset_netlink_op, NULL, S_IWUSR); +MODULE_PARM_DESC(reset_netlink, "Reset netlink commands."); + /* multicast group */ enum tcmu_multicast_groups { TCMU_MCGRP_CONFIG, @@ -303,8 +390,9 @@ static int tcmu_genl_cmd_done(struct genl_info *info, int completed_cmd) } list_del(&nl_cmd->nl_list); - pr_debug("%s genl cmd done got id %d curr %d done %d rc %d\n", -udev->name, dev_id, nl_cmd->cmd, completed_cmd, rc); + pr_debug("%s genl cmd done got id %d curr %d done %d rc %d stat %d\n", +udev->name, dev_id, nl_cmd->cmd, completed_cmd, rc, +nl_cmd->status); if (nl_cmd->cmd != completed_cmd) { pr_err("Mismatched commands on %s (Expecting reply for %d. Current %d).\n", @@ -1547,6 +1635,13 @@ static int tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd) mutex_lock(&tcmu_nl_cmd_mutex); + if (tcmu_netlink_blocked) { + mutex_unlock(&tcmu_nl_cmd_mutex); + pr_warn("Failing nl cmd %d on %s. Interface is blocked.\n", cmd, + udev->name); + return -EAGAIN; + } + if (nl_cmd->cmd != TCMU_CMD_UNSPEC) { mutex_unlock(&tcmu_nl_cmd_mutex); pr_warn("netlink cmd %d already executing on %s\n", @@ -1583,7 +1678,6 @@ static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev) mutex_lock(&tcmu_nl_cmd_mutex); nl_cmd->cmd = TCMU_CMD
tcmu: fix hung netlink requests and nl related cleanup V2
The following patches fix the issues where the userspace daemon has crashed and left netlink requests dangling. The daemon can now block the interface, kill outstanding requests, reopen the netlink socket and then unblock and execute new requests. The patches were made over Martin's for-next branch. v2: - Return BUSY error immediately when there is a running command. - Fix nl skb leak when device is blocked. - Fix check for valid block_netlink input. - Break out nl code cleanup into separate patch.
[PATCH 3/6] tcmu: simplify nl interface
Just return EBUSY if a nl request comes in while processing one. The upper layers do not support sending multiple create/remove requests at the same time (you cannot have a create and remove at the same time or do multiple creates or removes at the same time) and doing a reconfig while a create/remove is still executing does not make sense. Signed-off-by: Mike Christie --- drivers/target/target_core_user.c | 26 -- 1 file changed, 12 insertions(+), 14 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 73a5768..9835ea3 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -165,8 +165,6 @@ struct tcmu_dev { struct list_head timedout_entry; struct tcmu_nl_cmd curr_nl_cmd; - /* wake up threads waiting on curr_nl_cmd */ - wait_queue_head_t nl_cmd_wq; char dev_config[TCMU_CONFIG_LEN]; @@ -1264,8 +1262,6 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) timer_setup(&udev->qfull_timer, tcmu_qfull_timedout, 0); timer_setup(&udev->cmd_timer, tcmu_cmd_timedout, 0); - init_waitqueue_head(&udev->nl_cmd_wq); - INIT_RADIX_TREE(&udev->data_blocks, GFP_KERNEL); return &udev->se_dev; @@ -1539,24 +1535,23 @@ static int tcmu_release(struct uio_info *info, struct inode *inode) return 0; } -static void tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd) +static int tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd) { struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd; if (!tcmu_kern_cmd_reply_supported) - return; + return 0; if (udev->nl_reply_supported <= 0) - return; + return 0; -relock: mutex_lock(&tcmu_nl_cmd_mutex); if (nl_cmd->cmd != TCMU_CMD_UNSPEC) { mutex_unlock(&tcmu_nl_cmd_mutex); - pr_debug("sleeping for open nl cmd\n"); - wait_event(udev->nl_cmd_wq, (nl_cmd->cmd == TCMU_CMD_UNSPEC)); - goto relock; + pr_warn("netlink cmd %d already executing on %s\n", +nl_cmd->cmd, udev->name); + return -EBUSY; } memset(nl_cmd, 0, sizeof(*nl_cmd)); @@ -1568,6 +1563,7 @@ static void tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd) list_add_tail(&nl_cmd->nl_list, &tcmu_nl_cmd_list); mutex_unlock(&tcmu_nl_cmd_mutex); + return 0; } static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev) @@ -1590,8 +1586,6 @@ static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev) nl_cmd->status = 0; mutex_unlock(&tcmu_nl_cmd_mutex); - wake_up_all(&udev->nl_cmd_wq); - return ret; } @@ -1642,7 +1636,11 @@ static int tcmu_netlink_event_send(struct tcmu_dev *udev, genlmsg_end(skb, msg_header); - tcmu_init_genl_cmd_reply(udev, cmd); + ret = tcmu_init_genl_cmd_reply(udev, cmd); + if (ret) { + nlmsg_free(skb); + return ret; + } ret = genlmsg_multicast_allns(&tcmu_genl_family, skb, 0, TCMU_MCGRP_CONFIG, GFP_KERNEL); -- 2.7.2
[PATCH 6/6] target: remove target_find_device
target_find_device is no longer used, so remove it. Signed-off-by: Mike Christie --- drivers/target/target_core_device.c | 24 include/target/target_core_backend.h | 2 -- 2 files changed, 26 deletions(-) diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index e27db4d..a9ad6ec 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -879,30 +879,6 @@ sector_t target_to_linux_sector(struct se_device *dev, sector_t lb) } EXPORT_SYMBOL(target_to_linux_sector); -/** - * target_find_device - find a se_device by its dev_index - * @id: dev_index - * @do_depend: true if caller needs target_depend_item to be done - * - * If do_depend is true, the caller must do a target_undepend_item - * when finished using the device. - * - * If do_depend is false, the caller must be called in a configfs - * callback or during removal. - */ -struct se_device *target_find_device(int id, bool do_depend) -{ - struct se_device *dev; - - mutex_lock(&device_mutex); - dev = idr_find(&devices_idr, id); - if (dev && do_depend && target_depend_item(&dev->dev_group.cg_item)) - dev = NULL; - mutex_unlock(&device_mutex); - return dev; -} -EXPORT_SYMBOL(target_find_device); - struct devices_idr_iter { int (*fn)(struct se_device *dev, void *data); void *data; diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 34a15d5..c3ac472 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -106,8 +106,6 @@ booltarget_lun_is_rdonly(struct se_cmd *); sense_reason_t passthrough_parse_cdb(struct se_cmd *cmd, sense_reason_t (*exec_cmd)(struct se_cmd *cmd)); -struct se_device *target_find_device(int id, bool do_depend); - bool target_sense_desc_format(struct se_device *dev); sector_t target_to_linux_sector(struct se_device *dev, sector_t lb); bool target_configure_unmap_from_queue(struct se_dev_attrib *attrib, -- 2.7.2
[PATCH 2/6] tcmu: track nl commands
The next patch is going to fix the hung nl command issue so this adds a list of outstanding nl commands that we can later abort when the daemon is restarted. Signed-off-by: Mike Christie --- drivers/target/target_core_user.c | 68 ++- 1 file changed, 39 insertions(+), 29 deletions(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index 898a561..73a5768 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -103,9 +103,16 @@ struct tcmu_hba { #define TCMU_CONFIG_LEN 256 +static DEFINE_MUTEX(tcmu_nl_cmd_mutex); +static LIST_HEAD(tcmu_nl_cmd_list); + +struct tcmu_dev; + struct tcmu_nl_cmd { /* wake up thread waiting for reply */ struct completion complete; + struct list_head nl_list; + struct tcmu_dev *udev; int cmd; int status; }; @@ -157,7 +164,6 @@ struct tcmu_dev { struct list_head timedout_entry; - spinlock_t nl_cmd_lock; struct tcmu_nl_cmd curr_nl_cmd; /* wake up threads waiting on curr_nl_cmd */ wait_queue_head_t nl_cmd_wq; @@ -270,11 +276,9 @@ static struct nla_policy tcmu_attr_policy[TCMU_ATTR_MAX+1] = { static int tcmu_genl_cmd_done(struct genl_info *info, int completed_cmd) { - struct se_device *dev; - struct tcmu_dev *udev; + struct tcmu_dev *udev = NULL; struct tcmu_nl_cmd *nl_cmd; int dev_id, rc, ret = 0; - bool is_removed = (completed_cmd == TCMU_CMD_REMOVED_DEVICE); if (!info->attrs[TCMU_ATTR_CMD_STATUS] || !info->attrs[TCMU_ATTR_DEVICE_ID]) { @@ -285,33 +289,36 @@ static int tcmu_genl_cmd_done(struct genl_info *info, int completed_cmd) dev_id = nla_get_u32(info->attrs[TCMU_ATTR_DEVICE_ID]); rc = nla_get_s32(info->attrs[TCMU_ATTR_CMD_STATUS]); - dev = target_find_device(dev_id, !is_removed); - if (!dev) { - printk(KERN_ERR "tcmu nl cmd %u/%u completion could not find device with dev id %u.\n", - completed_cmd, rc, dev_id); - return -ENODEV; + mutex_lock(&tcmu_nl_cmd_mutex); + list_for_each_entry(nl_cmd, &tcmu_nl_cmd_list, nl_list) { + if (nl_cmd->udev->se_dev.dev_index == dev_id) { + udev = nl_cmd->udev; + break; + } } - udev = TCMU_DEV(dev); - spin_lock(&udev->nl_cmd_lock); - nl_cmd = &udev->curr_nl_cmd; + if (!udev) { + pr_err(KERN_ERR "tcmu nl cmd %u/%d completion could not find device with dev id %u.\n", + completed_cmd, rc, dev_id); + ret = -ENODEV; + goto unlock; + } + list_del(&nl_cmd->nl_list); - pr_debug("genl cmd done got id %d curr %d done %d rc %d\n", dev_id, -nl_cmd->cmd, completed_cmd, rc); + pr_debug("%s genl cmd done got id %d curr %d done %d rc %d\n", +udev->name, dev_id, nl_cmd->cmd, completed_cmd, rc); if (nl_cmd->cmd != completed_cmd) { - printk(KERN_ERR "Mismatched commands (Expecting reply for %d. Current %d).\n", - completed_cmd, nl_cmd->cmd); + pr_err("Mismatched commands on %s (Expecting reply for %d. Current %d).\n", + udev->name, completed_cmd, nl_cmd->cmd); ret = -EINVAL; - } else { - nl_cmd->status = rc; + goto unlock; } - spin_unlock(&udev->nl_cmd_lock); - if (!is_removed) - target_undepend_item(&dev->dev_group.cg_item); - if (!ret) - complete(&nl_cmd->complete); + nl_cmd->status = rc; + complete(&nl_cmd->complete); +unlock: + mutex_unlock(&tcmu_nl_cmd_mutex); return ret; } @@ -1258,7 +1265,6 @@ static struct se_device *tcmu_alloc_device(struct se_hba *hba, const char *name) timer_setup(&udev->cmd_timer, tcmu_cmd_timedout, 0); init_waitqueue_head(&udev->nl_cmd_wq); - spin_lock_init(&udev->nl_cmd_lock); INIT_RADIX_TREE(&udev->data_blocks, GFP_KERNEL); @@ -1544,10 +1550,10 @@ static void tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd) return; relock: - spin_lock(&udev->nl_cmd_lock); + mutex_lock(&tcmu_nl_cmd_mutex); if (nl_cmd->cmd != TCMU_CMD_UNSPEC) { - spin_unlock(&udev->nl_cmd_lock); + mutex_unlock(&tcmu_nl_cmd_mutex); pr_debug("sleeping for open nl cmd\n"); wait_event(udev->nl_cmd_wq, (nl_cmd->cmd == TCMU_CMD_UNSPEC)); goto relock; @@ -1555,9 +1561,13 @@ static void tcmu_init_genl_cmd_reply(struct tcmu_dev *udev, int cmd) memset(nl_cmd, 0, sizeof(*nl_cmd)); nl_cmd->cmd = cmd; + nl_cmd->udev = udev; init_completion(&nl_cmd->complete); + INIT_LIST_HEAD(&nl_cmd->nl_list)
[PATCH 1/6] tcmu: delete unused __wait
When this code change this was never cleaned up. Signed-off-by: Mike Christie --- drivers/target/target_core_user.c | 1 - 1 file changed, 1 deletion(-) diff --git a/drivers/target/target_core_user.c b/drivers/target/target_core_user.c index e4a76f9..898a561 100644 --- a/drivers/target/target_core_user.c +++ b/drivers/target/target_core_user.c @@ -1564,7 +1564,6 @@ static int tcmu_wait_genl_cmd_reply(struct tcmu_dev *udev) { struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd; int ret; - DEFINE_WAIT(__wait); if (!tcmu_kern_cmd_reply_supported) return 0; -- 2.7.2
Re: [PATCH v1] mpt3sas: Fix calltrace observed while running IO & host reset
On 06/22/18 09:38, Sreekanth Reddy wrote: In driver's .resume() callback function, driver is doing IOC reset operation. And as per your suggestion we tried using scsi_internal_device_block_nowait() to block the all the devices attached to the HBA before going for IOC reset operation. During system resume time as drives will be in quiesce state and calling scsi_internal_device_block_nowait() return with error status -22. So you suggested us to try using lock_system_sleep() API before calling scsi_internal_device_block_nowait(), so that we don't get -22 return status when we call scsi_internal_device_block_nowait() API during resume time. We tried the same and we see system get hang during hibernation. Please correct me if I am wrong or if I have wrongly understood your suggestions. I feel that the fix which have posted is the better fix then using scsi_internal_device_block_nowait()/scsi_internal_device_unblock_nowait() APIs. Hello Sreekanth, It seems like there is a misunderstanding: what I proposed was to use lock_system_sleep() to delay hibernation until unlock_system_sleep() has been called. I had not realized that the mpt3sas driver can call scsi_internal_device_block_nowait() during system resume. There is another issue with the scsi_internal_device_block_nowait() calls by the mpt3sas driver: callers like _scsih_sas_broadcast_primitive_event() seem to assume that all scsih_qcmd() calls have finished as soon as scsi_internal_device_block_nowait() has returned. However, that assumption is not correct, especially when using scsi-mq. If the patch "mpt3sas: Fix calltrace observed while running IO & host reset" would be allowed upstream, will the issues mentioned above ever be addressed? Bart.
Re: qla2xxx and smatch warnings about uninitialized variables
Hi Bart/Dan, > On Jun 22, 2018, at 2:08 PM, Bart Van Assche wrote: > > External Email > > On 06/22/18 14:06, Dan Carpenter wrote: >> On Fri, Jun 22, 2018 at 08:43:21AM -0700, Bart Van Assche wrote: >>> Hello Himanshu, >>> >>> The latest smatch version reports a large number of warnings about >>> uninitialized variables for the qla2xxx driver. I think these warnings >>> indicate real bugs. Can you have a look at these warnings? >> >> Or we could silence a lot of them by adding "qla8044_rd_reg_indirect 2" >> to the smatch_data/kernel.ignore_uninitialized_param file. > > Hello Dan, > > Since the warnings seem to indicate real bugs to me, I'm not sure it's a > good idea to suppress these warnings. > > Thanks, > > Bart. Thanks for report. I’ll look at the errors and send patch to fix these. Thanks, - Himanshu
Re: qla2xxx and smatch warnings about uninitialized variables
On 06/22/18 14:06, Dan Carpenter wrote: On Fri, Jun 22, 2018 at 08:43:21AM -0700, Bart Van Assche wrote: Hello Himanshu, The latest smatch version reports a large number of warnings about uninitialized variables for the qla2xxx driver. I think these warnings indicate real bugs. Can you have a look at these warnings? Or we could silence a lot of them by adding "qla8044_rd_reg_indirect 2" to the smatch_data/kernel.ignore_uninitialized_param file. Hello Dan, Since the warnings seem to indicate real bugs to me, I'm not sure it's a good idea to suppress these warnings. Thanks, Bart.
Re: qla2xxx and smatch warnings about uninitialized variables
On Fri, Jun 22, 2018 at 08:43:21AM -0700, Bart Van Assche wrote: > Hello Himanshu, > > The latest smatch version reports a large number of warnings about > uninitialized variables for the qla2xxx driver. I think these warnings > indicate real bugs. Can you have a look at these warnings? > Or we could silence a lot of them by adding "qla8044_rd_reg_indirect 2" to the smatch_data/kernel.ignore_uninitialized_param file. regards, dan carpenter
[PATCH] scsi: aacraid: Fix PD performance regression over incorrect qd being set
The driver fails to set the correct queue depth for native devices, due to failing to set the device type prior to calling aac_set_safw_target_qd(). This results in slave configure setting the queue depth to 1. This causes around 30% performance degradation. Fixed by setting the dev type before trying to set queue depth. Reported-by: Steve Best Fixes: 0bcb45fb20c2a ("scsi: aacraid: Add helper function to set queue depth") cc: sta...@vger.kernel.org Signed-off-by: Raghava Aditya Renukunta Reviewed-by: David Carroll --- drivers/scsi/aacraid/aachba.c | 15 +++ 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/drivers/scsi/aacraid/aachba.c b/drivers/scsi/aacraid/aachba.c index a9831bd37a73..a57f3a7d4748 100644 --- a/drivers/scsi/aacraid/aachba.c +++ b/drivers/scsi/aacraid/aachba.c @@ -1974,7 +1974,6 @@ static void aac_set_safw_attr_all_targets(struct aac_dev *dev) u32 lun_count, nexus; u32 i, bus, target; u8 expose_flag, attribs; - u8 devtype; lun_count = aac_get_safw_phys_lun_count(dev); @@ -1992,23 +1991,23 @@ static void aac_set_safw_attr_all_targets(struct aac_dev *dev) continue; if (expose_flag != 0) { - devtype = AAC_DEVTYPE_RAID_MEMBER; - goto update_devtype; + dev->hba_map[bus][target].devtype = + AAC_DEVTYPE_RAID_MEMBER; + continue; } if (nexus != 0 && (attribs & 8)) { - devtype = AAC_DEVTYPE_NATIVE_RAW; + dev->hba_map[bus][target].devtype = + AAC_DEVTYPE_NATIVE_RAW; dev->hba_map[bus][target].rmw_nexus = nexus; } else - devtype = AAC_DEVTYPE_ARC_RAW; + dev->hba_map[bus][target].devtype = + AAC_DEVTYPE_ARC_RAW; dev->hba_map[bus][target].scan_counter = dev->scan_counter; aac_set_safw_target_qd(dev, bus, target); - -update_devtype: - dev->hba_map[bus][target].devtype = devtype; } }
Re: [PATCH v1] mpt3sas: Fix calltrace observed while running IO & host reset
On Fri, Jun 22, 2018 at 8:22 PM, Bart Van Assche wrote: > On 06/21/18 22:35, Sreekanth Reddy wrote: >> >> No, lock_system_sleep() is not inserted in the interrupt context. we >> have inserted it in .resume() call back function just before issuing >> the IOC reset. > > > That's the wrong place to insert a lock_system_sleep() call. Please have a > look at drivers/scsi/scsi_transport_spi.c for an example of how to use that > function. > I am totally confused. In driver's .resume() callback function, driver is doing IOC reset operation. And as per your suggestion we tried using scsi_internal_device_block_nowait() to block the all the devices attached to the HBA before going for IOC reset operation. During system resume time as drives will be in quiesce state and calling scsi_internal_device_block_nowait() return with error status -22. So you suggested us to try using lock_system_sleep() API before calling scsi_internal_device_block_nowait(), so that we don't get -22 return status when we call scsi_internal_device_block_nowait() API during resume time. We tried the same and we see system get hang during hibernation. Please correct me if I am wrong or if I have wrongly understood your suggestions. I feel that the fix which have posted is the better fix then using scsi_internal_device_block_nowait()/scsi_internal_device_unblock_nowait() APIs. Thanks, Sreekanth > Thanks, > > Bart.
[Bug 200003] Hardware Error with 40-wire PATA cable
https://bugzilla.kernel.org/show_bug.cgi?id=23 Brent (bzipiti...@gmail.com) changed: What|Removed |Added Summary|40-wire PATA cable causes |Hardware Error with 40-wire |Hardware Error |PATA cable -- You are receiving this mail because: You are the assignee for the bug.
[Bug 200003] 40-wire PATA cable causes Hardware Error
https://bugzilla.kernel.org/show_bug.cgi?id=23 Brent (bzipiti...@gmail.com) changed: What|Removed |Added Summary|Memorex DVD burner Hardware |40-wire PATA cable causes |Error |Hardware Error --- Comment #10 from Brent (bzipiti...@gmail.com) --- I changed out the 40-wire PATA cable connected to the Memorex DVD burner for an 80-wire cable, and presto, now newer kernels can read optical media on that computer! Succeeded with 2.6.26, 3.14 (Puppy Linux 6.3.2), and 4.9.87 (AntiX Linux 17.1). The hard drive had a separate 80-wire cable all along. -- You are receiving this mail because: You are the assignee for the bug.
qla2xxx and smatch warnings about uninitialized variables
Hello Himanshu, The latest smatch version reports a large number of warnings about uninitialized variables for the qla2xxx driver. I think these warnings indicate real bugs. Can you have a look at these warnings? Thanks, Bart. $ make M=drivers/scsi/qla2xxx C=2 CHECK="smatch -p=kernel" CHECK drivers/scsi/qla2xxx/qla_os.c CC [M] drivers/scsi/qla2xxx/qla_os.o CHECK drivers/scsi/qla2xxx/qla_init.c CC [M] drivers/scsi/qla2xxx/qla_init.o CHECK drivers/scsi/qla2xxx/qla_mbx.c CC [M] drivers/scsi/qla2xxx/qla_mbx.o CHECK drivers/scsi/qla2xxx/qla_iocb.c CC [M] drivers/scsi/qla2xxx/qla_iocb.o CHECK drivers/scsi/qla2xxx/qla_isr.c CC [M] drivers/scsi/qla2xxx/qla_isr.o CHECK drivers/scsi/qla2xxx/qla_gs.c CC [M] drivers/scsi/qla2xxx/qla_gs.o CHECK drivers/scsi/qla2xxx/qla_dbg.c CC [M] drivers/scsi/qla2xxx/qla_dbg.o CHECK drivers/scsi/qla2xxx/qla_sup.c CC [M] drivers/scsi/qla2xxx/qla_sup.o CHECK drivers/scsi/qla2xxx/qla_attr.c CC [M] drivers/scsi/qla2xxx/qla_attr.o CHECK drivers/scsi/qla2xxx/qla_mid.c CC [M] drivers/scsi/qla2xxx/qla_mid.o CHECK drivers/scsi/qla2xxx/qla_dfs.c CC [M] drivers/scsi/qla2xxx/qla_dfs.o CHECK drivers/scsi/qla2xxx/qla_bsg.c CC [M] drivers/scsi/qla2xxx/qla_bsg.o CHECK drivers/scsi/qla2xxx/qla_nx.c drivers/scsi/qla2xxx/qla_nx.c:1016: qla82xx_flash_wait_write_finish() error: uninitialized symbol 'val'. drivers/scsi/qla2xxx/qla_nx.c:1213: qla82xx_pinit_from_rom() error: uninitialized symbol 'n'. CC [M] drivers/scsi/qla2xxx/qla_nx.o CHECK drivers/scsi/qla2xxx/qla_mr.c drivers/scsi/qla2xxx/qla_mr.c:2268: qlafx00_ioctl_iosb_entry() error: uninitialized symbol 'res'. CC [M] drivers/scsi/qla2xxx/qla_mr.o drivers/scsi/qla2xxx/qla_mr.c: In function ‘qlafx00_fx_disc’: drivers/scsi/qla2xxx/qla_mr.c:1882:4: warning: ‘strncpy’ output may be truncated copying 64 bytes from a string of length 64 [-Wstringop-truncation] strncpy(phost_info->nodename, ^ p_sysid->nodename, NODENAME_LENGTH); ~~~ drivers/scsi/qla2xxx/qla_mr.c:1886:4: warning: ‘strncpy’ output may be truncated copying 64 bytes from a string of length 64 [-Wstringop-truncation] strncpy(phost_info->release, ^~~~ p_sysid->release, RELEASE_LENGTH); ~ drivers/scsi/qla2xxx/qla_mr.c:1888:4: warning: ‘strncpy’ output may be truncated copying 64 bytes from a string of length 64 [-Wstringop-truncation] strncpy(phost_info->version, ^~~~ p_sysid->version, VERSION_LENGTH); ~ drivers/scsi/qla2xxx/qla_mr.c:1890:4: warning: ‘strncpy’ output may be truncated copying 64 bytes from a string of length 64 [-Wstringop-truncation] strncpy(phost_info->machine, ^~~~ p_sysid->machine, MACHINE_LENGTH); ~ drivers/scsi/qla2xxx/qla_mr.c:1892:4: warning: ‘strncpy’ output may be truncated copying 64 bytes from a string of length 64 [-Wstringop-truncation] strncpy(phost_info->domainname, ^~~ p_sysid->domainname, DOMNAME_LENGTH); CHECK drivers/scsi/qla2xxx/qla_nx2.c drivers/scsi/qla2xxx/qla_nx2.c:135: qla8044_read_write_crb_reg() error: uninitialized symbol 'value'. drivers/scsi/qla2xxx/qla_nx2.c:149: qla8044_poll_wait_for_ready() error: uninitialized symbol 'temp'. drivers/scsi/qla2xxx/qla_nx2.c:678: qla8044_poll_reg() error: uninitialized symbol 'value'. drivers/scsi/qla2xxx/qla_nx2.c:697: qla8044_poll_reg() error: uninitialized symbol 'value'. drivers/scsi/qla2xxx/qla_nx2.c:921: qla8044_poll_read_list() error: uninitialized symbol 'value'. drivers/scsi/qla2xxx/qla_nx2.c:1193: qla8044_ms_mem_write_128b() error: uninitialized symbol 'agt_ctrl'. drivers/scsi/qla2xxx/qla_nx2.c:2235: qla8044_minidump_process_control() error: uninitialized symbol 'read_value'. drivers/scsi/qla2xxx/qla_nx2.c:2261: qla8044_minidump_process_control() error: uninitialized symbol 'read_value'. drivers/scsi/qla2xxx/qla_nx2.c:2286: qla8044_minidump_process_control() error: uninitialized symbol 'read_value'. drivers/scsi/qla2xxx/qla_nx2.c:2344: qla8044_minidump_process_rdcrb() error: uninitialized symbol 'r_value'. drivers/scsi/qla2xxx/qla_nx2.c:2413: qla8044_minidump_process_rdmem() error: uninitialized symbol 'r_data'. drivers/scsi/qla2xxx/qla_nx2.c:2507: qla8044_minidump_process_l2tag() error: uninitialized symbol 'c_value_r'. drivers/scsi/qla2xxx/qla_nx2.c:2519: qla8044_minidump_process_l2tag() error: uninitialized symbol 'r_value'. drivers/scsi/qla2xxx/qla_nx2.c:2554: qla8044_minidump_process_l1cache() error: uninitialized symbol 'r_value'. drivers/scsi/qla2xxx/qla_nx2.c:2615: qla8044_minidump_process_rdmux() error: uninitialized symbol 'r_value'. drivers/scsi/qla
Re: [PATCH v1] mpt3sas: Fix calltrace observed while running IO & host reset
On 06/21/18 22:35, Sreekanth Reddy wrote: No, lock_system_sleep() is not inserted in the interrupt context. we have inserted it in .resume() call back function just before issuing the IOC reset. That's the wrong place to insert a lock_system_sleep() call. Please have a look at drivers/scsi/scsi_transport_spi.c for an example of how to use that function. Thanks, Bart.
[bug report] qedi: Add support for populating ethernet TLVs.
Hello Manish Rangankar, The patch 534bbdf8832a: "qedi: Add support for populating ethernet TLVs." from May 22, 2018, leads to the following static checker warning: drivers/scsi/qedi/qedi_main.c:891 qedi_get_boot_tgt_info() error: snprintf() is printing too much 256 vs 255 drivers/scsi/qedi/qedi_main.c 883 static void qedi_get_boot_tgt_info(struct nvm_iscsi_block *block, 884 struct qedi_boot_target *tgt, u8 index) 885 { 886 u32 ipv6_en; 887 888 ipv6_en = !!(block->generic.ctrl_flags & 889 NVM_ISCSI_CFG_GEN_IPV6_ENABLED); 890 891 snprintf(tgt->iscsi_name, NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN, "%s\n", ^^^ The buffer is one char smaller than NVM_ISCSI_CFG_ISCSI_NAME_MAX_LEN. 892 block->target[index].target_name.byte); 893 894 tgt->ipv6_en = ipv6_en; 895 896 if (ipv6_en) 897 snprintf(tgt->ip_addr, IPV6_LEN, "%pI6\n", 898 block->target[index].ipv6_addr.byte); 899 else 900 snprintf(tgt->ip_addr, IPV4_LEN, "%pI4\n", 901 block->target[index].ipv4_addr.byte); 902 } regards, dan carpenter