On 04/04/2018 09:38 PM, Xiubo Li wrote:
> On 2018/4/5 8:47, Mike Christie wrote:
>> On 04/02/2018 06:42 AM, [email protected] wrote:
>>> From: Xiubo Li <[email protected]>
>>>
>>> This patch adds 1 tcmu attr to reset and complete all the blocked
>>> netlink waiting threads. It's used when the userspace daemon like
>>> tcmu-runner has crashed or forced to shutdown just before the
>>> netlink requests be replied to the kernel, then the netlink requeting
>>> threads will get stuck forever. We must reboot the machine to recover
>>> from it and by this the rebootng is not a must then.
>>>
>>> The netlink reset operation should be done before the userspace daemon
>>> could receive and handle the netlink requests to be safe.
>>>
>>> Signed-off-by: Xiubo Li <[email protected]>
>>> ---
>>> drivers/target/target_core_user.c | 99
>>> ++++++++++++++++++++++++++++++++++++---
>>> 1 file changed, 93 insertions(+), 6 deletions(-)
>>>
>>> diff --git a/drivers/target/target_core_user.c
>>> b/drivers/target/target_core_user.c
>>> index 4ad89ea..dc8879d 100644
>>> --- a/drivers/target/target_core_user.c
>>> +++ b/drivers/target/target_core_user.c
>>> @@ -103,9 +103,13 @@ struct tcmu_hba {
>>> #define TCMU_CONFIG_LEN 256
>>> +static spinlock_t nl_complete_lock;
>>> +static struct idr complete_wait_udevs = IDR_INIT;
>>> +
>>> struct tcmu_nl_cmd {
>>> /* wake up thread waiting for reply */
>>> - struct completion complete;
>>> + bool complete;
>>> +
>>> int cmd;
>>> int status;
>>> };
>>> @@ -159,12 +163,17 @@ struct tcmu_dev {
>>> spinlock_t nl_cmd_lock;
>>> struct tcmu_nl_cmd curr_nl_cmd;
>>> - /* wake up threads waiting on curr_nl_cmd */
>>> + /* wake up threads waiting on nl_cmd_wq */
>>> wait_queue_head_t nl_cmd_wq;
>>> + /* complete thread waiting complete_wq */
>>> + wait_queue_head_t complete_wq;
>>> +
>>> char dev_config[TCMU_CONFIG_LEN];
>>> int nl_reply_supported;
>>> +
>>> + uint32_t dev_id;
>>> };
>>> #define TCMU_DEV(_se_dev) container_of(_se_dev, struct tcmu_dev,
>>> se_dev)
>>> @@ -251,6 +260,56 @@ static int tcmu_get_global_max_data_area(char
>>> *buffer,
>>> "Max MBs allowed to be allocated to all the tcmu device's "
>>> "data areas.");
>>> +static void tcmu_complete_wake_up(struct tcmu_dev *udev)
>>> +{
>>> + struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd;
>>> +
>>> + spin_lock(&nl_complete_lock);
>>> + nl_cmd->complete = true;
>>> + wake_up(&udev->complete_wq);
>>> + spin_unlock(&nl_complete_lock);
>>> +}
>>> +
>>> +static void tcmu_complete_wake_up_all(void)
>>> +{
>>> + struct tcmu_nl_cmd *nl_cmd;
>>> + struct tcmu_dev *udev;
>>> + int i;
>>> +
>>> + spin_lock(&nl_complete_lock);
>>> + idr_for_each_entry(&complete_wait_udevs, udev, i) {
>>> + nl_cmd = &udev->curr_nl_cmd;
>>> + nl_cmd->complete = true;
>>> + wake_up(&udev->complete_wq);
>>> + }
>>> + spin_unlock(&nl_complete_lock);
>>> +}
>>> +
>>> +static int tcmu_complete_wait(struct tcmu_dev *udev)
>>> +{
>>> + struct tcmu_nl_cmd *nl_cmd = &udev->curr_nl_cmd;
>>> + uint32_t dev_id;
>>> +
>>> + spin_lock(&nl_complete_lock);
>>> + dev_id = idr_alloc(&complete_wait_udevs, udev, 1, USHRT_MAX,
>>> GFP_NOWAIT);
>>> + if (dev_id < 0) {
>>> + pr_err("tcmu: Could not allocate dev id.\n");
>>> + return dev_id;
>>> + }
>>> + udev->dev_id = dev_id;
>> dev_id is never used.
> It will be used when the device is being removed.
Ah yeah, you are right. Bad comment on my part. I was thinking/writing
wrt if we used a list or helper around devices_idr.
>
>> I think if you just wanted to loop over all the devices you could just
>> use a list.
>>
>> Or,
>>
>> Just add a helper around target_core_device.c:devices_idr that just
>> gives you the tcmu devices.
>>
>>
>>
>>> + spin_unlock(&nl_complete_lock);
>>> +
>>> + pr_debug("sleeping for nl reply\n");
>>> + wait_event(udev->complete_wq, nl_cmd->complete);
>> I don't think you will need the complete field then or this function.
>>
>>
>>> +
>>> + spin_lock(&nl_complete_lock);
>>> + nl_cmd->complete = false;
>>> + idr_remove(&complete_wait_udevs, dev_id);
>>> + spin_unlock(&nl_complete_lock);
>>> +
>>> + return 0;
>>> +}
>>> +
>>> /* multicast group */
>>> enum tcmu_multicast_groups {
>>> TCMU_MCGRP_CONFIG,
>>> @@ -311,7 +370,7 @@ static int tcmu_genl_cmd_done(struct genl_info
>>> *info, int completed_cmd)
>>> if (!is_removed)
>>> target_undepend_item(&dev->dev_group.cg_item);
>>> if (!ret)
>>> - complete(&nl_cmd->complete);
>>> + tcmu_complete_wake_up(udev);
>>> return ret;
>>> }
>>> @@ -1258,6 +1317,7 @@ static struct se_device
>>> *tcmu_alloc_device(struct se_hba *hba, const char *name)
>>> timer_setup(&udev->cmd_timer, tcmu_cmd_timedout, 0);
>>> init_waitqueue_head(&udev->nl_cmd_wq);
>>> + init_waitqueue_head(&udev->complete_wq);
>>> spin_lock_init(&udev->nl_cmd_lock);
>>> INIT_RADIX_TREE(&udev->data_blocks, GFP_KERNEL);
>>> @@ -1462,7 +1522,11 @@ static void tcmu_dev_call_rcu(struct rcu_head *p)
>>> kfree(udev->uio_info.name);
>>> kfree(udev->name);
>>> +
>>> + spin_lock(&nl_complete_lock);
>>> + idr_remove(&complete_wait_udevs, udev->dev_id);
>>> kfree(udev);
>>> + spin_unlock(&nl_complete_lock);
>>> }
>>> static int tcmu_check_and_free_pending_cmd(struct tcmu_cmd *cmd)
>>> @@ -1555,7 +1619,6 @@ static void tcmu_init_genl_cmd_reply(struct
>>> tcmu_dev *udev, int cmd)
>>> memset(nl_cmd, 0, sizeof(*nl_cmd));
>>> nl_cmd->cmd = cmd;
>>> - init_completion(&nl_cmd->complete);
>>> spin_unlock(&udev->nl_cmd_lock);
>>> }
>>> @@ -1572,8 +1635,9 @@ static int tcmu_wait_genl_cmd_reply(struct
>>> tcmu_dev *udev)
>>> if (udev->nl_reply_supported <= 0)
>>> return 0;
>>> - pr_debug("sleeping for nl reply\n");
>>> - wait_for_completion(&nl_cmd->complete);
>>> + ret = tcmu_complete_wait(udev);
>>> + if (ret)
>>> + return ret;
>>> spin_lock(&udev->nl_cmd_lock);
>>> nl_cmd->cmd = TCMU_CMD_UNSPEC;
>>> @@ -2323,6 +2387,26 @@ static ssize_t tcmu_block_dev_store(struct
>>> config_item *item, const char *page,
>>> }
>>> CONFIGFS_ATTR(tcmu_, block_dev);
>>> +static ssize_t tcmu_reset_netlink_store(struct config_item *item,
>>> const char *page,
>>> + size_t count)
>>> +{
>>> + u8 val;
>>> + int ret;
>>> +
>>> + ret = kstrtou8(page, 0, &val);
>>> + if (ret < 0)
>>> + return ret;
>>> +
>>> + if (val != 1) {
>>> + pr_err("Invalid block value %d\n", val);
>>> + return -EINVAL;
>>> + }
>>> +
>>> + tcmu_complete_wake_up_all();
>>> + return count;
>>> +}
>>> +CONFIGFS_ATTR_WO(tcmu_, reset_netlink);
>>
>> If it's on the device it should only reset the device its on, so if 2
>> daemons/apps are managing different devices it doesn't mess up the other.
>>
>> Or we could just assume that there is only 1 daemon type and just do a
>> global attr at the module level. Probably just the per device is best in
>> case we end up with people running gluster + qemu+tcmu and ceph +
>> tcmu-runner.
>>
>> If you do the per device then you can just take the insides of
>> tcmu_genl_cmd_done and make it into a helper so that you can do the
>> refcount/target_undepend_item properly and it would do the wake up. In
>> the reset configfs function then grab the nl_cmd_lock, and set the
>> curr_nl_cmd status to some or pass it into the helper, and then call
>> your helper which does the common stuff.
> I thought there should only 1 daemon will be exist in user space and at
I think the most common will be the 1 daemon type setup (it would be
rare to run qemu-tcmu and tcmu-runner at the same time) but we could
have multiple daemon instances of the same type due to containers in the
future.
> the same time to simplify it by resetting only one device will also
> reset all the others, or we need to reset all the devices one by one.
>
I think we want it per device to match the other code like ring reset
and nl reply supported for example. We then get support for everything
very cheaply.
> If so, I will just do the per device resetting, then the patch will be
> very simple.
>
>>
>>> +
>>> static ssize_t tcmu_reset_ring_store(struct config_item *item,
>>> const char *page,
>>> size_t count)
>>> {
>>> @@ -2363,6 +2447,7 @@ static ssize_t tcmu_reset_ring_store(struct
>>> config_item *item, const char *page,
>>> static struct configfs_attribute *tcmu_action_attrs[] = {
>>> &tcmu_attr_block_dev,
>>> &tcmu_attr_reset_ring,
>>> + &tcmu_attr_reset_netlink,
>>> NULL,
>>> };
>>> @@ -2519,6 +2604,8 @@ static int __init tcmu_module_init(void)
>>> }
>>> tcmu_ops.tb_dev_attrib_attrs = tcmu_attrs;
>>> + spin_lock_init(&nl_complete_lock);
>>> +
>>> ret = transport_backend_register(&tcmu_ops);
>>> if (ret)
>>> goto out_attrs;
>>>
>> --
>> To unsubscribe from this list: send the line "unsubscribe
>> target-devel" in
>> the body of a message to [email protected]
>> More majordomo info at http://vger.kernel.org/majordomo-info.html
>
>