On 09/22/2015 09:31 PM, Ewan Milne wrote:
> On Thu, 2015-08-27 at 14:41 +0200, Hannes Reinecke wrote:
>> Use scsi_vpd_lun_id() to assign a unique device identification
>> to the alua port group structure.
>>
>> Signed-off-by: Hannes Reinecke <[email protected]>
>> ---
>> drivers/scsi/device_handler/scsi_dh_alua.c | 70
>> +++++++++++++++++++++++++++---
>> 1 file changed, 65 insertions(+), 5 deletions(-)
>>
>> diff --git a/drivers/scsi/device_handler/scsi_dh_alua.c
>> b/drivers/scsi/device_handler/scsi_dh_alua.c
>> index dbe9ff2..c2b2100b 100644
>> --- a/drivers/scsi/device_handler/scsi_dh_alua.c
>> +++ b/drivers/scsi/device_handler/scsi_dh_alua.c
>> @@ -70,6 +70,8 @@ static DEFINE_SPINLOCK(port_group_lock);
>> struct alua_port_group {
>> struct kref kref;
>> struct list_head node;
>> + unsigned char device_id_str[256];
>> + int device_id_size;
>
> I prefer _len instead of _size, _size should refer to the size of the buffer,
> not the current length of the data in it.
>
>> int group_id;
>> int tpgs;
>> int state;
>> @@ -229,7 +231,9 @@ static int alua_check_vpd(struct scsi_device *sdev,
>> struct alua_dh_data *h)
>> {
>> unsigned char *d;
>> int group_id = -1;
>> - struct alua_port_group *pg = NULL;
>> + char device_id_str[256];
>> + int device_id_size;
>> + struct alua_port_group *tmp_pg, *pg = NULL;
>>
>> if (!sdev->vpd_pg83)
>> return SCSI_DH_DEV_UNSUPP;
>> @@ -266,9 +270,39 @@ static int alua_check_vpd(struct scsi_device *sdev,
>> struct alua_dh_data *h)
>> h->tpgs = TPGS_MODE_NONE;
>> return SCSI_DH_DEV_UNSUPP;
>> }
>> + device_id_size = scsi_vpd_lun_id(sdev, device_id_str, 256);
>
> should be sizeof(device_id_str) not hardcoded 256
>
Okay.
>> + if (device_id_size <= 0) {
>> + /*
>> + * Internal error: TPGS supported by no
>
> "Internal error: TPGS supported by no" should be "but no"
>
Fixed now.
>> + * device identifcation found.
>> + * Disable ALUA support.
>> + */
>> + sdev_printk(KERN_INFO, sdev,
>> + "%s: No device descriptors found\n",
>> + ALUA_DH_NAME);
>> + h->tpgs = TPGS_MODE_NONE;
>> + return SCSI_DH_DEV_UNSUPP;
>> + }
>> sdev_printk(KERN_INFO, sdev,
>> - "%s: port group %02x rel port %02x\n",
>> - ALUA_DH_NAME, group_id, h->rel_port);
>> + "%s: device %s port group %02x "
>> + "rel port %02x\n", ALUA_DH_NAME,
>> + device_id_str, group_id, h->rel_port);
>> + spin_lock(&port_group_lock);
>> + list_for_each_entry(tmp_pg, &port_group_list, node) {
>> + if (tmp_pg->group_id != group_id)
>> + continue;
>> + if (tmp_pg->device_id_size != device_id_size)
>> + continue;
>> + if (strncmp(tmp_pg->device_id_str, device_id_str,
>> + device_id_size))
>> + continue;
>> + h->pg = tmp_pg;
>> + kref_get(&tmp_pg->kref);
>> + break;
>> + }
>> + spin_unlock(&port_group_lock);
>> + if (h->pg)
>> + return SCSI_DH_OK;
>
> The lookup checks whether h->pg == NULL but the function never
> explicitly sets it to NULL before iterating.
>
For my next iteration I've reworked this so that h->pg is explicitly
set.
>>
>> pg = kzalloc(sizeof(struct alua_port_group), GFP_KERNEL);
>> if (!pg) {
>> @@ -278,13 +312,39 @@ static int alua_check_vpd(struct scsi_device *sdev,
>> struct alua_dh_data *h)
>> /* Temporary failure, bypass */
>> return SCSI_DH_DEV_TEMP_BUSY;
>> }
>> + if (device_id_size)
>> + strncpy(pg->device_id_str, device_id_str, 256);
>
> should be sizeof(device_id_str) not hardcoded 256
>
Okay.
>> + else
>> + pg->device_id_str[0] = '\0';
>> +
>> + pg->device_id_size = device_id_size;
>> pg->group_id = group_id;
>> pg->tpgs = h->tpgs;
>> pg->state = TPGS_STATE_OPTIMIZED;
>> kref_init(&pg->kref);
>> spin_lock(&port_group_lock);
>> - list_add(&pg->node, &port_group_list);
>> - h->pg = pg;
>> + /*
>> + * Re-check list again to catch
>> + * concurrent updates
>> + */
>> + list_for_each_entry(tmp_pg, &port_group_list, node) {
>> + if (tmp_pg->group_id != pg->group_id)
>> + continue;
>> + if (tmp_pg->device_id_size != pg->device_id_size)
>> + continue;
>> + if (strncmp(tmp_pg->device_id_str, pg->device_id_str,
>> + device_id_size))
>> + continue;
>> + h->pg = tmp_pg;
>> + kref_get(&tmp_pg->kref);
>> + kfree(pg);
>
> With the added check for an existing alua_port_group object, and the kfree()
> of
> the alua_port_group that had been allocated if an existing one is found, the
> code does not
> do a destroy_workqueue() on pg->work_q.
>
With the current rework I've removed the per-pg workqueues, so that
issue doesn't occur anymore.
>> + pg = NULL;
>> + break;
>> + }
>> + if (pg) {
>> + list_add(&pg->node, &port_group_list);
>> + h->pg = pg;
>> + }
>> spin_unlock(&port_group_lock);
>>
>> return SCSI_DH_OK;
>
> An explanation in the comments about the ALUA topology and what the device_id
> vs.
> the group_id represents might be helpful. It occurred to me that if someone
> doesn't
> understand that you can have the same device_id behind different port groups
> they
> won't understand this code.
>
> HOST ----> STORAGE CTRL PG 1 ----> LUN X
> ----> ----> LUN X
> ----> STORAGE CTRL PG 2 ----> LUN X
> ----> ----> LUN X
>
Hmm. Someone willing to understand this code should be reasonable
familiar with SPC, so I doubt that'll be an issue.
Cheers,
Hannes
--
Dr. Hannes Reinecke zSeries & Storage
[email protected] +49 911 74053 688
SUSE LINUX GmbH, Maxfeldstr. 5, 90409 Nürnberg
GF: F. Imendörffer, J. Smithard, J. Guild, D. Upmanyu, G. Norton
HRB 21284 (AG Nürnberg)
--
To unsubscribe from this list: send the line "unsubscribe linux-scsi" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html