Re: [PATCH 07/14] target/sbc: Add P_TYPE + PROT_EN bits to READ_CAPACITY_16
On 1/8/2014 10:36 PM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch updates sbc_emulate_readcapacity_16() to set P_TYPE and PROT_EN bits when DIF emulation is enabled by the backend device. Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_sbc.c |5 + 1 file changed, 5 insertions(+) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 366b9bb..22599e8 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -106,6 +106,11 @@ sbc_emulate_readcapacity_16(struct se_cmd *cmd) buf[9] = (dev-dev_attrib.block_size 16) 0xff; buf[10] = (dev-dev_attrib.block_size 8) 0xff; buf[11] = dev-dev_attrib.block_size 0xff; + /* +* Set P_TYPE and PROT_EN bits for DIF support +*/ + if (dev-dev_attrib.pi_prot_type) + buf[12] = (dev-dev_attrib.pi_prot_type - 1) 1 | 0x1; if (dev-transport-get_lbppbe) buf[13] = dev-transport-get_lbppbe(dev) 0x0f; Hey Nic, What about FORMAT_UNIT emulation? The backstore protection configuration is done at the target side via configfs/targetcli, if you publish DIF support in INQUERY_EVPD/READ_CAPACITY you need to accept protection information format? Did I miss that one? or is it still under WIP? Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 13/14] target/rd: Add DIF protection into rd_execute_rw
On 1/8/2014 10:36 PM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds support for DIF protection into rd_execute_rw() code for WRITE/READ I/O using sbc_dif_verify_[write,read]() logic. It also adds rd_get_prot_table() for locating protection SGLs assoicated with the ramdisk backend device. Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_rd.c | 67 +++ 1 file changed, 67 insertions(+) diff --git a/drivers/target/target_core_rd.c b/drivers/target/target_core_rd.c index dd99844..3fd51eb 100644 --- a/drivers/target/target_core_rd.c +++ b/drivers/target/target_core_rd.c @@ -363,6 +363,26 @@ static struct rd_dev_sg_table *rd_get_sg_table(struct rd_dev *rd_dev, u32 page) return NULL; } +static struct rd_dev_sg_table *rd_get_prot_table(struct rd_dev *rd_dev, u32 page) +{ + struct rd_dev_sg_table *sg_table; + u32 i, sg_per_table = (RD_MAX_ALLOCATION_SIZE / + sizeof(struct scatterlist)); + + i = page / sg_per_table; + if (i rd_dev-sg_prot_count) { + sg_table = rd_dev-sg_prot_array[i]; + if ((sg_table-page_start_offset = page) +(sg_table-page_end_offset = page)) + return sg_table; + } + + pr_err(Unable to locate struct prot rd_dev_sg_table for page: %u\n, + page); + + return NULL; +} + static sense_reason_t rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, enum dma_data_direction data_direction) @@ -377,6 +397,7 @@ rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, u32 rd_page; u32 src_len; u64 tmp; + sense_reason_t rc; if (dev-rd_flags RDF_NULLIO) { target_complete_cmd(cmd, SAM_STAT_GOOD); @@ -399,6 +420,29 @@ rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, data_direction == DMA_FROM_DEVICE ? Read : Write, cmd-t_task_lba, rd_size, rd_page, rd_offset); + if ((cmd-se_cmd_flags SCF_PROT) se_dev-dev_attrib.pi_prot_type + data_direction == DMA_TO_DEVICE) { + sector_t sector = cmd-data_length / se_dev-dev_attrib.block_size; + struct rd_dev_sg_table *prot_table; + struct scatterlist *prot_sg; + u32 prot_offset, prot_page; + + tmp = cmd-t_task_lba * se_dev-prot_length; + prot_offset = do_div(tmp, PAGE_SIZE); + prot_page = tmp; + + prot_table = rd_get_prot_table(dev, prot_page); + if (!prot_table) + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + + prot_sg = prot_table-sg_table[prot_page - prot_table-page_start_offset]; + + rc = sbc_dif_verify_write(cmd, cmd-t_task_lba, sector, 0, + prot_sg, prot_offset); + if (rc) + return rc; + } + src_len = PAGE_SIZE - rd_offset; sg_miter_start(m, sgl, sgl_nents, data_direction == DMA_FROM_DEVICE ? @@ -460,6 +504,29 @@ rd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, } sg_miter_stop(m); + if ((cmd-se_cmd_flags SCF_PROT) se_dev-dev_attrib.pi_prot_type + data_direction == DMA_FROM_DEVICE) { + sector_t sector = cmd-data_length / se_dev-dev_attrib.block_size; + struct rd_dev_sg_table *prot_table; + struct scatterlist *prot_sg; + u32 prot_offset, prot_page; + + tmp = cmd-t_task_lba * se_dev-prot_length; + prot_offset = do_div(tmp, PAGE_SIZE); + prot_page = tmp; + + prot_table = rd_get_prot_table(dev, prot_page); + if (!prot_table) + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + + prot_sg = prot_table-sg_table[prot_page - prot_table-page_start_offset]; + + rc = sbc_dif_verify_read(cmd, cmd-t_task_lba, sector, 0, +prot_sg, prot_offset); + if (rc) + return rc; + } + target_complete_cmd(cmd, SAM_STAT_GOOD); return 0; } I wander how we can skip sbc_dif_verify_ if the transport already offloaded DIF verify. I think that the transport should signal the core layer that it is able to offload DIF (ADD/STRIP/PASS/VERIFY), in which case the core should turn off the backstore DIF verify emulation to sustain performance. I assume that if backstore DIF verify
Re: [PATCH 02/14] target: Add DIF CHECK_CONDITION ASC/ASCQ exception cases
On 1/8/2014 10:36 PM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds support for DIF related CHECK_CONDITION ASC/ASCQ exception cases into transport_send_check_condition_and_sense(). This includes: LOGICAL BLOCK GUARD CHECK FAILED LOGICAL BLOCK APPLICATION TAG CHECK FAILED LOGICAL BLOCK REFERENCE TAG CHECK FAILED that used by DIF TYPE1 and TYPE3 failure cases. Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_transport.c | 30 ++ include/target/target_core_base.h |3 +++ 2 files changed, 33 insertions(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 91953da..707ee17 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2648,6 +2648,36 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, buffer[SPC_ASC_KEY_OFFSET] = 0x1d; buffer[SPC_ASCQ_KEY_OFFSET] = 0x00; break; + case TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED: + /* CURRENT ERROR */ + buffer[0] = 0x70; + buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; + /* ILLEGAL REQUEST */ + buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; + /* LOGICAL BLOCK GUARD CHECK FAILED */ + buffer[SPC_ASC_KEY_OFFSET] = 0x10; + buffer[SPC_ASCQ_KEY_OFFSET] = 0x01; You have Enums for ASCQs (TARGET_GUARD_CHECK_FAILED, TARGET_APPTAG_CHECK_FAILED, TARGET_REFTAG_CHECK_FAILED). either use them or loose them. + break; + case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED: + /* CURRENT ERROR */ + buffer[0] = 0x70; + buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; + /* ILLEGAL REQUEST */ + buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; + /* LOGICAL BLOCK APPLICATION TAG CHECK FAILED */ + buffer[SPC_ASC_KEY_OFFSET] = 0x10; + buffer[SPC_ASCQ_KEY_OFFSET] = 0x02; + break; + case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED: + /* CURRENT ERROR */ + buffer[0] = 0x70; + buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; + /* ILLEGAL REQUEST */ + buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; + /* LOGICAL BLOCK REFERENCE TAG CHECK FAILED */ + buffer[SPC_ASC_KEY_OFFSET] = 0x10; + buffer[SPC_ASCQ_KEY_OFFSET] = 0x03; + break; case TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE: default: /* CURRENT ERROR */ diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 15f402c..9a6e091 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -206,6 +206,9 @@ enum tcm_sense_reason_table { TCM_OUT_OF_RESOURCES= R(0x12), TCM_PARAMETER_LIST_LENGTH_ERROR = R(0x13), TCM_MISCOMPARE_VERIFY = R(0x14), + TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED= R(0x15), + TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = R(0x16), + TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = R(0x17), #undef R }; -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 01/14] target: Add DIF related base definitions
On 1/8/2014 10:36 PM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds DIF related definitions to target_core_base.h that includes enums for target_prot_op + target_prot_type + target_prot_version + target_guard_type + target_pi_error. Also included is struct se_dif_v1_tuple, along with changes to struct se_cmd, struct se_dev_attrib, and struct se_device. Also, add new se_subsystem_api-[init,free]_prot() callers used by target core code to setup backend specific protection information after the device has been configured. Enums taken from Sagi Grimberg's original patch. Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org target: more defs Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- include/target/target_core_backend.h |2 ++ include/target/target_core_base.h| 59 ++ 2 files changed, 61 insertions(+) diff --git a/include/target/target_core_backend.h b/include/target/target_core_backend.h index 39e0114..930f30d 100644 --- a/include/target/target_core_backend.h +++ b/include/target/target_core_backend.h @@ -41,6 +41,8 @@ struct se_subsystem_api { unsigned int (*get_io_opt)(struct se_device *); unsigned char *(*get_sense_buffer)(struct se_cmd *); bool (*get_write_cache)(struct se_device *); + int (*init_prot)(struct se_device *); + void (*free_prot)(struct se_device *); }; struct sbc_ops { diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 45412a6..15f402c 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -166,6 +166,7 @@ enum se_cmd_flags_table { SCF_COMPARE_AND_WRITE = 0x0008, SCF_COMPARE_AND_WRITE_POST = 0x0010, SCF_CMD_XCOPY_PASSTHROUGH = 0x0020, + SCF_PROT= 0x0040, }; /* struct se_dev_entry-lun_flags and struct se_lun-lun_access */ @@ -414,6 +415,45 @@ struct se_tmr_req { struct list_headtmr_list; }; +enum target_prot_op { + TARGET_PROT_NORMAL, + TARGET_PROT_READ_INSERT, + TARGET_PROT_WRITE_INSERT, + TARGET_PROT_READ_STRIP, + TARGET_PROT_WRITE_STRIP, + TARGET_PROT_READ_PASS, + TARGET_PROT_WRITE_PASS, +}; + +enum target_prot_type { + TARGET_DIF_TYPE0_PROT, + TARGET_DIF_TYPE1_PROT, + TARGET_DIF_TYPE2_PROT, + TARGET_DIF_TYPE3_PROT, +}; + +enum target_prot_version { + TARGET_DIF_V1 = 1, + TARGET_DIF_V2 = 2, +}; + +enum target_guard_type { + TARGET_DIX_GUARD_CRC = 1, + TARGET_DIX_GUARD_IP = 2, +}; + +enum target_pi_error { + TARGET_GUARD_CHECK_FAILED = 0x1, + TARGET_APPTAG_CHECK_FAILED = 0x2, + TARGET_REFTAG_CHECK_FAILED = 0x3, +}; + +struct se_dif_v1_tuple { + __be16 guard_tag; + __be16 app_tag; + __be32 ref_tag; +}; + struct se_cmd { /* SAM response code being sent to initiator */ u8 scsi_status; @@ -498,6 +538,20 @@ struct se_cmd { /* Used for lun-lun_ref counting */ boollun_ref_active; + + /* DIF related members */ + enum target_prot_op prot_op; + enum target_prot_type prot_type; + enum target_guard_type bg_type; + u16 bg_seed; + u16 reftag_seed; + u32 apptag_seed; + u32 prot_length; + struct scatterlist *t_prot_sg; + unsigned intt_prot_nents; + boolprot_interleaved; + enum target_pi_errorpi_err; + u32 block_num; }; Some of these guys are unreferenced... I figured these should provide necessary info both for the transport and the backstores. Regarding prot_interleaved, I don't remember if we agreed to allow backstores to store the protection interleaved with the data, which seems to make some sense in pSCSI. Anyway, I added this flag simply because some transports support it (iSER) and it might be useful to avoid de-interleaving + re-interleaving the buffers. In my toy example I modified fileio to support both storing data+protection interleaved and storing protection in a seperate file.protection (without verify - left it for the transport) and let the user choose via configfs (protection_handover). I think we should hear more opinions here. struct se_ua { @@ -609,6 +663,9 @@ struct se_dev_attrib { int emulate_tpws; int emulate_caw; int emulate_3pc; + enum target_prot_type pi_prot_type; + enum
Re: [PATCH 09/14] target/configfs: Expose protection device attributes
On 1/8/2014 10:36 PM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds support for exposing DIF protection device attributes via configfs. This includes: pi_prot_type: Protection Type (0, 1, 3 currently support) pi_prot_version: Protection Version (DIF v1 currently supported) pi_guard_type: Guard Type (1=DIF CRC, 2=IP CRC) Within se_dev_set_pi_prot_type() it also adds the se_subsystem_api device callbacks to setup per device protection information. Suggestion, expose pi_prot_format and call transport-init_prot() there. It is more explicit and this routine should be called upon getting FORMAT_UNIT as well. Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_configfs.c | 12 ++ drivers/target/target_core_device.c | 65 + drivers/target/target_core_internal.h |2 + 3 files changed, 79 insertions(+) diff --git a/drivers/target/target_core_configfs.c b/drivers/target/target_core_configfs.c index 272755d..0f1101c 100644 --- a/drivers/target/target_core_configfs.c +++ b/drivers/target/target_core_configfs.c @@ -643,6 +643,15 @@ SE_DEV_ATTR(emulate_caw, S_IRUGO | S_IWUSR); DEF_DEV_ATTRIB(emulate_3pc); SE_DEV_ATTR(emulate_3pc, S_IRUGO | S_IWUSR); +DEF_DEV_ATTRIB(pi_prot_type); +SE_DEV_ATTR(pi_prot_type, S_IRUGO | S_IWUSR); + +DEF_DEV_ATTRIB_RO(pi_prot_version); +SE_DEV_ATTR_RO(pi_prot_version); + +DEF_DEV_ATTRIB(pi_guard_type); +SE_DEV_ATTR(pi_guard_type, S_IRUGO | S_IWUSR); + DEF_DEV_ATTRIB(enforce_pr_isids); SE_DEV_ATTR(enforce_pr_isids, S_IRUGO | S_IWUSR); @@ -702,6 +711,9 @@ static struct configfs_attribute *target_core_dev_attrib_attrs[] = { target_core_dev_attrib_emulate_tpws.attr, target_core_dev_attrib_emulate_caw.attr, target_core_dev_attrib_emulate_3pc.attr, + target_core_dev_attrib_pi_prot_type.attr, + target_core_dev_attrib_pi_prot_version.attr, + target_core_dev_attrib_pi_guard_type.attr, target_core_dev_attrib_enforce_pr_isids.attr, target_core_dev_attrib_is_nonrot.attr, target_core_dev_attrib_emulate_rest_reord.attr, diff --git a/drivers/target/target_core_device.c b/drivers/target/target_core_device.c index 207b340..2b59beb 100644 --- a/drivers/target/target_core_device.c +++ b/drivers/target/target_core_device.c @@ -918,6 +918,67 @@ int se_dev_set_emulate_3pc(struct se_device *dev, int flag) return 0; } +int se_dev_set_pi_prot_type(struct se_device *dev, int flag) +{ + int rc, old_prot = dev-dev_attrib.pi_prot_type; + + if (flag != 0 flag != 1 flag != 2 flag != 3) { + pr_err(Illegal value %d for pi_prot_type\n, flag); + return -EINVAL; + } + if (flag == 2) { + pr_err(DIF TYPE2 protection currently not supported\n); + return -ENOSYS; + } + if (!dev-transport-init_prot || !dev-transport-free_prot) { + pr_err(DIF protection not supported by backend: %s\n, + dev-transport-name); + return -ENOSYS; + } + if (!(dev-dev_flags DF_CONFIGURED)) { + pr_err(DIF protection requires device to be configured\n); + return -ENODEV; + } + if (dev-export_count) { + pr_err(dev[%p]: Unable to change SE Device PROT type while + export_count is %d\n, dev, dev-export_count); + return -EINVAL; + } + + dev-dev_attrib.pi_prot_type = flag; + + if (flag !old_prot) { + rc = dev-transport-init_prot(dev); + if (rc) { + dev-dev_attrib.pi_prot_type = old_prot; + return rc; + } + } else if (!flag old_prot) { + dev-transport-free_prot(dev); + } + pr_debug(dev[%p]: SE Device Protection Type: %d\n, dev, flag); + + return 0; +} + +int se_dev_set_pi_guard_type(struct se_device *dev, int flag) +{ + if (flag != 1 flag != 2) { + pr_err(Illegal value %d for pi_guard_type\n, flag); + return -EINVAL; + } + if (dev-export_count) { + pr_err(dev[%p]: Unable to change SE Device GUARD type while + export_count is %d\n, dev, dev-export_count); + return -EINVAL; + } + + dev-dev_attrib.pi_guard_type = flag; + pr_debug(dev[%p]: SE Device Guard Type: %d\n, dev, flag); + + return 0; +} + int se_dev_set_enforce_pr_isids(struct se_device *dev, int flag) { if ((flag != 0) (flag != 1)) { @@ -1415,6 +1476,7 @@ struct se_device *target_alloc_device(struct se_hba *hba, const char *name) dev-dev_link_magic
Re: [PATCH 03/14] target/sbc: Add sbc_check_prot + update sbc_parse_cdb for DIF
On 1/8/2014 10:36 PM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds sbc_check_prot() for performing various DIF related CDB sanity checks, along with setting SCF_PROT once sanity checks have passed. Also, add calls in sbc_parse_cdb() for READ_[10,12,16] + WRITE_[10,12,16] to perform DIF sanity checking. Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_sbc.c | 39 ++ 1 file changed, 39 insertions(+) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 52ae54e..600ffcb 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -563,6 +563,27 @@ sbc_compare_and_write(struct se_cmd *cmd) return TCM_NO_SENSE; } +bool +sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb) +{ + if (!dev-dev_attrib.pi_prot_type) + return true; + + if (dev-dev_attrib.pi_prot_type == TARGET_DIF_TYPE2_PROT + (cdb[1] 0xe0)) + return false; + + if (!(cdb[1] 0xe0)) { + pr_warn(Target: Unprotected READ/WRITE to DIF device\n); + return true; + } + if (!cmd-t_prot_sg || !cmd-t_prot_nents) + return true; + + cmd-se_cmd_flags |= SCF_PROT; Isn't this the place to fill the se_cmd DIF execution parameters? prot_op, prot_type, guard_type, initial_reftag, apptag etc... Next, all parties interested in DIF execution should look in se_cmd (backstore, transport). + return true; +} + sense_reason_t sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) { @@ -581,6 +602,9 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) cmd-execute_cmd = sbc_execute_rw; break; case READ_10: + if (!sbc_check_prot(dev, cmd, cdb)) + return TCM_UNSUPPORTED_SCSI_OPCODE; + sectors = transport_get_sectors_10(cdb); cmd-t_task_lba = transport_lba_32(cdb); cmd-se_cmd_flags |= SCF_SCSI_DATA_CDB; @@ -588,6 +612,9 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) cmd-execute_cmd = sbc_execute_rw; break; case READ_12: + if (!sbc_check_prot(dev, cmd, cdb)) + return TCM_UNSUPPORTED_SCSI_OPCODE; + sectors = transport_get_sectors_12(cdb); cmd-t_task_lba = transport_lba_32(cdb); cmd-se_cmd_flags |= SCF_SCSI_DATA_CDB; @@ -595,6 +622,9 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) cmd-execute_cmd = sbc_execute_rw; break; case READ_16: + if (!sbc_check_prot(dev, cmd, cdb)) + return TCM_UNSUPPORTED_SCSI_OPCODE; + sectors = transport_get_sectors_16(cdb); cmd-t_task_lba = transport_lba_64(cdb); cmd-se_cmd_flags |= SCF_SCSI_DATA_CDB; @@ -610,6 +640,9 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) break; case WRITE_10: case WRITE_VERIFY: + if (!sbc_check_prot(dev, cmd, cdb)) + return TCM_UNSUPPORTED_SCSI_OPCODE; + sectors = transport_get_sectors_10(cdb); cmd-t_task_lba = transport_lba_32(cdb); if (cdb[1] 0x8) @@ -619,6 +652,9 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) cmd-execute_cmd = sbc_execute_rw; break; case WRITE_12: + if (!sbc_check_prot(dev, cmd, cdb)) + return TCM_UNSUPPORTED_SCSI_OPCODE; + sectors = transport_get_sectors_12(cdb); cmd-t_task_lba = transport_lba_32(cdb); if (cdb[1] 0x8) @@ -628,6 +664,9 @@ sbc_parse_cdb(struct se_cmd *cmd, struct sbc_ops *ops) cmd-execute_cmd = sbc_execute_rw; break; case WRITE_16: + if (!sbc_check_prot(dev, cmd, cdb)) + return TCM_UNSUPPORTED_SCSI_OPCODE; + sectors = transport_get_sectors_16(cdb); cmd-t_task_lba = transport_lba_64(cdb); if (cdb[1] 0x8) -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 02/11] IB/isert: seperate connection protection domains and dma MRs
It is more correct to seperate connections protection domains and dma_mr handles. protection information support requires to do so. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/isert/ib_isert.c | 46 --- drivers/infiniband/ulp/isert/ib_isert.h |2 - 2 files changed, 24 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 6be57c3..3dd2427 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -248,13 +248,6 @@ isert_create_device_ib_res(struct isert_device *device) } cq_desc = device-cq_desc; - device-dev_pd = ib_alloc_pd(ib_dev); - if (IS_ERR(device-dev_pd)) { - ret = PTR_ERR(device-dev_pd); - pr_err(ib_alloc_pd failed for dev_pd: %d\n, ret); - goto out_cq_desc; - } - for (i = 0; i device-cqs_used; i++) { cq_desc[i].device = device; cq_desc[i].cq_index = i; @@ -282,13 +275,6 @@ isert_create_device_ib_res(struct isert_device *device) goto out_cq; } - device-dev_mr = ib_get_dma_mr(device-dev_pd, IB_ACCESS_LOCAL_WRITE); - if (IS_ERR(device-dev_mr)) { - ret = PTR_ERR(device-dev_mr); - pr_err(ib_get_dma_mr failed for dev_mr: %d\n, ret); - goto out_cq; - } - return 0; out_cq: @@ -304,9 +290,6 @@ out_cq: ib_destroy_cq(device-dev_tx_cq[j]); } } - ib_dealloc_pd(device-dev_pd); - -out_cq_desc: kfree(device-cq_desc); return ret; @@ -329,8 +312,6 @@ isert_free_device_ib_res(struct isert_device *device) device-dev_tx_cq[i] = NULL; } - ib_dereg_mr(device-dev_mr); - ib_dealloc_pd(device-dev_pd); kfree(device-cq_desc); } @@ -437,7 +418,7 @@ isert_conn_create_frwr_pool(struct isert_conn *isert_conn) goto err; } - fr_desc-data_mr = ib_alloc_fast_reg_mr(device-dev_pd, + fr_desc-data_mr = ib_alloc_fast_reg_mr(isert_conn-conn_pd, ISCSI_ISER_SG_TABLESIZE); if (IS_ERR(fr_desc-data_mr)) { pr_err(Failed to allocate frmr err=%ld\n, @@ -546,8 +527,22 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) } isert_conn-conn_device = device; - isert_conn-conn_pd = device-dev_pd; - isert_conn-conn_mr = device-dev_mr; + isert_conn-conn_pd = ib_alloc_pd(isert_conn-conn_device-ib_device); + if (IS_ERR(isert_conn-conn_pd)) { + ret = PTR_ERR(isert_conn-conn_pd); + pr_err(ib_alloc_pd failed for conn %p: ret=%d\n, + isert_conn, ret); + goto out_pd; + } + + isert_conn-conn_mr = ib_get_dma_mr(isert_conn-conn_pd, + IB_ACCESS_LOCAL_WRITE); + if (IS_ERR(isert_conn-conn_mr)) { + ret = PTR_ERR(isert_conn-conn_mr); + pr_err(ib_get_dma_mr failed for conn %p: ret=%d\n, + isert_conn, ret); + goto out_mr; + } if (device-use_frwr) { ret = isert_conn_create_frwr_pool(isert_conn); @@ -573,6 +568,10 @@ out_conn_dev: if (device-use_frwr) isert_conn_free_frwr_pool(isert_conn); out_frwr: + ib_dereg_mr(isert_conn-conn_mr); +out_mr: + ib_dealloc_pd(isert_conn-conn_pd); +out_pd: isert_device_try_release(device); out_rsp_dma_map: ib_dma_unmap_single(ib_dev, isert_conn-login_rsp_dma, @@ -611,6 +610,9 @@ isert_connect_release(struct isert_conn *isert_conn) isert_free_rx_descriptors(isert_conn); rdma_destroy_id(isert_conn-conn_cm_id); + ib_dereg_mr(isert_conn-conn_mr); + ib_dealloc_pd(isert_conn-conn_pd); + if (isert_conn-login_buf) { ib_dma_unmap_single(ib_dev, isert_conn-login_rsp_dma, ISER_RX_LOGIN_SIZE, DMA_TO_DEVICE); diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 691f90f..dec74d4 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -144,8 +144,6 @@ struct isert_device { int refcount; int cq_active_qps[ISERT_MAX_CQ]; struct ib_device*ib_device; - struct ib_pd*dev_pd; - struct ib_mr*dev_mr; struct ib_cq*dev_rx_cq[ISERT_MAX_CQ]; struct ib_cq*dev_tx_cq[ISERT_MAX_CQ]; struct isert_cq_desc*cq_desc; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message
[PATCH 03/11] IB/isert: Avoid frwr notation, user fastreg
Use fast registration lingo. fast registration will also incorporate signature/DIF registration. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/isert/ib_isert.c | 84 --- drivers/infiniband/ulp/isert/ib_isert.h |8 ++-- 2 files changed, 47 insertions(+), 45 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3dd2427..295d2be 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -47,10 +47,10 @@ static int isert_map_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_rdma_wr *wr); static void -isert_unreg_rdma_frwr(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); +isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); static int -isert_reg_rdma_frwr(struct iscsi_conn *conn, struct iscsi_cmd *cmd, - struct isert_rdma_wr *wr); +isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, + struct isert_rdma_wr *wr); static void isert_qp_event_callback(struct ib_event *e, void *context) @@ -225,11 +225,11 @@ isert_create_device_ib_res(struct isert_device *device) /* asign function handlers */ if (dev_attr-device_cap_flags IB_DEVICE_MEM_MGT_EXTENSIONS) { - device-use_frwr = 1; - device-reg_rdma_mem = isert_reg_rdma_frwr; - device-unreg_rdma_mem = isert_unreg_rdma_frwr; + device-use_fastreg = 1; + device-reg_rdma_mem = isert_reg_rdma; + device-unreg_rdma_mem = isert_unreg_rdma; } else { - device-use_frwr = 0; + device-use_fastreg = 0; device-reg_rdma_mem = isert_map_rdma; device-unreg_rdma_mem = isert_unmap_cmd; } @@ -237,9 +237,10 @@ isert_create_device_ib_res(struct isert_device *device) device-cqs_used = min_t(int, num_online_cpus(), device-ib_device-num_comp_vectors); device-cqs_used = min(ISERT_MAX_CQ, device-cqs_used); - pr_debug(Using %d CQs, device %s supports %d vectors support FRWR %d\n, + pr_debug(Using %d CQs, device %s supports %d vectors support +Fast registration %d\n, device-cqs_used, device-ib_device-name, -device-ib_device-num_comp_vectors, device-use_frwr); +device-ib_device-num_comp_vectors, device-use_fastreg); device-cq_desc = kzalloc(sizeof(struct isert_cq_desc) * device-cqs_used, GFP_KERNEL); if (!device-cq_desc) { @@ -367,18 +368,18 @@ isert_device_find_by_ib_dev(struct rdma_cm_id *cma_id) } static void -isert_conn_free_frwr_pool(struct isert_conn *isert_conn) +isert_conn_free_fastreg_pool(struct isert_conn *isert_conn) { struct fast_reg_descriptor *fr_desc, *tmp; int i = 0; - if (list_empty(isert_conn-conn_frwr_pool)) + if (list_empty(isert_conn-conn_fr_pool)) return; - pr_debug(Freeing conn %p frwr pool, isert_conn); + pr_debug(Freeing conn %p fastreg pool, isert_conn); list_for_each_entry_safe(fr_desc, tmp, -isert_conn-conn_frwr_pool, list) { +isert_conn-conn_fr_pool, list) { list_del(fr_desc-list); ib_free_fast_reg_page_list(fr_desc-data_frpl); ib_dereg_mr(fr_desc-data_mr); @@ -386,20 +387,20 @@ isert_conn_free_frwr_pool(struct isert_conn *isert_conn) ++i; } - if (i isert_conn-conn_frwr_pool_size) + if (i isert_conn-conn_fr_pool_size) pr_warn(Pool still has %d regions registered\n, - isert_conn-conn_frwr_pool_size - i); + isert_conn-conn_fr_pool_size - i); } static int -isert_conn_create_frwr_pool(struct isert_conn *isert_conn) +isert_conn_create_fastreg_pool(struct isert_conn *isert_conn) { struct fast_reg_descriptor *fr_desc; struct isert_device *device = isert_conn-conn_device; int i, ret; - INIT_LIST_HEAD(isert_conn-conn_frwr_pool); - isert_conn-conn_frwr_pool_size = 0; + INIT_LIST_HEAD(isert_conn-conn_fr_pool); + isert_conn-conn_fr_pool_size = 0; for (i = 0; i ISCSI_DEF_XMIT_CMDS_MAX; i++) { fr_desc = kzalloc(sizeof(*fr_desc), GFP_KERNEL); if (!fr_desc) { @@ -431,17 +432,17 @@ isert_conn_create_frwr_pool(struct isert_conn *isert_conn) fr_desc, fr_desc-data_frpl-page_list); fr_desc-valid = true; - list_add_tail(fr_desc-list, isert_conn-conn_frwr_pool); - isert_conn-conn_frwr_pool_size++; + list_add_tail(fr_desc-list, isert_conn-conn_fr_pool); + isert_conn-conn_fr_pool_size
[PATCH 01/11] Target/core: Fixes for isert compilation
replace prot_interleaved with prot_handover in se_cmd. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- include/target/target_core_base.h | 22 ++ 1 files changed, 14 insertions(+), 8 deletions(-) diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index 13daea5..2ae304d 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -439,14 +439,20 @@ struct se_tmr_req { struct list_headtmr_list; }; +#define TARGET_DIF_SIZE 8 enum target_prot_op { - TARGET_PROT_NORMAL, - TARGET_PROT_READ_INSERT, - TARGET_PROT_WRITE_INSERT, - TARGET_PROT_READ_STRIP, - TARGET_PROT_WRITE_STRIP, - TARGET_PROT_READ_PASS, - TARGET_PROT_WRITE_PASS, + TARGET_PROT_NORMAL = 0, + TARGET_PROT_DIN_INSERT, + TARGET_PROT_DOUT_INSERT, + TARGET_PROT_DIN_STRIP, + TARGET_PROT_DOUT_STRIP, + TARGET_PROT_DIN_PASS, + TARGET_PROT_DOUT_PASS +}; + +enum target_prot_ho { + PROT_SEPERATED, + PROT_INTERLEAVED, }; enum target_prot_type { @@ -573,7 +579,7 @@ struct se_cmd { u32 prot_length; struct scatterlist *t_prot_sg; unsigned intt_prot_nents; - boolprot_interleaved; + enum target_prot_ho prot_handover; enum target_pi_errorpi_err; u32 block_num; }; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 00/11] iSER target initial support for T10-DIF offload
Hey Nic, MKP, SCSI and RDMA folks, This patchset adds basic support for T10-DIF protection information offload in iSER target on top of Nic's recent work and RDMA signature verbs API. This code was tested with my own implementation of the target core T10-PI support which was designed mainly to activate the transport DIF offload. In order to actually get Linux SCSI target to work with iSER T10-DIF offload a couple of patches needs to be added to Nic's work which is ongoing. Apart from doing the actual iser implementation for T10-DIF offload, this series would help to see the full picture by: * Showing how the T10-DIF offload verbs are used * Showing how fabric transport offload plugs into the target core The T10-DIF signature offload verbs and mlx5 driver implementation patches are available from the for-next branch of git://beany.openfabrics.org/~ogerlitz/linux-2.6.git as the below commits: 2b4316b IB/mlx5: Publish support in signature feature ef3130d IB/mlx5: Collect signature error completion c1b37b1 IB/mlx5: Support IB_WR_REG_SIG_MR f5d8496 IB/mlx5: Keep mlx5 MRs in a radix tree under device 72a72ee IB/mlx5: remove MTT access mode from umr flags helper function ccb0a907 IB/mlx5: Break wqe handling to begin finish routines cda0569 IB/mlx5: Initialize mlx5_ib_qp signature related 33b4079 IB/mlx5, mlx5_core: Support for create_mr and destroy_mr 8b343e6 IB/core: Introduce Signature Verbs API c1b0358 IB/core: Introduce protected memory regions Sagi Grimberg (11): Target/core: Fixes for isert compilation IB/isert: seperate connection protection domains and dma MRs IB/isert: Avoid frwr notation, user fastreg IB/isert: Move fastreg descriptor creation to a function Target/iscsi: Add T10-PI indication for iscsi_portal_group IB/isert: Initialize T10-PI resources IB/isert: pass scatterlist instead of cmd to fast_reg_mr routine IB/isert: pass mr and frpl to isert_fast_reg_mr routine IB/isert: Accept RDMA_WRITE completions IB/isert: Support T10-PI protected transactions Target/configfs: Expose iSCSI network portal group T10-PI support drivers/infiniband/ulp/isert/ib_isert.c | 708 +++-- drivers/infiniband/ulp/isert/ib_isert.h | 29 +- drivers/target/iscsi/iscsi_target_configfs.c |6 + drivers/target/iscsi/iscsi_target_core.h |5 +- drivers/target/iscsi/iscsi_target_tpg.c | 21 + drivers/target/iscsi/iscsi_target_tpg.h |1 + include/target/target_core_base.h| 22 +- 7 files changed, 603 insertions(+), 189 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 04/11] IB/isert: Move fastreg descriptor creation to a function
This routine may be called both by fast registration descriptors for data and for integrity buffers. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/isert/ib_isert.c | 52 +++ 1 files changed, 32 insertions(+), 20 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 295d2be..9ef9193 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -393,6 +393,33 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn) } static int +isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd, +struct fast_reg_descriptor *fr_desc) +{ + fr_desc-data_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(fr_desc-data_frpl)) { + pr_err(Failed to allocate data frpl err=%ld\n, + PTR_ERR(fr_desc-data_frpl)); + return PTR_ERR(fr_desc-data_frpl); + } + + fr_desc-data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(fr_desc-data_mr)) { + pr_err(Failed to allocate data frmr err=%ld\n, + PTR_ERR(fr_desc-data_mr)); + ib_free_fast_reg_page_list(fr_desc-data_frpl); + return PTR_ERR(fr_desc-data_mr); + } + pr_debug(Create fr_desc %p page_list %p\n, +fr_desc, fr_desc-data_frpl-page_list); + + fr_desc-valid = true; + + return 0; +} + +static int isert_conn_create_fastreg_pool(struct isert_conn *isert_conn) { struct fast_reg_descriptor *fr_desc; @@ -409,29 +436,14 @@ isert_conn_create_fastreg_pool(struct isert_conn *isert_conn) goto err; } - fr_desc-data_frpl = - ib_alloc_fast_reg_page_list(device-ib_device, - ISCSI_ISER_SG_TABLESIZE); - if (IS_ERR(fr_desc-data_frpl)) { - pr_err(Failed to allocate fr_pg_list err=%ld\n, - PTR_ERR(fr_desc-data_frpl)); - ret = PTR_ERR(fr_desc-data_frpl); - goto err; - } - - fr_desc-data_mr = ib_alloc_fast_reg_mr(isert_conn-conn_pd, - ISCSI_ISER_SG_TABLESIZE); - if (IS_ERR(fr_desc-data_mr)) { - pr_err(Failed to allocate frmr err=%ld\n, - PTR_ERR(fr_desc-data_mr)); - ret = PTR_ERR(fr_desc-data_mr); - ib_free_fast_reg_page_list(fr_desc-data_frpl); + ret = isert_create_fr_desc(device-ib_device, + isert_conn-conn_pd, fr_desc); + if (ret) { + pr_err(Failed to create fastreg descriptor err=%d\n, + ret); goto err; } - pr_debug(Create fr_desc %p page_list %p\n, -fr_desc, fr_desc-data_frpl-page_list); - fr_desc-valid = true; list_add_tail(fr_desc-list, isert_conn-conn_fr_pool); isert_conn-conn_fr_pool_size++; } -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 07/11] IB/isert: pass scatterlist instead of cmd to fast_reg_mr routine
This routine may help for protection registration as well. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/isert/ib_isert.c | 28 1 files changed, 12 insertions(+), 16 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 98f23f4..3495e73 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2247,26 +2247,22 @@ isert_map_fr_pagelist(struct ib_device *ib_dev, static int isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, - struct isert_cmd *isert_cmd, struct isert_conn *isert_conn, - struct ib_sge *ib_sge, u32 offset, unsigned int data_len) + struct isert_conn *isert_conn, struct scatterlist *sg_start, + struct ib_sge *ib_sge, u32 sg_nents, u32 offset, + unsigned int data_len) { - struct iscsi_cmd *cmd = isert_cmd-iscsi_cmd; struct ib_device *ib_dev = isert_conn-conn_cm_id-device; - struct scatterlist *sg_start; - u32 sg_off, page_off; struct ib_send_wr fr_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; + int ret, pagelist_len; + u32 page_off; u8 key; - int ret, sg_nents, pagelist_len; - sg_off = offset / PAGE_SIZE; - sg_start = cmd-se_cmd.t_data_sg[sg_off]; - sg_nents = min_t(unsigned int, cmd-se_cmd.t_data_nents - sg_off, -ISCSI_ISER_SG_TABLESIZE); + sg_nents = min_t(unsigned int, sg_nents, ISCSI_ISER_SG_TABLESIZE); page_off = offset % PAGE_SIZE; - pr_debug(Cmd: %p use fr_desc %p sg_nents %d sg_off %d offset %u\n, -isert_cmd, fr_desc, sg_nents, sg_off, offset); + pr_debug(Use fr_desc %p sg_nents %d offset %u\n, +fr_desc, sg_nents, offset); pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents, fr_desc-data_frpl-page_list[0]); @@ -2335,9 +2331,9 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, if (wr-iser_ib_op == ISER_IB_RDMA_WRITE) { data_left = se_cmd-data_length; } else { - sg_off = cmd-write_data_done / PAGE_SIZE; - data_left = se_cmd-data_length - cmd-write_data_done; offset = cmd-write_data_done; + sg_off = offset / PAGE_SIZE; + data_left = se_cmd-data_length - cmd-write_data_done; isert_cmd-tx_desc.isert_cmd = isert_cmd; } @@ -2401,8 +2397,8 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, spin_unlock_irqrestore(isert_conn-conn_lock, flags); wr-fr_desc = fr_desc; - ret = isert_fast_reg_mr(fr_desc, isert_cmd, isert_conn, - ib_sge, offset, data_len); + ret = isert_fast_reg_mr(fr_desc, isert_conn, sg_start, + ib_sge, sg_nents, offset, data_len); if (ret) { list_add_tail(fr_desc-list, isert_conn-conn_fr_pool); goto unmap_sg; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 05/11] Target/iscsi: Add T10-PI indication for iscsi_portal_group
In case an iscsi portal group will be defined as t10_pi enabled, all connections on top of it will support protected transactions. T10-PI support may require extra reource allocation and maintenance by the transport layer, so we don't want to apply them on non-t10_pi network portals. This is a hook for the iscsi target layer to signal the transport at connection establishment that this connection will carry protected transactions. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/target/iscsi/iscsi_target_core.h |5 - drivers/target/iscsi/iscsi_target_tpg.c |2 ++ 2 files changed, 6 insertions(+), 1 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h index 48f7b3b..886d74d 100644 --- a/drivers/target/iscsi/iscsi_target_core.h +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -58,7 +58,8 @@ #define TA_DEMO_MODE_DISCOVERY 1 #define TA_DEFAULT_ERL 0 #define TA_CACHE_CORE_NPS 0 - +/* T10 protection information disabled by default */ +#define TA_DEFAULT_T10_PI 0 #define ISCSI_IOV_DATA_BUFFER 5 @@ -765,6 +766,7 @@ struct iscsi_tpg_attrib { u32 prod_mode_write_protect; u32 demo_mode_discovery; u32 default_erl; + u8 t10_pi; struct iscsi_portal_group *tpg; }; @@ -787,6 +789,7 @@ struct iscsi_np { void*np_context; struct iscsit_transport *np_transport; struct list_headnp_list; + struct iscsi_tpg_np *tpg_np; } cacheline_aligned; struct iscsi_tpg_np { diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 3976183..80ae14c 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -225,6 +225,7 @@ static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg) a-prod_mode_write_protect = TA_PROD_MODE_WRITE_PROTECT; a-demo_mode_discovery = TA_DEMO_MODE_DISCOVERY; a-default_erl = TA_DEFAULT_ERL; + a-t10_pi = TA_DEFAULT_T10_PI; } int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg) @@ -500,6 +501,7 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( init_completion(tpg_np-tpg_np_comp); kref_init(tpg_np-tpg_np_kref); tpg_np-tpg_np = np; + np-tpg_np = tpg_np; tpg_np-tpg = tpg; spin_lock(tpg-tpg_np_lock); -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 06/11] IB/isert: Initialize T10-PI resources
Upon connection establishment check if network portal is T10-PI enabled and allocate T10-PI resources if necessary, allocate signature enabled memory regions and mark connection queue-pair as signature enabled. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/isert/ib_isert.c | 104 +++ drivers/infiniband/ulp/isert/ib_isert.h | 19 +- 2 files changed, 106 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 9ef9193..98f23f4 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -87,7 +87,8 @@ isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr) } static int -isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) +isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id, + u8 protection) { struct isert_device *device = isert_conn-conn_device; struct ib_qp_init_attr attr; @@ -119,6 +120,8 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; + if (protection) + attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; pr_debug(isert_conn_setup_qp cma_id-device: %p\n, cma_id-device); @@ -234,13 +237,18 @@ isert_create_device_ib_res(struct isert_device *device) device-unreg_rdma_mem = isert_unmap_cmd; } + /* Check signature cap */ + device-pi_capable = dev_attr-device_cap_flags +IB_DEVICE_SIGNATURE_HANDOVER ? true : false; + device-cqs_used = min_t(int, num_online_cpus(), device-ib_device-num_comp_vectors); device-cqs_used = min(ISERT_MAX_CQ, device-cqs_used); pr_debug(Using %d CQs, device %s supports %d vectors support -Fast registration %d\n, +Fast registration %d pi_capable %d\n, device-cqs_used, device-ib_device-name, -device-ib_device-num_comp_vectors, device-use_fastreg); +device-ib_device-num_comp_vectors, device-use_fastreg, +device-pi_capable); device-cq_desc = kzalloc(sizeof(struct isert_cq_desc) * device-cqs_used, GFP_KERNEL); if (!device-cq_desc) { @@ -383,6 +391,12 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn) list_del(fr_desc-list); ib_free_fast_reg_page_list(fr_desc-data_frpl); ib_dereg_mr(fr_desc-data_mr); + if (fr_desc-pi_ctx) { + ib_free_fast_reg_page_list(fr_desc-pi_ctx-prot_frpl); + ib_dereg_mr(fr_desc-pi_ctx-prot_mr); + ib_destroy_mr(fr_desc-pi_ctx-sig_mr); + kfree(fr_desc-pi_ctx); + } kfree(fr_desc); ++i; } @@ -394,8 +408,10 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn) static int isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd, -struct fast_reg_descriptor *fr_desc) +struct fast_reg_descriptor *fr_desc, u8 protection) { + int ret; + fr_desc-data_frpl = ib_alloc_fast_reg_page_list(ib_device, ISCSI_ISER_SG_TABLESIZE); if (IS_ERR(fr_desc-data_frpl)) { @@ -408,19 +424,73 @@ isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd, if (IS_ERR(fr_desc-data_mr)) { pr_err(Failed to allocate data frmr err=%ld\n, PTR_ERR(fr_desc-data_mr)); - ib_free_fast_reg_page_list(fr_desc-data_frpl); - return PTR_ERR(fr_desc-data_mr); + ret = PTR_ERR(fr_desc-data_mr); + goto err_data_frpl; } pr_debug(Create fr_desc %p page_list %p\n, fr_desc, fr_desc-data_frpl-page_list); + fr_desc-data_key_valid = true; - fr_desc-valid = true; + if (protection) { + struct ib_mr_init_attr mr_init_attr = {0}; + struct pi_context *pi_ctx; + + fr_desc-pi_ctx = kzalloc(sizeof(*fr_desc-pi_ctx), GFP_KERNEL); + if (!fr_desc-pi_ctx) { + pr_err(Failed to allocate pi context\n); + ret = -ENOMEM; + goto err_data_mr; + } + pi_ctx = fr_desc-pi_ctx; + + pi_ctx-prot_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(pi_ctx-prot_frpl)) { + pr_err(Failed to allocate prot frpl err=%ld\n
[PATCH 10/11] IB/isert: Support T10-PI protected transactions
In case the Target core passed transport T10 protection operation: 1. Register data buffer (data memory region) 2. Register protection buffer if exsists (prot memory region) 3. Register signature region (signature memory region) - use work request IB_WR_REG_SIG_MR 4. Execute RDMA 5. Upon RDMA completion check the signature status - if succeeded send good SCSI response - if failed send SCSI bad response with appropriate sense buffer Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/isert/ib_isert.c | 376 ++- 1 files changed, 321 insertions(+), 55 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 9aa933e..8a888f0 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1499,6 +1499,7 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn) if (wr-fr_desc) { pr_debug(unreg_fastreg_cmd: %p free fr_desc %p\n, isert_cmd, wr-fr_desc); + wr-fr_desc-protected = false; spin_lock_bh(isert_conn-conn_lock); list_add_tail(wr-fr_desc-list, isert_conn-conn_fr_pool); spin_unlock_bh(isert_conn-conn_lock); @@ -1604,13 +1605,65 @@ isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd, } static void +isert_pi_err_sense_buffer(u8 *buf, u8 key, u8 asc, u8 ascq) +{ + buf[0] = 0x70; + buf[SPC_SENSE_KEY_OFFSET] = key; + buf[SPC_ASC_KEY_OFFSET] = asc; + buf[SPC_ASCQ_KEY_OFFSET] = ascq; +} + +static void isert_completion_rdma_write(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd) { + struct isert_rdma_wr *wr = isert_cmd-rdma_wr; struct iscsi_cmd *cmd = isert_cmd-iscsi_cmd; + struct se_cmd *se_cmd = cmd-se_cmd; struct isert_conn *isert_conn = isert_cmd-conn; struct isert_device *device = isert_conn-conn_device; + struct ib_mr_status mr_status; + int ret; + if (wr-fr_desc wr-fr_desc-protected) { + ret = ib_check_mr_status(wr-fr_desc-pi_ctx-sig_mr, +IB_MR_CHECK_SIG_STATUS, mr_status); + if (ret) { + pr_err(ib_check_mr_status failed, ret %d\n, ret); + goto fail_mr_status; + } + if (mr_status.fail_status IB_MR_CHECK_SIG_STATUS) { + u32 block_size = se_cmd-se_dev-dev_attrib.block_size; + + pr_err(PI error found type %d at offset %llx + expected %x vs actual %x\n, + mr_status.sig_err.err_type, + mr_status.sig_err.sig_err_offset, + mr_status.sig_err.expected, + mr_status.sig_err.actual); + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + se_cmd-pi_err = TARGET_GUARD_CHECK_FAILED; + break; + case IB_SIG_BAD_REFTAG: + se_cmd-pi_err = TARGET_REFTAG_CHECK_FAILED; + break; + case IB_SIG_BAD_APPTAG: + se_cmd-pi_err = TARGET_APPTAG_CHECK_FAILED; + break; + } + se_cmd-block_num = + mr_status.sig_err.sig_err_offset / block_size; + isert_pi_err_sense_buffer(se_cmd-sense_buffer, + ILLEGAL_REQUEST, 0x10, + (u8)se_cmd-pi_err); + se_cmd-scsi_status = SAM_STAT_CHECK_CONDITION; + se_cmd-scsi_sense_length = TRANSPORT_SENSE_BUFFER; + se_cmd-se_cmd_flags |= SCF_EMULATED_TASK_SENSE; + } + } + +fail_mr_status: device-unreg_rdma_mem(isert_cmd, isert_conn); isert_put_response(isert_conn-conn, cmd); } @@ -1624,7 +1677,43 @@ isert_completion_rdma_read(struct iser_tx_desc *tx_desc, struct se_cmd *se_cmd = cmd-se_cmd; struct isert_conn *isert_conn = isert_cmd-conn; struct isert_device *device = isert_conn-conn_device; + struct ib_mr_status mr_status; + int ret; + if (wr-fr_desc wr-fr_desc-protected) { + ret = ib_check_mr_status(wr-fr_desc-pi_ctx-sig_mr, +IB_MR_CHECK_SIG_STATUS, mr_status); + if (ret) { + pr_err(ib_check_mr_status failed, ret %d\n, ret); + goto fail_mr_status; + } + if (mr_status.fail_status IB_MR_CHECK_SIG_STATUS
[PATCH 09/11] IB/isert: Accept RDMA_WRITE completions
In case of protected transactions, we will need to check the protection status of the transaction before sending SCSI response. So be ready for RDMA_WRITE completions. currently we don't ask for these completions, but for T10-PI we will. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/isert/ib_isert.c | 20 +--- 1 files changed, 17 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 98aab21..9aa933e 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -51,6 +51,8 @@ isert_unreg_rdma(struct isert_cmd *isert_cmd, struct isert_conn *isert_conn); static int isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_rdma_wr *wr); +static int +isert_put_response(struct iscsi_conn *conn, struct iscsi_cmd *cmd); static void isert_qp_event_callback(struct ib_event *e, void *context) @@ -1602,6 +1604,18 @@ isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd, } static void +isert_completion_rdma_write(struct iser_tx_desc *tx_desc, + struct isert_cmd *isert_cmd) +{ + struct iscsi_cmd *cmd = isert_cmd-iscsi_cmd; + struct isert_conn *isert_conn = isert_cmd-conn; + struct isert_device *device = isert_conn-conn_device; + + device-unreg_rdma_mem(isert_cmd, isert_conn); + isert_put_response(isert_conn-conn, cmd); +} + +static void isert_completion_rdma_read(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd) { @@ -1721,9 +1735,9 @@ __isert_send_completion(struct iser_tx_desc *tx_desc, isert_conn, ib_dev); break; case ISER_IB_RDMA_WRITE: - pr_err(isert_send_completion: Got ISER_IB_RDMA_WRITE\n); - dump_stack(); - break; + pr_debug(isert_send_completion: Got ISER_IB_RDMA_WRITE\n); + atomic_dec(isert_conn-post_send_buf_count); + isert_completion_rdma_write(tx_desc, isert_cmd); case ISER_IB_RDMA_READ: pr_debug(isert_send_completion: Got ISER_IB_RDMA_READ:\n); -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 11/11] Target/configfs: Expose iSCSI network portal group T10-PI support
User may enable T10-PI support per network portal group. any connection established on top of it, will be required to serve protected transactions. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/target/iscsi/iscsi_target_configfs.c |6 ++ drivers/target/iscsi/iscsi_target_tpg.c | 19 +++ drivers/target/iscsi/iscsi_target_tpg.h |1 + 3 files changed, 26 insertions(+), 0 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index e3318ed..8f3f585 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1051,6 +1051,11 @@ TPG_ATTR(demo_mode_discovery, S_IRUGO | S_IWUSR); */ DEF_TPG_ATTRIB(default_erl); TPG_ATTR(default_erl, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_t10_pi + */ +DEF_TPG_ATTRIB(t10_pi); +TPG_ATTR(t10_pi, S_IRUGO | S_IWUSR); static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { iscsi_tpg_attrib_authentication.attr, @@ -1063,6 +1068,7 @@ static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { iscsi_tpg_attrib_prod_mode_write_protect.attr, iscsi_tpg_attrib_demo_mode_discovery.attr, iscsi_tpg_attrib_default_erl.attr, + iscsi_tpg_attrib_t10_pi.attr, NULL, }; diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 80ae14c..d95a5f2 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -860,3 +860,22 @@ int iscsit_ta_default_erl( return 0; } + +int iscsit_ta_t10_pi( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = tpg-tpg_attrib; + + if ((flag != 0) (flag != 1)) { + pr_err(Illegal value %d\n, flag); + return -EINVAL; + } + + a-t10_pi = flag; + pr_debug(iSCSI_TPG[%hu] - T10 Protection information bit: +%s\n, tpg-tpgt, (a-t10_pi) ? + ON : OFF); + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index 213c0fc..0a182f2 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -39,5 +39,6 @@ extern int iscsit_ta_demo_mode_write_protect(struct iscsi_portal_group *, u32); extern int iscsit_ta_prod_mode_write_protect(struct iscsi_portal_group *, u32); extern int iscsit_ta_demo_mode_discovery(struct iscsi_portal_group *, u32); extern int iscsit_ta_default_erl(struct iscsi_portal_group *, u32); +extern int iscsit_ta_t10_pi(struct iscsi_portal_group *, u32); #endif /* ISCSI_TARGET_TPG_H */ -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 08/11] IB/isert: pass mr and frpl to isert_fast_reg_mr routine
This commit generalizes isert_fast_reg_mr to receive mr and frpl instead of fr_desc to do registration. In T10-PI we also register protection memory region so we want to use this routine. This commit does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/isert/ib_isert.c | 62 +++ 1 files changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3495e73..98aab21 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2246,10 +2246,10 @@ isert_map_fr_pagelist(struct ib_device *ib_dev, } static int -isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, - struct isert_conn *isert_conn, struct scatterlist *sg_start, - struct ib_sge *ib_sge, u32 sg_nents, u32 offset, - unsigned int data_len) +isert_fast_reg_mr(struct isert_conn *isert_conn, struct ib_mr *mr, + struct ib_fast_reg_page_list *frpl, bool *key_valid, + struct scatterlist *sg_start, u32 sg_nents, u32 offset, + unsigned int data_len, struct ib_sge *ib_sge) { struct ib_device *ib_dev = isert_conn-conn_cm_id-device; struct ib_send_wr fr_wr, inv_wr; @@ -2260,33 +2260,31 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, sg_nents = min_t(unsigned int, sg_nents, ISCSI_ISER_SG_TABLESIZE); page_off = offset % PAGE_SIZE; - - pr_debug(Use fr_desc %p sg_nents %d offset %u\n, -fr_desc, sg_nents, offset); + pr_debug(Use mr %p frpl %p sg_nents %d offset %u\n, +mr, frpl, sg_nents, offset); pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents, -fr_desc-data_frpl-page_list[0]); +frpl-page_list[0]); - if (!fr_desc-data_key_valid) { + if (!*key_valid) { memset(inv_wr, 0, sizeof(inv_wr)); inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.ex.invalidate_rkey = fr_desc-data_mr-rkey; + inv_wr.ex.invalidate_rkey = mr-rkey; wr = inv_wr; /* Bump the key */ - key = (u8)(fr_desc-data_mr-rkey 0x00FF); - ib_update_fast_reg_key(fr_desc-data_mr, ++key); + key = (u8)(mr-rkey 0x00FF); + ib_update_fast_reg_key(mr, ++key); } /* Prepare FASTREG WR */ memset(fr_wr, 0, sizeof(fr_wr)); fr_wr.opcode = IB_WR_FAST_REG_MR; - fr_wr.wr.fast_reg.iova_start = - fr_desc-data_frpl-page_list[0] + page_off; - fr_wr.wr.fast_reg.page_list = fr_desc-data_frpl; + fr_wr.wr.fast_reg.iova_start = frpl-page_list[0] + page_off; + fr_wr.wr.fast_reg.page_list = frpl; fr_wr.wr.fast_reg.page_list_len = pagelist_len; fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; fr_wr.wr.fast_reg.length = data_len; - fr_wr.wr.fast_reg.rkey = fr_desc-data_mr-rkey; + fr_wr.wr.fast_reg.rkey = mr-rkey; fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; if (!wr) @@ -2299,14 +2297,14 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, pr_err(fast registration failed, ret:%d\n, ret); return ret; } - fr_desc-data_key_valid = false; - ib_sge-lkey = fr_desc-data_mr-lkey; - ib_sge-addr = fr_desc-data_frpl-page_list[0] + page_off; + *key_valid = false; + ib_sge-lkey = mr-lkey; + ib_sge-addr = frpl-page_list[0] + page_off; ib_sge-length = data_len; - pr_debug(RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n, -ib_sge-addr, ib_sge-length, ib_sge-lkey); + pr_debug(fastreg ib_sge: addr: 0x%16llx length: %u lkey: %08x\n, +ib_sge-addr + page_off, ib_sge-length, ib_sge-lkey); return ret; } @@ -2320,7 +2318,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_conn *isert_conn = (struct isert_conn *)conn-context; struct ib_device *ib_dev = isert_conn-conn_cm_id-device; struct ib_send_wr *send_wr; - struct ib_sge *ib_sge; + struct ib_sge data_sge; struct scatterlist *sg_start; struct fast_reg_descriptor *fr_desc; u32 sg_off = 0, sg_nents; @@ -2352,10 +2350,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, pr_debug(Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n, isert_cmd, count, sg_start, sg_nents, data_left); - memset(wr-s_ib_sge, 0, sizeof(*ib_sge)); - ib_sge = wr-s_ib_sge; - wr-ib_sge = ib_sge; - + wr-ib_sge = wr-s_ib_sge; wr-send_wr_num = 1; memset(wr-s_send_wr, 0, sizeof(*send_wr)); wr-send_wr = wr-s_send_wr
Re: [PATCH 07/14] target/sbc: Add P_TYPE + PROT_EN bits to READ_CAPACITY_16
On 1/10/2014 10:46 PM, Martin K. Petersen wrote: Andy == Andy Grover agro...@redhat.com writes: Andy Yes, don't you need FORMAT UNIT because protection information is Andy going to mean the pi-enabled lun will need to report less blocks? Modern disk drives won't shrink when you reformat them with PI. This is a result of an IDEMA agreement about LBA counts. And if you create a 10GB PI LUN on an array you'll get 10GB for data. Andy The ramdisk backstore changes in this series allocate extra space Andy for PI info, but my understanding was that especially for Andy emulation with block and fileio backstores, everything needs to go Andy in the same amount of space. For both file and block I'd recommend we store the PI in a separate block device or file unless the backing device is PI-capable. Andy Furthermore, if we want PI info stored along with the blocks, then Andy block and fileio backstore formats are no longer going to be 1:1 Andy -- requiring offset calculations, non-aligned read-modify-write, Andy and all that unpleasantness to be handled? I only think interleaved makes sense if you're passing the PI through instead of emulating. I agree, I implemented interleaved mode just as a proof of concept that our HW can perform offload in that manner. I assume we can stick with non-interleaved, although it can be added as a user option. Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 13/14] target/rd: Add DIF protection into rd_execute_rw
SNIP I wander how we can skip sbc_dif_verify_ if the transport already offloaded DIF verify. I think that the transport should signal the core layer that it is able to offload DIF (ADD/STRIP/PASS/VERIFY), in which case the core should turn off the backstore DIF verify emulation to sustain performance. So IBLOCK + PSCSI backends will need to be a non interleaved protection PASS for fast path operation, and backend protection emulation is reserved for RAMDISK and perhaps a special FILEIO full emulation mode. But can't we avoid that if transport already verified? This will kill performance. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 03/14] target/sbc: Add sbc_check_prot + update sbc_parse_cdb for DIF
On 1/10/2014 9:04 AM, Nicholas A. Bellinger wrote: On Thu, 2014-01-09 at 16:58 +0200, Sagi Grimberg wrote: On 1/8/2014 10:36 PM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds sbc_check_prot() for performing various DIF related CDB sanity checks, along with setting SCF_PROT once sanity checks have passed. Also, add calls in sbc_parse_cdb() for READ_[10,12,16] + WRITE_[10,12,16] to perform DIF sanity checking. Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_sbc.c | 39 ++ 1 file changed, 39 insertions(+) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 52ae54e..600ffcb 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -563,6 +563,27 @@ sbc_compare_and_write(struct se_cmd *cmd) return TCM_NO_SENSE; } +bool +sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb) +{ + if (!dev-dev_attrib.pi_prot_type) + return true; + + if (dev-dev_attrib.pi_prot_type == TARGET_DIF_TYPE2_PROT + (cdb[1] 0xe0)) + return false; + + if (!(cdb[1] 0xe0)) { + pr_warn(Target: Unprotected READ/WRITE to DIF device\n); + return true; + } + if (!cmd-t_prot_sg || !cmd-t_prot_nents) + return true; + + cmd-se_cmd_flags |= SCF_PROT; Isn't this the place to fill the se_cmd DIF execution parameters? prot_op, prot_type, guard_type, initial_reftag, apptag etc... Next, all parties interested in DIF execution should look in se_cmd (backstore, transport). Yes, working on this for -v2 as well. :) OK, so just to be clear, both the transport and the backstore will be look in se_cmd protection attributes when executing IO correct? -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 07/14] target/sbc: Add P_TYPE + PROT_EN bits to READ_CAPACITY_16
On 1/10/2014 10:39 PM, Martin K. Petersen wrote: Sagi == Sagi Grimberg sa...@mellanox.com writes: Sagi What about FORMAT_UNIT emulation? The backstore protection Sagi configuration is done at the target side via configfs/targetcli, I don't know of any non-disk devices that actually implement FORMAT UNIT. Usually such configuration is done using the array management interface. Hmm, So this takes me to a corner I still don't understand, if a LUN is pre-formatted as T10-protected, what happens to unwritten blocks read? I mean, SCSI login executes some reads from several LBAs which will probably fail as blocks are unwritten. What is the usage model? perform Initiator login and then format the LUN on the target node? This is why I thought FORMAT_UNIT should be implemented. I understand this corner will disappear in DIF v2 (following DIX1.1 draft) with ESCAPE flags. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 09/14] target/configfs: Expose protection device attributes
On 1/10/2014 9:00 AM, Nicholas A. Bellinger wrote: On Thu, 2014-01-09 at 13:01 +0200, Sagi Grimberg wrote: On 1/8/2014 10:36 PM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds support for exposing DIF protection device attributes via configfs. This includes: pi_prot_type: Protection Type (0, 1, 3 currently support) pi_prot_version: Protection Version (DIF v1 currently supported) pi_guard_type: Guard Type (1=DIF CRC, 2=IP CRC) Within se_dev_set_pi_prot_type() it also adds the se_subsystem_api device callbacks to setup per device protection information. Suggestion, expose pi_prot_format and call transport-init_prot() there. It is more explicit and this routine should be called upon getting FORMAT_UNIT as well. nod, working on this next for FILEIO following your original example code. I would suggest to keep user interface the same for all backstores. Let me send you my latest WIP example for FILEIO that follows the pi_prot_format notation and you can arrange it as you wish. Thanks Sagi! --nab -- To unsubscribe from this list: send the line unsubscribe target-devel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 07/14] target/sbc: Add P_TYPE + PROT_EN bits to READ_CAPACITY_16
On 1/10/2014 10:39 PM, Martin K. Petersen wrote: Sagi == Sagi Grimberg sa...@mellanox.com writes: Sagi What about FORMAT_UNIT emulation? The backstore protection Sagi configuration is done at the target side via configfs/targetcli, I don't know of any non-disk devices that actually implement FORMAT UNIT. Usually such configuration is done using the array management interface. Hmm, So this takes me to a corner I still don't understand, if a LUN is pre-formatted as T10-protected, what happens to unwritten blocks read? I mean, SCSI login executes some reads from sevel LBAs which will probably fail as blocks are unwritten. What is the usage model? perform Initiator login and then format the LUN on the target node? This is why I thought FORMAT_UNIT should be implemented. I understand this corner will disappear in DIF v2 (following DIX1.1 draft) with ESCAPE flags. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 09/14] target/configfs: Expose protection device attributes
SNIP nab This patch adds support for exposing DIF protection device nab attributes via configfs. This includes: nabpi_prot_type: Protection Type (0, 1, 3 currently support) nabpi_prot_version: Protection Version (DIF v1 currently supported) What's DIF v2? nabpi_guard_type: Guard Type (1=DIF CRC, 2=IP CRC) The IP checksum is only supported by DIX between OS and initiator, not by the target. I guess we could signal to the initiator via a vendor-private VPD that IP checksum is supported directly. But now what we have hardware-accelerated T10 CRC I don't think it's a big deal. shouldn't it stick around if it is not deprecated yet, the transport is required to support ip-csum-CRC conversion anyhow. (scsi_debug supports IP checksum because it's both initiator and target). -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 06/11] IB/isert: Initialize T10-PI resources
On 1/11/2014 11:09 PM, Or Gerlitz wrote: On Thu, Jan 9, 2014 at 6:40 PM, Sagi Grimberg sa...@mellanox.com wrote: @@ -557,8 +629,14 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) goto out_mr; } + if (pi_support !device-pi_capable) { + pr_err(Protection information requested but not supported\n); + ret = -EINVAL; + goto out_mr; + } + if (device-use_fastreg) { - ret = isert_conn_create_fastreg_pool(isert_conn); + ret = isert_conn_create_fastreg_pool(isert_conn, pi_support); just a nit, the pi_support bit can be looked up from the isert_conn struct, isn't it? if (ret) { pr_err(Conn: %p failed to create fastreg pool\n, isert_conn); @@ -566,7 +644,7 @@ isert_connect_request(struct rdma_cm_id *cma_id, struct rdma_cm_event *event) } } - ret = isert_conn_setup_qp(isert_conn, cma_id); + ret = isert_conn_setup_qp(isert_conn, cma_id, pi_support); if (ret) goto out_conn_dev; @@ -2193,7 +2271,7 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents, fr_desc-data_frpl-page_list[0]); - if (!fr_desc-valid) { + if (!fr_desc-data_key_valid) { memset(inv_wr, 0, sizeof(inv_wr)); inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.ex.invalidate_rkey = fr_desc-data_mr-rkey; @@ -2225,7 +2303,7 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, pr_err(fast registration failed, ret:%d\n, ret); return ret; } - fr_desc-valid = false; + fr_desc-data_key_valid = false; ib_sge-lkey = fr_desc-data_mr-lkey; ib_sge-addr = fr_desc-data_frpl-page_list[0] + page_off; diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 708a069..fab8b50 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -48,11 +48,21 @@ struct iser_tx_desc { struct ib_send_wr send_wr; } __packed; +struct pi_context { + struct ib_mr *prot_mr; + boolprot_key_valid; + struct ib_fast_reg_page_list *prot_frpl; + struct ib_mr *sig_mr; + boolsig_key_valid; +}; + struct fast_reg_descriptor { - struct list_headlist; - struct ib_mr*data_mr; - struct ib_fast_reg_page_list*data_frpl; - boolvalid; + struct list_headlist; + struct ib_mr *data_mr; + booldata_key_valid; + struct ib_fast_reg_page_list *data_frpl; + boolprotected; no need for many bools in one structure... each one needs a bit, correct? so embed them in one variable I figured it will be more explicit this way. protected boolean indicates if we should check the data-integrity status, and the other 3 indicates if the relevant MR is valid (no need to execute local invalidation). Do you think I should compact it somehow? usually xxx_valid booleans will align together although not always. + struct pi_context *pi_ctx; }; struct isert_rdma_wr { @@ -140,6 +150,7 @@ struct isert_cq_desc { struct isert_device { int use_fastreg; + boolpi_capable; this one (and its such) is/are derived from the ib device capabilities, so I would suggest to keep a copy of the caps instead of derived bools Yes, I'll keep the device capabilities instead. int cqs_used; int refcount; int cq_active_qps[ISERT_MAX_CQ]; -- To unsubscribe from this list: send the line unsubscribe target-devel in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 07/14] target/sbc: Add P_TYPE + PROT_EN bits to READ_CAPACITY_16
On 1/12/2014 2:33 PM, Martin K. Petersen wrote: Sagi == Sagi Grimberg sa...@dev.mellanox.co.il writes: I don't know of any non-disk devices that actually implement FORMAT UNIT. Usually such configuration is done using the array management interface. Sagi So this takes me to a corner I still don't understand, if a LUN is Sagi pre-formatted as T10-protected, what happens to unwritten blocks Sagi read? I mean, SCSI login executes some reads from sevel LBAs Sagi which will probably fail as blocks are unwritten. Per SBC, PI must be initialized to 0x. Since an app tag value of 0x is an escape, this will prevent both target and initiator from performing PI-verification when that block is read. OK, so this is an implicit escape (which will become explicit in DIX1.1?). So I will open that in DIF RDMA verbs. If a block is subsequently written and no PI is sent from the host (WRPROTECT=0), the target must generate valid PI for each block. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 07/14] target/sbc: Add P_TYPE + PROT_EN bits to READ_CAPACITY_16
Back to the original question, I don't think Sagi was asking if it was valid to do a legacy/unprotected READ, it was what to expect with a protected READ on unwritten blocks: So this takes me to a corner I still don't understand, if a LUN is pre-formatted as T10-protected, what happens to unwritten blocks read? So the precise answer is: the PI will be all 0xff bytes, unless logical provisioning is enabled, LBPRZ=0 and the device's compliance predates sbc3r34.pdf (November 2012). Doug Gilbert Thanks Doug, This answer translates directly into a fix in my T10-PI offload API. Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 02/14] target: Add DIF CHECK_CONDITION ASC/ASCQ exception cases
On 1/10/2014 8:53 AM, Nicholas A. Bellinger wrote: On Thu, 2014-01-09 at 12:43 +0200, Sagi Grimberg wrote: On 1/8/2014 10:36 PM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds support for DIF related CHECK_CONDITION ASC/ASCQ exception cases into transport_send_check_condition_and_sense(). This includes: LOGICAL BLOCK GUARD CHECK FAILED LOGICAL BLOCK APPLICATION TAG CHECK FAILED LOGICAL BLOCK REFERENCE TAG CHECK FAILED that used by DIF TYPE1 and TYPE3 failure cases. Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_transport.c | 30 ++ include/target/target_core_base.h |3 +++ 2 files changed, 33 insertions(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 91953da..707ee17 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2648,6 +2648,36 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, buffer[SPC_ASC_KEY_OFFSET] = 0x1d; buffer[SPC_ASCQ_KEY_OFFSET] = 0x00; break; + case TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED: + /* CURRENT ERROR */ + buffer[0] = 0x70; + buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; + /* ILLEGAL REQUEST */ + buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; + /* LOGICAL BLOCK GUARD CHECK FAILED */ + buffer[SPC_ASC_KEY_OFFSET] = 0x10; + buffer[SPC_ASCQ_KEY_OFFSET] = 0x01; Hey Nic, In my iSER patches I constructed the same sense buffer (call isert_pi_err_sense_buffer) and called isert_put_rsponse. So I should call this routine instead correct? -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 02/14] target: Add DIF CHECK_CONDITION ASC/ASCQ exception cases
On 1/14/2014 10:53 AM, Nicholas A. Bellinger wrote: On Tue, 2014-01-14 at 09:44 +0200, Sagi Grimberg wrote: On 1/10/2014 8:53 AM, Nicholas A. Bellinger wrote: On Thu, 2014-01-09 at 12:43 +0200, Sagi Grimberg wrote: On 1/8/2014 10:36 PM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds support for DIF related CHECK_CONDITION ASC/ASCQ exception cases into transport_send_check_condition_and_sense(). This includes: LOGICAL BLOCK GUARD CHECK FAILED LOGICAL BLOCK APPLICATION TAG CHECK FAILED LOGICAL BLOCK REFERENCE TAG CHECK FAILED that used by DIF TYPE1 and TYPE3 failure cases. Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_transport.c | 30 ++ include/target/target_core_base.h |3 +++ 2 files changed, 33 insertions(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 91953da..707ee17 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2648,6 +2648,36 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, buffer[SPC_ASC_KEY_OFFSET] = 0x1d; buffer[SPC_ASCQ_KEY_OFFSET] = 0x00; break; + case TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED: + /* CURRENT ERROR */ + buffer[0] = 0x70; + buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; + /* ILLEGAL REQUEST */ + buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; + /* LOGICAL BLOCK GUARD CHECK FAILED */ + buffer[SPC_ASC_KEY_OFFSET] = 0x10; + buffer[SPC_ASCQ_KEY_OFFSET] = 0x01; Hey Nic, In my iSER patches I constructed the same sense buffer (call isert_pi_err_sense_buffer) and called isert_put_rsponse. So I should call this routine instead correct? Yes, it should be OK to use this for generating CHECK_CONDITION from fabric protection failures in isert_completion_rdma_write() code after device-unreg_rdma_mem() has been called. --nab Will do that. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 00/14] target: Initial support for DIF Type1+Type3 emulation
On 1/8/2014 10:36 PM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org Hi MKP SCSI folks, This series contains initial support for target mode DIF Type1+Type3 emulation within target core, RAMDISK_MCP device backend, and tcm_loop fabric driver. DIF emulation is enabled via a new 'pi_prot_type' device attribute within configfs, which is set after initial device configuration and before target fabric LUN export occurs. The DIF read/write verify emulation has been made generic enough so it can be used by other backend drivers (eg: FILEIO), as well as DIF v2 in the near future. Also note that the majority of the logic has been groked from existing scsi_debug.c code. The current plan is to enable basic support for emulated backends with tcm_loop for v3.14 code, and then move onto IBLOCK backend support (that requires BLOCK layer changes) Hey Nic, Can you please elaborate on what BLOCK layer changes are required? I didn't spot any misses from Looking at Documentation/block/data-integrity.txt. Am I missing something? Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 00/14] target: Initial support for DIF Type1+Type3 emulation
On 1/16/2014 3:42 AM, Martin K. Petersen wrote: nab == Nicholas A Bellinger n...@linux-iscsi.org writes: nab The issue is that existing fs/bio-integrity.c code always assumes nab client/initiator mode, in that it will attempt to nab bio_integrity_generate() protection information in the submit_bio nab WRITE path, and bio_integrity_verify() of protection information in nab the bio_endio READ completion path. Only if the submit_bio() caller hasn't attached protection information already. If you submit a bio with a bip already attached the block layer will not generate/verify. Yes, that was my understanding as well. Thanks Martin, Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[LSF/MM TOPIC][ATTEND] T10-PI RDMA offload
Hey SCSI folks, I'd like to propose the following topic for upcoming LSF-MM: T10-PI standard is becoming more and more appealing for storage and cloud solutions. Since error-detection coding comes with its cost of CPU computation overhead, state-of-the-art ASICs offer the ability to offload T10-PI operations (DIF/DIX), examples are SAS FC controllers. Recently, the support for T10-PI offload over RDMA transactions was introduced in the Mellanox Connect-IB HCA. The first building block, RDMA verbs API supporting T10-PI offload was submitted over Linux-rdma (see http://marc.info/?l=linux-rdmam=138719320307936w=2). Moreover, We have seen first seeds of T10-PI support in Linux SCSI target entering v3.14 (see http://lwn.net/Articles/579708/) and RDMA offload implementation in iSER transport (see http://www.spinics.net/lists/linux-scsi/msg71128.html). There is still some ground to fill to get protection information support to a full solution over all backend devices. We would like to use LSF-MM platform to to push forward T10-PI support end-to-end which requires Linux SCSI Target core level support along with transport level support in iSER and SRP (and also FCoE in the future) and over to the Initiator side transports. Discussion topics: - Introduce T10-PI offload RDMA verbs and how are used in storage applications. - Discuss effects of DIX1.1 (currently a draft) in Target implementation (core level - transport level - HW level). - Discuss T10-PI Type 4 (16-byte DIF) status and possible implications on Target Initiator implementation down to HW level. - Discuss Current Limitations that T10-PI RDMA offload poses on iSCSI protocol (ImmediateData, UnsolDataOut) and if/how they can be solved. - What-ever else comes to mind... Thanks, Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 7/7] Target/configfs: Expose iSCSI network portal group T10-PI support
User may enable T10-PI support per network portal group. any connection established on top of it, will be required to serve protected transactions. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/target/iscsi/iscsi_target_configfs.c |6 ++ drivers/target/iscsi/iscsi_target_tpg.c | 19 +++ drivers/target/iscsi/iscsi_target_tpg.h |1 + 3 files changed, 26 insertions(+), 0 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_configfs.c b/drivers/target/iscsi/iscsi_target_configfs.c index e3318ed..8f3f585 100644 --- a/drivers/target/iscsi/iscsi_target_configfs.c +++ b/drivers/target/iscsi/iscsi_target_configfs.c @@ -1051,6 +1051,11 @@ TPG_ATTR(demo_mode_discovery, S_IRUGO | S_IWUSR); */ DEF_TPG_ATTRIB(default_erl); TPG_ATTR(default_erl, S_IRUGO | S_IWUSR); +/* + * Define iscsi_tpg_attrib_s_t10_pi + */ +DEF_TPG_ATTRIB(t10_pi); +TPG_ATTR(t10_pi, S_IRUGO | S_IWUSR); static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { iscsi_tpg_attrib_authentication.attr, @@ -1063,6 +1068,7 @@ static struct configfs_attribute *lio_target_tpg_attrib_attrs[] = { iscsi_tpg_attrib_prod_mode_write_protect.attr, iscsi_tpg_attrib_demo_mode_discovery.attr, iscsi_tpg_attrib_default_erl.attr, + iscsi_tpg_attrib_t10_pi.attr, NULL, }; diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 80ae14c..d95a5f2 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -860,3 +860,22 @@ int iscsit_ta_default_erl( return 0; } + +int iscsit_ta_t10_pi( + struct iscsi_portal_group *tpg, + u32 flag) +{ + struct iscsi_tpg_attrib *a = tpg-tpg_attrib; + + if ((flag != 0) (flag != 1)) { + pr_err(Illegal value %d\n, flag); + return -EINVAL; + } + + a-t10_pi = flag; + pr_debug(iSCSI_TPG[%hu] - T10 Protection information bit: +%s\n, tpg-tpgt, (a-t10_pi) ? + ON : OFF); + + return 0; +} diff --git a/drivers/target/iscsi/iscsi_target_tpg.h b/drivers/target/iscsi/iscsi_target_tpg.h index 213c0fc..0a182f2 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.h +++ b/drivers/target/iscsi/iscsi_target_tpg.h @@ -39,5 +39,6 @@ extern int iscsit_ta_demo_mode_write_protect(struct iscsi_portal_group *, u32); extern int iscsit_ta_prod_mode_write_protect(struct iscsi_portal_group *, u32); extern int iscsit_ta_demo_mode_discovery(struct iscsi_portal_group *, u32); extern int iscsit_ta_default_erl(struct iscsi_portal_group *, u32); +extern int iscsit_ta_t10_pi(struct iscsi_portal_group *, u32); #endif /* ISCSI_TARGET_TPG_H */ -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 5/7] IB/isert: Accept RDMA_WRITE completions
In case of protected transactions, we will need to check the protection status of the transaction before sending SCSI response. So be ready for RDMA_WRITE completions. currently we don't ask for these completions, but for T10-PI we will. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/isert/ib_isert.c | 17 +++-- 1 files changed, 15 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index e5159ee..e4352c8 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1602,6 +1602,18 @@ isert_completion_put(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd, } static void +isert_completion_rdma_write(struct iser_tx_desc *tx_desc, + struct isert_cmd *isert_cmd) +{ + struct iscsi_cmd *cmd = isert_cmd-iscsi_cmd; + struct isert_conn *isert_conn = isert_cmd-conn; + struct isert_device *device = isert_conn-conn_device; + + device-unreg_rdma_mem(isert_cmd, isert_conn); + isert_put_response(isert_conn-conn, cmd); +} + +static void isert_completion_rdma_read(struct iser_tx_desc *tx_desc, struct isert_cmd *isert_cmd) { @@ -1721,8 +1733,9 @@ __isert_send_completion(struct iser_tx_desc *tx_desc, isert_conn, ib_dev); break; case ISER_IB_RDMA_WRITE: - pr_err(isert_send_completion: Got ISER_IB_RDMA_WRITE\n); - dump_stack(); + pr_debug(isert_send_completion: Got ISER_IB_RDMA_WRITE\n); + atomic_dec(isert_conn-post_send_buf_count); + isert_completion_rdma_write(tx_desc, isert_cmd); break; case ISER_IB_RDMA_READ: pr_debug(isert_send_completion: Got ISER_IB_RDMA_READ:\n); -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 3/7] IB/isert: Initialize T10-PI resources
Upon connection establishment check if network portal is T10-PI enabled and allocate T10-PI resources if necessary, allocate signature enabled memory regions and mark connection queue-pair as signature enabled. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/isert/ib_isert.c | 104 +++ drivers/infiniband/ulp/isert/ib_isert.h | 19 +- 2 files changed, 106 insertions(+), 17 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 421182b..3495e73 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -87,7 +87,8 @@ isert_query_device(struct ib_device *ib_dev, struct ib_device_attr *devattr) } static int -isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) +isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id, + u8 protection) { struct isert_device *device = isert_conn-conn_device; struct ib_qp_init_attr attr; @@ -119,6 +120,8 @@ isert_conn_setup_qp(struct isert_conn *isert_conn, struct rdma_cm_id *cma_id) attr.cap.max_recv_sge = 1; attr.sq_sig_type = IB_SIGNAL_REQ_WR; attr.qp_type = IB_QPT_RC; + if (protection) + attr.create_flags |= IB_QP_CREATE_SIGNATURE_EN; pr_debug(isert_conn_setup_qp cma_id-device: %p\n, cma_id-device); @@ -234,13 +237,18 @@ isert_create_device_ib_res(struct isert_device *device) device-unreg_rdma_mem = isert_unmap_cmd; } + /* Check signature cap */ + device-pi_capable = dev_attr-device_cap_flags +IB_DEVICE_SIGNATURE_HANDOVER ? true : false; + device-cqs_used = min_t(int, num_online_cpus(), device-ib_device-num_comp_vectors); device-cqs_used = min(ISERT_MAX_CQ, device-cqs_used); pr_debug(Using %d CQs, device %s supports %d vectors support -Fast registration %d\n, +Fast registration %d pi_capable %d\n, device-cqs_used, device-ib_device-name, -device-ib_device-num_comp_vectors, device-use_fastreg); +device-ib_device-num_comp_vectors, device-use_fastreg, +device-pi_capable); device-cq_desc = kzalloc(sizeof(struct isert_cq_desc) * device-cqs_used, GFP_KERNEL); if (!device-cq_desc) { @@ -383,6 +391,12 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn) list_del(fr_desc-list); ib_free_fast_reg_page_list(fr_desc-data_frpl); ib_dereg_mr(fr_desc-data_mr); + if (fr_desc-pi_ctx) { + ib_free_fast_reg_page_list(fr_desc-pi_ctx-prot_frpl); + ib_dereg_mr(fr_desc-pi_ctx-prot_mr); + ib_destroy_mr(fr_desc-pi_ctx-sig_mr); + kfree(fr_desc-pi_ctx); + } kfree(fr_desc); ++i; } @@ -394,8 +408,10 @@ isert_conn_free_fastreg_pool(struct isert_conn *isert_conn) static int isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd, -struct fast_reg_descriptor *fr_desc) +struct fast_reg_descriptor *fr_desc, u8 protection) { + int ret; + fr_desc-data_frpl = ib_alloc_fast_reg_page_list(ib_device, ISCSI_ISER_SG_TABLESIZE); if (IS_ERR(fr_desc-data_frpl)) { @@ -408,19 +424,73 @@ isert_create_fr_desc(struct ib_device *ib_device, struct ib_pd *pd, if (IS_ERR(fr_desc-data_mr)) { pr_err(Failed to allocate data frmr err=%ld\n, PTR_ERR(fr_desc-data_mr)); - ib_free_fast_reg_page_list(fr_desc-data_frpl); - return PTR_ERR(fr_desc-data_mr); + ret = PTR_ERR(fr_desc-data_mr); + goto err_data_frpl; } pr_debug(Create fr_desc %p page_list %p\n, fr_desc, fr_desc-data_frpl-page_list); + fr_desc-data_key_valid = true; - fr_desc-valid = true; + if (protection) { + struct ib_mr_init_attr mr_init_attr = {0}; + struct pi_context *pi_ctx; + + fr_desc-pi_ctx = kzalloc(sizeof(*fr_desc-pi_ctx), GFP_KERNEL); + if (!fr_desc-pi_ctx) { + pr_err(Failed to allocate pi context\n); + ret = -ENOMEM; + goto err_data_mr; + } + pi_ctx = fr_desc-pi_ctx; + + pi_ctx-prot_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(pi_ctx-prot_frpl)) { + pr_err(Failed to allocate prot frpl err=%ld\n
[PATCH v1 2/7] Target/iscsi: Add T10-PI indication for iscsi_portal_group
In case an iscsi portal group will be defined as t10_pi enabled, all connections on top of it will support protected transactions. T10-PI support may require extra reource allocation and maintenance by the transport layer, so we don't want to apply them on non-t10_pi network portals. This is a hook for the iscsi target layer to signal the transport at connection establishment that this connection will carry protected transactions. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/target/iscsi/iscsi_target_core.h |5 - drivers/target/iscsi/iscsi_target_tpg.c |2 ++ 2 files changed, 6 insertions(+), 1 deletions(-) diff --git a/drivers/target/iscsi/iscsi_target_core.h b/drivers/target/iscsi/iscsi_target_core.h index 48f7b3b..886d74d 100644 --- a/drivers/target/iscsi/iscsi_target_core.h +++ b/drivers/target/iscsi/iscsi_target_core.h @@ -58,7 +58,8 @@ #define TA_DEMO_MODE_DISCOVERY 1 #define TA_DEFAULT_ERL 0 #define TA_CACHE_CORE_NPS 0 - +/* T10 protection information disabled by default */ +#define TA_DEFAULT_T10_PI 0 #define ISCSI_IOV_DATA_BUFFER 5 @@ -765,6 +766,7 @@ struct iscsi_tpg_attrib { u32 prod_mode_write_protect; u32 demo_mode_discovery; u32 default_erl; + u8 t10_pi; struct iscsi_portal_group *tpg; }; @@ -787,6 +789,7 @@ struct iscsi_np { void*np_context; struct iscsit_transport *np_transport; struct list_headnp_list; + struct iscsi_tpg_np *tpg_np; } cacheline_aligned; struct iscsi_tpg_np { diff --git a/drivers/target/iscsi/iscsi_target_tpg.c b/drivers/target/iscsi/iscsi_target_tpg.c index 3976183..80ae14c 100644 --- a/drivers/target/iscsi/iscsi_target_tpg.c +++ b/drivers/target/iscsi/iscsi_target_tpg.c @@ -225,6 +225,7 @@ static void iscsit_set_default_tpg_attribs(struct iscsi_portal_group *tpg) a-prod_mode_write_protect = TA_PROD_MODE_WRITE_PROTECT; a-demo_mode_discovery = TA_DEMO_MODE_DISCOVERY; a-default_erl = TA_DEFAULT_ERL; + a-t10_pi = TA_DEFAULT_T10_PI; } int iscsit_tpg_add_portal_group(struct iscsi_tiqn *tiqn, struct iscsi_portal_group *tpg) @@ -500,6 +501,7 @@ struct iscsi_tpg_np *iscsit_tpg_add_network_portal( init_completion(tpg_np-tpg_np_comp); kref_init(tpg_np-tpg_np_kref); tpg_np-tpg_np = np; + np-tpg_np = tpg_np; tpg_np-tpg = tpg; spin_lock(tpg-tpg_np_lock); -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 4/7] IB/isert: pass mr and frpl to isert_fast_reg_mr routine
This commit generalizes isert_fast_reg_mr to receive mr and frpl instead of fr_desc to do registration. In T10-PI we also register protection memory region so we want to use this routine. This commit does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/isert/ib_isert.c | 62 +++ 1 files changed, 30 insertions(+), 32 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index 3495e73..e5159ee 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -2246,10 +2246,10 @@ isert_map_fr_pagelist(struct ib_device *ib_dev, } static int -isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, - struct isert_conn *isert_conn, struct scatterlist *sg_start, - struct ib_sge *ib_sge, u32 sg_nents, u32 offset, - unsigned int data_len) +isert_fast_reg_mr(struct isert_conn *isert_conn, struct ib_mr *mr, + struct ib_fast_reg_page_list *frpl, bool *key_valid, + struct scatterlist *sg_start, u32 sg_nents, u32 offset, + unsigned int data_len, struct ib_sge *ib_sge) { struct ib_device *ib_dev = isert_conn-conn_cm_id-device; struct ib_send_wr fr_wr, inv_wr; @@ -2260,33 +2260,31 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, sg_nents = min_t(unsigned int, sg_nents, ISCSI_ISER_SG_TABLESIZE); page_off = offset % PAGE_SIZE; - - pr_debug(Use fr_desc %p sg_nents %d offset %u\n, -fr_desc, sg_nents, offset); + pr_debug(Use mr %p frpl %p sg_nents %d offset %u\n, +mr, frpl, sg_nents, offset); pagelist_len = isert_map_fr_pagelist(ib_dev, sg_start, sg_nents, -fr_desc-data_frpl-page_list[0]); +frpl-page_list[0]); - if (!fr_desc-data_key_valid) { + if (!*key_valid) { memset(inv_wr, 0, sizeof(inv_wr)); inv_wr.opcode = IB_WR_LOCAL_INV; - inv_wr.ex.invalidate_rkey = fr_desc-data_mr-rkey; + inv_wr.ex.invalidate_rkey = mr-rkey; wr = inv_wr; /* Bump the key */ - key = (u8)(fr_desc-data_mr-rkey 0x00FF); - ib_update_fast_reg_key(fr_desc-data_mr, ++key); + key = (u8)(mr-rkey 0x00FF); + ib_update_fast_reg_key(mr, ++key); } /* Prepare FASTREG WR */ memset(fr_wr, 0, sizeof(fr_wr)); fr_wr.opcode = IB_WR_FAST_REG_MR; - fr_wr.wr.fast_reg.iova_start = - fr_desc-data_frpl-page_list[0] + page_off; - fr_wr.wr.fast_reg.page_list = fr_desc-data_frpl; + fr_wr.wr.fast_reg.iova_start = frpl-page_list[0] + page_off; + fr_wr.wr.fast_reg.page_list = frpl; fr_wr.wr.fast_reg.page_list_len = pagelist_len; fr_wr.wr.fast_reg.page_shift = PAGE_SHIFT; fr_wr.wr.fast_reg.length = data_len; - fr_wr.wr.fast_reg.rkey = fr_desc-data_mr-rkey; + fr_wr.wr.fast_reg.rkey = mr-rkey; fr_wr.wr.fast_reg.access_flags = IB_ACCESS_LOCAL_WRITE; if (!wr) @@ -2299,14 +2297,14 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, pr_err(fast registration failed, ret:%d\n, ret); return ret; } - fr_desc-data_key_valid = false; - ib_sge-lkey = fr_desc-data_mr-lkey; - ib_sge-addr = fr_desc-data_frpl-page_list[0] + page_off; + *key_valid = false; + ib_sge-lkey = mr-lkey; + ib_sge-addr = frpl-page_list[0] + page_off; ib_sge-length = data_len; - pr_debug(RDMA ib_sge: addr: 0x%16llx length: %u lkey: %08x\n, -ib_sge-addr, ib_sge-length, ib_sge-lkey); + pr_debug(fastreg ib_sge: addr: 0x%16llx length: %u lkey: %08x\n, +ib_sge-addr + page_off, ib_sge-length, ib_sge-lkey); return ret; } @@ -2320,7 +2318,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, struct isert_conn *isert_conn = (struct isert_conn *)conn-context; struct ib_device *ib_dev = isert_conn-conn_cm_id-device; struct ib_send_wr *send_wr; - struct ib_sge *ib_sge; + struct ib_sge data_sge; struct scatterlist *sg_start; struct fast_reg_descriptor *fr_desc; u32 sg_off = 0, sg_nents; @@ -2352,10 +2350,7 @@ isert_reg_rdma(struct iscsi_conn *conn, struct iscsi_cmd *cmd, pr_debug(Mapped cmd: %p count: %u sg: %p sg_nents: %u rdma_len %d\n, isert_cmd, count, sg_start, sg_nents, data_left); - memset(wr-s_ib_sge, 0, sizeof(*ib_sge)); - ib_sge = wr-s_ib_sge; - wr-ib_sge = ib_sge; - + wr-ib_sge = wr-s_ib_sge; wr-send_wr_num = 1; memset(wr-s_send_wr, 0, sizeof(*send_wr)); wr-send_wr = wr-s_send_wr
[PATCH v1 0/7] iSER target initial support for T10-DIF offload
This patchset adds basic support for T10-DIF protection information offload in iSER target on top of Nic's recent work and RDMA signature verbs API. This code was tested with my own implementation of the target core T10-PI support which was designed mainly to activate the transport DIF offload. In order to actually get Linux SCSI target to work with iSER T10-DIF offload a couple of patches needs to be added to Nic's work which is ongoing. Apart from doing the actual iser implementation for T10-DIF offload, this series would help to see the full picture by: * Showing how the T10-DIF offload verbs are used * Showing how fabric transport offload plugs into the target core The T10-DIF signature offload verbs and mlx5 driver implementation patches are available from the for-next branch of git://beany.openfabrics.org/~ogerlitz/linux-2.6.git as the below commits: 2b4316b IB/mlx5: Publish support in signature feature ef3130d IB/mlx5: Collect signature error completion c1b37b1 IB/mlx5: Support IB_WR_REG_SIG_MR f5d8496 IB/mlx5: Keep mlx5 MRs in a radix tree under device 72a72ee IB/mlx5: remove MTT access mode from umr flags helper function ccb0a907 IB/mlx5: Break wqe handling to begin finish routines cda0569 IB/mlx5: Initialize mlx5_ib_qp signature related 33b4079 IB/mlx5, mlx5_core: Support for create_mr and destroy_mr 8b343e6 IB/core: Introduce Signature Verbs API c1b0358 IB/core: Introduce protected memory regions Here is a running example of target T10-PI support in an offload manner using Mellanox Connect-IB HW. The working mode is head-less where the iSER target is working against a legacy iSER initiator. SCSI writes are inserted with protection block guards and SCSI reads are verified and stripped from protection block guards. *Note: fileio support is not apart of this patchset and will be provided later as part of Nic's next T10-PI patches. *Note: configuration commands are presented on top of targetcli 1. Creating file backstore / backstores/fileio create file_or_dev=/root/file name=dev size=1G Not using buffered mode. Created fileio dev. 2. Set protection parameters / backstores/fileio/dev set attribute prot_type=1 guard_type=crc app_tag=0xfefe pi_handover=seperated Parameter guard_type is now 'crc'. Parameter app_tag is now '0xfefe'. Parameter prot_type is now '1'. Parameter pi_handover is now 'seperated'. 3. Format (with T10-PI) file backstore and enable protection information / backstores/fileio/dev set attribute pi_format=1 pi_enable=1 Parameter pi_enable is now '1'. Parameter pi_format is now '1'. Now file file.protection is created and formatted: $ hexdump -C -n 48 file.protection ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff || * 0030 3. Set iscsi tpgt T10-PI support (signaling the transport to initialize T10-PI) / iscsi/iqn.2003-01.org.linux-iscsi.dev-r-vrt-077-024.x8664:sn.5263e2a37630/tpgt1/ set attribute t10_pi=1 Parameter t10_pi is now '1'. 5. Legacy initiator successfully writes 4 512B blocks $ dd if=/root/tmp_file of=/dev/sdb bs=512 count=4 oflag=direct 4+0 records in 4+0 records out 2048 bytes (2.0 kB) copied, 0.170229 s, 12.0 kB/s Now data blocs are stored in file and protection block guards are stored in file.protection $ hexdump -C -n 48 file.protection 00 e3 fe fe 00 00 00 00 0e c6 fe fe 00 00 00 01 || 0010 2c d8 fe fe 00 00 00 02 1d 5d fe fe 00 00 00 03 |,]..| 0020 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff || 0030 6. Legacy initiator successfully reads 10 512B blocks $ dd if=/dev/sdb of=/dev/null bs=512 count=10 iflag=direct 10+0 records in 10+0 records out 5120 bytes (5.1 kB) copied, 0.00397309 s, 1.3 MB/s 7. Corrupting block 0 guard tag in file.protection (0x00e3 - 0xff1c) $ hexdump -C -n 48 file.protection ff 1c fe fe 00 00 00 00 0e c6 fe fe 00 00 00 01 || 0010 2c d8 fe fe 00 00 00 02 1d 5d fe fe 00 00 00 03 |,]..| 0020 ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff || 0030 8. Legacy initiator fails to read 10 512B blocks dd if=/dev/sdb of=/dev/null bs=512 count=10 iflag=direct dd: reading `/dev/sdb': Input/output error 0+0 records in 0+0 records out 0 bytes (0 B) copied, 0.0035183 s, 0.0 kB/s SNIP from target log: kernel: mlx5_0:mlx5_poll_one:512:(pid21565): CQN: 0x19 Got SIGERR on key: 0x4bde err_type 0 err_offset 207 expected e3 actual ff1c kernel: PI error found type 0 at offset 0x207 expected 0xe3 vs actual 0xff1c where error type 0 is IB_SIG_BAD_GUARD. Changes from v0: - Dropped applied preperation patches - Restored break statement for RDMA_WRITE completions - Used core rroutine for sending CHECK_CONDITION for PI errors - checkpatch errors Sagi Grimberg (7
[LSF/MM TOPIC][ATTEND] T10-PI RDMA offload
Hey SCSI (and LSF) folks, I'd like to propose the following topic for upcoming LSF-MM: T10-PI standard is becoming more and more appealing for storage and cloud solutions. Since error-detection coding comes with its cost of CPU computation overhead, state-of-the-art ASICs offer the ability to offload T10-PI operations (DIF/DIX), examples are SAS FC controllers. Recently, the support for T10-PI offload over RDMA transactions was introduced in the Mellanox Connect-IB HCA. The first building block, RDMA verbs API supporting T10-PI offload was submitted over Linux-rdma (see http://marc.info/?l=linux-rdmam=138719320307936w=2). Moreover, we have seen first seeds of T10-PI support in Linux SCSI target entering v3.14 (see http://lwn.net/Articles/579708/) and RDMA offload implementation in iSER transport (see http://www.spinics.net/lists/linux-scsi/msg71128.html). There is still some ground to fill to get protection information support to a full solution over all backend devices. We would like to use LSF-MM platform to to push forward T10-PI support end-to-end which requires Linux SCSI Target core level support along with transport level support in iSER and SRP (and also FCoE in the future) and over to the Initiator side transports. Discussion topics: - Introduce T10-PI offload RDMA verbs and how are used in storage applications. - Discuss effects of DIX1.1 (currently a draft) in Target implementation (core level - transport level - HW level). - Discuss T10-PI Type 4 (16-byte DIF) status and possible implications on Target Initiator implementation down to HW level. - Discuss Current Limitations that T10-PI RDMA offload poses on iSCSI protocol (ImmediateData, UnsolDataOut) and if/how they can be solved. - What-ever else comes to mind... Thanks, Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [LSF/MM TOPIC] [ATTEND] scsi-mq
On 1/16/2014 1:05 AM, Nicholas A. Bellinger wrote: Hi all, I'd like to discuss the current state of scsi-mq prototype code. And now that blk-mq is upstream for v3.13, exploring the remaining TODO items towards an initial scsi-mq merge sometime before 2015 is upon us. The benefits of scsi-mq remain unchanged: - Utilizes blk-mq's native per-cpu primitive + NUMA local friendly queuing of pre-allocated struct request descriptor memory - Eliminates all fast-path memory allocations in SCSI-core + optionally the underlying SCSI LLDs - Avoids fast-path Scsi_Host-host_lock + request_queue-queue_lock accesses in submission + completion paths These benefits have been discussed in greater detail in [1], and the latest alpha quality code is available at [2] below. The current TODO items include: - A plan for per device SCSI error handling - Proper scsi_device-sdev_gendev reference counting - Queuing fairness across multiple scsi-mq devices per host - Support for 1 nr_hw_queues + conversion of qla2xxx + lpfc LLDs that support native hardware multiqueue Thank you, --nab References: [1]: [ATTEND] scsi-mq prototype discussion http://marc.info/?l=linux-scsim=137358831329753w=2 [2]: scsi-mq WIP updated to v3.13-rc3 http://marc.info/?l=linux-scsim=138782535731722w=2 +1 I would be happy to join this discussion, I think it is also important to think about the interaction with iSCSI and LLDs. Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] IB/mlx5: Fix smatch warnings
Possible double free on in-mailbox. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/hw/mlx5/mr.c |2 +- 1 files changed, 1 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index bc27f6b..f023711 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1050,13 +1050,13 @@ struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, in-seg.flags = MLX5_PERM_UMR_EN | access_mode; err = mlx5_core_create_mkey(dev-mdev, mr-mmr, in, sizeof(*in), NULL, NULL, NULL); - kfree(in); if (err) goto err_destroy_psv; mr-ibmr.lkey = mr-mmr.key; mr-ibmr.rkey = mr-mmr.key; mr-umem = NULL; + kfree(in); return mr-ibmr; -- 1.7.8.2 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] IB/mlx5: Fix siganture rule constants according to FW specifications
Use DIF CRC INC with apptag escape (0x8) and update IP-CSUM entries. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/hw/mlx5/qp.c |6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7981620..58c4735 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1952,9 +1952,9 @@ static int format_selector(struct ib_sig_attrs *attr, { #define FORMAT_DIF_NONE0 -#define FORMAT_DIF_CRC_INC 4 -#define FORMAT_DIF_CSUM_INC12 -#define FORMAT_DIF_CRC_NO_INC 13 +#define FORMAT_DIF_CRC_INC 8 +#define FORMAT_DIF_CRC_NO_INC 12 +#define FORMAT_DIF_CSUM_INC13 #define FORMAT_DIF_CSUM_NO_INC 14 switch (domain-sig.dif.type) { -- 1.7.8.2 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] IB/mlx5: Fix signature rule constants according to FW specifications
Use DIF CRC INC with apptag escape (0x8) and update IP-CSUM entries. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/hw/mlx5/qp.c |6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7981620..58c4735 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1952,9 +1952,9 @@ static int format_selector(struct ib_sig_attrs *attr, { #define FORMAT_DIF_NONE0 -#define FORMAT_DIF_CRC_INC 4 -#define FORMAT_DIF_CSUM_INC12 -#define FORMAT_DIF_CRC_NO_INC 13 +#define FORMAT_DIF_CRC_INC 8 +#define FORMAT_DIF_CRC_NO_INC 12 +#define FORMAT_DIF_CSUM_INC13 #define FORMAT_DIF_CSUM_NO_INC 14 switch (domain-sig.dif.type) { -- 1.7.8.2 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH-v2 03/17] target/sbc: Add DIF setup in sbc_check_prot + sbc_parse_cdb
On 1/19/2014 4:44 AM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds sbc_check_prot() for performing various DIF related CDB sanity checks, along with setting cmd-prot_type once sanity checks have passed. Also, add calls in sbc_parse_cdb() for READ_[10,12,16] + WRITE_[10,12,16] to perform DIF sanity checking. v2 changes: - Make sbc_check_prot defined as static (Fengguang + Wei) - Remove unprotected READ/WRITE warning (mkp) - Populate cmd-prot_type + friends (Sagi) - Drop SCF_PROT usage Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_sbc.c | 62 ++ 1 file changed, 62 insertions(+) diff --git a/drivers/target/target_core_sbc.c b/drivers/target/target_core_sbc.c index 6863dbe..91a92f3 100644 --- a/drivers/target/target_core_sbc.c +++ b/drivers/target/target_core_sbc.c @@ -563,6 +563,44 @@ sbc_compare_and_write(struct se_cmd *cmd) return TCM_NO_SENSE; } +static bool +sbc_check_prot(struct se_device *dev, struct se_cmd *cmd, unsigned char *cdb, + u32 sectors) +{ + if (!cmd-t_prot_sg || !cmd-t_prot_nents) + return true; + + switch (dev-dev_attrib.pi_prot_type) { + case TARGET_DIF_TYPE3_PROT: + if (!(cdb[1] 0xe0)) + return true; + + cmd-reftag_seed = 0x; + break; + case TARGET_DIF_TYPE2_PROT: + if (cdb[1] 0xe0) + return false; + + cmd-reftag_seed = cmd-t_task_lba; + break; + case TARGET_DIF_TYPE1_PROT: + if (!(cdb[1] 0xe0)) + return true; + + cmd-reftag_seed = cmd-t_task_lba; + break; + case TARGET_DIF_TYPE0_PROT: + default: + return true; + } + + cmd-prot_type = dev-dev_attrib.pi_prot_type; + cmd-prot_length = dev-prot_length * sectors; + cmd-prot_handover = PROT_SEPERATED; I know that we are not planning to support interleaved mode at the moment, But I think that the protection handover type is the backstore preference and should be taken from se_dev. But it is not that important for now... Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH-v2 10/17] target: Add protection SGLs to target_submit_cmd_map_sgls
On 1/19/2014 4:44 AM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds support to target_submit_cmd_map_sgls() for accepting 'sgl_prot' + 'sgl_prot_count' parameters for DIF protection information. Note the passed parameters are stored at se_cmd-t_prot_sg and se_cmd-t_prot_nents respectively. Also, update tcm_loop and vhost-scsi fabrics usage of target_submit_cmd_map_sgls() to take into account the new parameters. I didn't see that you added protection allocation to transports that does not use target_submit_cmd_map_sgls() - which happens to be iSCSI/iSER/SRP :( Don't you think that prot SG allocation should be added also to target_alloc_sgl()? by then se_cmd should contain the protection attributes and this routine can know if it needs to allocate prot_sg as well. This is how I used it... Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/loopback/tcm_loop.c |2 +- drivers/target/target_core_transport.c | 16 ++-- drivers/vhost/scsi.c |2 +- include/target/target_core_fabric.h|3 ++- 4 files changed, 18 insertions(+), 5 deletions(-) diff --git a/drivers/target/loopback/tcm_loop.c b/drivers/target/loopback/tcm_loop.c index 763ee45..112b795 100644 --- a/drivers/target/loopback/tcm_loop.c +++ b/drivers/target/loopback/tcm_loop.c @@ -217,7 +217,7 @@ static void tcm_loop_submission_work(struct work_struct *work) scsi_bufflen(sc), tcm_loop_sam_attr(sc), sc-sc_data_direction, 0, scsi_sglist(sc), scsi_sg_count(sc), - sgl_bidi, sgl_bidi_count); + sgl_bidi, sgl_bidi_count, NULL, 0); if (rc 0) { set_host_byte(sc, DID_NO_CONNECT); goto out_done; diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index fa4fc04..aebe0bb 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -1310,6 +1310,8 @@ transport_generic_map_mem_to_cmd(struct se_cmd *cmd, struct scatterlist *sgl, * @sgl_count: scatterlist count for unidirectional mapping * @sgl_bidi: struct scatterlist memory for bidirectional READ mapping * @sgl_bidi_count: scatterlist count for bidirectional READ mapping + * @sgl_prot: struct scatterlist memory protection information + * @sgl_prot_count: scatterlist count for protection information * * Returns non zero to signal active I/O shutdown failure. All other * setup exceptions will be returned as a SCSI CHECK_CONDITION response, @@ -1322,7 +1324,8 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess unsigned char *cdb, unsigned char *sense, u32 unpacked_lun, u32 data_length, int task_attr, int data_dir, int flags, struct scatterlist *sgl, u32 sgl_count, - struct scatterlist *sgl_bidi, u32 sgl_bidi_count) + struct scatterlist *sgl_bidi, u32 sgl_bidi_count, + struct scatterlist *sgl_prot, u32 sgl_prot_count) { struct se_portal_group *se_tpg; sense_reason_t rc; @@ -1364,6 +1367,14 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess target_put_sess_cmd(se_sess, se_cmd); return 0; } + /* +* Save pointers for SGLs containing protection information, +* if present. +*/ + if (sgl_prot_count) { + se_cmd-t_prot_sg = sgl_prot; + se_cmd-t_prot_nents = sgl_prot_count; + } rc = target_setup_cmd_from_cdb(se_cmd, cdb); if (rc != 0) { @@ -1406,6 +1417,7 @@ int target_submit_cmd_map_sgls(struct se_cmd *se_cmd, struct se_session *se_sess return 0; } } + /* * Check if we need to delay processing because of ALUA * Active/NonOptimized primary access state.. @@ -1445,7 +1457,7 @@ int target_submit_cmd(struct se_cmd *se_cmd, struct se_session *se_sess, { return target_submit_cmd_map_sgls(se_cmd, se_sess, cdb, sense, unpacked_lun, data_length, task_attr, data_dir, - flags, NULL, 0, NULL, 0); + flags, NULL, 0, NULL, 0, NULL, 0); } EXPORT_SYMBOL(target_submit_cmd); diff --git a/drivers/vhost/scsi.c b/drivers/vhost/scsi.c index f175629..84488a8 100644 --- a/drivers/vhost/scsi.c +++ b/drivers/vhost/scsi.c @@ -889,7 +889,7 @@ static void tcm_vhost_submission_work(struct work_struct *work) cmd-tvc_lun, cmd-tvc_exp_data_len, cmd-tvc_task_attr, cmd-tvc_data_direction
Re: [PATCH-v2 11/17] target/iblock: Add blk_integrity + BIP passthrough support
On 1/19/2014 4:44 AM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds blk_integrity passthrough support for block_device backends using IBLOCK. Nice! This includes iblock_alloc_bip() + setup of bio_integrity_payload information that attaches to the leading struct bio once bio_list is populated during fast-path iblock_execute_rw() I/O dispatch. It also updates setup in iblock_configure_device() to detect modes of protection + se dev-dev_attrib.pi_prot_type accordingly, along with creating required bio_set integrity mempools. Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/Kconfig |1 + drivers/target/target_core_iblock.c | 91 ++- 2 files changed, 90 insertions(+), 2 deletions(-) diff --git a/drivers/target/Kconfig b/drivers/target/Kconfig index 50aad2e..dc2d84a 100644 --- a/drivers/target/Kconfig +++ b/drivers/target/Kconfig @@ -14,6 +14,7 @@ if TARGET_CORE config TCM_IBLOCK tristate TCM/IBLOCK Subsystem Plugin for Linux/BLOCK + select BLK_DEV_INTEGRITY help Say Y here to enable the TCM/IBLOCK subsystem plugin for non-buffered access to Linux/Block devices using BIO diff --git a/drivers/target/target_core_iblock.c b/drivers/target/target_core_iblock.c index 15d9121..293d9b0 100644 --- a/drivers/target/target_core_iblock.c +++ b/drivers/target/target_core_iblock.c @@ -91,6 +91,7 @@ static int iblock_configure_device(struct se_device *dev) struct iblock_dev *ib_dev = IBLOCK_DEV(dev); struct request_queue *q; struct block_device *bd = NULL; + struct blk_integrity *bi; fmode_t mode; int ret = -ENOMEM; @@ -155,8 +156,40 @@ static int iblock_configure_device(struct se_device *dev) if (blk_queue_nonrot(q)) dev-dev_attrib.is_nonrot = 1; + bi = bdev_get_integrity(bd); + if (bi) { + struct bio_set *bs = ib_dev-ibd_bio_set; + + if (!strcmp(bi-name, T10-DIF-TYPE3-IP) || + !strcmp(bi-name, T10-DIF-TYPE1-IP)) { + pr_err(IBLOCK export of blk_integrity: %s not + supported\n, bi-name); + ret = -ENOSYS; + goto out_blkdev_put; + } Please remind me why we ignore IP-CSUM guard type again? MKP, will this be irrelevant for the initiator as well? if so, I don't see a reason to expose this in RDMA verbs. + + if (!strcmp(bi-name, T10-DIF-TYPE3-CRC)) { + dev-dev_attrib.pi_prot_type = TARGET_DIF_TYPE3_PROT; + } else if (!strcmp(bi-name, T10-DIF-TYPE1-CRC)) { + dev-dev_attrib.pi_prot_type = TARGET_DIF_TYPE1_PROT; + } + + if (dev-dev_attrib.pi_prot_type) { + if (bioset_integrity_create(bs, IBLOCK_BIO_POOL_SIZE) 0) { + pr_err(Unable to allocate bioset for PI\n); + ret = -ENOMEM; + goto out_blkdev_put; + } + pr_debug(IBLOCK setup BIP bs-bio_integrity_pool: %p\n, +bs-bio_integrity_pool); + } + dev-dev_attrib.hw_pi_prot_type = dev-dev_attrib.pi_prot_type; + } + return 0; +out_blkdev_put: + blkdev_put(ib_dev-ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL); out_free_bioset: bioset_free(ib_dev-ibd_bio_set); ib_dev-ibd_bio_set = NULL; @@ -170,8 +203,10 @@ static void iblock_free_device(struct se_device *dev) if (ib_dev-ibd_bd != NULL) blkdev_put(ib_dev-ibd_bd, FMODE_WRITE|FMODE_READ|FMODE_EXCL); - if (ib_dev-ibd_bio_set != NULL) + if (ib_dev-ibd_bio_set != NULL) { + bioset_integrity_free(ib_dev-ibd_bio_set); bioset_free(ib_dev-ibd_bio_set); + } kfree(ib_dev); } @@ -586,13 +621,58 @@ static ssize_t iblock_show_configfs_dev_params(struct se_device *dev, char *b) return bl; } +static int +iblock_alloc_bip(struct se_cmd *cmd, struct bio *bio) +{ + struct se_device *dev = cmd-se_dev; + struct blk_integrity *bi; + struct bio_integrity_payload *bip; + struct iblock_dev *ib_dev = IBLOCK_DEV(dev); + struct scatterlist *sg; + int i, rc; + + bi = bdev_get_integrity(ib_dev-ibd_bd); + if (!bi) { + pr_err(Unable to locate bio_integrity\n); + return -ENODEV; + } + + bip = bio_integrity_alloc(bio, GFP_NOIO, cmd-t_prot_nents); + if (!bip) { + pr_err(Unable to allocate bio_integrity_payload\n
Re: [PATCH-v2 12/17] target/file: Add DIF protection init/format support
On 1/19/2014 4:44 AM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds support for DIF protection init/format support into the FILEIO backend. It involves using a seperate $FILE.protection for storing PI that is opened via fd_init_prot() using the common pi_prot_type attribute. The actual formatting of the protection is done via fd_format_prot() using the common pi_prot_format attribute, that will populate the initial PI data based upon the currently configured pi_prot_type. Based on original FILEIO code from Sagi. Nice! see comments below... v1 changes: - Fix sparse warnings in fd_init_format_buf (Fengguang) Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_file.c | 137 + drivers/target/target_core_file.h |4 ++ 2 files changed, 141 insertions(+) diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 0e34cda..119d519 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -700,6 +700,140 @@ static sector_t fd_get_blocks(struct se_device *dev) dev-dev_attrib.block_size); } +static int fd_init_prot(struct se_device *dev) +{ + struct fd_dev *fd_dev = FD_DEV(dev); + struct file *prot_file, *file = fd_dev-fd_file; + struct inode *inode; + int ret, flags = O_RDWR | O_CREAT | O_LARGEFILE | O_DSYNC; + char buf[FD_MAX_DEV_PROT_NAME]; + + if (!file) { + pr_err(Unable to locate fd_dev-fd_file\n); + return -ENODEV; + } + + inode = file-f_mapping-host; + if (S_ISBLK(inode-i_mode)) { + pr_err(FILEIO Protection emulation only supported on + !S_ISBLK\n); + return -ENOSYS; + } + + if (fd_dev-fbd_flags FDBD_HAS_BUFFERED_IO_WCE) + flags = ~O_DSYNC; + + snprintf(buf, FD_MAX_DEV_PROT_NAME, %s.protection, +fd_dev-fd_dev_name); + + prot_file = filp_open(buf, flags, 0600); + if (IS_ERR(prot_file)) { + pr_err(filp_open(%s) failed\n, buf); + ret = PTR_ERR(prot_file); + return ret; + } + fd_dev-fd_prot_file = prot_file; + + return 0; +} + +static void fd_init_format_buf(struct se_device *dev, unsigned char *buf, + u32 unit_size, u32 *ref_tag, u16 app_tag, + bool inc_reftag) +{ + unsigned char *p = buf; + int i; + + for (i = 0; i unit_size; i += dev-prot_length) { + *((u16 *)p[0]) = 0x; + *((__be16 *)p[2]) = cpu_to_be16(app_tag); + *((__be32 *)p[4]) = cpu_to_be32(*ref_tag); + + if (inc_reftag) + (*ref_tag)++; + + p += dev-prot_length; + } +} + +static int fd_format_prot(struct se_device *dev) +{ + struct fd_dev *fd_dev = FD_DEV(dev); + struct file *prot_fd = fd_dev-fd_prot_file; + sector_t prot_length, prot; + unsigned char *buf; + loff_t pos = 0; + u32 ref_tag = 0; + int unit_size = FDBD_FORMAT_UNIT_SIZE * dev-dev_attrib.block_size; + int rc, ret = 0, size, len; + bool inc_reftag = false; + + if (!dev-dev_attrib.pi_prot_type) { + pr_err(Unable to format_prot while pi_prot_type == 0\n); + return -ENODEV; + } + if (!prot_fd) { + pr_err(Unable to locate fd_dev-fd_prot_file\n); + return -ENODEV; + } + + switch (dev-dev_attrib.pi_prot_type) { redundant - see below. + case TARGET_DIF_TYPE3_PROT: + ref_tag = 0x; + break; + case TARGET_DIF_TYPE2_PROT: + case TARGET_DIF_TYPE1_PROT: + inc_reftag = true; + break; + default: + break; + } + + buf = vzalloc(unit_size); + if (!buf) { + pr_err(Unable to allocate FILEIO prot buf\n); + return -ENOMEM; + } + + prot_length = (dev-transport-get_blocks(dev) + 1) * dev-prot_length; + size = prot_length; + + pr_debug(Using FILEIO prot_length: %llu\n, +(unsigned long long)prot_length); + + for (prot = 0; prot prot_length; prot += unit_size) { + + fd_init_format_buf(dev, buf, unit_size, ref_tag, 0x, + inc_reftag); I didn't send you my latest patches (my fault...).T10-PI format should only place escape values throughout the protection file (fill it with 0xff). so I guess in this case fd_init_formast_buf() boils down to memset(buf, 0xff, unit_size) once before the loop and just
Re: [PATCH-v2 13/17] target/file: Add DIF protection support to fd_execute_rw
On 1/19/2014 4:44 AM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds support for DIF protection into fd_execute_rw() code for WRITE/READ I/O using sbc_dif_verify_[write,read]() logic. It adds fd_do_prot_rw() for handling interface with FILEIO PI, and uses a locally allocated fd_prot-prot_buf + fd_prot-prot_sg for interacting with SBC DIF verify emulation code. Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_file.c | 119 - drivers/target/target_core_file.h |5 ++ 2 files changed, 123 insertions(+), 1 deletion(-) diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 119d519..aaba7c5 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -257,6 +257,72 @@ static void fd_free_device(struct se_device *dev) kfree(fd_dev); } +static int fd_do_prot_rw(struct se_cmd *cmd, struct fd_prot *fd_prot, +int is_write) +{ + struct se_device *se_dev = cmd-se_dev; + struct fd_dev *dev = FD_DEV(se_dev); + struct file *prot_fd = dev-fd_prot_file; + struct scatterlist *sg; + loff_t pos = (cmd-t_task_lba * se_dev-prot_length); + unsigned char *buf; + u32 prot_size, len, size; + int rc, ret = 1, i; + + prot_size = (cmd-data_length / se_dev-dev_attrib.block_size) * +se_dev-prot_length; + + if (!is_write) { + fd_prot-prot_buf = vzalloc(prot_size); + if (!fd_prot-prot_buf) { + pr_err(Unable to allocate fd_prot-prot_buf\n); + return -ENOMEM; + } + buf = fd_prot-prot_buf; + + fd_prot-prot_sg_nents = cmd-t_prot_nents; + fd_prot-prot_sg = kzalloc(sizeof(struct scatterlist) * + fd_prot-prot_sg_nents, GFP_KERNEL); + if (!fd_prot-prot_sg) { + pr_err(Unable to allocate fd_prot-prot_sg\n); + vfree(fd_prot-prot_buf); + return -ENOMEM; + } + size = prot_size; + + for_each_sg(fd_prot-prot_sg, sg, fd_prot-prot_sg_nents, i) { + + len = min_t(u32, PAGE_SIZE, size); + sg_set_buf(sg, buf, len); + size -= len; + buf += len; + } + } + + if (is_write) { + rc = kernel_write(prot_fd, fd_prot-prot_buf, prot_size, pos); + if (rc 0 || prot_size != rc) { + pr_err(kernel_write() for fd_do_prot_rw failed: + %d\n, rc); + ret = -EINVAL; + } + } else { + rc = kernel_read(prot_fd, pos, fd_prot-prot_buf, prot_size); + if (rc 0) { + pr_err(kernel_read() for fd_do_prot_rw failed: + %d\n, rc); + ret = -EINVAL; + } + } + + if (is_write || ret 0) { + kfree(fd_prot-prot_sg); + vfree(fd_prot-prot_buf); + } + + return ret; +} + static int fd_do_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, int is_write) { @@ -551,6 +617,8 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, enum dma_data_direction data_direction) { struct se_device *dev = cmd-se_dev; + struct fd_prot fd_prot; + sense_reason_t rc; int ret = 0; /* @@ -558,8 +626,48 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents, * physical memory addresses to struct iovec virtual memory. */ if (data_direction == DMA_FROM_DEVICE) { Maybe its better to export this one to a separate function? fd_execute_prot_rw()? just a nit... + memset(fd_prot, 0, sizeof(struct fd_prot)); + + if (cmd-prot_type) { + ret = fd_do_prot_rw(cmd, fd_prot, false); + if (ret 0) + return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE; + } + ret = fd_do_rw(cmd, sgl, sgl_nents, 0); + + if (ret 0 cmd-prot_type) { + u32 sectors = cmd-data_length / dev-dev_attrib.block_size; + + rc = sbc_dif_verify_read(cmd, cmd-t_task_lba, sectors, +0, fd_prot.prot_sg, 0); + if (rc) { + kfree(fd_prot.prot_sg
Re: [PATCH] IB/mlx5: Fix signature rule constants according to FW specifications
On 1/19/2014 12:37 PM, Sagi Grimberg wrote: Use DIF CRC INC with apptag escape (0x8) and update IP-CSUM entries. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/hw/mlx5/qp.c |6 +++--- 1 files changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c index 7981620..58c4735 100644 --- a/drivers/infiniband/hw/mlx5/qp.c +++ b/drivers/infiniband/hw/mlx5/qp.c @@ -1952,9 +1952,9 @@ static int format_selector(struct ib_sig_attrs *attr, { #define FORMAT_DIF_NONE 0 -#define FORMAT_DIF_CRC_INC 4 -#define FORMAT_DIF_CSUM_INC12 -#define FORMAT_DIF_CRC_NO_INC 13 +#define FORMAT_DIF_CRC_INC 8 +#define FORMAT_DIF_CRC_NO_INC 12 +#define FORMAT_DIF_CSUM_INC13 #define FORMAT_DIF_CSUM_NO_INC14 switch (domain-sig.dif.type) { Just a clarification, This patch along with Fix smatch warnings are incremental patches to v4 of pending Signature verbs patchset. Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [LSF/MM ATTEND] interest in blk-mq, scsi-mq, dm-cache, dm-thinp, dm-*
On 1/16/2014 6:34 PM, Sagi Grimberg wrote: On 1/10/2014 8:27 PM, Mike Snitzer wrote: I would like to attend to participate in discussions related to topics listed in the subject. As a maintainer of DM I'd be interested to learn/discuss areas that should become a development focus in the months following LSF. +1 for scsi-mq. Sparing a few words on why I voted on this topic, As part of me being involved in fast RDMA based initiators (iSER, SRP) I can say that SCSI layer became a real performance bottleneck. Implementations bypassing SCSI has started to pop-up lately. scsi-mq can and should boost performance and it is important that we work also on LLDs to provide a total solution. Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: linux rdma 3.14 merge plans
On 1/22/2014 2:43 AM, Roland Dreier wrote: On Tue, Jan 21, 2014 at 2:00 PM, Or Gerlitz or.gerl...@gmail.com wrote: Roland, ping! the signature patches were posted three months ago. We deserve a response from the maintainer that goes beyond I need to think on that. Responsiveness was stated by Linus to be the #1 requirement from kernel maintainers. Hi Roland, I'll try to respond here. removing LKML and adding Linux-scsi. Or, I'm not sure what response you're after from me. Linus has also said that maintainers should say no a lot more (http://lwn.net/Articles/571995/) so maybe you want me to say, No, I won't merge this patch set, since it adds a bunch of complexity to support a feature no one really cares about. 1. I disagree about no-one cares about DIF/DIX. We are witnessing growing interests in this especially for RDMA. 2. We put a lot of efforts to avoid complexity here and plug-in as simple as possible. Application that will choose to use DIF will implement only 3 steps: a. allocate signature enabled MR. b. register signature enabled MR with DIF attributes (via post_send) and then do RDMA. c. check MR status after transaction is completed (_lightweight_ verb that can be called from interrupt context). Is that it? (And yes I am skeptical about this stuff — I work at an enterprise storage company and even here it's hard to find anyone who cares about DIF/DIX, especially offload features that stop it from being end-to-end) 1. RDMA verbs are _NOT_ stopping DIF from being end-to-end. OS (or SCSI in our specific case) passes LLD 2 scatterlists: data {block1, block2, block3,...}, and protection {DIF1, DIF2, DIF3}. LLD is required to verify the data integrity (block guards) and to interleave over the wire {block1, DIF1, block2, DIF2}. You must support that in HW, you rather iSER/SRP will use giant copy's to interleave by itself? or in case OS asked LLD to INSERT DIF iSER/SRP will compute CRC for each data-block? RDMA storage ULPs are transports - they should have no business with data processing. 2. HW DIF offload also gives you protection across the PCI. the data-validation is done (hopefully offloaded) also when data+protection are written to the back-end device. end-to-end is preserved. 3. SAS FC have T10-PI offload. This is just adding RDMA into the game. With this set of verbs iSER, SRP, FCoE Initiators and targets will be able to support T10-PI. I'm sure you're not expecting me to say, Sure, I'll merge it without understanding the problem it's solving Problem: T10-PI offload support for RDMA based initiators. Supporting end-to-end data integrity while sustaining high RDMA performance. or how it's doing that, How it's doing that: - We introduce a new type of memory region that posses protection attributes suited for data integrity offload. - We Introduce a new fast registration method that can bind all the relevant info for verify/generate of protection information: * describe if/how to interleave data with protection. * describe what method of data integrity is used (DIF type X, CRC, XOR...) and the seeds that HW should start calculation from. * describe how to verify the data. - We Introduce a new lightweight check of the data-integrity status to check if there were any integrity errors and get information on them. Note: We made MR allocation routine generic enough to lay a framework to unite all MR allocation methods (get_dma_mr, alloc_fast_reg_mr, reg_phys, reg_user_mr, fmrs, and probably more in the future...). We defined ib_create_mr that can actually get mr_init_attr which can be easily extended as opposed to the specific calls exists today. So I would say this even reduces complexity. Hope this helps, Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH-v2 10/17] target: Add protection SGLs to target_submit_cmd_map_sgls
On 1/22/2014 12:17 AM, Nicholas A. Bellinger wrote: On Sun, 2014-01-19 at 14:12 +0200, Sagi Grimberg wrote: On 1/19/2014 4:44 AM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds support to target_submit_cmd_map_sgls() for accepting 'sgl_prot' + 'sgl_prot_count' parameters for DIF protection information. Note the passed parameters are stored at se_cmd-t_prot_sg and se_cmd-t_prot_nents respectively. Also, update tcm_loop and vhost-scsi fabrics usage of target_submit_cmd_map_sgls() to take into account the new parameters. I didn't see that you added protection allocation to transports that does not use target_submit_cmd_map_sgls() - which happens to be iSCSI/iSER/SRP :( Don't you think that prot SG allocation should be added also to target_alloc_sgl()? by then se_cmd should contain the protection attributes and this routine can know if it needs to allocate prot_sg as well. This is how I used it... Yes, this specific bit was left out for the moment as no code in the patch for v3.14 actually uses it.. I'm planning to add it to for-next - v3.15 code as soon as the merge window closes. --nab Yes, that makes sense to me. Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH-v2 11/17] target/iblock: Add blk_integrity + BIP passthrough support
On 1/22/2014 3:52 AM, Martin K. Petersen wrote: Sagi == Sagi Grimberg sa...@dev.mellanox.co.il writes: Sagi Please remind me why we ignore IP-CSUM guard type again? MKP, Sagi will this be irrelevant for the initiator as well? if so, I don't Sagi see a reason to expose this in RDMA verbs. I don't see much use for IP checksum for the target. You are required by SBC to use T10 CRC on the wire so there is no point in converting to IP checksum in the backend. My impending patches will allow you to pass through PI with T10 CRC to a device with an IP checksum block integrity profile (i.e. the choice of checksum is a per-bio bip flag instead of an HBA-enforced global). OK, so IP checksum support still makes sense. Thanks! Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH-v2 12/17] target/file: Add DIF protection init/format support
On 1/22/2014 12:28 AM, Nicholas A. Bellinger wrote: On Sun, 2014-01-19 at 14:31 +0200, Sagi Grimberg wrote: On 1/19/2014 4:44 AM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds support for DIF protection init/format support into the FILEIO backend. It involves using a seperate $FILE.protection for storing PI that is opened via fd_init_prot() using the common pi_prot_type attribute. The actual formatting of the protection is done via fd_format_prot() using the common pi_prot_format attribute, that will populate the initial PI data based upon the currently configured pi_prot_type. Based on original FILEIO code from Sagi. Nice! see comments below... v1 changes: - Fix sparse warnings in fd_init_format_buf (Fengguang) Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_file.c | 137 + drivers/target/target_core_file.h |4 ++ 2 files changed, 141 insertions(+) diff --git a/drivers/target/target_core_file.c b/drivers/target/target_core_file.c index 0e34cda..119d519 100644 --- a/drivers/target/target_core_file.c +++ b/drivers/target/target_core_file.c @@ -700,6 +700,140 @@ static sector_t fd_get_blocks(struct se_device *dev) dev-dev_attrib.block_size); } +static int fd_init_prot(struct se_device *dev) +{ + struct fd_dev *fd_dev = FD_DEV(dev); + struct file *prot_file, *file = fd_dev-fd_file; + struct inode *inode; + int ret, flags = O_RDWR | O_CREAT | O_LARGEFILE | O_DSYNC; + char buf[FD_MAX_DEV_PROT_NAME]; + + if (!file) { + pr_err(Unable to locate fd_dev-fd_file\n); + return -ENODEV; + } + + inode = file-f_mapping-host; + if (S_ISBLK(inode-i_mode)) { + pr_err(FILEIO Protection emulation only supported on + !S_ISBLK\n); + return -ENOSYS; + } + + if (fd_dev-fbd_flags FDBD_HAS_BUFFERED_IO_WCE) + flags = ~O_DSYNC; + + snprintf(buf, FD_MAX_DEV_PROT_NAME, %s.protection, +fd_dev-fd_dev_name); + + prot_file = filp_open(buf, flags, 0600); + if (IS_ERR(prot_file)) { + pr_err(filp_open(%s) failed\n, buf); + ret = PTR_ERR(prot_file); + return ret; + } + fd_dev-fd_prot_file = prot_file; + + return 0; +} + +static void fd_init_format_buf(struct se_device *dev, unsigned char *buf, + u32 unit_size, u32 *ref_tag, u16 app_tag, + bool inc_reftag) +{ + unsigned char *p = buf; + int i; + + for (i = 0; i unit_size; i += dev-prot_length) { + *((u16 *)p[0]) = 0x; + *((__be16 *)p[2]) = cpu_to_be16(app_tag); + *((__be32 *)p[4]) = cpu_to_be32(*ref_tag); + + if (inc_reftag) + (*ref_tag)++; + + p += dev-prot_length; + } +} + +static int fd_format_prot(struct se_device *dev) +{ + struct fd_dev *fd_dev = FD_DEV(dev); + struct file *prot_fd = fd_dev-fd_prot_file; + sector_t prot_length, prot; + unsigned char *buf; + loff_t pos = 0; + u32 ref_tag = 0; + int unit_size = FDBD_FORMAT_UNIT_SIZE * dev-dev_attrib.block_size; + int rc, ret = 0, size, len; + bool inc_reftag = false; + + if (!dev-dev_attrib.pi_prot_type) { + pr_err(Unable to format_prot while pi_prot_type == 0\n); + return -ENODEV; + } + if (!prot_fd) { + pr_err(Unable to locate fd_dev-fd_prot_file\n); + return -ENODEV; + } + + switch (dev-dev_attrib.pi_prot_type) { redundant - see below. + case TARGET_DIF_TYPE3_PROT: + ref_tag = 0x; + break; + case TARGET_DIF_TYPE2_PROT: + case TARGET_DIF_TYPE1_PROT: + inc_reftag = true; + break; + default: + break; + } + + buf = vzalloc(unit_size); + if (!buf) { + pr_err(Unable to allocate FILEIO prot buf\n); + return -ENOMEM; + } + + prot_length = (dev-transport-get_blocks(dev) + 1) * dev-prot_length; + size = prot_length; + + pr_debug(Using FILEIO prot_length: %llu\n, +(unsigned long long)prot_length); + + for (prot = 0; prot prot_length; prot += unit_size) { + + fd_init_format_buf(dev, buf, unit_size, ref_tag, 0x, + inc_reftag); I didn't send you my latest patches (my fault...).T10-PI format should only place escape values throughout the protection file (fill it with 0xff). so I
Re: [PATCH-v2 02/17] target: Add DIF CHECK_CONDITION ASC/ASCQ exception cases
On 1/19/2014 4:44 AM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch adds support for DIF related CHECK_CONDITION ASC/ASCQ exception cases into transport_send_check_condition_and_sense(). This includes: LOGICAL BLOCK GUARD CHECK FAILED LOGICAL BLOCK APPLICATION TAG CHECK FAILED LOGICAL BLOCK REFERENCE TAG CHECK FAILED that used by DIF TYPE1 and TYPE3 failure cases. Cc: Martin K. Petersen martin.peter...@oracle.com Cc: Christoph Hellwig h...@lst.de Cc: Hannes Reinecke h...@suse.de Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/target/target_core_transport.c | 30 ++ include/target/target_core_base.h |3 +++ 2 files changed, 33 insertions(+) diff --git a/drivers/target/target_core_transport.c b/drivers/target/target_core_transport.c index 18c828d..fa4fc04 100644 --- a/drivers/target/target_core_transport.c +++ b/drivers/target/target_core_transport.c @@ -2674,6 +2674,36 @@ transport_send_check_condition_and_sense(struct se_cmd *cmd, buffer[SPC_ASC_KEY_OFFSET] = 0x1d; buffer[SPC_ASCQ_KEY_OFFSET] = 0x00; break; + case TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED: + /* CURRENT ERROR */ + buffer[0] = 0x70; + buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; + /* ILLEGAL REQUEST */ + buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; + /* LOGICAL BLOCK GUARD CHECK FAILED */ + buffer[SPC_ASC_KEY_OFFSET] = 0x10; + buffer[SPC_ASCQ_KEY_OFFSET] = 0x01; + break; + case TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED: + /* CURRENT ERROR */ + buffer[0] = 0x70; + buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; + /* ILLEGAL REQUEST */ + buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; + /* LOGICAL BLOCK APPLICATION TAG CHECK FAILED */ + buffer[SPC_ASC_KEY_OFFSET] = 0x10; + buffer[SPC_ASCQ_KEY_OFFSET] = 0x02; + break; + case TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED: + /* CURRENT ERROR */ + buffer[0] = 0x70; + buffer[SPC_ADD_SENSE_LEN_OFFSET] = 10; + /* ILLEGAL REQUEST */ + buffer[SPC_SENSE_KEY_OFFSET] = ILLEGAL_REQUEST; + /* LOGICAL BLOCK REFERENCE TAG CHECK FAILED */ + buffer[SPC_ASC_KEY_OFFSET] = 0x10; + buffer[SPC_ASCQ_KEY_OFFSET] = 0x03; + break; Hey Nic, I think we missed the failed LBA here. AFAICT According to SPC-4, a DIF error should be accompanied by Information sense-data descriptor with the (first) failed sector in the information field. This means that this routine should be ready to accept a u32 bad_sector or something. I'm not sure how much of a must it really is. Let me prepare a patch... Sagi. case TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE: default: /* CURRENT ERROR */ diff --git a/include/target/target_core_base.h b/include/target/target_core_base.h index d98048b..0336d70 100644 --- a/include/target/target_core_base.h +++ b/include/target/target_core_base.h @@ -205,6 +205,9 @@ enum tcm_sense_reason_table { TCM_OUT_OF_RESOURCES= R(0x12), TCM_PARAMETER_LIST_LENGTH_ERROR = R(0x13), TCM_MISCOMPARE_VERIFY = R(0x14), + TCM_LOGICAL_BLOCK_GUARD_CHECK_FAILED= R(0x15), + TCM_LOGICAL_BLOCK_APP_TAG_CHECK_FAILED = R(0x16), + TCM_LOGICAL_BLOCK_REF_TAG_CHECK_FAILED = R(0x17), #undef R }; -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH-v2 03/17] target/sbc: Add DIF setup in sbc_check_prot + sbc_parse_cdb
On 1/22/2014 12:48 AM, Nicholas A. Bellinger wrote: + cmd-prot_handover = PROT_SEPERATED; I know that we are not planning to support interleaved mode at the moment, But I think that the protection handover type is the backstore preference and should be taken from se_dev. But it is not that important for now... Yeah, I figured since the RDMA pieces needed the handover type defined in some form, it made sense to include PROT_SEPERATED hardcoded here, but stopped short of adding se_dev-prot_handler for the first round merge. --nab Actually they don't, I just added them in iSER code to demonstrate the HW ability. If we are not planning to support that (although as MKP mentioned it might be useful in some cases), you can remove that for now and we can add it in the future - iSER can ignore it for now (I'll refactor the patches). Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 12/15] scsi: initial blk-mq support
On 2/5/2014 2:41 PM, Christoph Hellwig wrote: Add support for using the blk-mq code to submit requests to SCSI drivers. There is very little blk-mq specific code, but that's partially because important functionality like partial completions and request requeueing is still missing in blk-mq. I hope to keep most of the additions for these in the blk-mq core instead of the SCSI layer, though. Based on the earlier scsi-mq prototype by Nicholas Bellinger, although not a whole lot of actual code is left. Not-quite-signed-off-yet-by: Christoph Hellwig h...@lst.de --- drivers/scsi/scsi.c | 36 ++- drivers/scsi/scsi_lib.c | 244 -- drivers/scsi/scsi_priv.h |2 + drivers/scsi/scsi_scan.c |5 +- include/scsi/scsi_host.h |3 + 5 files changed, 278 insertions(+), 12 deletions(-) diff --git a/drivers/scsi/scsi.c b/drivers/scsi/scsi.c index adb8bfb..cf5c110 100644 --- a/drivers/scsi/scsi.c +++ b/drivers/scsi/scsi.c @@ -44,6 +44,7 @@ #include linux/string.h #include linux/slab.h #include linux/blkdev.h +#include linux/blk-mq.h #include linux/delay.h #include linux/init.h #include linux/completion.h @@ -688,6 +689,33 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) return 0; } +static void scsi_softirq_done_remote(void *data) +{ + return scsi_softirq_done(data); +} + +static void scsi_mq_done(struct request *req) +{ + int cpu; + +#if 0 + if (!ctx-ipi_redirect) + return scsi_softirq_done(cmd); +#endif + + cpu = get_cpu(); + if (cpu != req-cpu cpu_online(req-cpu)) { + req-csd.func = scsi_softirq_done_remote; + req-csd.info = req; + req-csd.flags = 0; + __smp_call_function_single(req-cpu, req-csd, 0); + } else { + scsi_softirq_done(req); + } + + put_cpu(); +} + /** * scsi_done - Invoke completion on finished SCSI command. * @cmd: The SCSI Command for which a low-level device driver (LLDD) gives @@ -701,8 +729,14 @@ int scsi_dispatch_cmd(struct scsi_cmnd *cmd) */ static void scsi_done(struct scsi_cmnd *cmd) { + struct request *req = cmd-request; + trace_scsi_dispatch_cmd_done(cmd); - blk_complete_request(cmd-request); + + if (req-mq_ctx) + scsi_mq_done(req); + else + blk_complete_request(req); } /** diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index e67950c..8dd8893 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -20,6 +20,7 @@ #include linux/delay.h #include linux/hardirq.h #include linux/scatterlist.h +#include linux/blk-mq.h #include scsi/scsi.h #include scsi/scsi_cmnd.h @@ -554,6 +555,15 @@ static bool scsi_end_request(struct scsi_cmnd *cmd, int error, int bytes, struct request *req = cmd-request; /* +* XXX: need to handle partial completions and retries here. +*/ + if (req-mq_ctx) { + blk_mq_end_io(req, error); + put_device(cmd-device-sdev_gendev); + return true; + } + + /* * If there are blocks left over at the end, set up the command * to queue the remainder of them. */ @@ -1014,12 +1024,15 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb, { int count; - /* -* If sg table allocation fails, requeue request later. -*/ - if (unlikely(scsi_alloc_sgtable(sdb, req-nr_phys_segments, - gfp_mask))) { - return BLKPREP_DEFER; + BUG_ON(req-nr_phys_segments SCSI_MAX_SG_SEGMENTS); + + if (!req-mq_ctx) { + /* +* If sg table allocation fails, requeue request later. +*/ + if (unlikely(scsi_alloc_sgtable(sdb, req-nr_phys_segments, + gfp_mask))) + return BLKPREP_DEFER; } req-buffer = NULL; @@ -1075,9 +1088,11 @@ int scsi_init_io(struct scsi_cmnd *cmd, gfp_t gfp_mask) BUG_ON(prot_sdb == NULL); ivecs = blk_rq_count_integrity_sg(rq-q, rq-bio); - if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) { - error = BLKPREP_DEFER; - goto err_exit; + if (!rq-mq_ctx) { + if (scsi_alloc_sgtable(prot_sdb, ivecs, gfp_mask)) { + error = BLKPREP_DEFER; + goto err_exit; + } } count = blk_rq_map_integrity_sg(rq-q, rq-bio, @@ -1505,7 +1520,7 @@ static void scsi_kill_request(struct request *req, struct request_queue *q) blk_complete_request(req); } -static void scsi_softirq_done(struct request *rq) +void scsi_softirq_done(struct request *rq) { struct
bio-integrity BUG_ON
Hey, I stumbled on a nasty crash with 3.14-rc1 when playing with scsi_debug DIF emulation. It's late (IL time..), so I thought I'd post this one up and ask if anyone else noticed it before digging into this one. it comes from the condition: BUG_ON(total bio-bi_integrity-bip_iter.bi_size); trace: scsi_debug_init: dif_storep 131072 bytes @ c90016ad8000 scsi_debug: host protection DIF1 DIX1 scsi7 : scsi_debug, version 1.82 [20100324], dev_size_mb=8, opts=0x0 scsi 7:0:0:0: Direct-Access Linuxscsi_debug 0004 PQ: 0 ANSI: 5 sd 7:0:0:0: Attached scsi generic sg2 type 0 sd 7:0:0:0: [sdc] Enabling DIF Type 1 protection sd 7:0:0:0: [sdc] 16384 512-byte logical blocks: (8.38 MB/8.00 MiB) sd 7:0:0:0: [sdc] Write Protect is off sd 7:0:0:0: [sdc] Write cache: enabled, read cache: enabled, supports DPO and FUA sdc: unknown partition table sd 7:0:0:0: [sdc] Enabling DIX T10-DIF-TYPE1-CRC protection sd 7:0:0:0: [sdc] DIF application tag size 2 [ cut here ] kernel BUG at fs/bio-integrity.c:479! invalid opcode: [#1] SMP Modules linked in: scsi_debug(O) netconsole nfsv3 nfs_acl rpcsec_gss_krb5 auth_rpcgss nfsv4 nfs fscache lockd autofs4 sunrpc target_core_iblock target_core_file target_core_pscsi target_core_mod configfs 8021q garp stp llc cpufreq_ondemand rdma_ucm(O) ib_ucm(O) rdma_cm(O) iw_cm(O) ib_ipoib(O) ib_cm(O) ib_uverbs(O) ib_umad(O) mlx5_ib(O) mlx5_core mlx4_en mlx4_ib(O) ib_sa(O) ib_mad(O) ib_core(O) ib_addr(O) ipv6 mlx4_core dm_mirror dm_region_hash dm_log uinput iTCO_wdt iTCO_vendor_support sg(O) microcode pcspkr serio_raw sb_edac edac_core i2c_i801 lpc_ich mfd_core shpchp ioatdma igb dca i2c_algo_bit i2c_core ptp pps_core ipmi_si ipmi_msghandler dm_mod acpi_cpufreq wmi ext3 jbd mbcache sd_mod crc_t10dif crct10dif_common ahci libahci isci libsas scsi_transport_sas [last unloaded: ip_tables] CPU: 18 PID: 4029 Comm: kworker/18:1H Tainted: G O 3.14.0-rc1+ #1 Hardware name: Supermicro SYS-1027R-WRF/X9DRW, BIOS 3.0a 08/08/2013 Workqueue: kintegrityd bio_integrity_verify_fn task: 880867954f90 ti: 880868ed2000 task.ti: 880868ed2000 RIP: 0010:[811c8a92] [811c8a92] bio_integrity_verify_fn+0x172/0x180 RSP: 0018:880868ed3d78 EFLAGS: 00010202 RAX: 0008 RBX: 3f80 RCX: 88086f2f9940 RDX: 0040 RSI: 88086f2fbc80 RDI: 880868ed3d98 RBP: 880868ed3df8 R08: 88086bd04f40 R09: R10: R11: R12: 88086f2f99c8 R13: 880454012720 R14: 0040 R15: FS: () GS:88087fcc() knlGS: CS: 0010 DS: ES: CR0: 80050033 CR2: 003e59410060 CR3: 00086e5c1000 CR4: 000407e0 Stack: 0001 88086f2f9940 88086bd04f40 88086f2fbcb8 88086bd04f40 880862cea000 3f80 02001000 88045525d40c 81068092 880868ed3e38 88086e8d3500 Call Trace: [81068092] ? worker_set_flags+0x92/0xd0 [8106b3c2] process_one_work+0x182/0x3b0 [8106c9b0] worker_thread+0x120/0x3a0 [8106c890] ? manage_workers+0x160/0x160 [8107242e] kthread+0xce/0xf0 [81072360] ? kthread_freezable_should_stop+0x70/0x70 [81577d2c] ret_from_fork+0x7c/0xb0 [81072360] ? kthread_freezable_should_stop+0x70/0x70 Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 06/11] IB/iser: Introduce pi_enable, pi_guard module parameters
Use modparams to activate protection information support. pi_enable bool: Based on this parameter iSER will know if it should support T10-PI. We don't want to do this by default as it requires to allocate and initiatlize extra resources. In case pi_enable=N, iSER won't publish to SCSI midlayer any DIF capabilities. pi_guard int: Based on this parameter iSER will publish DIX guard type support to SCSI midlayer. 0 means CRC is allowed to be passed in DIX buffers, 1 (or non-zero) means IP-CSUM is allowed to be passed in DIX buffers. Note that over the wire, only CRC is allowed. In the next phase, it is worth considering passing these parameters from iscsid via nlmsg. This will allow these parameters to be connection based rather than global. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.c |8 drivers/infiniband/ulp/iser/iscsi_iser.h |3 +++ drivers/infiniband/ulp/iser/iser_verbs.c |3 +++ 3 files changed, 14 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index dd03cfe..cfa952e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -82,6 +82,8 @@ static unsigned int iscsi_max_lun = 512; module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); int iser_debug_level = 0; +bool iser_pi_enable = false; +int iser_pi_guard = 0; MODULE_DESCRIPTION(iSER (iSCSI Extensions for RDMA) Datamover); MODULE_LICENSE(Dual BSD/GPL); @@ -91,6 +93,12 @@ MODULE_VERSION(DRV_VER); module_param_named(debug_level, iser_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, Enable debug tracing if 0 (default:disabled)); +module_param_named(pi_enable, iser_pi_enable, bool, 0644); +MODULE_PARM_DESC(pi_enable, Enable T10-PI offload support (default:disabled)); + +module_param_named(pi_guard, iser_pi_guard, int, 0644); +MODULE_PARM_DESC(pi_guard, T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)); + struct iser_global ig; void diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 5f7dbfd..76b2124 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -317,6 +317,7 @@ struct iser_conn { unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; struct ib_recv_wrrx_wr[ISER_MIN_POSTED_RX]; + bool pi_support; /* Connection memory registration pool */ union { @@ -371,6 +372,8 @@ struct iser_global { extern struct iser_global ig; extern int iser_debug_level; +extern bool iser_pi_enable; +extern int iser_pi_guard; /* allocate connection resources needed for rdma functionality */ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index fd32d63..94f967c 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -757,6 +757,9 @@ int iser_connect(struct iser_conn *ib_conn, ib_conn-state = ISER_CONN_PENDING; + /* connection T10-PI support */ + ib_conn-pi_support = iser_pi_enable; + iser_conn_get(ib_conn); /* ref ib conn's cma id */ ib_conn-cma_id = rdma_create_id(iser_cma_handler, (void *)ib_conn, -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 02/11] IB/iser: Push the desicion what memory key to use into fast_reg_mr routine
This is a preparation step for T10-PI offload support. We prefer to push the desicion of which mkey to use (global or fastreg) to iser_fast_reg_mr. We choose to do this since it in T10-PI we may need to register for protection buffers and in this case we wish to simplify iser_fast_reg_mr instead of repeating the logic of which key to use. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iser_memory.c | 101 + 1 files changed, 59 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 4e34729..0e029fe 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -444,16 +444,40 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, return 0; } -static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, - struct iser_conn *ib_conn, +static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_regd_buf *regd_buf, - u32 offset, unsigned int data_size, - unsigned int page_list_len) + struct iser_data_buf *mem, + struct ib_sge *sge) { + struct fast_reg_descriptor *desc = regd_buf-reg.mem_h; + struct iser_conn *ib_conn = iser_task-iser_conn-ib_conn; + struct iser_device *device = ib_conn-device; + struct ib_device *ibdev = device-ib_device; struct ib_send_wr fastreg_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; u8 key; - int ret; + int ret, offset, size, plen; + + /* if there a single dma entry, dma mr suffices */ + if (mem-dma_nents == 1) { + struct scatterlist *sg = (struct scatterlist *)mem-buf; + + sge-lkey = device-mr-lkey; + sge-addr = ib_sg_dma_address(ibdev, sg[0]); + sge-length = ib_sg_dma_len(ibdev, sg[0]); + + iser_dbg(Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n, +sge-lkey, sge-addr, sge-length); + return 0; + } + + plen = iser_sg_to_page_vec(mem, device-ib_device, + desc-data_frpl-page_list, + offset, size); + if (plen * SIZE_4K size) { + iser_err(fast reg page_list too short to hold this SG\n); + return -EINVAL; + } if (!desc-valid) { memset(inv_wr, 0, sizeof(inv_wr)); @@ -472,9 +496,9 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, fastreg_wr.opcode = IB_WR_FAST_REG_MR; fastreg_wr.wr.fast_reg.iova_start = desc-data_frpl-page_list[0] + offset; fastreg_wr.wr.fast_reg.page_list = desc-data_frpl; - fastreg_wr.wr.fast_reg.page_list_len = page_list_len; + fastreg_wr.wr.fast_reg.page_list_len = plen; fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; - fastreg_wr.wr.fast_reg.length = data_size; + fastreg_wr.wr.fast_reg.length = size; fastreg_wr.wr.fast_reg.rkey = desc-data_mr-rkey; fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | @@ -492,12 +516,9 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, } desc-valid = false; - regd_buf-reg.mem_h = desc; - regd_buf-reg.lkey = desc-data_mr-lkey; - regd_buf-reg.rkey = desc-data_mr-rkey; - regd_buf-reg.va = desc-data_frpl-page_list[0] + offset; - regd_buf-reg.len = data_size; - regd_buf-reg.is_mr = 1; + sge-lkey = desc-data_mr-lkey; + sge-addr = desc-data_frpl-page_list[0] + offset; + sge-length = size; return ret; } @@ -516,11 +537,10 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, struct ib_device *ibdev = device-ib_device; struct iser_data_buf *mem = iser_task-data[cmd_dir]; struct iser_regd_buf *regd_buf = iser_task-rdma_regd[cmd_dir]; - struct fast_reg_descriptor *desc; - unsigned int data_size, page_list_len; + struct fast_reg_descriptor *desc = NULL; + struct ib_sge data_sge; int err, aligned_len; unsigned long flags; - u32 offset; aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem-dma_nents) { @@ -533,41 +553,38 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, mem = iser_task-data_copy[cmd_dir]; } - /* if there a single dma entry, dma mr suffices */ - if (mem-dma_nents == 1) { - struct scatterlist *sg = (struct scatterlist *)mem-buf; - - regd_buf-reg.lkey = device-mr-lkey; - regd_buf-reg.rkey
[PATCH 04/11] IB/iser: Keep IB device attributes under iser_device
For T10-PI offload support, we will need to know the device signature offload capability upon every connection establishment. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.h |1 + drivers/infiniband/ulp/iser/iser_verbs.c | 18 ++ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 265ab99..5ffa92f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -260,6 +260,7 @@ struct iscsi_iser_task; struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; + struct ib_device_attrdev_attr; struct ib_cq *rx_cq[ISER_MAX_CQ]; struct ib_cq *tx_cq[ISER_MAX_CQ]; struct ib_mr *mr; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 0cc76de..3280260 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -71,17 +71,14 @@ static void iser_event_handler(struct ib_event_handler *handler, */ static int iser_create_device_ib_res(struct iser_device *device) { - int i, j; struct iser_cq_desc *cq_desc; - struct ib_device_attr *dev_attr; + struct ib_device_attr *dev_attr = device-dev_attr; + int ret, i, j; - dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); - if (!dev_attr) - return -ENOMEM; - - if (ib_query_device(device-ib_device, dev_attr)) { + ret = ib_query_device(device-ib_device, dev_attr); + if (ret) { pr_warn(Query device failed for %s\n, device-ib_device-name); - goto dev_attr_err; + return ret; } /* Assign function handles - based on FMR support */ @@ -101,7 +98,7 @@ static int iser_create_device_ib_res(struct iser_device *device) device-iser_unreg_rdma_mem = iser_unreg_mem_fastreg; } else { iser_err(IB device does not support FMRs nor FastRegs, can't register memory\n); - goto dev_attr_err; + return -1; } device-cqs_used = min(ISER_MAX_CQ, device-ib_device-num_comp_vectors); @@ -158,7 +155,6 @@ static int iser_create_device_ib_res(struct iser_device *device) if (ib_register_event_handler(device-event_handler)) goto handler_err; - kfree(dev_attr); return 0; handler_err: @@ -178,8 +174,6 @@ pd_err: kfree(device-cq_desc); cq_desc_err: iser_err(failed to allocate an IB resource\n); -dev_attr_err: - kfree(dev_attr); return -1; } -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 01/11] IB/iser: Avoid FRWR notation, use fastreg instead
FRWR stands for fast registration work request. We want to avoid calling the fastreg pool with that name, instead we name it fastreg which stands for fast registration. This pool will include more elements in the future, so it is a good idea to generalize the name. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.h | 18 --- drivers/infiniband/ulp/iser/iser_memory.c | 24 +- drivers/infiniband/ulp/iser/iser_verbs.c | 78 ++-- 3 files changed, 61 insertions(+), 59 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index e1a01c6..265ab99 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -312,6 +312,8 @@ struct iser_conn { unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; struct ib_recv_wrrx_wr[ISER_MIN_POSTED_RX]; + + /* Connection memory registration pool */ union { struct { struct ib_fmr_pool *pool; /* pool of IB FMRs */ @@ -321,8 +323,8 @@ struct iser_conn { struct { struct list_headpool; int pool_size; - } frwr; - } fastreg; + } fastreg; + }; }; struct iscsi_iser_conn { @@ -408,8 +410,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task, - enum iser_data_dir cmd_dir); +int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, + enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, struct sockaddr_in *src_addr, @@ -422,8 +424,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); -void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, -enum iser_data_dir cmd_dir); +void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); int iser_post_recvl(struct iser_conn *ib_conn); int iser_post_recvm(struct iser_conn *ib_conn, int count); @@ -440,6 +442,6 @@ int iser_initialize_task_headers(struct iscsi_task *task, int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fmr_pool(struct iser_conn *ib_conn); -int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max); -void iser_free_frwr_pool(struct iser_conn *ib_conn); +int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max); +void iser_free_fastreg_pool(struct iser_conn *ib_conn); #endif diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index f770179..4e34729 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -422,8 +422,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, (unsigned long)regd_buf-reg.va, (unsigned long)regd_buf-reg.len); } else { /* use FMR for multiple dma entries */ - iser_page_vec_build(mem, ib_conn-fastreg.fmr.page_vec, ibdev); - err = iser_reg_page_vec(ib_conn, ib_conn-fastreg.fmr.page_vec, + iser_page_vec_build(mem, ib_conn-fmr.page_vec, ibdev); + err = iser_reg_page_vec(ib_conn, ib_conn-fmr.page_vec, regd_buf-reg); if (err err != -EAGAIN) { iser_data_buf_dump(mem, ibdev); @@ -431,12 +431,12 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, mem-dma_nents, ntoh24(iser_task-desc.iscsi_header.dlength)); iser_err(page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n, -ib_conn-fastreg.fmr.page_vec-data_size, -ib_conn-fastreg.fmr.page_vec-length, -ib_conn-fastreg.fmr.page_vec-offset); - for (i = 0; i ib_conn-fastreg.fmr.page_vec-length; i++) +ib_conn-fmr.page_vec-data_size, +ib_conn-fmr.page_vec-length, +ib_conn-fmr.page_vec-offset); + for (i = 0; i ib_conn-fmr.page_vec-length; i++) iser_err(page_vec[%d] = 0x%llx\n, i
[PATCH 05/11] IB/iser: Replace fastreg descriptor valid bool with indicators container
In T10-PI support we will have memory keys for protection buffers and signature transactions. We prefer to compact indicators rather than keeping multiple bools. This commit does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.h |8 ++-- drivers/infiniband/ulp/iser/iser_memory.c |4 ++-- drivers/infiniband/ulp/iser/iser_verbs.c |2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 5ffa92f..5f7dbfd 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -280,13 +280,17 @@ struct iser_device { enum iser_data_dir cmd_dir); }; +enum iser_reg_indicator { + ISER_DATA_KEY_VALID = 1 0, +}; + struct fast_reg_descriptor { struct list_head list; /* For fast registration - FRWR */ struct ib_mr *data_mr; struct ib_fast_reg_page_list *data_frpl; - /* Valid for fast registration flag */ - bool valid; + /* registration indicators container */ + u8reg_indicators; }; struct iser_conn { diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 0e029fe..3edab18 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -479,7 +479,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return -EINVAL; } - if (!desc-valid) { + if (!(desc-reg_indicators ISER_DATA_KEY_VALID)) { memset(inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = ISER_FRWR_LI_WRID; inv_wr.opcode = IB_WR_LOCAL_INV; @@ -514,7 +514,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, iser_err(fast registration failed, ret:%d\n, ret); return ret; } - desc-valid = false; + desc-reg_indicators = ~ISER_DATA_KEY_VALID; sge-lkey = desc-data_mr-lkey; sge-addr = desc-data_frpl-page_list[0] + offset; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 3280260..fd32d63 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -296,7 +296,7 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, } iser_info(Create fr_desc %p page_list %p\n, desc, desc-data_frpl-page_list); - desc-valid = true; + desc-reg_indicators |= ISER_DATA_KEY_VALID; return 0; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 08/11] IB/iser: Support T10-PI operations
Add logic to initialize protection information entities. Upon each iSCSI task, we keep the scsi_cmnd in order to query the scsi protection operations and reference to protection buffers. Modify iser_fast_reg_mr to receive indication weather it is registering the data or protection buffers. In addition Introduce iser_reg_sig_mr which performs fast registration work-request for a signature enabled memory region (IB_WR_REG_SIG_MR). In this routine we set all the protection relevants for the device to offload protection data-transfer and verification. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.c |2 + drivers/infiniband/ulp/iser/iscsi_iser.h |8 + drivers/infiniband/ulp/iser/iser_initiator.c | 41 - drivers/infiniband/ulp/iser/iser_memory.c| 255 +++--- 4 files changed, 280 insertions(+), 26 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index cfa952e..a64b878 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -184,6 +184,8 @@ iscsi_iser_task_init(struct iscsi_task *task) iser_task-command_sent = 0; iser_task_rdma_init(iser_task); + iser_task-sc = task-sc; + return 0; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index a4626d9..23a70b9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -46,6 +46,8 @@ #include linux/printk.h #include scsi/libiscsi.h #include scsi/scsi_transport_iscsi.h +#include scsi/scsi_cmnd.h +#include scsi/scsi_device.h #include linux/interrupt.h #include linux/wait.h @@ -289,6 +291,10 @@ struct iser_device { enum iser_data_dir cmd_dir); }; +#define ISER_CHECK_GUARD 0xc0 +#define ISER_CHECK_REFTAG 0x0f +#define ISER_CHECK_APPTAG 0x30 + enum iser_reg_indicator { ISER_DATA_KEY_VALID = 1 0, ISER_PROT_KEY_VALID = 1 1, @@ -361,11 +367,13 @@ struct iscsi_iser_task { struct iser_tx_desc desc; struct iscsi_iser_conn *iser_conn; enum iser_task_statusstatus; + struct scsi_cmnd *sc; int command_sent; /* set if command sent */ int dir[ISER_DIRS_NUM]; /* set if dir use*/ struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ + struct iser_data_buf prot[ISER_DIRS_NUM]; /* prot desc */ }; struct iser_page_vec { diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 8352b0c..fd4c7af 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -62,6 +62,17 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, if (err) return err; + if (scsi_prot_sg_count(iser_task-sc)) { + struct iser_data_buf *pbuf_in = iser_task-prot[ISER_DIR_IN]; + + err = iser_dma_map_task_data(iser_task, +pbuf_in, +ISER_DIR_IN, +DMA_FROM_DEVICE); + if (err) + return err; + } + if (edtl iser_task-data[ISER_DIR_IN].data_len) { iser_err(Total data length: %ld, less than EDTL: %d, in READ cmd BHS itt: %d, conn: 0x%p\n, @@ -113,6 +124,17 @@ iser_prepare_write_cmd(struct iscsi_task *task, if (err) return err; + if (scsi_prot_sg_count(iser_task-sc)) { + struct iser_data_buf *pbuf_out = iser_task-prot[ISER_DIR_OUT]; + + err = iser_dma_map_task_data(iser_task, +pbuf_out, +ISER_DIR_OUT, +DMA_TO_DEVICE); + if (err) + return err; + } + if (edtl iser_task-data[ISER_DIR_OUT].data_len) { iser_err(Total data length: %ld, less than EDTL: %d, in WRITE cmd BHS itt: %d, conn: 0x%p\n, @@ -376,7 +398,7 @@ int iser_send_command(struct iscsi_conn *conn, struct iscsi_iser_task *iser_task = task-dd_data; unsigned long edtl; int err; - struct iser_data_buf *data_buf; + struct iser_data_buf *data_buf, *prot_buf; struct iscsi_scsi_req *hdr = (struct iscsi_scsi_req *)task-hdr; struct scsi_cmnd *sc = task-sc; struct
[PATCH 07/11] IB/iser: Initialize T10-PI resources
During connection establishment we also initiatlize T10-PI resources (QP, PI contexts) in order to support SCSI's protection operations. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.h | 19 ++ drivers/infiniband/ulp/iser/iser_initiator.c |8 +++ drivers/infiniband/ulp/iser/iser_verbs.c | 81 +++-- 3 files changed, 101 insertions(+), 7 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 76b2124..a4626d9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -134,6 +134,15 @@ ISER_MAX_TX_MISC_PDUS+ \ ISER_MAX_RX_MISC_PDUS) +/* Max registration work requests per command */ +#define ISER_MAX_REG_WR_PER_CMD5 + +/* For Signature we don't support DATAOUTs so no need to make room for them */ +#define ISER_QP_SIG_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \ + (1 + ISER_MAX_REG_WR_PER_CMD) + \ + ISER_MAX_TX_MISC_PDUS + \ + ISER_MAX_RX_MISC_PDUS) + #define ISER_VER 0x10 #define ISER_WSV 0x08 #define ISER_RSV 0x04 @@ -282,6 +291,15 @@ struct iser_device { enum iser_reg_indicator { ISER_DATA_KEY_VALID = 1 0, + ISER_PROT_KEY_VALID = 1 1, + ISER_SIG_KEY_VALID = 1 2, + ISER_FR_PROTECTED = 1 3, +}; + +struct iser_pi_context { + struct ib_mr *prot_mr; + struct ib_fast_reg_page_list *prot_frpl; + struct ib_mr *sig_mr; }; struct fast_reg_descriptor { @@ -289,6 +307,7 @@ struct fast_reg_descriptor { /* For fast registration - FRWR */ struct ib_mr *data_mr; struct ib_fast_reg_page_list *data_frpl; + struct iser_pi_context *pi_ctx; /* registration indicators container */ u8reg_indicators; }; diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 334f34b..8352b0c 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -255,6 +255,14 @@ int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *s ib_conn-qp_max_recv_dtos_mask = session-cmds_max - 1; /* cmds_max is 2^N */ ib_conn-min_posted_rx = ib_conn-qp_max_recv_dtos 2; + /* Check T10-PI support request against device capability */ + if (ib_conn-pi_support + !(device-dev_attr.device_cap_flags IB_DEVICE_SIGNATURE_HANDOVER)) { + iser_err(T10-PI requested but not supported on device %s\n, +device-ib_device-name); + return -EINVAL; + } + if (device-iser_alloc_rdma_reg_res(ib_conn, session-scsi_cmds_max)) goto create_rdma_reg_res_failed; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 94f967c..b00e1ec 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -215,6 +215,11 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max) struct ib_fmr_pool_param params; int ret = -ENOMEM; + if (ib_conn-pi_support) { + iser_err(T10-PI is not supported for FMRs\n); + return -EINVAL; + } + ib_conn-fmr.page_vec = kmalloc(sizeof(struct iser_page_vec) + (sizeof(u64)*(ISCSI_ISER_SG_TABLESIZE + 1)), GFP_KERNEL); @@ -275,7 +280,7 @@ void iser_free_fmr_pool(struct iser_conn *ib_conn) static int iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, -struct fast_reg_descriptor *desc) +bool pi_enable, struct fast_reg_descriptor *desc) { int ret; @@ -294,12 +299,64 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, iser_err(Failed to allocate ib_fast_reg_mr err=%d\n, ret); goto fast_reg_mr_failure; } + desc-reg_indicators |= ISER_DATA_KEY_VALID; + + if (pi_enable) { + struct ib_mr_init_attr mr_init_attr = {0}; + struct iser_pi_context *pi_ctx = NULL; + + desc-pi_ctx = kzalloc(sizeof(*desc-pi_ctx), GFP_KERNEL); + if (!desc-pi_ctx) { + iser_err(Failed to allocate pi context\n); + ret = -ENOMEM; + goto pi_ctx_alloc_failure; + } + pi_ctx = desc-pi_ctx
[PATCH 03/11] IB/iser: Move fast_reg_descriptor initialization to a function
fastreg descriptor will include protection information context. In order to place the logic in one place we introduce iser_create_fr_desc function. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iser_verbs.c | 58 - 1 files changed, 40 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 4809693..0cc76de 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -279,6 +279,39 @@ void iser_free_fmr_pool(struct iser_conn *ib_conn) ib_conn-fmr.page_vec = NULL; } +static int +iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, +struct fast_reg_descriptor *desc) +{ + int ret; + + desc-data_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc-data_frpl)) { + ret = PTR_ERR(desc-data_frpl); + iser_err(Failed to allocate ib_fast_reg_page_list err=%d\n, +ret); + return PTR_ERR(desc-data_frpl); + } + + desc-data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc-data_mr)) { + ret = PTR_ERR(desc-data_mr); + iser_err(Failed to allocate ib_fast_reg_mr err=%d\n, ret); + goto fast_reg_mr_failure; + } + iser_info(Create fr_desc %p page_list %p\n, + desc, desc-data_frpl-page_list); + desc-valid = true; + + return 0; + +fast_reg_mr_failure: + ib_free_fast_reg_page_list(desc-data_frpl); + + return ret; +} + /** * iser_create_fastreg_pool - Creates pool of fast_reg descriptors * for fast registration work requests. @@ -300,32 +333,21 @@ int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) goto err; } - desc-data_frpl = ib_alloc_fast_reg_page_list(device-ib_device, - ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(desc-data_frpl)) { - ret = PTR_ERR(desc-data_frpl); - iser_err(Failed to allocate ib_fast_reg_page_list err=%d\n, ret); - goto fast_reg_page_failure; + ret = iser_create_fastreg_desc(device-ib_device, + device-pd, desc); + if (ret) { + iser_err(Failed to create fastreg descriptor err=%d\n, +ret); + kfree(desc); + goto err; } - desc-data_mr = ib_alloc_fast_reg_mr(device-pd, -ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(desc-data_mr)) { - ret = PTR_ERR(desc-data_mr); - iser_err(Failed to allocate ib_fast_reg_mr err=%d\n, ret); - goto fast_reg_mr_failure; - } - desc-valid = true; list_add_tail(desc-list, ib_conn-fastreg.pool); ib_conn-fastreg.pool_size++; } return 0; -fast_reg_mr_failure: - ib_free_fast_reg_page_list(desc-data_frpl); -fast_reg_page_failure: - kfree(desc); err: iser_free_fastreg_pool(ib_conn); return ret; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 09/11] SCSI/libiscsi: Add check_protection callback for transports
iSCSI needs to be at least aware that a task involves protection information. In case it does, after the transaction completed libiscsi will ask the transport to check the protection status of the transaction. Unlike transport errors, DIF errors should not prevent successful completion of the transaction from the transport point of view, but should be escelated to scsi mid-layer when constructing the scsi result and sense data. check_protection routine will return the ascq corresponding to the DIF error that occured (or 0 if no error happened). return ascq: - 0x1: GUARD_CHECK_FAILED - 0x2: APPTAG_CHECK_FAILED - 0x3: REFTAG_CHECK_FAILED Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/scsi/libiscsi.c | 32 include/scsi/libiscsi.h |4 include/scsi/scsi_transport_iscsi.h |1 + 3 files changed, 37 insertions(+), 0 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 4046241..a58a6bb 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -395,6 +395,10 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) if (rc) return rc; } + + if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) + task-protected = true; + if (sc-sc_data_direction == DMA_TO_DEVICE) { unsigned out_len = scsi_out(sc)-length; struct iscsi_r2t_info *r2t = task-unsol_r2t; @@ -823,6 +827,33 @@ static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, sc-result = (DID_OK 16) | rhdr-cmd_status; + if (task-protected) { + sector_t sector; + u8 ascq; + + /** +* Transports that didn't implement check_protection +* callback but still published T10-PI support to scsi-mid +* deserve this BUG_ON. +**/ +BUG_ON(!session-tt-check_protection); + + ascq = session-tt-check_protection(task, sector); + if (ascq) { + sc-result = DRIVER_SENSE 24 | DID_ABORT 16 | +SAM_STAT_CHECK_CONDITION; + scsi_build_sense_buffer(1, sc-sense_buffer, + ILLEGAL_REQUEST, 0x10, ascq); + sc-sense_buffer[7] = 0xc; /* Additional sense length */ + sc-sense_buffer[8] = 0; /* Information desc type */ + sc-sense_buffer[9] = 0xa; /* Additional desc length */ + sc-sense_buffer[10] = 0x80; /* Validity bit */ + + put_unaligned_be64(sector, sc-sense_buffer[12]); + goto out; + } + } + if (rhdr-response != ISCSI_STATUS_CMD_COMPLETED) { sc-result = DID_ERROR 16; goto out; @@ -1567,6 +1598,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn, task-have_checked_conn = false; task-last_timeout = jiffies; task-last_xfer = jiffies; + task-protected = false; INIT_LIST_HEAD(task-running); return task; } diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 309f513..1457c26 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -133,6 +133,10 @@ struct iscsi_task { unsigned long last_xfer; unsigned long last_timeout; boolhave_checked_conn; + + /* T10 protection information */ + boolprotected; + /* state set/tested under session-lock */ int state; atomic_trefcount; diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 88640a4..2555ee5 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -167,6 +167,7 @@ struct iscsi_transport { struct iscsi_bus_flash_conn *fnode_conn); int (*logout_flashnode_sid) (struct iscsi_cls_session *cls_sess); int (*get_host_stats) (struct Scsi_Host *shost, char *buf, int len); + u8 (*check_protection)(struct iscsi_task *task, sector_t *sector); }; /* -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 02/13] IB/iser: Push the desicion what memory key to use into fast_reg_mr routine
This is a preparation step for T10-PI offload support. We prefer to push the desicion of which mkey to use (global or fastreg) to iser_fast_reg_mr. We choose to do this since it in T10-PI we may need to register for protection buffers and in this case we wish to simplify iser_fast_reg_mr instead of repeating the logic of which key to use. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iser_memory.c | 101 + 1 files changed, 59 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 6e9b7bc..d25587e 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -444,16 +444,40 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, return 0; } -static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, - struct iser_conn *ib_conn, +static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_regd_buf *regd_buf, - u32 offset, unsigned int data_size, - unsigned int page_list_len) + struct iser_data_buf *mem, + struct ib_sge *sge) { + struct fast_reg_descriptor *desc = regd_buf-reg.mem_h; + struct iser_conn *ib_conn = iser_task-iser_conn-ib_conn; + struct iser_device *device = ib_conn-device; + struct ib_device *ibdev = device-ib_device; struct ib_send_wr fastreg_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; u8 key; - int ret; + int ret, offset, size, plen; + + /* if there a single dma entry, dma mr suffices */ + if (mem-dma_nents == 1) { + struct scatterlist *sg = (struct scatterlist *)mem-buf; + + sge-lkey = device-mr-lkey; + sge-addr = ib_sg_dma_address(ibdev, sg[0]); + sge-length = ib_sg_dma_len(ibdev, sg[0]); + + iser_dbg(Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n, +sge-lkey, sge-addr, sge-length); + return 0; + } + + plen = iser_sg_to_page_vec(mem, device-ib_device, + desc-data_frpl-page_list, + offset, size); + if (plen * SIZE_4K size) { + iser_err(fast reg page_list too short to hold this SG\n); + return -EINVAL; + } if (!desc-valid) { memset(inv_wr, 0, sizeof(inv_wr)); @@ -472,9 +496,9 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, fastreg_wr.opcode = IB_WR_FAST_REG_MR; fastreg_wr.wr.fast_reg.iova_start = desc-data_frpl-page_list[0] + offset; fastreg_wr.wr.fast_reg.page_list = desc-data_frpl; - fastreg_wr.wr.fast_reg.page_list_len = page_list_len; + fastreg_wr.wr.fast_reg.page_list_len = plen; fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; - fastreg_wr.wr.fast_reg.length = data_size; + fastreg_wr.wr.fast_reg.length = size; fastreg_wr.wr.fast_reg.rkey = desc-data_mr-rkey; fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | @@ -492,12 +516,9 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, } desc-valid = false; - regd_buf-reg.mem_h = desc; - regd_buf-reg.lkey = desc-data_mr-lkey; - regd_buf-reg.rkey = desc-data_mr-rkey; - regd_buf-reg.va = desc-data_frpl-page_list[0] + offset; - regd_buf-reg.len = data_size; - regd_buf-reg.is_mr = 1; + sge-lkey = desc-data_mr-lkey; + sge-addr = desc-data_frpl-page_list[0] + offset; + sge-length = size; return ret; } @@ -516,11 +537,10 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, struct ib_device *ibdev = device-ib_device; struct iser_data_buf *mem = iser_task-data[cmd_dir]; struct iser_regd_buf *regd_buf = iser_task-rdma_regd[cmd_dir]; - struct fast_reg_descriptor *desc; - unsigned int data_size, page_list_len; + struct fast_reg_descriptor *desc = NULL; + struct ib_sge data_sge; int err, aligned_len; unsigned long flags; - u32 offset; aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem-dma_nents) { @@ -533,41 +553,38 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, mem = iser_task-data_copy[cmd_dir]; } - /* if there a single dma entry, dma mr suffices */ - if (mem-dma_nents == 1) { - struct scatterlist *sg = (struct scatterlist *)mem-buf; - - regd_buf-reg.lkey = device
[PATCH v1 05/13] IB/iser: Replace fastreg descriptor valid bool with indicators container
In T10-PI support we will have memory keys for protection buffers and signature transactions. We prefer to compact indicators rather than keeping multiple bools. This commit does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.h |8 ++-- drivers/infiniband/ulp/iser/iser_memory.c |4 ++-- drivers/infiniband/ulp/iser/iser_verbs.c |2 +- 3 files changed, 9 insertions(+), 5 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index b4290f5..5660714 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -280,13 +280,17 @@ struct iser_device { enum iser_data_dir cmd_dir); }; +enum iser_reg_indicator { + ISER_DATA_KEY_VALID = 1 0, +}; + struct fast_reg_descriptor { struct list_head list; /* For fast registration - FRWR */ struct ib_mr *data_mr; struct ib_fast_reg_page_list *data_frpl; - /* Valid for fast registration flag */ - bool valid; + /* registration indicators container */ + u8reg_indicators; }; struct iser_conn { diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index d25587e..a7a0d3e 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -479,7 +479,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, return -EINVAL; } - if (!desc-valid) { + if (!(desc-reg_indicators ISER_DATA_KEY_VALID)) { memset(inv_wr, 0, sizeof(inv_wr)); inv_wr.wr_id = ISER_FASTREG_LI_WRID; inv_wr.opcode = IB_WR_LOCAL_INV; @@ -514,7 +514,7 @@ static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, iser_err(fast registration failed, ret:%d\n, ret); return ret; } - desc-valid = false; + desc-reg_indicators = ~ISER_DATA_KEY_VALID; sge-lkey = desc-data_mr-lkey; sge-addr = desc-data_frpl-page_list[0] + offset; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 95fcfca..6a5f424 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -296,7 +296,7 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, } iser_info(Create fr_desc %p page_list %p\n, desc, desc-data_frpl-page_list); - desc-valid = true; + desc-reg_indicators |= ISER_DATA_KEY_VALID; return 0; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 04/13] IB/iser: Keep IB device attributes under iser_device
For T10-PI offload support, we will need to know the device signature offload capability upon every connection establishment. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.h |1 + drivers/infiniband/ulp/iser/iser_verbs.c | 18 ++ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index ca161df..b4290f5 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -260,6 +260,7 @@ struct iscsi_iser_task; struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; + struct ib_device_attrdev_attr; struct ib_cq *rx_cq[ISER_MAX_CQ]; struct ib_cq *tx_cq[ISER_MAX_CQ]; struct ib_mr *mr; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 9569e40..95fcfca 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -71,17 +71,14 @@ static void iser_event_handler(struct ib_event_handler *handler, */ static int iser_create_device_ib_res(struct iser_device *device) { - int i, j; struct iser_cq_desc *cq_desc; - struct ib_device_attr *dev_attr; + struct ib_device_attr *dev_attr = device-dev_attr; + int ret, i, j; - dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); - if (!dev_attr) - return -ENOMEM; - - if (ib_query_device(device-ib_device, dev_attr)) { + ret = ib_query_device(device-ib_device, dev_attr); + if (ret) { pr_warn(Query device failed for %s\n, device-ib_device-name); - goto dev_attr_err; + return ret; } /* Assign function handles - based on FMR support */ @@ -101,7 +98,7 @@ static int iser_create_device_ib_res(struct iser_device *device) device-iser_unreg_rdma_mem = iser_unreg_mem_fastreg; } else { iser_err(IB device does not support FMRs nor FastRegs, can't register memory\n); - goto dev_attr_err; + return -1; } device-cqs_used = min(ISER_MAX_CQ, device-ib_device-num_comp_vectors); @@ -158,7 +155,6 @@ static int iser_create_device_ib_res(struct iser_device *device) if (ib_register_event_handler(device-event_handler)) goto handler_err; - kfree(dev_attr); return 0; handler_err: @@ -178,8 +174,6 @@ pd_err: kfree(device-cq_desc); cq_desc_err: iser_err(failed to allocate an IB resource\n); -dev_attr_err: - kfree(dev_attr); return -1; } -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 03/13] IB/iser: Move fast_reg_descriptor initialization to a function
fastreg descriptor will include protection information context. In order to place the logic in one place we introduce iser_create_fr_desc function. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iser_verbs.c | 58 - 1 files changed, 40 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index dc5a0b4..9569e40 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -279,6 +279,39 @@ void iser_free_fmr_pool(struct iser_conn *ib_conn) ib_conn-fmr.page_vec = NULL; } +static int +iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, +struct fast_reg_descriptor *desc) +{ + int ret; + + desc-data_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc-data_frpl)) { + ret = PTR_ERR(desc-data_frpl); + iser_err(Failed to allocate ib_fast_reg_page_list err=%d\n, +ret); + return PTR_ERR(desc-data_frpl); + } + + desc-data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc-data_mr)) { + ret = PTR_ERR(desc-data_mr); + iser_err(Failed to allocate ib_fast_reg_mr err=%d\n, ret); + goto fast_reg_mr_failure; + } + iser_info(Create fr_desc %p page_list %p\n, + desc, desc-data_frpl-page_list); + desc-valid = true; + + return 0; + +fast_reg_mr_failure: + ib_free_fast_reg_page_list(desc-data_frpl); + + return ret; +} + /** * iser_create_fastreg_pool - Creates pool of fast_reg descriptors * for fast registration work requests. @@ -300,32 +333,21 @@ int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) goto err; } - desc-data_frpl = ib_alloc_fast_reg_page_list(device-ib_device, - ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(desc-data_frpl)) { - ret = PTR_ERR(desc-data_frpl); - iser_err(Failed to allocate ib_fast_reg_page_list err=%d\n, ret); - goto fast_reg_page_failure; + ret = iser_create_fastreg_desc(device-ib_device, + device-pd, desc); + if (ret) { + iser_err(Failed to create fastreg descriptor err=%d\n, +ret); + kfree(desc); + goto err; } - desc-data_mr = ib_alloc_fast_reg_mr(device-pd, -ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(desc-data_mr)) { - ret = PTR_ERR(desc-data_mr); - iser_err(Failed to allocate ib_fast_reg_mr err=%d\n, ret); - goto fast_reg_mr_failure; - } - desc-valid = true; list_add_tail(desc-list, ib_conn-fastreg.pool); ib_conn-fastreg.pool_size++; } return 0; -fast_reg_mr_failure: - ib_free_fast_reg_page_list(desc-data_frpl); -fast_reg_page_failure: - kfree(desc); err: iser_free_fastreg_pool(ib_conn); return ret; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 07/13] IB/iser: Generalize fall_to_bounce_buf routine
Unaligned SG-lists may also happen for protection information. Genrelize bounce buffer routine to handle any iser_data_buf which may be data and/or protection. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iser_memory.c | 53 - 1 files changed, 29 insertions(+), 24 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index a933508..2c3f4b1 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -45,13 +45,19 @@ * iser_start_rdma_unaligned_sg */ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, + struct iser_data_buf *data, + struct iser_data_buf *data_copy, enum iser_data_dir cmd_dir) { - int dma_nents; - struct ib_device *dev; + struct ib_device *dev = iser_task-iser_conn-ib_conn-device-ib_device; + struct scatterlist *sgl = (struct scatterlist *)data-buf; + struct scatterlist *sg; char *mem = NULL; - struct iser_data_buf *data = iser_task-data[cmd_dir]; - unsigned long cmd_data_len = data-data_len; + unsigned long cmd_data_len = 0; + int dma_nents, i; + + for_each_sg(sgl, sg, data-size, i) + cmd_data_len += ib_sg_dma_len(dev, sg); if (cmd_data_len ISER_KMALLOC_THRESHOLD) mem = (void *)__get_free_pages(GFP_ATOMIC, @@ -61,17 +67,16 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, if (mem == NULL) { iser_err(Failed to allocate mem size %d %d for copying sglist\n, -data-size,(int)cmd_data_len); +data-size, (int)cmd_data_len); return -ENOMEM; } if (cmd_dir == ISER_DIR_OUT) { /* copy the unaligned sg the buffer which is used for RDMA */ - struct scatterlist *sgl = (struct scatterlist *)data-buf; - struct scatterlist *sg; int i; char *p, *from; + sgl = (struct scatterlist *)data-buf; p = mem; for_each_sg(sgl, sg, data-size, i) { from = kmap_atomic(sg_page(sg)); @@ -83,22 +88,19 @@ static int iser_start_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, } } - sg_init_one(iser_task-data_copy[cmd_dir].sg_single, mem, cmd_data_len); - iser_task-data_copy[cmd_dir].buf = - iser_task-data_copy[cmd_dir].sg_single; - iser_task-data_copy[cmd_dir].size = 1; + sg_init_one(data_copy-sg_single, mem, cmd_data_len); + data_copy-buf = data_copy-sg_single; + data_copy-size = 1; + data_copy-copy_buf = mem; - iser_task-data_copy[cmd_dir].copy_buf = mem; - - dev = iser_task-iser_conn-ib_conn-device-ib_device; - dma_nents = ib_dma_map_sg(dev, - iser_task-data_copy[cmd_dir].sg_single, - 1, + dma_nents = ib_dma_map_sg(dev, data_copy-sg_single, 1, (cmd_dir == ISER_DIR_OUT) ? DMA_TO_DEVICE : DMA_FROM_DEVICE); BUG_ON(dma_nents == 0); - iser_task-data_copy[cmd_dir].dma_nents = dma_nents; + data_copy-dma_nents = dma_nents; + data_copy-data_len = cmd_data_len; + return 0; } @@ -341,11 +343,12 @@ void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, struct ib_device *ibdev, + struct iser_data_buf *mem, + struct iser_data_buf *mem_copy, enum iser_data_dir cmd_dir, int aligned_len) { struct iscsi_conn*iscsi_conn = iser_task-iser_conn-iscsi_conn; - struct iser_data_buf *mem = iser_task-data[cmd_dir]; iscsi_conn-fmr_unalign_cnt++; iser_warn(rdma alignment violation (%d/%d aligned) or FMR not supported\n, @@ -355,12 +358,12 @@ static int fall_to_bounce_buf(struct iscsi_iser_task *iser_task, iser_data_buf_dump(mem, ibdev); /* unmap the command data before accessing it */ - iser_dma_unmap_task_data(iser_task, iser_task-data[cmd_dir]); + iser_dma_unmap_task_data(iser_task, mem); /* allocate copy buf, if we are writing, copy the */ /* unaligned scatterlist, dma map the copy*/ - if (iser_start_rdma_unaligned_sg(iser_task, cmd_dir) != 0) - return -ENOMEM; + if (iser_start_rdma_unaligned_sg(iser_task, mem, mem_copy, cmd_dir) != 0) + return -ENOMEM
[PATCH v1 11/13] SCSI/libiscsi: Add check_protection callback for transports
iSCSI needs to be at least aware that a task involves protection information. In case it does, after the transaction completed libiscsi will ask the transport to check the protection status of the transaction. Unlike transport errors, DIF errors should not prevent successful completion of the transaction from the transport point of view, but should be escelated to scsi mid-layer when constructing the scsi result and sense data. check_protection routine will return the ascq corresponding to the DIF error that occured (or 0 if no error happened). return ascq: - 0x1: GUARD_CHECK_FAILED - 0x2: APPTAG_CHECK_FAILED - 0x3: REFTAG_CHECK_FAILED Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/scsi/libiscsi.c | 32 include/scsi/libiscsi.h |4 include/scsi/scsi_transport_iscsi.h |1 + 3 files changed, 37 insertions(+), 0 deletions(-) diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 4046241..a58a6bb 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -395,6 +395,10 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) if (rc) return rc; } + + if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) + task-protected = true; + if (sc-sc_data_direction == DMA_TO_DEVICE) { unsigned out_len = scsi_out(sc)-length; struct iscsi_r2t_info *r2t = task-unsol_r2t; @@ -823,6 +827,33 @@ static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, sc-result = (DID_OK 16) | rhdr-cmd_status; + if (task-protected) { + sector_t sector; + u8 ascq; + + /** +* Transports that didn't implement check_protection +* callback but still published T10-PI support to scsi-mid +* deserve this BUG_ON. +**/ +BUG_ON(!session-tt-check_protection); + + ascq = session-tt-check_protection(task, sector); + if (ascq) { + sc-result = DRIVER_SENSE 24 | DID_ABORT 16 | +SAM_STAT_CHECK_CONDITION; + scsi_build_sense_buffer(1, sc-sense_buffer, + ILLEGAL_REQUEST, 0x10, ascq); + sc-sense_buffer[7] = 0xc; /* Additional sense length */ + sc-sense_buffer[8] = 0; /* Information desc type */ + sc-sense_buffer[9] = 0xa; /* Additional desc length */ + sc-sense_buffer[10] = 0x80; /* Validity bit */ + + put_unaligned_be64(sector, sc-sense_buffer[12]); + goto out; + } + } + if (rhdr-response != ISCSI_STATUS_CMD_COMPLETED) { sc-result = DID_ERROR 16; goto out; @@ -1567,6 +1598,7 @@ static inline struct iscsi_task *iscsi_alloc_task(struct iscsi_conn *conn, task-have_checked_conn = false; task-last_timeout = jiffies; task-last_xfer = jiffies; + task-protected = false; INIT_LIST_HEAD(task-running); return task; } diff --git a/include/scsi/libiscsi.h b/include/scsi/libiscsi.h index 309f513..1457c26 100644 --- a/include/scsi/libiscsi.h +++ b/include/scsi/libiscsi.h @@ -133,6 +133,10 @@ struct iscsi_task { unsigned long last_xfer; unsigned long last_timeout; boolhave_checked_conn; + + /* T10 protection information */ + boolprotected; + /* state set/tested under session-lock */ int state; atomic_trefcount; diff --git a/include/scsi/scsi_transport_iscsi.h b/include/scsi/scsi_transport_iscsi.h index 88640a4..2555ee5 100644 --- a/include/scsi/scsi_transport_iscsi.h +++ b/include/scsi/scsi_transport_iscsi.h @@ -167,6 +167,7 @@ struct iscsi_transport { struct iscsi_bus_flash_conn *fnode_conn); int (*logout_flashnode_sid) (struct iscsi_cls_session *cls_sess); int (*get_host_stats) (struct Scsi_Host *shost, char *buf, int len); + u8 (*check_protection)(struct iscsi_task *task, sector_t *sector); }; /* -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 09/13] IB/iser: Initialize T10-PI resources
From: Alex Tabachnik al...@mellanox.com During connection establishment we also initiatlize T10-PI resources (QP, PI contexts) in order to support SCSI's protection operations. Signed-off-by: Alex Tabachnik al...@mellanox.com Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.h | 21 - drivers/infiniband/ulp/iser/iser_verbs.c | 77 +++--- 2 files changed, 90 insertions(+), 8 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 011003f..99fc8b8 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -134,6 +134,15 @@ ISER_MAX_TX_MISC_PDUS+ \ ISER_MAX_RX_MISC_PDUS) +/* Max registration work requests per command */ +#define ISER_MAX_REG_WR_PER_CMD5 + +/* For Signature we don't support DATAOUTs so no need to make room for them */ +#define ISER_QP_SIG_MAX_REQ_DTOS (ISER_DEF_XMIT_CMDS_MAX * \ + (1 + ISER_MAX_REG_WR_PER_CMD) + \ + ISER_MAX_TX_MISC_PDUS + \ + ISER_MAX_RX_MISC_PDUS) + #define ISER_VER 0x10 #define ISER_WSV 0x08 #define ISER_RSV 0x04 @@ -281,7 +290,16 @@ struct iser_device { }; enum iser_reg_indicator { - ISER_DATA_KEY_VALID = 1 0, + ISER_DATA_KEY_VALID = 1 0, + ISER_PROT_KEY_VALID = 1 1, + ISER_SIG_KEY_VALID = 1 2, + ISER_FASTREG_PROTECTED = 1 3, +}; + +struct iser_pi_context { + struct ib_mr *prot_mr; + struct ib_fast_reg_page_list *prot_frpl; + struct ib_mr *sig_mr; }; struct fast_reg_descriptor { @@ -289,6 +307,7 @@ struct fast_reg_descriptor { /* For fast registration - FRWR */ struct ib_mr *data_mr; struct ib_fast_reg_page_list *data_frpl; + struct iser_pi_context *pi_ctx; /* registration indicators container */ u8reg_indicators; }; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 4c27f55..0404c71 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -275,7 +275,7 @@ void iser_free_fmr_pool(struct iser_conn *ib_conn) static int iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, -struct fast_reg_descriptor *desc) +bool pi_enable, struct fast_reg_descriptor *desc) { int ret; @@ -294,12 +294,64 @@ iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, iser_err(Failed to allocate ib_fast_reg_mr err=%d\n, ret); goto fast_reg_mr_failure; } + desc-reg_indicators |= ISER_DATA_KEY_VALID; + + if (pi_enable) { + struct ib_mr_init_attr mr_init_attr = {0}; + struct iser_pi_context *pi_ctx = NULL; + + desc-pi_ctx = kzalloc(sizeof(*desc-pi_ctx), GFP_KERNEL); + if (!desc-pi_ctx) { + iser_err(Failed to allocate pi context\n); + ret = -ENOMEM; + goto pi_ctx_alloc_failure; + } + pi_ctx = desc-pi_ctx; + + pi_ctx-prot_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE); + if (IS_ERR(pi_ctx-prot_frpl)) { + ret = PTR_ERR(pi_ctx-prot_frpl); + iser_err(Failed to allocate prot frpl ret=%d\n, +ret); + goto prot_frpl_failure; + } + + pi_ctx-prot_mr = ib_alloc_fast_reg_mr(pd, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(pi_ctx-prot_mr)) { + ret = PTR_ERR(pi_ctx-prot_mr); + iser_err(Failed to allocate prot frmr ret=%d\n, +ret); + goto prot_mr_failure; + } + desc-reg_indicators |= ISER_PROT_KEY_VALID; + + mr_init_attr.max_reg_descriptors = 2; + mr_init_attr.flags |= IB_MR_SIGNATURE_EN; + pi_ctx-sig_mr = ib_create_mr(pd, mr_init_attr); + if (IS_ERR(pi_ctx-sig_mr)) { + ret = PTR_ERR(pi_ctx-sig_mr); + iser_err(Failed to allocate signature enabled mr err=%d\n, +ret); + goto sig_mr_failure; + } + desc-reg_indicators
[PATCH v1 08/13] IB/iser: Introduce pi_enable, pi_guard module parameters
From: Alex Tabachnik al...@mellanox.com Use modparams to activate protection information support. pi_enable bool: Based on this parameter iSER will know if it should support T10-PI. We don't want to do this by default as it requires to allocate and initiatlize extra resources. In case pi_enable=N, iSER won't publish to SCSI midlayer any DIF capabilities. pi_guard int: Based on this parameter iSER will publish DIX guard type support to SCSI midlayer. 0 means CRC is allowed to be passed in DIX buffers, 1 (or non-zero) means IP-CSUM is allowed to be passed in DIX buffers. Note that over the wire, only CRC is allowed. In the next phase, it is worth considering passing these parameters from iscsid via nlmsg. This will allow these parameters to be connection based rather than global. Signed-off-by: Alex Tabachnik al...@mellanox.com Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.c |8 drivers/infiniband/ulp/iser/iscsi_iser.h |3 +++ drivers/infiniband/ulp/iser/iser_verbs.c | 13 + 3 files changed, 24 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index dd03cfe..cfa952e 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -82,6 +82,8 @@ static unsigned int iscsi_max_lun = 512; module_param_named(max_lun, iscsi_max_lun, uint, S_IRUGO); int iser_debug_level = 0; +bool iser_pi_enable = false; +int iser_pi_guard = 0; MODULE_DESCRIPTION(iSER (iSCSI Extensions for RDMA) Datamover); MODULE_LICENSE(Dual BSD/GPL); @@ -91,6 +93,12 @@ MODULE_VERSION(DRV_VER); module_param_named(debug_level, iser_debug_level, int, 0644); MODULE_PARM_DESC(debug_level, Enable debug tracing if 0 (default:disabled)); +module_param_named(pi_enable, iser_pi_enable, bool, 0644); +MODULE_PARM_DESC(pi_enable, Enable T10-PI offload support (default:disabled)); + +module_param_named(pi_guard, iser_pi_guard, int, 0644); +MODULE_PARM_DESC(pi_guard, T10-PI guard_type, 0:CRC|1:IP_CSUM (default:CRC)); + struct iser_global ig; void diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 623defa..011003f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -317,6 +317,7 @@ struct iser_conn { unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; struct ib_recv_wrrx_wr[ISER_MIN_POSTED_RX]; + bool pi_support; /* Connection memory registration pool */ union { @@ -371,6 +372,8 @@ struct iser_global { extern struct iser_global ig; extern int iser_debug_level; +extern bool iser_pi_enable; +extern int iser_pi_guard; /* allocate connection resources needed for rdma functionality */ int iser_conn_set_full_featured_mode(struct iscsi_conn *conn); diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 6a5f424..4c27f55 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -607,6 +607,19 @@ static int iser_addr_handler(struct rdma_cm_id *cma_id) ib_conn = (struct iser_conn *)cma_id-context; ib_conn-device = device; + /* connection T10-PI support */ + if (iser_pi_enable) { + if (!(device-dev_attr.device_cap_flags + IB_DEVICE_SIGNATURE_HANDOVER)) { + iser_warn(T10-PI requested but not supported on %s, + continue without T10-PI\n, + ib_conn-device-ib_device-name); + ib_conn-pi_support = false; + } else { + ib_conn-pi_support = true; + } + } + ret = rdma_resolve_route(cma_id, 1000); if (ret) { iser_err(resolve route failed: %d\n, ret); -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 01/13] IB/iser: Avoid FRWR notation, use fastreg instead
FRWR stands for fast registration work request. We want to avoid calling the fastreg pool with that name, instead we name it fastreg which stands for fast registration. This pool will include more elements in the future, so it is a good idea to generalize the name. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.h | 20 --- drivers/infiniband/ulp/iser/iser_memory.c | 28 +- drivers/infiniband/ulp/iser/iser_verbs.c | 84 ++-- 3 files changed, 67 insertions(+), 65 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index e1a01c6..ca161df 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -138,7 +138,7 @@ #define ISER_WSV 0x08 #define ISER_RSV 0x04 -#define ISER_FRWR_LI_WRID 0xULL +#define ISER_FASTREG_LI_WRID 0xULL struct iser_hdr { u8 flags; @@ -312,6 +312,8 @@ struct iser_conn { unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; struct ib_recv_wrrx_wr[ISER_MIN_POSTED_RX]; + + /* Connection memory registration pool */ union { struct { struct ib_fmr_pool *pool; /* pool of IB FMRs */ @@ -321,8 +323,8 @@ struct iser_conn { struct { struct list_headpool; int pool_size; - } frwr; - } fastreg; + } fastreg; + }; }; struct iscsi_iser_conn { @@ -408,8 +410,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task, - enum iser_data_dir cmd_dir); +int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, + enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, struct sockaddr_in *src_addr, @@ -422,8 +424,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); -void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, -enum iser_data_dir cmd_dir); +void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); int iser_post_recvl(struct iser_conn *ib_conn); int iser_post_recvm(struct iser_conn *ib_conn, int count); @@ -440,6 +442,6 @@ int iser_initialize_task_headers(struct iscsi_task *task, int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fmr_pool(struct iser_conn *ib_conn); -int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max); -void iser_free_frwr_pool(struct iser_conn *ib_conn); +int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max); +void iser_free_fastreg_pool(struct iser_conn *ib_conn); #endif diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index f770179..6e9b7bc 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -422,8 +422,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, (unsigned long)regd_buf-reg.va, (unsigned long)regd_buf-reg.len); } else { /* use FMR for multiple dma entries */ - iser_page_vec_build(mem, ib_conn-fastreg.fmr.page_vec, ibdev); - err = iser_reg_page_vec(ib_conn, ib_conn-fastreg.fmr.page_vec, + iser_page_vec_build(mem, ib_conn-fmr.page_vec, ibdev); + err = iser_reg_page_vec(ib_conn, ib_conn-fmr.page_vec, regd_buf-reg); if (err err != -EAGAIN) { iser_data_buf_dump(mem, ibdev); @@ -431,12 +431,12 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, mem-dma_nents, ntoh24(iser_task-desc.iscsi_header.dlength)); iser_err(page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n, -ib_conn-fastreg.fmr.page_vec-data_size, -ib_conn-fastreg.fmr.page_vec-length, -ib_conn-fastreg.fmr.page_vec-offset); - for (i = 0; i ib_conn-fastreg.fmr.page_vec-length; i
[PATCH v1 12/13] IB/iser: Implement check_protection
Once the iSCSI transaction is completed we must imeplement check_protection in order to notify on DIF errors that may have occured. The routine boils down to calling ib_check_mr_status to get the signature status of the transaction. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.c | 13 drivers/infiniband/ulp/iser/iscsi_iser.h |2 + drivers/infiniband/ulp/iser/iser_verbs.c | 45 ++ 3 files changed, 60 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index a64b878..f13d7e9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -306,6 +306,18 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) } } +static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) +{ + struct iscsi_iser_task *iser_task = task-dd_data; + + if (iser_task-dir[ISER_DIR_IN]) + return iser_check_task_pi_status(iser_task, ISER_DIR_IN, +sector); + else + return iser_check_task_pi_status(iser_task, ISER_DIR_OUT, +sector); +} + static struct iscsi_cls_conn * iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) { @@ -742,6 +754,7 @@ static struct iscsi_transport iscsi_iser_transport = { .xmit_task = iscsi_iser_task_xmit, .cleanup_task = iscsi_iser_cleanup_task, .alloc_pdu = iscsi_iser_pdu_alloc, + .check_protection = iscsi_iser_check_protection, /* recovery */ .session_recovery_timedout = iscsi_session_recovery_timedout, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index fce5409..95f291f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -483,4 +483,6 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fmr_pool(struct iser_conn *ib_conn); int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fastreg_pool(struct iser_conn *ib_conn); +u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, +enum iser_data_dir cmd_dir, sector_t *sector); #endif diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 0404c71..617d490 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1153,3 +1153,48 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context) tasklet_schedule(device-cq_tasklet[cq_index]); } + +u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, +enum iser_data_dir cmd_dir, sector_t *sector) +{ + struct iser_mem_reg *reg = iser_task-rdma_regd[cmd_dir].reg; + struct fast_reg_descriptor *desc = reg-mem_h; + unsigned long sector_size = iser_task-sc-device-sector_size; + struct ib_mr_status mr_status; + int ret; + + if (desc desc-reg_indicators ISER_FASTREG_PROTECTED) { + desc-reg_indicators = ~ISER_FASTREG_PROTECTED; + ret = ib_check_mr_status(desc-pi_ctx-sig_mr, +IB_MR_CHECK_SIG_STATUS, mr_status); + if (ret) { + pr_err(ib_check_mr_status failed, ret %d\n, ret); + goto err; + } + + if (mr_status.fail_status IB_MR_CHECK_SIG_STATUS) { + *sector = mr_status.sig_err.sig_err_offset; + + do_div(*sector, sector_size + 8); + pr_err(PI error found type %d at sector %lx + expected %x vs actual %x\n, + mr_status.sig_err.err_type, *sector, + mr_status.sig_err.expected, + mr_status.sig_err.actual); + + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + return 0x1; + case IB_SIG_BAD_REFTAG: + return 0x3; + case IB_SIG_BAD_APPTAG: + return 0x2; + } + } + } + + return 0; +err: + /* Not alot we can do here, return ambiguous guard error */ + return 0x1; +} -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 00/13] T10-PI support for iSER initiator
Hey Roland, Nic and Co This patchset adds T10 protection information offload support over RDMA signature verbs API. This set, along with the iSER target set, allow end-to-end protection information passthrough and validation. The patchset was tested against Linux SCSI target with iSER DIF support applied. The enablement of iSER DIF support is currently controlled with module parameters (similar to lpfc for example) which make them global. In the next phase we can consider passing these parameters from iscsid nl messages, which would make them per-connection. The approach I took with respect to escelating protection information errors was minimal iSCSI intervention in protection information affairs. I added libiscsi a hook asking the transport to check the protection information status, and construct the proper sense data in case of errors (the alternative of letting the transport to construct sense data seemed much less appealing). Note that this patchset comes on top of a pending patch for iSER to suppress fastreg completions (http://marc.info/?l=linux-rdmam=139047309831997w=2). v0 patches are available in target-pending git repo (branch rdma-dif) and passed 0-DAY testing. Roland, I would like to hear your feedback on this. The set is ordered by the following: - Preperation patches (non/minor functionality changes). - Add protection information execution support. - Add protection information status check facilities. - Publish T10-DIF support to SCSI midlayer according to IB device capabilities. Changes from v0: - Fix protection information dma registration for unaligned scatterlists which may happen when the block layer merges bios. - Don't fail connections on devices without DIF support - warn and continue without DIF. - reword FR - FastReg Alex Tabachnik (2): IB/iser: Introduce pi_enable, pi_guard module parameters IB/iser: Initialize T10-PI resources Sagi Grimberg (11): IB/iser: Avoid FRWR notation, use fastreg instead IB/iser: Push the desicion what memory key to use into fast_reg_mr routine IB/iser: Move fast_reg_descriptor initialization to a function IB/iser: Keep IB device attributes under iser_device IB/iser: Replace fastreg descriptor valid bool with indicators container IB/iser: Generalize iser_unmap_task_data and finalize_rdma_unaligned_sg IB/iser: Generalize fall_to_bounce_buf routine IB/iser: Support T10-PI operations SCSI/libiscsi: Add check_protection callback for transports IB/iser: Implement check_protection IB/iser: Publish T10-PI support to SCSI midlayer drivers/infiniband/ulp/iser/iscsi_iser.c | 46 +++- drivers/infiniband/ulp/iser/iscsi_iser.h | 71 - drivers/infiniband/ulp/iser/iser_initiator.c | 98 +- drivers/infiniband/ulp/iser/iser_memory.c| 445 +++--- drivers/infiniband/ulp/iser/iser_verbs.c | 285 - drivers/scsi/libiscsi.c | 32 ++ include/scsi/libiscsi.h |4 + include/scsi/scsi_transport_iscsi.h |1 + 8 files changed, 769 insertions(+), 213 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v1 06/13] IB/iser: Generalize iser_unmap_task_data and finalize_rdma_unaligned_sg
This routines operates on data buffers and may also work with protection infomation buffers. So we generalize them to handle an iser_data_buf which can be the command data or command protection information. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.h |9 -- drivers/infiniband/ulp/iser/iser_initiator.c | 37 +--- drivers/infiniband/ulp/iser/iser_memory.c| 40 ++--- 3 files changed, 48 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 5660714..623defa 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -410,8 +410,10 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *task); void iser_free_rx_descriptors(struct iser_conn *ib_conn); -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, -enum iser_data_dir cmd_dir); +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, +struct iser_data_buf *mem, +struct iser_data_buf *mem_copy, +enum iser_data_dir cmd_dir); int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); @@ -441,7 +443,8 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, enum iser_data_dir iser_dir, enum dma_data_direction dma_dir); -void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, + struct iser_data_buf *data); int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 334f34b..58e14c7 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -644,27 +644,42 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { struct iser_device *device = iser_task-iser_conn-ib_conn-device; - int is_rdma_aligned = 1; + int is_rdma_data_aligned = 1; /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy */ if (iser_task-data_copy[ISER_DIR_IN].copy_buf != NULL) { - is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN); + is_rdma_data_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + iser_task-data[ISER_DIR_IN], + iser_task-data_copy[ISER_DIR_IN], + ISER_DIR_IN); } + if (iser_task-data_copy[ISER_DIR_OUT].copy_buf != NULL) { - is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); + is_rdma_data_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + iser_task-data[ISER_DIR_OUT], + iser_task-data_copy[ISER_DIR_OUT], + ISER_DIR_OUT); } - if (iser_task-dir[ISER_DIR_IN]) + if (iser_task-dir[ISER_DIR_IN]) { device-iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); + if (is_rdma_data_aligned) + iser_dma_unmap_task_data(iser_task, +iser_task-data[ISER_DIR_IN]); - if (iser_task-dir[ISER_DIR_OUT]) - device-iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); + } - /* if the data was unaligned, it was already unmapped and then copied */ - if (is_rdma_aligned) - iser_dma_unmap_task_data(iser_task); + if (iser_task-dir[ISER_DIR_OUT]) { + device-iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); + if (is_rdma_data_aligned) + iser_dma_unmap_task_data(iser_task, + iser_task-data[ISER_DIR_OUT]); + if (prot_count is_rdma_prot_aligned) + iser_dma_unmap_task_data(iser_task, + iser_task-prot[ISER_DIR_OUT]); + } } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index
[PATCH v1 10/13] IB/iser: Support T10-PI operations
Add logic to initialize protection information entities. Upon each iSCSI task, we keep the scsi_cmnd in order to query the scsi protection operations and reference to protection buffers. Modify iser_fast_reg_mr to receive indication weather it is registering the data or protection buffers. In addition Introduce iser_reg_sig_mr which performs fast registration work-request for a signature enabled memory region (IB_WR_REG_SIG_MR). In this routine we set all the protection relevants for the device to offload protection data-transfer and verification. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.c |2 + drivers/infiniband/ulp/iser/iscsi_iser.h |9 + drivers/infiniband/ulp/iser/iser_initiator.c | 63 ++- drivers/infiniband/ulp/iser/iser_memory.c| 257 +++--- 4 files changed, 304 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index cfa952e..a64b878 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -184,6 +184,8 @@ iscsi_iser_task_init(struct iscsi_task *task) iser_task-command_sent = 0; iser_task_rdma_init(iser_task); + iser_task-sc = task-sc; + return 0; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 99fc8b8..fce5409 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -46,6 +46,8 @@ #include linux/printk.h #include scsi/libiscsi.h #include scsi/scsi_transport_iscsi.h +#include scsi/scsi_cmnd.h +#include scsi/scsi_device.h #include linux/interrupt.h #include linux/wait.h @@ -289,6 +291,10 @@ struct iser_device { enum iser_data_dir cmd_dir); }; +#define ISER_CHECK_GUARD 0xc0 +#define ISER_CHECK_REFTAG 0x0f +#define ISER_CHECK_APPTAG 0x30 + enum iser_reg_indicator { ISER_DATA_KEY_VALID = 1 0, ISER_PROT_KEY_VALID = 1 1, @@ -361,11 +367,14 @@ struct iscsi_iser_task { struct iser_tx_desc desc; struct iscsi_iser_conn *iser_conn; enum iser_task_statusstatus; + struct scsi_cmnd *sc; int command_sent; /* set if command sent */ int dir[ISER_DIRS_NUM]; /* set if dir use*/ struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ + struct iser_data_buf prot[ISER_DIRS_NUM]; /* prot desc */ + struct iser_data_buf prot_copy[ISER_DIRS_NUM];/* prot copy */ }; struct iser_page_vec { diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 58e14c7..7fd95fe 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -62,6 +62,17 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, if (err) return err; + if (scsi_prot_sg_count(iser_task-sc)) { + struct iser_data_buf *pbuf_in = iser_task-prot[ISER_DIR_IN]; + + err = iser_dma_map_task_data(iser_task, +pbuf_in, +ISER_DIR_IN, +DMA_FROM_DEVICE); + if (err) + return err; + } + if (edtl iser_task-data[ISER_DIR_IN].data_len) { iser_err(Total data length: %ld, less than EDTL: %d, in READ cmd BHS itt: %d, conn: 0x%p\n, @@ -113,6 +124,17 @@ iser_prepare_write_cmd(struct iscsi_task *task, if (err) return err; + if (scsi_prot_sg_count(iser_task-sc)) { + struct iser_data_buf *pbuf_out = iser_task-prot[ISER_DIR_OUT]; + + err = iser_dma_map_task_data(iser_task, +pbuf_out, +ISER_DIR_OUT, +DMA_TO_DEVICE); + if (err) + return err; + } + if (edtl iser_task-data[ISER_DIR_OUT].data_len) { iser_err(Total data length: %ld, less than EDTL: %d, in WRITE cmd BHS itt: %d, conn: 0x%p\n, @@ -368,7 +390,7 @@ int iser_send_command(struct iscsi_conn *conn, struct iscsi_iser_task *iser_task = task-dd_data; unsigned long edtl; int err; - struct iser_data_buf *data_buf; + struct iser_data_buf *data_buf
[PATCH v1 13/13] IB/iser: Publish T10-PI support to SCSI midlayer
After allocating a scsi_host we set protection types and guard type supported. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.c | 23 ++- 1 files changed, 22 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index f13d7e9..a0ec2d0 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -435,6 +435,17 @@ static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) iscsi_host_free(shost); } +static inline unsigned int +iser_dif_prot_caps(int prot_caps) +{ + return ((prot_caps IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION | + SHOST_DIX_TYPE1_PROTECTION : 0) | + ((prot_caps IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION | + SHOST_DIX_TYPE2_PROTECTION : 0) | + ((prot_caps IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION | + SHOST_DIX_TYPE3_PROTECTION : 0); +} + static struct iscsi_cls_session * iscsi_iser_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, uint16_t qdepth, @@ -459,8 +470,18 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, * older userspace tools (before 2.0-870) did not pass us * the leading conn's ep so this will be NULL; */ - if (ep) + if (ep) { ib_conn = ep-dd_data; + if (ib_conn-pi_support) { + u32 sig_caps = ib_conn-device-dev_attr.sig_prot_cap; + + scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); + if (iser_pi_guard) + scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP); + else + scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC); + } + } if (iscsi_host_add(shost, ep ? ib_conn-device-ib_device-dma_device : NULL)) -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v1 11/13] SCSI/libiscsi: Add check_protection callback for transports
On 3/3/2014 6:41 AM, Mike Christie wrote: On 02/27/2014 05:13 AM, Sagi Grimberg wrote: diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 4046241..a58a6bb 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -395,6 +395,10 @@ static int iscsi_prep_scsi_cmd_pdu(struct iscsi_task *task) if (rc) return rc; } + + if (scsi_get_prot_op(sc) != SCSI_PROT_NORMAL) + task-protected = true; + if (sc-sc_data_direction == DMA_TO_DEVICE) { unsigned out_len = scsi_out(sc)-length; struct iscsi_r2t_info *r2t = task-unsol_r2t; @@ -823,6 +827,33 @@ static void iscsi_scsi_cmd_rsp(struct iscsi_conn *conn, struct iscsi_hdr *hdr, sc-result = (DID_OK 16) | rhdr-cmd_status; + if (task-protected) { + sector_t sector; + u8 ascq; + + /** +* Transports that didn't implement check_protection +* callback but still published T10-PI support to scsi-mid +* deserve this BUG_ON. +**/ +BUG_ON(!session-tt-check_protection); Extra space before BUG_ON. I'll add. + + ascq = session-tt-check_protection(task, sector); + if (ascq) { + sc-result = DRIVER_SENSE 24 | DID_ABORT 16 | +SAM_STAT_CHECK_CONDITION; I am not sure what the reason for the DID_ABORT is here. I do not think we want that, because we just want scsi-ml to evaluate the sense error part of the failure. It works ok today, but the DID_ABORT error can possibly be evaluated before the sense so you might miss passing that info to upper layers. That makes sense to me. I can remove DID_ABORT. + scsi_build_sense_buffer(1, sc-sense_buffer, + ILLEGAL_REQUEST, 0x10, ascq); + sc-sense_buffer[7] = 0xc; /* Additional sense length */ + sc-sense_buffer[8] = 0; /* Information desc type */ + sc-sense_buffer[9] = 0xa; /* Additional desc length */ + sc-sense_buffer[10] = 0x80; /* Validity bit */ + + put_unaligned_be64(sector, sc-sense_buffer[12]); + goto out; + } + } + -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v1 10/13] IB/iser: Support T10-PI operations
On 3/3/2014 6:44 AM, Mike Christie wrote: On 02/27/2014 05:13 AM, Sagi Grimberg wrote: diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 58e14c7..7fd95fe 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -62,6 +62,17 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, if (err) return err; + if (scsi_prot_sg_count(iser_task-sc)) { + struct iser_data_buf *pbuf_in = iser_task-prot[ISER_DIR_IN]; + + err = iser_dma_map_task_data(iser_task, +pbuf_in, +ISER_DIR_IN, +DMA_FROM_DEVICE); + if (err) + return err; + } + if (edtl iser_task-data[ISER_DIR_IN].data_len) { iser_err(Total data length: %ld, less than EDTL: %d, in READ cmd BHS itt: %d, conn: 0x%p\n, @@ -113,6 +124,17 @@ iser_prepare_write_cmd(struct iscsi_task *task, if (err) return err; + if (scsi_prot_sg_count(iser_task-sc)) { + struct iser_data_buf *pbuf_out = iser_task-prot[ISER_DIR_OUT]; + + err = iser_dma_map_task_data(iser_task, +pbuf_out, +ISER_DIR_OUT, +DMA_TO_DEVICE); + if (err) + return err; + } + The xmit_task callout does handle failures like EINVAL. If the above map calls fail then you would get infinite retries. You would currently want to do the mapping in the init_task callout instead. Same applies to the data dma_mapping that already exist in xmit_task today. I can move it to init_task, but to me, dma_mapping is not really an init operation but part of xmit stage. If it makes it easier on the driver implementation then it is ok to modify the xmit_task callers so that they handle multiple error codes for drivers like iser that have the xmit_task callout called from iscsi_queuecommand. This sounds somewhat better to me. -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/6] iser-target: Fix post_send_buf_count for RDMA READ/WRITE
On 3/4/2014 9:49 AM, Or Gerlitz wrote: On 04/03/2014 02:01, Nicholas A. Bellinger wrote: This is necessary because even though IB_SEND_SIGNALED is not set for RDMA WRITEs + READs, during a QP failure event the work requests will be returned with exception status from the TX completion queue. Impossible... for rdma reads we must ask for completing, since we should write the data for the back-end, I assume it's just wrong mentioning of rdma-read here, right? No, In case of multiple RDMA READs (for non-fastreg case) isert asks for completion only on the last one. This is fine. Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v1 10/13] IB/iser: Support T10-PI operations
On 3/4/2014 11:38 AM, Or Gerlitz wrote: On 03/03/2014 06:44, Mike Christie wrote: The xmit_task callout does handle failures like EINVAL. If the above map calls fail then you would get infinite retries. You would currently want to do the mapping in the init_task callout instead. If it makes it easier on the driver implementation then it is ok to modify the xmit_task callers so that they handle multiple error codes for drivers like iser that have the xmit_task callout called from iscsi_queuecommand. Mike, After looking on the code with Sagi, it seems to us that the correct way to go here, would be to enhance in iscsi_queuecommand the processing of the result returned by session-tt-xmit_task(task) to behave in a similar manner to how the return value of iscsi_prep_scsi_cmd_pdu() is treated. E.g for errors such as ENOMEM and EGAIN take the reject flow which would cause the SCSI midlayer to retry the command and for other return values go to the fault flow which will cause the ML to abort the command. Or. Yes, we were thinking about the following: --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1707,10 +1707,17 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) goto prepd_fault; } } - if (session-tt-xmit_task(task)) { - session-cmdsn--; - reason = FAILURE_SESSION_NOT_READY; - goto prepd_reject; + + reason = session-tt-xmit_task(task); + if (reason) { + if (reason == -ENOMEM || reason == -EAGAIN) { + session-cmdsn--; + reason = FAILURE_SESSION_NOT_READY; + goto prepd_reject; + } else { + sc-result = DID_ABORT 16; + goto prepd_fault; + } } } else { list_add_tail(task-running, conn-cmdqueue); -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v1 10/13] IB/iser: Support T10-PI operations
On 3/4/2014 1:25 PM, Or Gerlitz wrote: On 04/03/2014 11:59, Sagi Grimberg wrote: On 3/4/2014 11:38 AM, Or Gerlitz wrote: On 03/03/2014 06:44, Mike Christie wrote: The xmit_task callout does handle failures like EINVAL. If the above map calls fail then you would get infinite retries. You would currently want to do the mapping in the init_task callout instead. If it makes it easier on the driver implementation then it is ok to modify the xmit_task callers so that they handle multiple error codes for drivers like iser that have the xmit_task callout called from iscsi_queuecommand. Mike, After looking on the code with Sagi, it seems to us that the correct way to go here, would be to enhance in iscsi_queuecommand the processing of the result returned by session-tt-xmit_task(task) to behave in a similar manner to how the return value of iscsi_prep_scsi_cmd_pdu() is treated. E.g for errors such as ENOMEM and EGAIN take the reject flow which would cause the SCSI midlayer to retry the command and for other return values go to the fault flow which will cause the ML to abort the command. Or. Yes, we were thinking about the following: --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -1707,10 +1707,17 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) goto prepd_fault; } } - if (session-tt-xmit_task(task)) { - session-cmdsn--; - reason = FAILURE_SESSION_NOT_READY; - goto prepd_reject; + + reason = session-tt-xmit_task(task); + if (reason) { + if (reason == -ENOMEM || reason == -EAGAIN) { + session-cmdsn--; I am pretty sure this has to be done anyway, no matter why the xmit_task callback failed Even if we abort? this just follows the same logic as iscsi_prep_scsi_cmd_pdu error flow. + reason = FAILURE_SESSION_NOT_READY; + goto prepd_reject; + } else { + sc-result = DID_ABORT 16; + goto prepd_fault; + } } } else { list_add_tail(task-running, conn-cmdqueue); -- To unsubscribe from this list: send the line unsubscribe linux-rdma in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 5/6] iser-target: Ignore completions for FRWRs in isert_cq_tx_work
On 3/4/2014 2:01 AM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org This patch changes IB_WR_FAST_REG_MR + IB_WR_LOCAL_INV related work requests to include a ISER_FRWR_LI_WRID value in order to signal isert_cq_tx_work() that these requests should be ignored. This is necessary because even though IB_SEND_SIGNALED is not set for either work request, during a QP failure event the work requests will be returned with exception status from the TX completion queue. Cc: Sagi Grimberg sa...@mellanox.com Cc: Or Gerlitz ogerl...@mellanox.com Signed-off-by: Nicholas Bellinger n...@linux-iscsi.org --- drivers/infiniband/ulp/isert/ib_isert.c |8 ++-- drivers/infiniband/ulp/isert/ib_isert.h |1 + 2 files changed, 7 insertions(+), 2 deletions(-) diff --git a/drivers/infiniband/ulp/isert/ib_isert.c b/drivers/infiniband/ulp/isert/ib_isert.c index c9d488f..003b5d0 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.c +++ b/drivers/infiniband/ulp/isert/ib_isert.c @@ -1738,8 +1738,10 @@ isert_cq_tx_work(struct work_struct *work) pr_debug(TX wc.status: 0x%08x\n, wc.status); pr_debug(TX wc.vendor_err: 0x%08x\n, wc.vendor_err); - atomic_dec(isert_conn-post_send_buf_count); - isert_cq_tx_comp_err(tx_desc, isert_conn); + if (wc.wr_id != ISER_FRWR_LI_WRID) { Better to use ISER_FASTREG_LI_WRID - I changed it in the initiator. + atomic_dec(isert_conn-post_send_buf_count); + isert_cq_tx_comp_err(tx_desc, isert_conn); + } } } @@ -2202,6 +2204,7 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, if (!fr_desc-valid) { memset(inv_wr, 0, sizeof(inv_wr)); + inv_wr.wr_id = ISER_FRWR_LI_WRID; inv_wr.opcode = IB_WR_LOCAL_INV; inv_wr.ex.invalidate_rkey = fr_desc-data_mr-rkey; wr = inv_wr; @@ -2212,6 +2215,7 @@ isert_fast_reg_mr(struct fast_reg_descriptor *fr_desc, /* Prepare FASTREG WR */ memset(fr_wr, 0, sizeof(fr_wr)); + fr_wr.wr_id = ISER_FRWR_LI_WRID; fr_wr.opcode = IB_WR_FAST_REG_MR; fr_wr.wr.fast_reg.iova_start = fr_desc-data_frpl-page_list[0] + page_off; diff --git a/drivers/infiniband/ulp/isert/ib_isert.h b/drivers/infiniband/ulp/isert/ib_isert.h index 41e799f..599b4e2 100644 --- a/drivers/infiniband/ulp/isert/ib_isert.h +++ b/drivers/infiniband/ulp/isert/ib_isert.h @@ -6,6 +6,7 @@ #define ISERT_RDMA_LISTEN_BACKLOG 10 #define ISCSI_ISER_SG_TABLESIZE 256 +#define ISER_FRWR_LI_WRID 0xULL enum isert_desc_type { ISCSI_TX_CONTROL, -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/6] iser-target: Fix active I/O shutdown related issues
On 3/4/2014 2:00 AM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org Hi Or Sagi, This series addresses a number of active I/O shutdown related issues in iser-target code that have come up recently during stress testing. Note there is still a seperate iser-target network portal shutdown bug being tracked down, but this series addresses all existing issues related to active I/O session shutdown. The patch breakdown looks like: Patch #1 fixes a long-standing bug where TPGs in shutdown incorrectly could be referenced by new login attempts. Patch #2 converts list_del - list_del_init for iscsi_cmd-i_conn_node so that list_empty works correctly. Patch #3 addresses isert_conn-state related bugs resulting in hung shutdown, and splits isert_free_conn() into seperate code that is called earlier during shutdown to ensure that all outstanding I/O has completed. Patch #4 fixes incorrect accounting of -post_send_buf_count during active I/O shutdown with outstanding RDMA WRITE + RDMA READ work requests. Patch #5 addresses a bug related to active I/O shutdown with outstanding FRMR work requests. Note this patch is specific to v3.12+ code. Patch #6 addresses bugs related to active I/O shutdown with outstanding completion interrupt coalescing batches. Note this patch is specific to v3.13+ code. Please review. Hey Nic, So besides a minor comment, you have my Ack on this set. More on cleanup flow. isert_cma_handler does not handle RDMA_CM_EVENT_TIMEWAIT_EXIT. To be more specific, according to IB spec, when initiating disconnect (rdma_disconnect/ib_send_cm_dreq), one should not destroy a used qp until getting TIMEWAIT_EXIT CM event. We are working on this in iSER initiator. It might lead to stale connection CM rejects on future connections (SRP also does not do that). Sagi. --nab Nicholas Bellinger (6): iscsi-target: Fix iscsit_get_tpg_from_np tpg_state bug iscsi/iser-target: Use list_del_init for -i_conn_node iscsi/iser-target: Fix isert_conn-state hung shutdown issues iser-target: Fix post_send_buf_count for RDMA READ/WRITE iser-target: Ignore completions for FRWRs in isert_cq_tx_work iser-target: Fix command leak for tx_desc-comp_llnode_batch drivers/infiniband/ulp/isert/ib_isert.c | 180 ++ drivers/infiniband/ulp/isert/ib_isert.h |7 +- drivers/target/iscsi/iscsi_target.c | 10 +- drivers/target/iscsi/iscsi_target_erl2.c | 16 +-- drivers/target/iscsi/iscsi_target_tpg.c |2 +- include/target/iscsi/iscsi_transport.h |1 + 6 files changed, 129 insertions(+), 87 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v1 10/13] IB/iser: Support T10-PI operations
On 3/4/2014 6:16 PM, Or Gerlitz wrote: On 04/03/2014 16:44, Sagi Grimberg wrote: @@ -1707,10 +1707,17 @@ int iscsi_queuecommand(struct Scsi_Host *host, struct scsi_cmnd *sc) goto prepd_fault; } } - if (session-tt-xmit_task(task)) { - session-cmdsn--; - reason = FAILURE_SESSION_NOT_READY; - goto prepd_reject; + + reason = session-tt-xmit_task(task); + if (reason) { + if (reason == -ENOMEM || reason == -EAGAIN) { + session-cmdsn--; I am pretty sure this has to be done anyway, no matter why the xmit_task callback failed Even if we abort? this just follows the same logic as iscsi_prep_scsi_cmd_pdu error flow. yes, take a 2nd look on iscsi_prep_scsi_cmd_pdu and you'll see that all the possible error cases take place **before** session-cmdsn is incremented Ahhh, yes... So dec the cmdsn regardless. Mike, are you on board with this? Sagi. -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/6] iser-target: Fix active I/O shutdown related issues
On 3/5/2014 2:06 AM, Nicholas A. Bellinger wrote: On Tue, 2014-03-04 at 17:17 +0200, Sagi Grimberg wrote: On 3/4/2014 2:00 AM, Nicholas A. Bellinger wrote: From: Nicholas Bellinger n...@linux-iscsi.org Hi Or Sagi, This series addresses a number of active I/O shutdown related issues in iser-target code that have come up recently during stress testing. Note there is still a seperate iser-target network portal shutdown bug being tracked down, but this series addresses all existing issues related to active I/O session shutdown. The patch breakdown looks like: Patch #1 fixes a long-standing bug where TPGs in shutdown incorrectly could be referenced by new login attempts. Patch #2 converts list_del - list_del_init for iscsi_cmd-i_conn_node so that list_empty works correctly. Patch #3 addresses isert_conn-state related bugs resulting in hung shutdown, and splits isert_free_conn() into seperate code that is called earlier during shutdown to ensure that all outstanding I/O has completed. Patch #4 fixes incorrect accounting of -post_send_buf_count during active I/O shutdown with outstanding RDMA WRITE + RDMA READ work requests. Patch #5 addresses a bug related to active I/O shutdown with outstanding FRMR work requests. Note this patch is specific to v3.12+ code. Patch #6 addresses bugs related to active I/O shutdown with outstanding completion interrupt coalescing batches. Note this patch is specific to v3.13+ code. Please review. Hey Nic, So besides a minor comment, you have my Ack on this set. Thanks! More on cleanup flow. isert_cma_handler does not handle RDMA_CM_EVENT_TIMEWAIT_EXIT. To be more specific, according to IB spec, when initiating disconnect (rdma_disconnect/ib_send_cm_dreq), one should not destroy a used qp until getting TIMEWAIT_EXIT CM event. We are working on this in iSER initiator. It might lead to stale connection CM rejects on future connections (SRP also does not do that). nod, I noticed that as well during recent debugging. However, AFAICT the RDMA_CM_EVENT_TIMEWAIT_EVENT doesn't (always) occur on the target side after a RDMA_CM_EVENT_DISCONNECTED, and thus far I've not been able to ascertain what's different about the shutdown sequence that would make this happen, or not happen.. Any ideas..? That's probably because the cm_id is destroyed before you get the event. There is a specific timout computation to get this event (see IB spec). If you will attempt to disconnect while the link is down (initiator won't receive it and send you disconnect back), you should be able to see this event. As I understand, in order to comply the spec, the QP (and the cm_id afterwards) should be destroyed only when getting this event and not before. Sagi. --nab -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 02/13] IB/iser: Push the decision what memory key to use into fast_reg_mr routine
This is a preparation step for T10-PI offload support. We prefer to push the desicion of which mkey to use (global or fastreg) to iser_fast_reg_mr. We choose to do this since it in T10-PI we may need to register for protection buffers and in this case we wish to simplify iser_fast_reg_mr instead of repeating the logic of which key to use. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iser_memory.c | 101 + 1 files changed, 59 insertions(+), 42 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index 6e9b7bc..d25587e 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -444,16 +444,40 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, return 0; } -static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, - struct iser_conn *ib_conn, +static int iser_fast_reg_mr(struct iscsi_iser_task *iser_task, struct iser_regd_buf *regd_buf, - u32 offset, unsigned int data_size, - unsigned int page_list_len) + struct iser_data_buf *mem, + struct ib_sge *sge) { + struct fast_reg_descriptor *desc = regd_buf-reg.mem_h; + struct iser_conn *ib_conn = iser_task-iser_conn-ib_conn; + struct iser_device *device = ib_conn-device; + struct ib_device *ibdev = device-ib_device; struct ib_send_wr fastreg_wr, inv_wr; struct ib_send_wr *bad_wr, *wr = NULL; u8 key; - int ret; + int ret, offset, size, plen; + + /* if there a single dma entry, dma mr suffices */ + if (mem-dma_nents == 1) { + struct scatterlist *sg = (struct scatterlist *)mem-buf; + + sge-lkey = device-mr-lkey; + sge-addr = ib_sg_dma_address(ibdev, sg[0]); + sge-length = ib_sg_dma_len(ibdev, sg[0]); + + iser_dbg(Single DMA entry: lkey=0x%x, addr=0x%llx, length=0x%x\n, +sge-lkey, sge-addr, sge-length); + return 0; + } + + plen = iser_sg_to_page_vec(mem, device-ib_device, + desc-data_frpl-page_list, + offset, size); + if (plen * SIZE_4K size) { + iser_err(fast reg page_list too short to hold this SG\n); + return -EINVAL; + } if (!desc-valid) { memset(inv_wr, 0, sizeof(inv_wr)); @@ -472,9 +496,9 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, fastreg_wr.opcode = IB_WR_FAST_REG_MR; fastreg_wr.wr.fast_reg.iova_start = desc-data_frpl-page_list[0] + offset; fastreg_wr.wr.fast_reg.page_list = desc-data_frpl; - fastreg_wr.wr.fast_reg.page_list_len = page_list_len; + fastreg_wr.wr.fast_reg.page_list_len = plen; fastreg_wr.wr.fast_reg.page_shift = SHIFT_4K; - fastreg_wr.wr.fast_reg.length = data_size; + fastreg_wr.wr.fast_reg.length = size; fastreg_wr.wr.fast_reg.rkey = desc-data_mr-rkey; fastreg_wr.wr.fast_reg.access_flags = (IB_ACCESS_LOCAL_WRITE | IB_ACCESS_REMOTE_WRITE | @@ -492,12 +516,9 @@ static int iser_fast_reg_mr(struct fast_reg_descriptor *desc, } desc-valid = false; - regd_buf-reg.mem_h = desc; - regd_buf-reg.lkey = desc-data_mr-lkey; - regd_buf-reg.rkey = desc-data_mr-rkey; - regd_buf-reg.va = desc-data_frpl-page_list[0] + offset; - regd_buf-reg.len = data_size; - regd_buf-reg.is_mr = 1; + sge-lkey = desc-data_mr-lkey; + sge-addr = desc-data_frpl-page_list[0] + offset; + sge-length = size; return ret; } @@ -516,11 +537,10 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, struct ib_device *ibdev = device-ib_device; struct iser_data_buf *mem = iser_task-data[cmd_dir]; struct iser_regd_buf *regd_buf = iser_task-rdma_regd[cmd_dir]; - struct fast_reg_descriptor *desc; - unsigned int data_size, page_list_len; + struct fast_reg_descriptor *desc = NULL; + struct ib_sge data_sge; int err, aligned_len; unsigned long flags; - u32 offset; aligned_len = iser_data_buf_aligned_len(mem, ibdev); if (aligned_len != mem-dma_nents) { @@ -533,41 +553,38 @@ int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *iser_task, mem = iser_task-data_copy[cmd_dir]; } - /* if there a single dma entry, dma mr suffices */ - if (mem-dma_nents == 1) { - struct scatterlist *sg = (struct scatterlist *)mem-buf; - - regd_buf-reg.lkey = device
[PATCH v2 03/13] IB/iser: Move fast_reg_descriptor initialization to a function
fastreg descriptor will include protection information context. In order to place the logic in one place we introduce iser_create_fr_desc function. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iser_verbs.c | 58 - 1 files changed, 40 insertions(+), 18 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index dc5a0b4..9569e40 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -279,6 +279,39 @@ void iser_free_fmr_pool(struct iser_conn *ib_conn) ib_conn-fmr.page_vec = NULL; } +static int +iser_create_fastreg_desc(struct ib_device *ib_device, struct ib_pd *pd, +struct fast_reg_descriptor *desc) +{ + int ret; + + desc-data_frpl = ib_alloc_fast_reg_page_list(ib_device, + ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc-data_frpl)) { + ret = PTR_ERR(desc-data_frpl); + iser_err(Failed to allocate ib_fast_reg_page_list err=%d\n, +ret); + return PTR_ERR(desc-data_frpl); + } + + desc-data_mr = ib_alloc_fast_reg_mr(pd, ISCSI_ISER_SG_TABLESIZE + 1); + if (IS_ERR(desc-data_mr)) { + ret = PTR_ERR(desc-data_mr); + iser_err(Failed to allocate ib_fast_reg_mr err=%d\n, ret); + goto fast_reg_mr_failure; + } + iser_info(Create fr_desc %p page_list %p\n, + desc, desc-data_frpl-page_list); + desc-valid = true; + + return 0; + +fast_reg_mr_failure: + ib_free_fast_reg_page_list(desc-data_frpl); + + return ret; +} + /** * iser_create_fastreg_pool - Creates pool of fast_reg descriptors * for fast registration work requests. @@ -300,32 +333,21 @@ int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max) goto err; } - desc-data_frpl = ib_alloc_fast_reg_page_list(device-ib_device, - ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(desc-data_frpl)) { - ret = PTR_ERR(desc-data_frpl); - iser_err(Failed to allocate ib_fast_reg_page_list err=%d\n, ret); - goto fast_reg_page_failure; + ret = iser_create_fastreg_desc(device-ib_device, + device-pd, desc); + if (ret) { + iser_err(Failed to create fastreg descriptor err=%d\n, +ret); + kfree(desc); + goto err; } - desc-data_mr = ib_alloc_fast_reg_mr(device-pd, -ISCSI_ISER_SG_TABLESIZE + 1); - if (IS_ERR(desc-data_mr)) { - ret = PTR_ERR(desc-data_mr); - iser_err(Failed to allocate ib_fast_reg_mr err=%d\n, ret); - goto fast_reg_mr_failure; - } - desc-valid = true; list_add_tail(desc-list, ib_conn-fastreg.pool); ib_conn-fastreg.pool_size++; } return 0; -fast_reg_mr_failure: - ib_free_fast_reg_page_list(desc-data_frpl); -fast_reg_page_failure: - kfree(desc); err: iser_free_fastreg_pool(ib_conn); return ret; -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 10/13] IB/iser: Support T10-PI operations
Add logic to initialize protection information entities. Upon each iSCSI task, we keep the scsi_cmnd in order to query the scsi protection operations and reference to protection buffers. Modify iser_fast_reg_mr to receive indication weather it is registering the data or protection buffers. In addition Introduce iser_reg_sig_mr which performs fast registration work-request for a signature enabled memory region (IB_WR_REG_SIG_MR). In this routine we set all the protection relevants for the device to offload protection data-transfer and verification. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.c |2 + drivers/infiniband/ulp/iser/iscsi_iser.h |9 + drivers/infiniband/ulp/iser/iser_initiator.c | 63 ++- drivers/infiniband/ulp/iser/iser_memory.c| 257 +++--- 4 files changed, 304 insertions(+), 27 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index cfa952e..a64b878 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -184,6 +184,8 @@ iscsi_iser_task_init(struct iscsi_task *task) iser_task-command_sent = 0; iser_task_rdma_init(iser_task); + iser_task-sc = task-sc; + return 0; } diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 99fc8b8..fce5409 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -46,6 +46,8 @@ #include linux/printk.h #include scsi/libiscsi.h #include scsi/scsi_transport_iscsi.h +#include scsi/scsi_cmnd.h +#include scsi/scsi_device.h #include linux/interrupt.h #include linux/wait.h @@ -289,6 +291,10 @@ struct iser_device { enum iser_data_dir cmd_dir); }; +#define ISER_CHECK_GUARD 0xc0 +#define ISER_CHECK_REFTAG 0x0f +#define ISER_CHECK_APPTAG 0x30 + enum iser_reg_indicator { ISER_DATA_KEY_VALID = 1 0, ISER_PROT_KEY_VALID = 1 1, @@ -361,11 +367,14 @@ struct iscsi_iser_task { struct iser_tx_desc desc; struct iscsi_iser_conn *iser_conn; enum iser_task_statusstatus; + struct scsi_cmnd *sc; int command_sent; /* set if command sent */ int dir[ISER_DIRS_NUM]; /* set if dir use*/ struct iser_regd_buf rdma_regd[ISER_DIRS_NUM];/* regd rdma buf */ struct iser_data_buf data[ISER_DIRS_NUM]; /* orig. data des*/ struct iser_data_buf data_copy[ISER_DIRS_NUM];/* contig. copy */ + struct iser_data_buf prot[ISER_DIRS_NUM]; /* prot desc */ + struct iser_data_buf prot_copy[ISER_DIRS_NUM];/* prot copy */ }; struct iser_page_vec { diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 58e14c7..7fd95fe 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -62,6 +62,17 @@ static int iser_prepare_read_cmd(struct iscsi_task *task, if (err) return err; + if (scsi_prot_sg_count(iser_task-sc)) { + struct iser_data_buf *pbuf_in = iser_task-prot[ISER_DIR_IN]; + + err = iser_dma_map_task_data(iser_task, +pbuf_in, +ISER_DIR_IN, +DMA_FROM_DEVICE); + if (err) + return err; + } + if (edtl iser_task-data[ISER_DIR_IN].data_len) { iser_err(Total data length: %ld, less than EDTL: %d, in READ cmd BHS itt: %d, conn: 0x%p\n, @@ -113,6 +124,17 @@ iser_prepare_write_cmd(struct iscsi_task *task, if (err) return err; + if (scsi_prot_sg_count(iser_task-sc)) { + struct iser_data_buf *pbuf_out = iser_task-prot[ISER_DIR_OUT]; + + err = iser_dma_map_task_data(iser_task, +pbuf_out, +ISER_DIR_OUT, +DMA_TO_DEVICE); + if (err) + return err; + } + if (edtl iser_task-data[ISER_DIR_OUT].data_len) { iser_err(Total data length: %ld, less than EDTL: %d, in WRITE cmd BHS itt: %d, conn: 0x%p\n, @@ -368,7 +390,7 @@ int iser_send_command(struct iscsi_conn *conn, struct iscsi_iser_task *iser_task = task-dd_data; unsigned long edtl; int err; - struct iser_data_buf *data_buf; + struct iser_data_buf *data_buf
[PATCH v2 12/13] IB/iser: Implement check_protection
Once the iSCSI transaction is completed we must imeplement check_protection in order to notify on DIF errors that may have occured. The routine boils down to calling ib_check_mr_status to get the signature status of the transaction. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.c | 13 drivers/infiniband/ulp/iser/iscsi_iser.h |2 + drivers/infiniband/ulp/iser/iser_verbs.c | 47 ++ 3 files changed, 62 insertions(+), 0 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index a64b878..f13d7e9 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -306,6 +306,18 @@ static void iscsi_iser_cleanup_task(struct iscsi_task *task) } } +static u8 iscsi_iser_check_protection(struct iscsi_task *task, sector_t *sector) +{ + struct iscsi_iser_task *iser_task = task-dd_data; + + if (iser_task-dir[ISER_DIR_IN]) + return iser_check_task_pi_status(iser_task, ISER_DIR_IN, +sector); + else + return iser_check_task_pi_status(iser_task, ISER_DIR_OUT, +sector); +} + static struct iscsi_cls_conn * iscsi_iser_conn_create(struct iscsi_cls_session *cls_session, uint32_t conn_idx) { @@ -742,6 +754,7 @@ static struct iscsi_transport iscsi_iser_transport = { .xmit_task = iscsi_iser_task_xmit, .cleanup_task = iscsi_iser_cleanup_task, .alloc_pdu = iscsi_iser_pdu_alloc, + .check_protection = iscsi_iser_check_protection, /* recovery */ .session_recovery_timedout = iscsi_session_recovery_timedout, diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index fce5409..95f291f 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -483,4 +483,6 @@ int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fmr_pool(struct iser_conn *ib_conn); int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fastreg_pool(struct iser_conn *ib_conn); +u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, +enum iser_data_dir cmd_dir, sector_t *sector); #endif diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 0404c71..abbb6ec 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -1153,3 +1153,50 @@ static void iser_cq_callback(struct ib_cq *cq, void *cq_context) tasklet_schedule(device-cq_tasklet[cq_index]); } + +u8 iser_check_task_pi_status(struct iscsi_iser_task *iser_task, +enum iser_data_dir cmd_dir, sector_t *sector) +{ + struct iser_mem_reg *reg = iser_task-rdma_regd[cmd_dir].reg; + struct fast_reg_descriptor *desc = reg-mem_h; + unsigned long sector_size = iser_task-sc-device-sector_size; + struct ib_mr_status mr_status; + int ret; + + if (desc desc-reg_indicators ISER_FASTREG_PROTECTED) { + desc-reg_indicators = ~ISER_FASTREG_PROTECTED; + ret = ib_check_mr_status(desc-pi_ctx-sig_mr, +IB_MR_CHECK_SIG_STATUS, mr_status); + if (ret) { + pr_err(ib_check_mr_status failed, ret %d\n, ret); + goto err; + } + + if (mr_status.fail_status IB_MR_CHECK_SIG_STATUS) { + sector_t sector_off = mr_status.sig_err.sig_err_offset; + + do_div(sector_off, sector_size + 8); + *sector = scsi_get_lba(iser_task-sc) + sector_off; + + pr_err(PI error found type %d at sector %lx + expected %x vs actual %x\n, + mr_status.sig_err.err_type, *sector, + mr_status.sig_err.expected, + mr_status.sig_err.actual); + + switch (mr_status.sig_err.err_type) { + case IB_SIG_BAD_GUARD: + return 0x1; + case IB_SIG_BAD_REFTAG: + return 0x3; + case IB_SIG_BAD_APPTAG: + return 0x2; + } + } + } + + return 0; +err: + /* Not alot we can do here, return ambiguous guard error */ + return 0x1; +} -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message
[PATCH v2 00/13] T10-PI support for iSER initiator
Hey Roland, Nic, Mike and Co This patchset adds T10 protection information offload support over RDMA signature verbs API. This set, along with the iSER target set, allow end-to-end protection information passthrough and validation. The patchset was tested against Linux SCSI target with iSER DIF support applied. iSER T10-PI support enablement is currently controlled with module parameters (similar to lpfc for example) which make them global. In the next phase we can consider passing these parameters from iscsid nl messages, which would make them per-connection. The approach I took with respect to escalating protection information errors was minimal iSCSI intervention in protection information affairs. I added libiscsi a hook asking the transport to check the protection information status, and construct the proper sense data in case of errors (the alternative of letting the transport to construct sense data seemed much less appealing). Note that this patchset comes on top of a pending patch for iSER to suppress fastreg completions (http://marc.info/?l=linux-rdmam=139047309831997w=2). v0 patches are available in target-pending git repo (branch rdma-dif) and passed 0-DAY testing. Roland, I would like to hear your feedback on this. The set is ordered by the following: - Preparation patches (non/minor functionality changes). - Add protection information execution support. - Add protection information status check facilities. - Publish T10-DIF support to SCSI mid-layer according to IB device capabilities. Changes from v1: - Removed extra space (BUG_ON). - Dropped DID_ABORT from sc result for data integrity errors. - Fixed failed sector report. Changes from v0: - Fix protection information dma registration for unaligned scatterlists which may happen when the block layer merges bios. - Don't fail connections on devices without DIF support - warn and continue without DIF. - reword FR - FastReg Alex Tabachnik (2): IB/iser: Introduce pi_enable, pi_guard module parameters IB/iser: Initialize T10-PI resources Sagi Grimberg (11): IB/iser: Avoid FRWR notation, use fastreg instead IB/iser: Push the decision what memory key to use into fast_reg_mr routine IB/iser: Move fast_reg_descriptor initialization to a function IB/iser: Keep IB device attributes under iser_device IB/iser: Replace fastreg descriptor valid bool with indicators container IB/iser: Generalize iser_unmap_task_data and finalize_rdma_unaligned_sg IB/iser: Generalize fall_to_bounce_buf routine IB/iser: Support T10-PI operations SCSI/libiscsi: Add check_protection callback for transports IB/iser: Implement check_protection IB/iser: Publish T10-PI support to SCSI midlayer drivers/infiniband/ulp/iser/iscsi_iser.c | 46 +++- drivers/infiniband/ulp/iser/iscsi_iser.h | 71 - drivers/infiniband/ulp/iser/iser_initiator.c | 98 +- drivers/infiniband/ulp/iser/iser_memory.c| 445 +++--- drivers/infiniband/ulp/iser/iser_verbs.c | 287 - drivers/scsi/libiscsi.c | 32 ++ include/scsi/libiscsi.h |4 + include/scsi/scsi_transport_iscsi.h |1 + 8 files changed, 771 insertions(+), 213 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 01/13] IB/iser: Avoid FRWR notation, use fastreg instead
FRWR stands for fast registration work request. We want to avoid calling the fastreg pool with that name, instead we name it fastreg which stands for fast registration. This pool will include more elements in the future, so it is a good idea to generalize the name. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.h | 20 --- drivers/infiniband/ulp/iser/iser_memory.c | 28 +- drivers/infiniband/ulp/iser/iser_verbs.c | 84 ++-- 3 files changed, 67 insertions(+), 65 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index e1a01c6..ca161df 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -138,7 +138,7 @@ #define ISER_WSV 0x08 #define ISER_RSV 0x04 -#define ISER_FRWR_LI_WRID 0xULL +#define ISER_FASTREG_LI_WRID 0xULL struct iser_hdr { u8 flags; @@ -312,6 +312,8 @@ struct iser_conn { unsigned int rx_desc_head; struct iser_rx_desc *rx_descs; struct ib_recv_wrrx_wr[ISER_MIN_POSTED_RX]; + + /* Connection memory registration pool */ union { struct { struct ib_fmr_pool *pool; /* pool of IB FMRs */ @@ -321,8 +323,8 @@ struct iser_conn { struct { struct list_headpool; int pool_size; - } frwr; - } fastreg; + } fastreg; + }; }; struct iscsi_iser_conn { @@ -408,8 +410,8 @@ void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); -int iser_reg_rdma_mem_frwr(struct iscsi_iser_task *task, - enum iser_data_dir cmd_dir); +int iser_reg_rdma_mem_fastreg(struct iscsi_iser_task *task, + enum iser_data_dir cmd_dir); int iser_connect(struct iser_conn *ib_conn, struct sockaddr_in *src_addr, @@ -422,8 +424,8 @@ int iser_reg_page_vec(struct iser_conn *ib_conn, void iser_unreg_mem_fmr(struct iscsi_iser_task *iser_task, enum iser_data_dir cmd_dir); -void iser_unreg_mem_frwr(struct iscsi_iser_task *iser_task, -enum iser_data_dir cmd_dir); +void iser_unreg_mem_fastreg(struct iscsi_iser_task *iser_task, + enum iser_data_dir cmd_dir); int iser_post_recvl(struct iser_conn *ib_conn); int iser_post_recvm(struct iser_conn *ib_conn, int count); @@ -440,6 +442,6 @@ int iser_initialize_task_headers(struct iscsi_task *task, int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); int iser_create_fmr_pool(struct iser_conn *ib_conn, unsigned cmds_max); void iser_free_fmr_pool(struct iser_conn *ib_conn); -int iser_create_frwr_pool(struct iser_conn *ib_conn, unsigned cmds_max); -void iser_free_frwr_pool(struct iser_conn *ib_conn); +int iser_create_fastreg_pool(struct iser_conn *ib_conn, unsigned cmds_max); +void iser_free_fastreg_pool(struct iser_conn *ib_conn); #endif diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index f770179..6e9b7bc 100644 --- a/drivers/infiniband/ulp/iser/iser_memory.c +++ b/drivers/infiniband/ulp/iser/iser_memory.c @@ -422,8 +422,8 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, (unsigned long)regd_buf-reg.va, (unsigned long)regd_buf-reg.len); } else { /* use FMR for multiple dma entries */ - iser_page_vec_build(mem, ib_conn-fastreg.fmr.page_vec, ibdev); - err = iser_reg_page_vec(ib_conn, ib_conn-fastreg.fmr.page_vec, + iser_page_vec_build(mem, ib_conn-fmr.page_vec, ibdev); + err = iser_reg_page_vec(ib_conn, ib_conn-fmr.page_vec, regd_buf-reg); if (err err != -EAGAIN) { iser_data_buf_dump(mem, ibdev); @@ -431,12 +431,12 @@ int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *iser_task, mem-dma_nents, ntoh24(iser_task-desc.iscsi_header.dlength)); iser_err(page_vec: data_size = 0x%x, length = %d, offset = 0x%x\n, -ib_conn-fastreg.fmr.page_vec-data_size, -ib_conn-fastreg.fmr.page_vec-length, -ib_conn-fastreg.fmr.page_vec-offset); - for (i = 0; i ib_conn-fastreg.fmr.page_vec-length; i
[PATCH v2 13/13] IB/iser: Publish T10-PI support to SCSI midlayer
After allocating a scsi_host we set protection types and guard type supported. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.c | 23 ++- 1 files changed, 22 insertions(+), 1 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.c b/drivers/infiniband/ulp/iser/iscsi_iser.c index f13d7e9..a0ec2d0 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.c +++ b/drivers/infiniband/ulp/iser/iscsi_iser.c @@ -435,6 +435,17 @@ static void iscsi_iser_session_destroy(struct iscsi_cls_session *cls_session) iscsi_host_free(shost); } +static inline unsigned int +iser_dif_prot_caps(int prot_caps) +{ + return ((prot_caps IB_PROT_T10DIF_TYPE_1) ? SHOST_DIF_TYPE1_PROTECTION | + SHOST_DIX_TYPE1_PROTECTION : 0) | + ((prot_caps IB_PROT_T10DIF_TYPE_2) ? SHOST_DIF_TYPE2_PROTECTION | + SHOST_DIX_TYPE2_PROTECTION : 0) | + ((prot_caps IB_PROT_T10DIF_TYPE_3) ? SHOST_DIF_TYPE3_PROTECTION | + SHOST_DIX_TYPE3_PROTECTION : 0); +} + static struct iscsi_cls_session * iscsi_iser_session_create(struct iscsi_endpoint *ep, uint16_t cmds_max, uint16_t qdepth, @@ -459,8 +470,18 @@ iscsi_iser_session_create(struct iscsi_endpoint *ep, * older userspace tools (before 2.0-870) did not pass us * the leading conn's ep so this will be NULL; */ - if (ep) + if (ep) { ib_conn = ep-dd_data; + if (ib_conn-pi_support) { + u32 sig_caps = ib_conn-device-dev_attr.sig_prot_cap; + + scsi_host_set_prot(shost, iser_dif_prot_caps(sig_caps)); + if (iser_pi_guard) + scsi_host_set_guard(shost, SHOST_DIX_GUARD_IP); + else + scsi_host_set_guard(shost, SHOST_DIX_GUARD_CRC); + } + } if (iscsi_host_add(shost, ep ? ib_conn-device-ib_device-dma_device : NULL)) -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 04/13] IB/iser: Keep IB device attributes under iser_device
For T10-PI offload support, we will need to know the device signature offload capability upon every connection establishment. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com Signed-off-by: Alex Tabachnik al...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.h |1 + drivers/infiniband/ulp/iser/iser_verbs.c | 18 ++ 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index ca161df..b4290f5 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -260,6 +260,7 @@ struct iscsi_iser_task; struct iser_device { struct ib_device *ib_device; struct ib_pd *pd; + struct ib_device_attrdev_attr; struct ib_cq *rx_cq[ISER_MAX_CQ]; struct ib_cq *tx_cq[ISER_MAX_CQ]; struct ib_mr *mr; diff --git a/drivers/infiniband/ulp/iser/iser_verbs.c b/drivers/infiniband/ulp/iser/iser_verbs.c index 9569e40..95fcfca 100644 --- a/drivers/infiniband/ulp/iser/iser_verbs.c +++ b/drivers/infiniband/ulp/iser/iser_verbs.c @@ -71,17 +71,14 @@ static void iser_event_handler(struct ib_event_handler *handler, */ static int iser_create_device_ib_res(struct iser_device *device) { - int i, j; struct iser_cq_desc *cq_desc; - struct ib_device_attr *dev_attr; + struct ib_device_attr *dev_attr = device-dev_attr; + int ret, i, j; - dev_attr = kmalloc(sizeof(*dev_attr), GFP_KERNEL); - if (!dev_attr) - return -ENOMEM; - - if (ib_query_device(device-ib_device, dev_attr)) { + ret = ib_query_device(device-ib_device, dev_attr); + if (ret) { pr_warn(Query device failed for %s\n, device-ib_device-name); - goto dev_attr_err; + return ret; } /* Assign function handles - based on FMR support */ @@ -101,7 +98,7 @@ static int iser_create_device_ib_res(struct iser_device *device) device-iser_unreg_rdma_mem = iser_unreg_mem_fastreg; } else { iser_err(IB device does not support FMRs nor FastRegs, can't register memory\n); - goto dev_attr_err; + return -1; } device-cqs_used = min(ISER_MAX_CQ, device-ib_device-num_comp_vectors); @@ -158,7 +155,6 @@ static int iser_create_device_ib_res(struct iser_device *device) if (ib_register_event_handler(device-event_handler)) goto handler_err; - kfree(dev_attr); return 0; handler_err: @@ -178,8 +174,6 @@ pd_err: kfree(device-cq_desc); cq_desc_err: iser_err(failed to allocate an IB resource\n); -dev_attr_err: - kfree(dev_attr); return -1; } -- 1.7.1 -- To unsubscribe from this list: send the line unsubscribe linux-scsi in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2 06/13] IB/iser: Generalize iser_unmap_task_data and finalize_rdma_unaligned_sg
This routines operates on data buffers and may also work with protection infomation buffers. So we generalize them to handle an iser_data_buf which can be the command data or command protection information. This patch does not change any functionality. Signed-off-by: Sagi Grimberg sa...@mellanox.com --- drivers/infiniband/ulp/iser/iscsi_iser.h |9 -- drivers/infiniband/ulp/iser/iser_initiator.c | 37 +--- drivers/infiniband/ulp/iser/iser_memory.c| 40 ++--- 3 files changed, 48 insertions(+), 38 deletions(-) diff --git a/drivers/infiniband/ulp/iser/iscsi_iser.h b/drivers/infiniband/ulp/iser/iscsi_iser.h index 5660714..623defa 100644 --- a/drivers/infiniband/ulp/iser/iscsi_iser.h +++ b/drivers/infiniband/ulp/iser/iscsi_iser.h @@ -410,8 +410,10 @@ void iser_task_rdma_finalize(struct iscsi_iser_task *task); void iser_free_rx_descriptors(struct iser_conn *ib_conn); -void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *task, -enum iser_data_dir cmd_dir); +void iser_finalize_rdma_unaligned_sg(struct iscsi_iser_task *iser_task, +struct iser_data_buf *mem, +struct iser_data_buf *mem_copy, +enum iser_data_dir cmd_dir); int iser_reg_rdma_mem_fmr(struct iscsi_iser_task *task, enum iser_data_dir cmd_dir); @@ -441,7 +443,8 @@ int iser_dma_map_task_data(struct iscsi_iser_task *iser_task, enum iser_data_dir iser_dir, enum dma_data_direction dma_dir); -void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task); +void iser_dma_unmap_task_data(struct iscsi_iser_task *iser_task, + struct iser_data_buf *data); int iser_initialize_task_headers(struct iscsi_task *task, struct iser_tx_desc *tx_desc); int iser_alloc_rx_descriptors(struct iser_conn *ib_conn, struct iscsi_session *session); diff --git a/drivers/infiniband/ulp/iser/iser_initiator.c b/drivers/infiniband/ulp/iser/iser_initiator.c index 334f34b..58e14c7 100644 --- a/drivers/infiniband/ulp/iser/iser_initiator.c +++ b/drivers/infiniband/ulp/iser/iser_initiator.c @@ -644,27 +644,42 @@ void iser_task_rdma_init(struct iscsi_iser_task *iser_task) void iser_task_rdma_finalize(struct iscsi_iser_task *iser_task) { struct iser_device *device = iser_task-iser_conn-ib_conn-device; - int is_rdma_aligned = 1; + int is_rdma_data_aligned = 1; /* if we were reading, copy back to unaligned sglist, * anyway dma_unmap and free the copy */ if (iser_task-data_copy[ISER_DIR_IN].copy_buf != NULL) { - is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_IN); + is_rdma_data_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + iser_task-data[ISER_DIR_IN], + iser_task-data_copy[ISER_DIR_IN], + ISER_DIR_IN); } + if (iser_task-data_copy[ISER_DIR_OUT].copy_buf != NULL) { - is_rdma_aligned = 0; - iser_finalize_rdma_unaligned_sg(iser_task, ISER_DIR_OUT); + is_rdma_data_aligned = 0; + iser_finalize_rdma_unaligned_sg(iser_task, + iser_task-data[ISER_DIR_OUT], + iser_task-data_copy[ISER_DIR_OUT], + ISER_DIR_OUT); } - if (iser_task-dir[ISER_DIR_IN]) + if (iser_task-dir[ISER_DIR_IN]) { device-iser_unreg_rdma_mem(iser_task, ISER_DIR_IN); + if (is_rdma_data_aligned) + iser_dma_unmap_task_data(iser_task, +iser_task-data[ISER_DIR_IN]); - if (iser_task-dir[ISER_DIR_OUT]) - device-iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); + } - /* if the data was unaligned, it was already unmapped and then copied */ - if (is_rdma_aligned) - iser_dma_unmap_task_data(iser_task); + if (iser_task-dir[ISER_DIR_OUT]) { + device-iser_unreg_rdma_mem(iser_task, ISER_DIR_OUT); + if (is_rdma_data_aligned) + iser_dma_unmap_task_data(iser_task, + iser_task-data[ISER_DIR_OUT]); + if (prot_count is_rdma_prot_aligned) + iser_dma_unmap_task_data(iser_task, + iser_task-prot[ISER_DIR_OUT]); + } } diff --git a/drivers/infiniband/ulp/iser/iser_memory.c b/drivers/infiniband/ulp/iser/iser_memory.c index