Hi Martin, 

Can you review this patch and pull it into scsi since Nicholas has
been out for awhile?

I have tested this patch and really like the performance enhancements
as a result of it.

Thanks,

Bryant


On 4/19/18 1:29 PM, Andrei Vagin wrote:

> Hello Nicholas,
>
> What do you think about this patch?
>
> Thanks,
> Andrei
>
> On Wed, Mar 21, 2018 at 11:55:02PM -0700, Andrei Vagin wrote:
>> There are two advantages:
>> * Direct I/O allows to avoid the write-back cache, so it reduces
>>   affects to other processes in the system.
>> * Async I/O allows to handle a few commands concurrently.
>>
>> DIO + AIO shows a better perfomance for random write operations:
>>
>> Mode: O_DSYNC Async: 1
>> $ ./fio --bs=4K --direct=1 --rw=randwrite --ioengine=libaio --iodepth=64 
>> --name=/dev/sda --runtime=20 --numjobs=2
>>   WRITE: bw=45.9MiB/s (48.1MB/s), 21.9MiB/s-23.0MiB/s (22.0MB/s-25.2MB/s), 
>> io=919MiB (963MB), run=20002-20020msec
>>
>> Mode: O_DSYNC Async: 0
>> $ ./fio --bs=4K --direct=1 --rw=randwrite --ioengine=libaio --iodepth=64 
>> --name=/dev/sdb --runtime=20 --numjobs=2
>>   WRITE: bw=1607KiB/s (1645kB/s), 802KiB/s-805KiB/s (821kB/s-824kB/s), 
>> io=31.8MiB (33.4MB), run=20280-20295msec
>>
>> Known issue:
>>
>> DIF (PI) emulation doesn't work when a target uses async I/O, because
>> DIF metadata is saved in a separate file, and it is another non-trivial
>> task how to synchronize writing in two files, so that a following read
>> operation always returns a consisten metadata for a specified block.
>>
>> v2: fix comments from Christoph Hellwig
>>
>> Cc: "Nicholas A. Bellinger" <n...@linux-iscsi.org>
>> Cc: Christoph Hellwig <h...@infradead.org>
>> Cc: Bryant G. Ly <bryan...@linux.vnet.ibm.com>
>> Tested-by: Bryant G. Ly <bryan...@linux.vnet.ibm.com>
>> Signed-off-by: Andrei Vagin <ava...@openvz.org>
>> ---
>>  drivers/target/target_core_file.c | 137 
>> ++++++++++++++++++++++++++++++++++----
>>  drivers/target/target_core_file.h |   1 +
>>  2 files changed, 124 insertions(+), 14 deletions(-)
>>
>> diff --git a/drivers/target/target_core_file.c 
>> b/drivers/target/target_core_file.c
>> index 9b2c0c773022..16751ae55d7b 100644
>> --- a/drivers/target/target_core_file.c
>> +++ b/drivers/target/target_core_file.c
>> @@ -250,6 +250,84 @@ static void fd_destroy_device(struct se_device *dev)
>>      }
>>  }
>>  
>> +struct target_core_file_cmd {
>> +    unsigned long   len;
>> +    struct se_cmd   *cmd;
>> +    struct kiocb    iocb;
>> +};
>> +
>> +static void cmd_rw_aio_complete(struct kiocb *iocb, long ret, long ret2)
>> +{
>> +    struct target_core_file_cmd *cmd;
>> +
>> +    cmd = container_of(iocb, struct target_core_file_cmd, iocb);
>> +
>> +    if (ret != cmd->len)
>> +            target_complete_cmd(cmd->cmd, SAM_STAT_CHECK_CONDITION);
>> +    else
>> +            target_complete_cmd(cmd->cmd, SAM_STAT_GOOD);
>> +
>> +    kfree(cmd);
>> +}
>> +
>> +static sense_reason_t
>> +fd_execute_rw_aio(struct se_cmd *cmd, struct scatterlist *sgl, u32 
>> sgl_nents,
>> +          enum dma_data_direction data_direction)
>> +{
>> +    int is_write = !(data_direction == DMA_FROM_DEVICE);
>> +    struct se_device *dev = cmd->se_dev;
>> +    struct fd_dev *fd_dev = FD_DEV(dev);
>> +    struct file *file = fd_dev->fd_file;
>> +    struct target_core_file_cmd *aio_cmd;
>> +    struct iov_iter iter = {};
>> +    struct scatterlist *sg;
>> +    struct bio_vec *bvec;
>> +    ssize_t len = 0;
>> +    int ret = 0, i;
>> +
>> +    aio_cmd = kmalloc(sizeof(struct target_core_file_cmd), GFP_KERNEL);
>> +    if (!aio_cmd)
>> +            return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
>> +
>> +    bvec = kcalloc(sgl_nents, sizeof(struct bio_vec), GFP_KERNEL);
>> +    if (!bvec) {
>> +            kfree(aio_cmd);
>> +            return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
>> +    }
>> +
>> +    for_each_sg(sgl, sg, sgl_nents, i) {
>> +            bvec[i].bv_page = sg_page(sg);
>> +            bvec[i].bv_len = sg->length;
>> +            bvec[i].bv_offset = sg->offset;
>> +
>> +            len += sg->length;
>> +    }
>> +
>> +    iov_iter_bvec(&iter, ITER_BVEC | is_write, bvec, sgl_nents, len);
>> +
>> +    aio_cmd->cmd = cmd;
>> +    aio_cmd->len = len;
>> +    aio_cmd->iocb.ki_pos = cmd->t_task_lba * dev->dev_attrib.block_size;
>> +    aio_cmd->iocb.ki_filp = file;
>> +    aio_cmd->iocb.ki_complete = cmd_rw_aio_complete;
>> +    aio_cmd->iocb.ki_flags = IOCB_DIRECT;
>> +
>> +    if (is_write && (cmd->se_cmd_flags & SCF_FUA))
>> +            aio_cmd->iocb.ki_flags |= IOCB_DSYNC;
>> +
>> +    if (is_write)
>> +            ret = call_write_iter(file, &aio_cmd->iocb, &iter);
>> +    else
>> +            ret = call_read_iter(file, &aio_cmd->iocb, &iter);
>> +
>> +    kfree(bvec);
>> +
>> +    if (ret != -EIOCBQUEUED)
>> +            cmd_rw_aio_complete(&aio_cmd->iocb, ret, 0);
>> +
>> +    return 0;
>> +}
>> +
>>  static int fd_do_rw(struct se_cmd *cmd, struct file *fd,
>>                  u32 block_size, struct scatterlist *sgl,
>>                  u32 sgl_nents, u32 data_length, int is_write)
>> @@ -527,7 +605,7 @@ fd_execute_unmap(struct se_cmd *cmd, sector_t lba, 
>> sector_t nolb)
>>  }
>>  
>>  static sense_reason_t
>> -fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
>> +fd_execute_rw_buffered(struct se_cmd *cmd, struct scatterlist *sgl, u32 
>> sgl_nents,
>>            enum dma_data_direction data_direction)
>>  {
>>      struct se_device *dev = cmd->se_dev;
>> @@ -537,16 +615,6 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist 
>> *sgl, u32 sgl_nents,
>>      sense_reason_t rc;
>>      int ret = 0;
>>      /*
>> -     * We are currently limited by the number of iovecs (2048) per
>> -     * single vfs_[writev,readv] call.
>> -     */
>> -    if (cmd->data_length > FD_MAX_BYTES) {
>> -            pr_err("FILEIO: Not able to process I/O of %u bytes due to"
>> -                   "FD_MAX_BYTES: %u iovec count limitation\n",
>> -                    cmd->data_length, FD_MAX_BYTES);
>> -            return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
>> -    }
>> -    /*
>>       * Call vectorized fileio functions to map struct scatterlist
>>       * physical memory addresses to struct iovec virtual memory.
>>       */
>> @@ -620,14 +688,39 @@ fd_execute_rw(struct se_cmd *cmd, struct scatterlist 
>> *sgl, u32 sgl_nents,
>>      return 0;
>>  }
>>  
>> +static sense_reason_t
>> +fd_execute_rw(struct se_cmd *cmd, struct scatterlist *sgl, u32 sgl_nents,
>> +          enum dma_data_direction data_direction)
>> +{
>> +    struct se_device *dev = cmd->se_dev;
>> +    struct fd_dev *fd_dev = FD_DEV(dev);
>> +
>> +    /*
>> +     * We are currently limited by the number of iovecs (2048) per
>> +     * single vfs_[writev,readv] call.
>> +     */
>> +    if (cmd->data_length > FD_MAX_BYTES) {
>> +            pr_err("FILEIO: Not able to process I/O of %u bytes due to"
>> +                   "FD_MAX_BYTES: %u iovec count limitation\n",
>> +                    cmd->data_length, FD_MAX_BYTES);
>> +            return TCM_LOGICAL_UNIT_COMMUNICATION_FAILURE;
>> +    }
>> +
>> +    if (fd_dev->fbd_flags & FDBD_HAS_ASYNC_IO)
>> +            return fd_execute_rw_aio(cmd, sgl, sgl_nents, data_direction);
>> +    return fd_execute_rw_buffered(cmd, sgl, sgl_nents, data_direction);
>> +}
>> +
>>  enum {
>> -    Opt_fd_dev_name, Opt_fd_dev_size, Opt_fd_buffered_io, Opt_err
>> +    Opt_fd_dev_name, Opt_fd_dev_size, Opt_fd_buffered_io,
>> +    Opt_fd_async_io, Opt_err
>>  };
>>  
>>  static match_table_t tokens = {
>>      {Opt_fd_dev_name, "fd_dev_name=%s"},
>>      {Opt_fd_dev_size, "fd_dev_size=%s"},
>>      {Opt_fd_buffered_io, "fd_buffered_io=%d"},
>> +    {Opt_fd_async_io, "fd_async_io=%d"},
>>      {Opt_err, NULL}
>>  };
>>  
>> @@ -693,6 +786,21 @@ static ssize_t fd_set_configfs_dev_params(struct 
>> se_device *dev,
>>  
>>                      fd_dev->fbd_flags |= FDBD_HAS_BUFFERED_IO_WCE;
>>                      break;
>> +            case Opt_fd_async_io:
>> +                    ret = match_int(args, &arg);
>> +                    if (ret)
>> +                            goto out;
>> +                    if (arg != 1) {
>> +                            pr_err("bogus fd_async_io=%d value\n", arg);
>> +                            ret = -EINVAL;
>> +                            goto out;
>> +                    }
>> +
>> +                    pr_debug("FILEIO: Using async I/O"
>> +                            " operations for struct fd_dev\n");
>> +
>> +                    fd_dev->fbd_flags |= FDBD_HAS_ASYNC_IO;
>> +                    break;
>>              default:
>>                      break;
>>              }
>> @@ -709,10 +817,11 @@ static ssize_t fd_show_configfs_dev_params(struct 
>> se_device *dev, char *b)
>>      ssize_t bl = 0;
>>  
>>      bl = sprintf(b + bl, "TCM FILEIO ID: %u", fd_dev->fd_dev_id);
>> -    bl += sprintf(b + bl, "        File: %s  Size: %llu  Mode: %s\n",
>> +    bl += sprintf(b + bl, "        File: %s  Size: %llu  Mode: %s Async: 
>> %d\n",
>>              fd_dev->fd_dev_name, fd_dev->fd_dev_size,
>>              (fd_dev->fbd_flags & FDBD_HAS_BUFFERED_IO_WCE) ?
>> -            "Buffered-WCE" : "O_DSYNC");
>> +            "Buffered-WCE" : "O_DSYNC",
>> +            !!(fd_dev->fbd_flags & FDBD_HAS_ASYNC_IO));
>>      return bl;
>>  }
>>  
>> diff --git a/drivers/target/target_core_file.h 
>> b/drivers/target/target_core_file.h
>> index 53be5ffd3261..929b1ecd544e 100644
>> --- a/drivers/target/target_core_file.h
>> +++ b/drivers/target/target_core_file.h
>> @@ -22,6 +22,7 @@
>>  #define FBDF_HAS_PATH               0x01
>>  #define FBDF_HAS_SIZE               0x02
>>  #define FDBD_HAS_BUFFERED_IO_WCE 0x04
>> +#define FDBD_HAS_ASYNC_IO    0x08
>>  #define FDBD_FORMAT_UNIT_SIZE       2048
>>  
>>  struct fd_dev {
>> -- 
>> 2.13.6
>>

Reply via email to