Dave Jiang <[email protected]> writes:

> The daxctl io option allows I/Os to be performed between block/file to
> and from device dax files. It also provides a way to zero a device dax
> device.
>
> i.e. daxctl io --input=/home/myfile --output=/dev/dax1.0

Does that really belong in daxctl?

-Jeff

> Signed-off-by: Dave Jiang <[email protected]>
> ---
>  Documentation/Makefile.am   |    3 
>  Documentation/daxctl-io.txt |   71 +++++
>  daxctl/Makefile.am          |    5 
>  daxctl/daxctl.c             |    2 
>  daxctl/io.c                 |  567 
> +++++++++++++++++++++++++++++++++++++++++++
>  5 files changed, 646 insertions(+), 2 deletions(-)
>  create mode 100644 Documentation/daxctl-io.txt
>  create mode 100644 daxctl/io.c
>
> diff --git a/Documentation/Makefile.am b/Documentation/Makefile.am
> index c7e0758..8efdbc2 100644
> --- a/Documentation/Makefile.am
> +++ b/Documentation/Makefile.am
> @@ -26,7 +26,8 @@ man1_MANS = \
>       ndctl-destroy-namespace.1 \
>       ndctl-check-namespace.1 \
>       ndctl-list.1 \
> -     daxctl-list.1
> +     daxctl-list.1 \
> +     daxctl-io.1
>  
>  CLEANFILES = $(man1_MANS)
>  
> diff --git a/Documentation/daxctl-io.txt b/Documentation/daxctl-io.txt
> new file mode 100644
> index 0000000..c3ddd15
> --- /dev/null
> +++ b/Documentation/daxctl-io.txt
> @@ -0,0 +1,71 @@
> +daxctl-io(1)
> +===========
> +
> +NAME
> +----
> +daxctl-io - Perform I/O on Device-DAX devices or zero a Device-DAX device.
> +
> +SYNOPSIS
> +--------
> +[verse]
> +'daxctl io' [<options>]
> +
> +There must be a Device-DAX device involved whether as the input or the output
> +device. Read from a Device-DAX device and write to a file, a block device,
> +another Device-DAX device, or stdout (if no output is provided). Write
> +to a Device-DAX device from a file, a block device, or stdin, or another
> +Device-DAX device.
> +
> +No length specified will default to input file/device length. If input is
> +a special char file then length will be the output file/device length.
> +
> +No input will default to stdin. No output will default to stdout.
> +
> +For a Device-DAX device, attempts to clear badblocks within range of writes
> +will be performed.
> +
> +EXAMPLE
> +-------
> +[verse]
> +# daxctl io --zero /dev/dax1.0
> +
> +# daxctl io --input=/dev/dax1.0 --output=/home/myfile --len=2097152 
> --seek=4096
> +
> +# cat /dev/zero | daxctl io --output=/dev/dax1.0
> +
> +# daxctl io --input=/dev/zero --output=/dev/dax1.0 --skip=4096
> +
> +OPTIONS
> +-------
> +-i::
> +--input=::
> +     Input device or file to read from.
> +
> +-o::
> +--output=::
> +     Output device or file to write to.
> +
> +-z::
> +--zero::
> +     Zero the output device for 'len' size. Or the entire device if no
> +     length was provided. The output device must be a Device DAX device.
> +
> +-l::
> +--len::
> +     The length in bytes to perform the I/O.
> +
> +-s::
> +--seek::
> +     The number of bytes to skip over on the output before performing a
> +     write.
> +
> +-k::
> +--skip::
> +     The number of bytes to skip over on the input before performing a read.
> +
> +COPYRIGHT
> +---------
> +Copyright (c) 2017, Intel Corporation. License GPLv2: GNU GPL
> +version 2 <http://gnu.org/licenses/gpl.html>.  This is free software:
> +you are free to change and redistribute it.  There is NO WARRANTY, to
> +the extent permitted by law.
> diff --git a/daxctl/Makefile.am b/daxctl/Makefile.am
> index fe467d0..1ba1f07 100644
> --- a/daxctl/Makefile.am
> +++ b/daxctl/Makefile.am
> @@ -5,10 +5,13 @@ bin_PROGRAMS = daxctl
>  daxctl_SOURCES =\
>               daxctl.c \
>               list.c \
> +             io.c \
>               ../util/json.c
>  
>  daxctl_LDADD =\
>       lib/libdaxctl.la \
> +     ../ndctl/lib/libndctl.la \
>       ../libutil.a \
>       $(UUID_LIBS) \
> -     $(JSON_LIBS)
> +     $(JSON_LIBS) \
> +     -lpmem
> diff --git a/daxctl/daxctl.c b/daxctl/daxctl.c
> index 91a4600..db2e495 100644
> --- a/daxctl/daxctl.c
> +++ b/daxctl/daxctl.c
> @@ -67,11 +67,13 @@ static int cmd_help(int argc, const char **argv, void 
> *ctx)
>  }
>  
>  int cmd_list(int argc, const char **argv, void *ctx);
> +int cmd_io(int argc, const char **argv, void *ctx);
>  
>  static struct cmd_struct commands[] = {
>       { "version", cmd_version },
>       { "list", cmd_list },
>       { "help", cmd_help },
> +     { "io", cmd_io },
>  };
>  
>  int main(int argc, const char **argv)
> diff --git a/daxctl/io.c b/daxctl/io.c
> new file mode 100644
> index 0000000..92e2878
> --- /dev/null
> +++ b/daxctl/io.c
> @@ -0,0 +1,567 @@
> +/*
> + * Copyright(c) 2015-2017 Intel Corporation. All rights reserved.
> + *
> + * This program is free software; you can redistribute it and/or modify it
> + * under the terms of version 2 of the GNU General Public License as
> + * published by the Free Software Foundation.
> + *
> + * This program is distributed in the hope that it will be useful, but
> + * WITHOUT ANY WARRANTY; without even the implied warranty of
> + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
> + * General Public License for more details.
> + */
> +#include <stdio.h>
> +#include <errno.h>
> +#include <stdlib.h>
> +#include <sys/types.h>
> +#include <sys/stat.h>
> +#include <sys/sysmacros.h>
> +#include <sys/param.h>
> +#include <sys/mman.h>
> +#include <fcntl.h>
> +#include <unistd.h>
> +#include <limits.h>
> +#include <libgen.h>
> +#include <libpmem.h>
> +#include <util/json.h>
> +#include <util/filter.h>
> +#include <json-c/json.h>
> +#include <daxctl/libdaxctl.h>
> +#include <ccan/short_types/short_types.h>
> +#include <util/parse-options.h>
> +#include <ccan/array_size/array_size.h>
> +#include <ndctl/ndctl.h>
> +
> +enum io_direction {
> +     IO_READ = 0,
> +     IO_WRITE,
> +};
> +
> +struct io_dev {
> +     int fd;
> +     int major;
> +     int minor;
> +     void *mmap;
> +     const char *parm_path;
> +     char *real_path;
> +     uint64_t offset;
> +     enum io_direction direction;
> +     bool is_dax;
> +     bool is_char;
> +     bool is_new;
> +     bool need_trunc;
> +     struct ndctl_ctx *ndctx;
> +     struct ndctl_region *region;
> +     struct ndctl_dax *dax;
> +     uint64_t size;
> +};
> +
> +static struct {
> +     struct io_dev dev[2];
> +     bool zero;
> +     uint64_t len;
> +     struct ndctl_cmd *ars_cap;
> +     struct ndctl_cmd *clear_err;
> +} io = {
> +     .dev[0].fd = -1,
> +     .dev[1].fd = -1,
> +};
> +
> +#define fail(fmt, ...) \
> +do { \
> +     fprintf(stderr, "daxctl-%s:%s:%d: " fmt, \
> +                     VERSION, __func__, __LINE__, ##__VA_ARGS__); \
> +} while (0)
> +
> +static bool is_stdinout(struct io_dev *io_dev)
> +{
> +     return (io_dev->fd == STDIN_FILENO ||
> +                     io_dev->fd == STDOUT_FILENO) ? true : false;
> +}
> +
> +static int setup_device(struct io_dev *io_dev, struct ndctl_ctx *ctx,
> +             size_t size)
> +{
> +     int flags, rc;
> +
> +     if (is_stdinout(io_dev))
> +             return 0;
> +
> +     if (io_dev->is_new)
> +             flags = O_CREAT|O_WRONLY|O_TRUNC;
> +     else if (io_dev->need_trunc)
> +             flags = O_RDWR | O_TRUNC;
> +     else
> +             flags = O_RDWR;
> +
> +     io_dev->fd = open(io_dev->parm_path, flags, S_IRUSR|S_IWUSR);
> +     if (io_dev->fd == -1) {
> +             rc = -errno;
> +             perror("open");
> +             return rc;
> +     }
> +
> +     if (!io_dev->is_dax)
> +             return 0;
> +
> +     flags = (io_dev->direction == IO_READ) ? PROT_READ : PROT_WRITE;
> +     io_dev->mmap = mmap(NULL, size, flags, MAP_SHARED, io_dev->fd, 0);
> +     if (io_dev->mmap == MAP_FAILED) {
> +             rc = -errno;
> +             perror("mmap");
> +             return rc;
> +     }
> +
> +     return 0;
> +}
> +
> +static int match_device(struct io_dev *io_dev, struct daxctl_region *dregion)
> +{
> +     struct daxctl_dev *dev;
> +
> +     daxctl_dev_foreach(dregion, dev) {
> +             if (io_dev->major == daxctl_dev_get_major(dev) &&
> +                     io_dev->minor == daxctl_dev_get_minor(dev)) {
> +                     io_dev->is_dax = true;
> +                     io_dev->size = daxctl_dev_get_size(dev);
> +                     return 1;
> +             }
> +     }
> +
> +     return 0;
> +}
> +
> +static int find_dax_device(struct io_dev *io_dev, struct ndctl_ctx *ndctx,
> +             enum io_direction dir)
> +{
> +     struct ndctl_bus *bus;
> +     struct ndctl_region *region;
> +     struct ndctl_dax *dax;
> +     struct daxctl_region *dregion;
> +     struct stat st;
> +     int rc;
> +     char cdev_path[256];
> +     char link_path[256];
> +     char *dev_name;
> +
> +     if (is_stdinout(io_dev)) {
> +             io_dev->size = ULONG_MAX;
> +             return 0;
> +     }
> +
> +     rc = stat(io_dev->parm_path, &st);
> +     if (rc == -1) {
> +             rc = -errno;
> +             if (rc == -ENOENT && dir == IO_WRITE) {
> +                     io_dev->is_new = true;
> +                     io_dev->size = ULONG_MAX;
> +                     return 0;
> +             }
> +             perror("stat");
> +             return rc;
> +     }
> +
> +     if (S_ISREG(st.st_mode)) {
> +             if (dir == IO_WRITE) {
> +                     io_dev->need_trunc = true;
> +                     io_dev->size = ULONG_MAX;
> +             } else
> +                     io_dev->size = st.st_size;
> +             return 0;
> +     } else if (S_ISBLK(st.st_mode)) {
> +             io_dev->size = st.st_size;
> +             return 0;
> +     } else if (S_ISCHR(st.st_mode)) {
> +             io_dev->size = ULONG_MAX;
> +             io_dev->is_char = true;
> +             io_dev->major = major(st.st_rdev);
> +             io_dev->minor = minor(st.st_rdev);
> +     } else
> +             return -ENODEV;
> +
> +     rc = snprintf(cdev_path, 255, "/sys/dev/char/%u:%u", io_dev->major,
> +                     io_dev->minor);
> +     if (rc < 0) {
> +             fail("snprintf\n");
> +             return -ENXIO;
> +     }
> +
> +     rc = readlink(cdev_path, link_path, 255);
> +     if (rc == -1) {
> +             rc = errno;
> +             perror("readlink");
> +             return rc;
> +     }
> +     link_path[rc] = '\0';
> +     dev_name = basename(link_path);
> +
> +     ndctl_bus_foreach(ndctx, bus)
> +             ndctl_region_foreach(bus, region)
> +                     ndctl_dax_foreach(region, dax) {
> +                             if (strncmp(dev_name,
> +                                             ndctl_dax_get_devname(dax),
> +                                             256))
> +                                     continue;
> +
> +                             dregion = ndctl_dax_get_daxctl_region(dax);
> +                             if(match_device(io_dev, dregion)) {
> +                                     io_dev->region = region;
> +                                     io_dev->dax = dax;
> +                                     return 1;
> +                             }
> +                     }
> +     return 0;
> +}
> +
> +static int send_clear_error(struct ndctl_bus *bus, uint64_t start, uint64_t 
> size)
> +{
> +     uint64_t cleared;
> +     int rc;
> +
> +     io.clear_err = ndctl_bus_cmd_new_clear_error(start, size, io.ars_cap);
> +     if (!io.clear_err) {
> +             fail("bus: %s failed to create cmd\n",
> +                             ndctl_bus_get_provider(bus));
> +             return -ENXIO;
> +     }
> +
> +     rc = ndctl_cmd_submit(io.clear_err);
> +     if (rc) {
> +             fail("bus: %s failed to submit cmd: %d\n",
> +                             ndctl_bus_get_provider(bus), rc);
> +                             ndctl_cmd_unref(io.clear_err);
> +             return rc;
> +     }
> +
> +     cleared = ndctl_cmd_clear_error_get_cleared(io.clear_err);
> +     if (cleared != size) {
> +             fail("bus: %s expected to clear: %ld actual: %ld\n",
> +                             ndctl_bus_get_provider(bus),
> +                             size, cleared);
> +             return -ENXIO;
> +     }
> +
> +     return 0;
> +}
> +
> +static int get_ars_cap(struct ndctl_bus *bus, uint64_t start, uint64_t size)
> +{
> +     int rc;
> +
> +     io.ars_cap = ndctl_bus_cmd_new_ars_cap(bus, start, size);
> +     if (!io.ars_cap) {
> +             fail("bus: %s failed to create cmd\n",
> +                             ndctl_bus_get_provider(bus));
> +             return -ENOTTY;
> +     }
> +
> +     rc = ndctl_cmd_submit(io.ars_cap);
> +     if (rc) {
> +             fail("bus: %s failed to submit cmd: %d\n",
> +                             ndctl_bus_get_provider(bus), rc);
> +             ndctl_cmd_unref(io.ars_cap);
> +             return rc;
> +     }
> +
> +     if (ndctl_cmd_ars_cap_get_size(io.ars_cap) <
> +                     sizeof(struct nd_cmd_ars_status)) {
> +             fail("bus: %s expected size >= %zd got: %d\n",
> +                             ndctl_bus_get_provider(bus),
> +                             sizeof(struct nd_cmd_ars_status),
> +                             ndctl_cmd_ars_cap_get_size(io.ars_cap));
> +             ndctl_cmd_unref(io.ars_cap);
> +             return -ENXIO;
> +     }
> +
> +     return 0;
> +}
> +
> +int clear_errors(struct ndctl_bus *bus, uint64_t start, uint64_t len)
> +{
> +     int rc;
> +
> +     rc = get_ars_cap(bus, start, len);
> +     if (rc) {
> +             fail("get_ars_cap failed\n");
> +             return rc;
> +     }
> +
> +     rc = send_clear_error(bus, start, len);
> +     if (rc) {
> +             fail("send_clear_error failed\n");
> +             return rc;
> +     }
> +
> +     return 0;
> +}
> +
> +static int clear_badblocks(struct io_dev *dev, uint64_t len)
> +{
> +     unsigned long long dax_begin, dax_size, dax_end;
> +     unsigned long long region_begin, offset;
> +     unsigned long long size, io_begin, io_end, io_len;
> +     struct badblock *bb;
> +     int rc;
> +
> +     dax_begin = ndctl_dax_get_resource(dev->dax);
> +     if (dax_begin == ULLONG_MAX)
> +             return -ERANGE;
> +
> +     dax_size = ndctl_dax_get_size(dev->dax);
> +     if (dax_size == ULLONG_MAX)
> +             return -ERANGE;
> +
> +     dax_end = dax_begin + dax_size - 1;
> +
> +     region_begin = ndctl_region_get_resource(dev->region);
> +     if (region_begin == ULLONG_MAX)
> +             return -ERANGE;
> +
> +     ndctl_region_badblock_foreach(dev->region, bb) {
> +             unsigned long long bb_begin, bb_end, begin, end;
> +
> +             bb_begin = region_begin + (bb->offset << 9);
> +             bb_end = bb_begin + (bb->len << 9) - 1;
> +
> +             if (bb_end <= dax_begin || bb_begin >= dax_end)
> +                     continue;
> +
> +             if (bb_begin < dax_begin)
> +                     begin = dax_begin;
> +             else
> +                     begin = bb_begin;
> +
> +             if (bb_end > dax_end)
> +                     end = dax_end;
> +             else
> +                     end = bb_end;
> +
> +             offset = begin - dax_begin;
> +             size = end - begin + 1;
> +
> +             /*
> +              * If end of I/O is before badblock or the offset of the
> +              * I/O is greater than the actual size of badblock range
> +              */
> +             if (dev->offset + len - 1 < offset || dev->offset > size)
> +                     continue;
> +
> +             io_begin = (dev->offset < offset) ? offset : dev->offset;
> +             if ((dev->offset + len) < (offset + size))
> +                     io_end = offset + len;
> +             else
> +                     io_end = offset + size;
> +
> +             io_len = io_end - io_begin;
> +             io_begin += dax_begin;
> +             rc = clear_errors(ndctl_region_get_bus(dev->region),
> +                             io_begin, io_len);
> +             if (rc < 0)
> +                     return rc;
> +     }
> +
> +     return 0;
> +}
> +
> +static ssize_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev,
> +             uint64_t len, bool zero)
> +{
> +     void *src, *dst;
> +     ssize_t rc, count = 0;
> +
> +     if (zero && dst_dev->is_dax) {
> +             dst = (uint8_t *)dst_dev->mmap + dst_dev->offset;
> +             memset(dst, 0, len);
> +             pmem_persist(dst, len);
> +             rc = len;
> +     } else if (dst_dev->is_dax && src_dev->is_dax) {
> +             src = (uint8_t *)src_dev->mmap + src_dev->offset;
> +             dst = (uint8_t *)dst_dev->mmap + dst_dev->offset;
> +             pmem_memcpy_persist(dst, src, len);
> +             rc = len;
> +     } else if (src_dev->is_dax) {
> +             src = (uint8_t *)src_dev->mmap + src_dev->offset;
> +             if (dst_dev->offset) {
> +                     rc = lseek(dst_dev->fd, dst_dev->offset, SEEK_SET);
> +                     if (rc < 0) {
> +                             rc = -errno;
> +                             perror("lseek");
> +                             return rc;
> +                     }
> +             }
> +             do {
> +                     rc = write(dst_dev->fd, (uint8_t *)src + count,
> +                                     len - count);
> +                     if (rc == -1) {
> +                             rc = -errno;
> +                             perror("write");
> +                             return rc;
> +                     }
> +                     count += rc;
> +             } while (count != (ssize_t)len);
> +             rc = count;
> +             if (rc != (ssize_t)len)
> +                     printf("Requested size %lu larger than source.\n", len);
> +     } else if (dst_dev->is_dax) {
> +             dst = (uint8_t *)dst_dev->mmap + dst_dev->offset;
> +             if (src_dev->offset) {
> +                     rc = lseek(src_dev->fd, src_dev->offset, SEEK_SET);
> +                     if (rc < 0) {
> +                             rc = -errno;
> +                             perror("lseek");
> +                             return rc;
> +                     }
> +             }
> +             do {
> +                     rc = read(src_dev->fd, (uint8_t *)dst + count,
> +                                     len - count);
> +                     if (rc == -1) {
> +                             rc = -errno;
> +                             perror("pread");
> +                             return rc;
> +                     }
> +                     /* end of file */
> +                     if (rc == 0)
> +                             break;
> +                     count += rc;
> +             } while (count != (ssize_t)len);
> +             pmem_persist(dst, count);
> +             rc = count;
> +             if (rc != (ssize_t)len)
> +                     printf("Requested size %lu larger than destination.\n", 
> len);
> +     } else
> +             return -EINVAL;
> +
> +     return rc;
> +}
> +
> +static int do_io(struct ndctl_ctx *ctx)
> +{
> +     int rc, i, dax_devs = 0;
> +
> +     /* if we are zeroing the device, we just need output */
> +     i = io.zero ? 1 : 0;
> +     for (; i < 2; i++) {
> +             if (!io.dev[i].parm_path)
> +                     continue;
> +             rc = find_dax_device(&io.dev[i], ctx, i);
> +             if (rc < 0)
> +                     return rc;
> +
> +             if (rc == 1)
> +                     dax_devs++;
> +     }
> +
> +     if (dax_devs == 0) {
> +             fail("No DAX devices for input or output, fail\n");
> +             return -ENODEV;
> +     }
> +
> +     if (io.len == 0) {
> +             if (is_stdinout(&io.dev[0]))
> +                     io.len = io.dev[1].size;
> +             else
> +                     io.len = io.dev[0].size;
> +     }
> +
> +     io.dev[1].direction = IO_WRITE;
> +     i = io.zero ? 1 : 0;
> +     for (; i < 2; i++) {
> +             if (!io.dev[i].parm_path)
> +                     continue;
> +             rc = setup_device(&io.dev[i], ctx, io.len);
> +             if (rc < 0)
> +                     return rc;
> +     }
> +
> +     if (io.dev[1].is_dax) {
> +             rc = clear_badblocks(&io.dev[1], io.len);
> +             if (rc < 0) {
> +                     fail("Failed to clear badblocks on %s\n",
> +                                     io.dev[1].parm_path);
> +                     return rc;
> +             }
> +     }
> +
> +     rc = __do_io(&io.dev[1], &io.dev[0], io.len, io.zero);
> +     if (rc < 0) {
> +             fail("Failed to perform I/O\n");
> +             return rc;
> +     }
> +
> +     printf("Data copied %u bytes to device %s\n",
> +                     rc, io.dev[1].parm_path);
> +
> +     return 0;
> +}
> +
> +static void cleanup(struct ndctl_ctx *ctx)
> +{
> +     int i;
> +
> +     for (i = 0; i < 2; i++) {
> +             if (is_stdinout(&io.dev[i]))
> +                     continue;
> +             close(io.dev[i].fd);
> +     }
> +}
> +
> +int cmd_io(int argc, const char **argv, void *ctx)
> +{
> +     const struct option options[] = {
> +             OPT_STRING('i', "input", &io.dev[0].parm_path, "in device",
> +                             "input device/file"),
> +             OPT_STRING('o', "output", &io.dev[1].parm_path, "out device",
> +                             "output device/file"),
> +             OPT_BOOLEAN('z', "zero", &io.zero, "zeroing the device"),
> +             OPT_U64('l', "len", &io.len, "total length to perform the I/O"),
> +             OPT_U64('s', "seek", &io.dev[1].offset, "seek offset for 
> output"),
> +             OPT_U64('k', "skip", &io.dev[0].offset, "skip offset for 
> input"),
> +     };
> +     const char * const u[] = {
> +             "daxctl io [<options>]",
> +             NULL
> +     };
> +     int i, rc;
> +     struct ndctl_ctx *ndctx;
> +
> +     argc = parse_options(argc, argv, options, u, 0);
> +     for (i = 0; i < argc; i++) {
> +             fail("Unknown parameter \"%s\"\n", argv[i]);
> +             return -EINVAL;
> +     }
> +
> +     if (argc) {
> +             usage_with_options(u, options);
> +             return 0;
> +     }
> +
> +     if (!io.dev[0].parm_path && !io.dev[1].parm_path) {
> +             usage_with_options(u, options);
> +             return 0;
> +     }
> +
> +     if (!io.dev[0].parm_path) {
> +             io.dev[0].fd = STDIN_FILENO;
> +             io.dev[0].offset = 0;
> +     }
> +
> +     if (!io.dev[1].parm_path) {
> +             io.dev[1].fd = STDOUT_FILENO;
> +             io.dev[1].offset = 0;
> +     }
> +
> +     rc = ndctl_new(&ndctx);
> +     if (rc)
> +             return -ENOMEM;
> +
> +     rc = do_io(ndctx);
> +     if (rc < 0)
> +             goto out;
> +
> +     rc = 0;
> +out:
> +     cleanup(ndctx);
> +     ndctl_unref(ndctx);
> +     return rc;
> +}
>
> _______________________________________________
> Linux-nvdimm mailing list
> [email protected]
> https://lists.01.org/mailman/listinfo/linux-nvdimm
_______________________________________________
Linux-nvdimm mailing list
[email protected]
https://lists.01.org/mailman/listinfo/linux-nvdimm

Reply via email to