Dave Jiang <[email protected]> writes: > The daxctl io option allows I/Os to be performed between block/file to > and from device dax files. It also provides a way to zero a device dax > device. > > i.e. daxctl io --input=/home/myfile --output=/dev/dax1.0
Does that really belong in daxctl? -Jeff > Signed-off-by: Dave Jiang <[email protected]> > --- > Documentation/Makefile.am | 3 > Documentation/daxctl-io.txt | 71 +++++ > daxctl/Makefile.am | 5 > daxctl/daxctl.c | 2 > daxctl/io.c | 567 > +++++++++++++++++++++++++++++++++++++++++++ > 5 files changed, 646 insertions(+), 2 deletions(-) > create mode 100644 Documentation/daxctl-io.txt > create mode 100644 daxctl/io.c > > diff --git a/Documentation/Makefile.am b/Documentation/Makefile.am > index c7e0758..8efdbc2 100644 > --- a/Documentation/Makefile.am > +++ b/Documentation/Makefile.am > @@ -26,7 +26,8 @@ man1_MANS = \ > ndctl-destroy-namespace.1 \ > ndctl-check-namespace.1 \ > ndctl-list.1 \ > - daxctl-list.1 > + daxctl-list.1 \ > + daxctl-io.1 > > CLEANFILES = $(man1_MANS) > > diff --git a/Documentation/daxctl-io.txt b/Documentation/daxctl-io.txt > new file mode 100644 > index 0000000..c3ddd15 > --- /dev/null > +++ b/Documentation/daxctl-io.txt > @@ -0,0 +1,71 @@ > +daxctl-io(1) > +=========== > + > +NAME > +---- > +daxctl-io - Perform I/O on Device-DAX devices or zero a Device-DAX device. > + > +SYNOPSIS > +-------- > +[verse] > +'daxctl io' [<options>] > + > +There must be a Device-DAX device involved whether as the input or the output > +device. Read from a Device-DAX device and write to a file, a block device, > +another Device-DAX device, or stdout (if no output is provided). Write > +to a Device-DAX device from a file, a block device, or stdin, or another > +Device-DAX device. > + > +No length specified will default to input file/device length. If input is > +a special char file then length will be the output file/device length. > + > +No input will default to stdin. No output will default to stdout. > + > +For a Device-DAX device, attempts to clear badblocks within range of writes > +will be performed. > + > +EXAMPLE > +------- > +[verse] > +# daxctl io --zero /dev/dax1.0 > + > +# daxctl io --input=/dev/dax1.0 --output=/home/myfile --len=2097152 > --seek=4096 > + > +# cat /dev/zero | daxctl io --output=/dev/dax1.0 > + > +# daxctl io --input=/dev/zero --output=/dev/dax1.0 --skip=4096 > + > +OPTIONS > +------- > +-i:: > +--input=:: > + Input device or file to read from. > + > +-o:: > +--output=:: > + Output device or file to write to. > + > +-z:: > +--zero:: > + Zero the output device for 'len' size. Or the entire device if no > + length was provided. The output device must be a Device DAX device. > + > +-l:: > +--len:: > + The length in bytes to perform the I/O. > + > +-s:: > +--seek:: > + The number of bytes to skip over on the output before performing a > + write. > + > +-k:: > +--skip:: > + The number of bytes to skip over on the input before performing a read. > + > +COPYRIGHT > +--------- > +Copyright (c) 2017, Intel Corporation. License GPLv2: GNU GPL > +version 2 <http://gnu.org/licenses/gpl.html>. This is free software: > +you are free to change and redistribute it. There is NO WARRANTY, to > +the extent permitted by law. > diff --git a/daxctl/Makefile.am b/daxctl/Makefile.am > index fe467d0..1ba1f07 100644 > --- a/daxctl/Makefile.am > +++ b/daxctl/Makefile.am > @@ -5,10 +5,13 @@ bin_PROGRAMS = daxctl > daxctl_SOURCES =\ > daxctl.c \ > list.c \ > + io.c \ > ../util/json.c > > daxctl_LDADD =\ > lib/libdaxctl.la \ > + ../ndctl/lib/libndctl.la \ > ../libutil.a \ > $(UUID_LIBS) \ > - $(JSON_LIBS) > + $(JSON_LIBS) \ > + -lpmem > diff --git a/daxctl/daxctl.c b/daxctl/daxctl.c > index 91a4600..db2e495 100644 > --- a/daxctl/daxctl.c > +++ b/daxctl/daxctl.c > @@ -67,11 +67,13 @@ static int cmd_help(int argc, const char **argv, void > *ctx) > } > > int cmd_list(int argc, const char **argv, void *ctx); > +int cmd_io(int argc, const char **argv, void *ctx); > > static struct cmd_struct commands[] = { > { "version", cmd_version }, > { "list", cmd_list }, > { "help", cmd_help }, > + { "io", cmd_io }, > }; > > int main(int argc, const char **argv) > diff --git a/daxctl/io.c b/daxctl/io.c > new file mode 100644 > index 0000000..92e2878 > --- /dev/null > +++ b/daxctl/io.c > @@ -0,0 +1,567 @@ > +/* > + * Copyright(c) 2015-2017 Intel Corporation. All rights reserved. > + * > + * This program is free software; you can redistribute it and/or modify it > + * under the terms of version 2 of the GNU General Public License as > + * published by the Free Software Foundation. > + * > + * This program is distributed in the hope that it will be useful, but > + * WITHOUT ANY WARRANTY; without even the implied warranty of > + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU > + * General Public License for more details. > + */ > +#include <stdio.h> > +#include <errno.h> > +#include <stdlib.h> > +#include <sys/types.h> > +#include <sys/stat.h> > +#include <sys/sysmacros.h> > +#include <sys/param.h> > +#include <sys/mman.h> > +#include <fcntl.h> > +#include <unistd.h> > +#include <limits.h> > +#include <libgen.h> > +#include <libpmem.h> > +#include <util/json.h> > +#include <util/filter.h> > +#include <json-c/json.h> > +#include <daxctl/libdaxctl.h> > +#include <ccan/short_types/short_types.h> > +#include <util/parse-options.h> > +#include <ccan/array_size/array_size.h> > +#include <ndctl/ndctl.h> > + > +enum io_direction { > + IO_READ = 0, > + IO_WRITE, > +}; > + > +struct io_dev { > + int fd; > + int major; > + int minor; > + void *mmap; > + const char *parm_path; > + char *real_path; > + uint64_t offset; > + enum io_direction direction; > + bool is_dax; > + bool is_char; > + bool is_new; > + bool need_trunc; > + struct ndctl_ctx *ndctx; > + struct ndctl_region *region; > + struct ndctl_dax *dax; > + uint64_t size; > +}; > + > +static struct { > + struct io_dev dev[2]; > + bool zero; > + uint64_t len; > + struct ndctl_cmd *ars_cap; > + struct ndctl_cmd *clear_err; > +} io = { > + .dev[0].fd = -1, > + .dev[1].fd = -1, > +}; > + > +#define fail(fmt, ...) \ > +do { \ > + fprintf(stderr, "daxctl-%s:%s:%d: " fmt, \ > + VERSION, __func__, __LINE__, ##__VA_ARGS__); \ > +} while (0) > + > +static bool is_stdinout(struct io_dev *io_dev) > +{ > + return (io_dev->fd == STDIN_FILENO || > + io_dev->fd == STDOUT_FILENO) ? true : false; > +} > + > +static int setup_device(struct io_dev *io_dev, struct ndctl_ctx *ctx, > + size_t size) > +{ > + int flags, rc; > + > + if (is_stdinout(io_dev)) > + return 0; > + > + if (io_dev->is_new) > + flags = O_CREAT|O_WRONLY|O_TRUNC; > + else if (io_dev->need_trunc) > + flags = O_RDWR | O_TRUNC; > + else > + flags = O_RDWR; > + > + io_dev->fd = open(io_dev->parm_path, flags, S_IRUSR|S_IWUSR); > + if (io_dev->fd == -1) { > + rc = -errno; > + perror("open"); > + return rc; > + } > + > + if (!io_dev->is_dax) > + return 0; > + > + flags = (io_dev->direction == IO_READ) ? PROT_READ : PROT_WRITE; > + io_dev->mmap = mmap(NULL, size, flags, MAP_SHARED, io_dev->fd, 0); > + if (io_dev->mmap == MAP_FAILED) { > + rc = -errno; > + perror("mmap"); > + return rc; > + } > + > + return 0; > +} > + > +static int match_device(struct io_dev *io_dev, struct daxctl_region *dregion) > +{ > + struct daxctl_dev *dev; > + > + daxctl_dev_foreach(dregion, dev) { > + if (io_dev->major == daxctl_dev_get_major(dev) && > + io_dev->minor == daxctl_dev_get_minor(dev)) { > + io_dev->is_dax = true; > + io_dev->size = daxctl_dev_get_size(dev); > + return 1; > + } > + } > + > + return 0; > +} > + > +static int find_dax_device(struct io_dev *io_dev, struct ndctl_ctx *ndctx, > + enum io_direction dir) > +{ > + struct ndctl_bus *bus; > + struct ndctl_region *region; > + struct ndctl_dax *dax; > + struct daxctl_region *dregion; > + struct stat st; > + int rc; > + char cdev_path[256]; > + char link_path[256]; > + char *dev_name; > + > + if (is_stdinout(io_dev)) { > + io_dev->size = ULONG_MAX; > + return 0; > + } > + > + rc = stat(io_dev->parm_path, &st); > + if (rc == -1) { > + rc = -errno; > + if (rc == -ENOENT && dir == IO_WRITE) { > + io_dev->is_new = true; > + io_dev->size = ULONG_MAX; > + return 0; > + } > + perror("stat"); > + return rc; > + } > + > + if (S_ISREG(st.st_mode)) { > + if (dir == IO_WRITE) { > + io_dev->need_trunc = true; > + io_dev->size = ULONG_MAX; > + } else > + io_dev->size = st.st_size; > + return 0; > + } else if (S_ISBLK(st.st_mode)) { > + io_dev->size = st.st_size; > + return 0; > + } else if (S_ISCHR(st.st_mode)) { > + io_dev->size = ULONG_MAX; > + io_dev->is_char = true; > + io_dev->major = major(st.st_rdev); > + io_dev->minor = minor(st.st_rdev); > + } else > + return -ENODEV; > + > + rc = snprintf(cdev_path, 255, "/sys/dev/char/%u:%u", io_dev->major, > + io_dev->minor); > + if (rc < 0) { > + fail("snprintf\n"); > + return -ENXIO; > + } > + > + rc = readlink(cdev_path, link_path, 255); > + if (rc == -1) { > + rc = errno; > + perror("readlink"); > + return rc; > + } > + link_path[rc] = '\0'; > + dev_name = basename(link_path); > + > + ndctl_bus_foreach(ndctx, bus) > + ndctl_region_foreach(bus, region) > + ndctl_dax_foreach(region, dax) { > + if (strncmp(dev_name, > + ndctl_dax_get_devname(dax), > + 256)) > + continue; > + > + dregion = ndctl_dax_get_daxctl_region(dax); > + if(match_device(io_dev, dregion)) { > + io_dev->region = region; > + io_dev->dax = dax; > + return 1; > + } > + } > + return 0; > +} > + > +static int send_clear_error(struct ndctl_bus *bus, uint64_t start, uint64_t > size) > +{ > + uint64_t cleared; > + int rc; > + > + io.clear_err = ndctl_bus_cmd_new_clear_error(start, size, io.ars_cap); > + if (!io.clear_err) { > + fail("bus: %s failed to create cmd\n", > + ndctl_bus_get_provider(bus)); > + return -ENXIO; > + } > + > + rc = ndctl_cmd_submit(io.clear_err); > + if (rc) { > + fail("bus: %s failed to submit cmd: %d\n", > + ndctl_bus_get_provider(bus), rc); > + ndctl_cmd_unref(io.clear_err); > + return rc; > + } > + > + cleared = ndctl_cmd_clear_error_get_cleared(io.clear_err); > + if (cleared != size) { > + fail("bus: %s expected to clear: %ld actual: %ld\n", > + ndctl_bus_get_provider(bus), > + size, cleared); > + return -ENXIO; > + } > + > + return 0; > +} > + > +static int get_ars_cap(struct ndctl_bus *bus, uint64_t start, uint64_t size) > +{ > + int rc; > + > + io.ars_cap = ndctl_bus_cmd_new_ars_cap(bus, start, size); > + if (!io.ars_cap) { > + fail("bus: %s failed to create cmd\n", > + ndctl_bus_get_provider(bus)); > + return -ENOTTY; > + } > + > + rc = ndctl_cmd_submit(io.ars_cap); > + if (rc) { > + fail("bus: %s failed to submit cmd: %d\n", > + ndctl_bus_get_provider(bus), rc); > + ndctl_cmd_unref(io.ars_cap); > + return rc; > + } > + > + if (ndctl_cmd_ars_cap_get_size(io.ars_cap) < > + sizeof(struct nd_cmd_ars_status)) { > + fail("bus: %s expected size >= %zd got: %d\n", > + ndctl_bus_get_provider(bus), > + sizeof(struct nd_cmd_ars_status), > + ndctl_cmd_ars_cap_get_size(io.ars_cap)); > + ndctl_cmd_unref(io.ars_cap); > + return -ENXIO; > + } > + > + return 0; > +} > + > +int clear_errors(struct ndctl_bus *bus, uint64_t start, uint64_t len) > +{ > + int rc; > + > + rc = get_ars_cap(bus, start, len); > + if (rc) { > + fail("get_ars_cap failed\n"); > + return rc; > + } > + > + rc = send_clear_error(bus, start, len); > + if (rc) { > + fail("send_clear_error failed\n"); > + return rc; > + } > + > + return 0; > +} > + > +static int clear_badblocks(struct io_dev *dev, uint64_t len) > +{ > + unsigned long long dax_begin, dax_size, dax_end; > + unsigned long long region_begin, offset; > + unsigned long long size, io_begin, io_end, io_len; > + struct badblock *bb; > + int rc; > + > + dax_begin = ndctl_dax_get_resource(dev->dax); > + if (dax_begin == ULLONG_MAX) > + return -ERANGE; > + > + dax_size = ndctl_dax_get_size(dev->dax); > + if (dax_size == ULLONG_MAX) > + return -ERANGE; > + > + dax_end = dax_begin + dax_size - 1; > + > + region_begin = ndctl_region_get_resource(dev->region); > + if (region_begin == ULLONG_MAX) > + return -ERANGE; > + > + ndctl_region_badblock_foreach(dev->region, bb) { > + unsigned long long bb_begin, bb_end, begin, end; > + > + bb_begin = region_begin + (bb->offset << 9); > + bb_end = bb_begin + (bb->len << 9) - 1; > + > + if (bb_end <= dax_begin || bb_begin >= dax_end) > + continue; > + > + if (bb_begin < dax_begin) > + begin = dax_begin; > + else > + begin = bb_begin; > + > + if (bb_end > dax_end) > + end = dax_end; > + else > + end = bb_end; > + > + offset = begin - dax_begin; > + size = end - begin + 1; > + > + /* > + * If end of I/O is before badblock or the offset of the > + * I/O is greater than the actual size of badblock range > + */ > + if (dev->offset + len - 1 < offset || dev->offset > size) > + continue; > + > + io_begin = (dev->offset < offset) ? offset : dev->offset; > + if ((dev->offset + len) < (offset + size)) > + io_end = offset + len; > + else > + io_end = offset + size; > + > + io_len = io_end - io_begin; > + io_begin += dax_begin; > + rc = clear_errors(ndctl_region_get_bus(dev->region), > + io_begin, io_len); > + if (rc < 0) > + return rc; > + } > + > + return 0; > +} > + > +static ssize_t __do_io(struct io_dev *dst_dev, struct io_dev *src_dev, > + uint64_t len, bool zero) > +{ > + void *src, *dst; > + ssize_t rc, count = 0; > + > + if (zero && dst_dev->is_dax) { > + dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; > + memset(dst, 0, len); > + pmem_persist(dst, len); > + rc = len; > + } else if (dst_dev->is_dax && src_dev->is_dax) { > + src = (uint8_t *)src_dev->mmap + src_dev->offset; > + dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; > + pmem_memcpy_persist(dst, src, len); > + rc = len; > + } else if (src_dev->is_dax) { > + src = (uint8_t *)src_dev->mmap + src_dev->offset; > + if (dst_dev->offset) { > + rc = lseek(dst_dev->fd, dst_dev->offset, SEEK_SET); > + if (rc < 0) { > + rc = -errno; > + perror("lseek"); > + return rc; > + } > + } > + do { > + rc = write(dst_dev->fd, (uint8_t *)src + count, > + len - count); > + if (rc == -1) { > + rc = -errno; > + perror("write"); > + return rc; > + } > + count += rc; > + } while (count != (ssize_t)len); > + rc = count; > + if (rc != (ssize_t)len) > + printf("Requested size %lu larger than source.\n", len); > + } else if (dst_dev->is_dax) { > + dst = (uint8_t *)dst_dev->mmap + dst_dev->offset; > + if (src_dev->offset) { > + rc = lseek(src_dev->fd, src_dev->offset, SEEK_SET); > + if (rc < 0) { > + rc = -errno; > + perror("lseek"); > + return rc; > + } > + } > + do { > + rc = read(src_dev->fd, (uint8_t *)dst + count, > + len - count); > + if (rc == -1) { > + rc = -errno; > + perror("pread"); > + return rc; > + } > + /* end of file */ > + if (rc == 0) > + break; > + count += rc; > + } while (count != (ssize_t)len); > + pmem_persist(dst, count); > + rc = count; > + if (rc != (ssize_t)len) > + printf("Requested size %lu larger than destination.\n", > len); > + } else > + return -EINVAL; > + > + return rc; > +} > + > +static int do_io(struct ndctl_ctx *ctx) > +{ > + int rc, i, dax_devs = 0; > + > + /* if we are zeroing the device, we just need output */ > + i = io.zero ? 1 : 0; > + for (; i < 2; i++) { > + if (!io.dev[i].parm_path) > + continue; > + rc = find_dax_device(&io.dev[i], ctx, i); > + if (rc < 0) > + return rc; > + > + if (rc == 1) > + dax_devs++; > + } > + > + if (dax_devs == 0) { > + fail("No DAX devices for input or output, fail\n"); > + return -ENODEV; > + } > + > + if (io.len == 0) { > + if (is_stdinout(&io.dev[0])) > + io.len = io.dev[1].size; > + else > + io.len = io.dev[0].size; > + } > + > + io.dev[1].direction = IO_WRITE; > + i = io.zero ? 1 : 0; > + for (; i < 2; i++) { > + if (!io.dev[i].parm_path) > + continue; > + rc = setup_device(&io.dev[i], ctx, io.len); > + if (rc < 0) > + return rc; > + } > + > + if (io.dev[1].is_dax) { > + rc = clear_badblocks(&io.dev[1], io.len); > + if (rc < 0) { > + fail("Failed to clear badblocks on %s\n", > + io.dev[1].parm_path); > + return rc; > + } > + } > + > + rc = __do_io(&io.dev[1], &io.dev[0], io.len, io.zero); > + if (rc < 0) { > + fail("Failed to perform I/O\n"); > + return rc; > + } > + > + printf("Data copied %u bytes to device %s\n", > + rc, io.dev[1].parm_path); > + > + return 0; > +} > + > +static void cleanup(struct ndctl_ctx *ctx) > +{ > + int i; > + > + for (i = 0; i < 2; i++) { > + if (is_stdinout(&io.dev[i])) > + continue; > + close(io.dev[i].fd); > + } > +} > + > +int cmd_io(int argc, const char **argv, void *ctx) > +{ > + const struct option options[] = { > + OPT_STRING('i', "input", &io.dev[0].parm_path, "in device", > + "input device/file"), > + OPT_STRING('o', "output", &io.dev[1].parm_path, "out device", > + "output device/file"), > + OPT_BOOLEAN('z', "zero", &io.zero, "zeroing the device"), > + OPT_U64('l', "len", &io.len, "total length to perform the I/O"), > + OPT_U64('s', "seek", &io.dev[1].offset, "seek offset for > output"), > + OPT_U64('k', "skip", &io.dev[0].offset, "skip offset for > input"), > + }; > + const char * const u[] = { > + "daxctl io [<options>]", > + NULL > + }; > + int i, rc; > + struct ndctl_ctx *ndctx; > + > + argc = parse_options(argc, argv, options, u, 0); > + for (i = 0; i < argc; i++) { > + fail("Unknown parameter \"%s\"\n", argv[i]); > + return -EINVAL; > + } > + > + if (argc) { > + usage_with_options(u, options); > + return 0; > + } > + > + if (!io.dev[0].parm_path && !io.dev[1].parm_path) { > + usage_with_options(u, options); > + return 0; > + } > + > + if (!io.dev[0].parm_path) { > + io.dev[0].fd = STDIN_FILENO; > + io.dev[0].offset = 0; > + } > + > + if (!io.dev[1].parm_path) { > + io.dev[1].fd = STDOUT_FILENO; > + io.dev[1].offset = 0; > + } > + > + rc = ndctl_new(&ndctx); > + if (rc) > + return -ENOMEM; > + > + rc = do_io(ndctx); > + if (rc < 0) > + goto out; > + > + rc = 0; > +out: > + cleanup(ndctx); > + ndctl_unref(ndctx); > + return rc; > +} > > _______________________________________________ > Linux-nvdimm mailing list > [email protected] > https://lists.01.org/mailman/listinfo/linux-nvdimm _______________________________________________ Linux-nvdimm mailing list [email protected] https://lists.01.org/mailman/listinfo/linux-nvdimm
