From: Chengyu Zhu <[email protected]> Wire up the ublk userspace block device backend into mount.erofs, providing an alternative to nbd for block device exposure.
Signed-off-by: Chengyu Zhu <[email protected]> Signed-off-by: Gao Xiang <[email protected]> --- mount/main.c | 266 ++++++++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 244 insertions(+), 22 deletions(-) diff --git a/mount/main.c b/mount/main.c index 90fbdc68f88d..7713ba41058c 100644 --- a/mount/main.c +++ b/mount/main.c @@ -23,6 +23,7 @@ #include "../lib/liberofs_fanotify.h" #endif #include "../lib/liberofs_s3.h" +#include "../lib/liberofs_ublk.h" #ifdef HAVE_LINUX_LOOP_H #include <linux/loop.h> @@ -51,6 +52,7 @@ struct loop_info { #define EROFSMOUNT_RUNDIR "/var/run/erofs" #define EROFSMOUNT_NBD_REC_FMT EROFSMOUNT_RUNDIR "/mountnbd_nbd%d" +#define EROFSMOUNT_UBLK_REC_FMT EROFSMOUNT_RUNDIR "/mountublk_ublkb%d" #ifdef EROFS_FANOTIFY_ENABLED #define EROFSMOUNT_FANOTIFY_HELP ", fanotify" @@ -58,12 +60,19 @@ struct loop_info { #define EROFSMOUNT_FANOTIFY_HELP "" #endif +#ifdef HAVE_LIBURING +#define EROFSMOUNT_UBLK_HELP ", ublk" +#else +#define EROFSMOUNT_UBLK_HELP "" +#endif + enum erofs_backend_drv { EROFSAUTO, EROFSLOCAL, EROFSFUSE, EROFSNBD, EROFSFANOTIFY, + EROFSUBLK, }; enum erofsmount_mode { @@ -117,10 +126,10 @@ static void usage(int argc, char **argv) " -d <0-9> set output verbosity; 0=quiet, 9=verbose (default=%i)\n" " -o options comma-separated list of mount options\n" " -t type[.subtype] filesystem type (and optional subtype)\n" - " subtypes: fuse, local, nbd" EROFSMOUNT_FANOTIFY_HELP "\n" + " subtypes: fuse, local, nbd" EROFSMOUNT_FANOTIFY_HELP EROFSMOUNT_UBLK_HELP "\n" " -u unmount the filesystem\n" " --disconnect abort an existing NBD device forcibly\n" - " --reattach reattach to an existing NBD device\n" + " --reattach reattach to an existing NBD or ublk device\n" #ifdef OCIEROFS_ENABLED "\n" "OCI-specific options (EXPERIMENTAL, with -o):\n" @@ -465,6 +474,12 @@ static int erofsmount_parse_options(int argc, char **argv) #else erofs_err("fanotify backend support is not built-in"); return -EINVAL; +#endif + } else if (!strcmp(dot + 1, "ublk")) { +#ifdef HAVE_LIBURING + mountcfg.backend = EROFSUBLK; +#else + erofs_err("ublk backend support is not built-in"); #endif } else { erofs_err("invalid filesystem subtype `%s`", dot + 1); @@ -1399,11 +1414,29 @@ out_fork: return num; } +static int erofsmount_ublk_handler(void *ctx, struct erofs_ublk_request *req) +{ + struct erofs_vfile *vf = ctx; + ssize_t ret; + + if (req->op != EROFS_UBLK_OP_READ) + return -EOPNOTSUPP; + + ret = erofs_io_pread(vf, req->buf, req->nr_sectors << 9, + req->start_sector << 9); + if (ret < 0) + return (int)ret; + + req->result = ret; + return 0; +} + static int erofsmount_reattach(const char *target) { struct erofsmount_nbd_ctx ctx = { .vd = &ctx._vd }; + int ublk_dev_id, nbdnum, err; + char ublk_recp[64]; char *identifier; - int nbdnum, err; struct stat st; FILE *f; @@ -1411,7 +1444,48 @@ static int erofsmount_reattach(const char *target) if (err < 0) return -errno; - if (!S_ISBLK(st.st_mode) || major(st.st_rdev) != EROFS_NBD_MAJOR) + if (!S_ISBLK(st.st_mode)) + return -ENOTBLK; + + if (sscanf(target, "/dev/ublkb%d", &ublk_dev_id) == 1) { + if (!erofs_ublk_is_recoverable(ublk_dev_id)) { + erofs_err("ublk device %d is not recoverable", + ublk_dev_id); + return -ENODEV; + } + snprintf(ublk_recp, sizeof(ublk_recp), + EROFSMOUNT_UBLK_REC_FMT, ublk_dev_id); + f = fopen(ublk_recp, "r"); + if (!f) { + erofs_err("cannot open recovery file %s: %s", + ublk_recp, strerror(errno)); + return -errno; + } + err = erofsmount_recovery_open_source(&ctx, f); + if (err) + return err; + if (fork() == 0) { + if (erofs_ublk_init() < 0) + exit(EXIT_FAILURE); + err = erofs_ublk_recover_dev(ublk_dev_id, + erofsmount_ublk_handler, + &ctx.vd); + if (err) { + erofs_err("erofs_ublk_recover_dev: %s", + strerror(-err)); + exit(EXIT_FAILURE); + } + erofs_ublk_start(ublk_dev_id, -1); + unlink(ublk_recp); + erofs_ublk_destroy(ublk_dev_id); + erofs_io_close(ctx.vd); + exit(EXIT_SUCCESS); + } + erofs_io_close(ctx.vd); + return 0; + } + + if (major(st.st_rdev) != EROFS_NBD_MAJOR) return -ENOTBLK; nbdnum = erofs_nbd_get_index_from_minor(minor(st.st_rdev)); @@ -2034,6 +2108,130 @@ out: } #endif +static int erofsmount_ublk(struct erofsmount_source *source, + const char *mountpoint, const char *fstype, + int flags, const char *options) +{ + int pipefd[2]; + char dev_path[64]; + pid_t pid; + int dev_id, err; + char ready; + + err = erofs_ublk_init(); + if (err) { + erofs_err("ublk not supported"); + return err; + } + + if (pipe(pipefd) < 0) + return -errno; + + pid = fork(); + if (pid < 0) { + close(pipefd[0]); + close(pipefd[1]); + return -errno; + } + + if (pid == 0) { + struct erofsmount_nbd_ctx ctx = { .vd = &ctx._vd }; + struct erofs_ublk_dev_info info = {}; + char ublk_recp[64], *recp; + struct stat st; + + close(pipefd[0]); + + err = erofsmount_open_source(&ctx, source); + if (err) + exit(EXIT_FAILURE); + + info.nr_hw_queues = 1; + info.queue_depth = 64; + info.max_io_buf_bytes = 65536; + info.dev_id = -1; + info.blkbits = 12; + info.flags = EROFS_UBLK_F_USER_RECOVERY; + + if (source->type == EROFSMOUNT_SOURCE_LOCAL && + erofs_io_fstat(ctx.vd, &st) == 0) + info.dev_size = st.st_size; + else + info.dev_size = INT64_MAX; + + dev_id = erofs_ublk_create_dev(&info, + erofsmount_ublk_handler, ctx.vd); + if (dev_id < 0) { + erofs_err("erofs_ublk_create_dev failed: %s", + strerror(-dev_id)); + exit(EXIT_FAILURE); + } + + snprintf(ublk_recp, sizeof(ublk_recp), + EROFSMOUNT_UBLK_REC_FMT, dev_id); + recp = erofsmount_write_recovery_info(source); + if (IS_ERR(recp)) { + erofs_err("write_recovery_info: %s", + strerror(-(int)PTR_ERR(recp))); + } else { + if (rename(recp, ublk_recp)) + erofs_err("rename recovery: %s", + strerror(errno)); + free(recp); + } + + if (write(pipefd[1], &dev_id, + sizeof(dev_id)) != sizeof(dev_id)) + exit(EXIT_FAILURE); + + err = erofs_ublk_start(dev_id, pipefd[1]); + if (err) + erofs_err("erofs_ublk_start: %s", + strerror(-err)); + + unlink(ublk_recp); + erofs_ublk_destroy(dev_id); + erofs_io_close(ctx.vd); + exit(EXIT_SUCCESS); + } + + close(pipefd[1]); + if (read(pipefd[0], &dev_id, sizeof(dev_id)) != + sizeof(dev_id)) { + waitpid(pid, NULL, 0); + close(pipefd[0]); + return -EIO; + } + + snprintf(dev_path, sizeof(dev_path), + "/dev/ublkb%d", dev_id); + + if (read(pipefd[0], &ready, 1) != 1) { + waitpid(pid, NULL, 0); + close(pipefd[0]); + return -EIO; + } + close(pipefd[0]); + + err = mount(dev_path, mountpoint, fstype, flags, options); + if (err < 0) { + err = -errno; + kill(pid, SIGTERM); + waitpid(pid, NULL, 0); + return err; + } + return 0; +} + +static int ublk_dev_id_from_path(const char *path) +{ + int dev_id; + + if (sscanf(path, "/dev/ublkb%d", &dev_id) == 1) + return dev_id; + return -1; +} + int erofsmount_umount(char *target) { char *device = NULL, *mountpoint = NULL; @@ -2071,7 +2269,7 @@ int erofsmount_umount(char *target) for (s = NULL; (getline(&s, &n, mounts)) > 0;) { bool hit = false; - char *f1, *f2, *end; + char *f1, *f2 = NULL, *end; f1 = s; end = strchr(f1, ' '); @@ -2088,31 +2286,48 @@ int erofsmount_umount(char *target) hit = true; } if (hit) { - if (isblk) { - err = -EBUSY; - free(s); - fclose(mounts); - goto err_out; - } free(device); device = strdup(f1); - if (!mountpoint) - mountpoint = strdup(f2); + free(mountpoint); + mountpoint = f2 ? strdup(f2) : NULL; } } free(s); fclose(mounts); + + if (isblk && !device) { + if (S_ISBLK(st.st_mode) && major(st.st_rdev) == EROFS_NBD_MAJOR) { + nbdnum = erofs_nbd_get_index_from_minor(minor(st.st_rdev)); + err = erofs_nbd_nl_disconnect(nbdnum); + if (err != -EOPNOTSUPP) + goto err_out; + } + err = ublk_dev_id_from_path(target); + if (err >= 0) { + err = erofs_ublk_del_dev_by_id(err); + goto err_out; + } + err = -ENOENT; + goto err_out; + } + if (!isblk && !device) { err = -ENOENT; goto err_out; } - if (isblk && !mountpoint && - S_ISBLK(st.st_mode) && major(st.st_rdev) == EROFS_NBD_MAJOR) { - nbdnum = erofs_nbd_get_index_from_minor(minor(st.st_rdev)); - err = erofs_nbd_nl_disconnect(nbdnum); - if (err != -EOPNOTSUPP) - return err; + err = ublk_dev_id_from_path(device); + if (err >= 0) { + if (mountpoint) { + int ret = umount(mountpoint); + + if (ret) { + err = -errno; + goto err_out; + } + } + err = erofs_ublk_del_dev_by_id(err); + goto err_out; } /* Avoid TOCTOU issue with NBD_CFLAG_DISCONNECT_ON_CLOSE */ @@ -2224,13 +2439,20 @@ int main(int argc, char *argv[]) goto exit; } - if (mountcfg.backend == EROFSNBD) { + if (mountcfg.backend == EROFSNBD || mountcfg.backend == EROFSUBLK) { if (mountsrc.type == EROFSMOUNT_SOURCE_OCI) mountsrc.ocicfg.image_ref = mountcfg.device; else mountsrc.device_path = mountcfg.device; - err = erofsmount_nbd(&mountsrc, mountcfg.target, - mountcfg.fstype, mountcfg.flags, mountcfg.options); + + if (mountcfg.backend == EROFSNBD) + err = erofsmount_nbd(&mountsrc, mountcfg.target, + mountcfg.fstype, mountcfg.flags, + mountcfg.options); + else + err = erofsmount_ublk(&mountsrc, mountcfg.target, + mountcfg.fstype, mountcfg.flags, + mountcfg.options); goto exit; } -- 2.43.5
