On Thu, Aug 13, 2020 at 7:36 PM Kevin Wolf <[email protected]> wrote:
> Instead of implementing qemu-nbd --offset in the NBD code, just put a
> raw block node with the requested offset on top of the user image and
> rely on that doing the job.
>
> This does not only simplify the nbd_export_new() interface and bring it
> closer to the set of options that the nbd-server-add QMP command offers,
> but in fact it also eliminates a potential source for bugs in the NBD
> code which previously had to add the offset manually in all relevant
> places.
>
Just to make sure I understand this correctly -
qemu-nbd can work with:
$ qemu-nbd 'json:{"driver": "file", "filename": "test.raw"}'
And:
$ qemu-nbd 'json:{"driver": "raw", "file": {"driver": "file",
"filename": "test.raw"}}'
I assumed that we always create the raw node?
oVirt always uses the second form to make it easier to support offset,
size, and backing.
https://github.com/oVirt/ovirt-imageio/blob/2021164d064227d7c5e03c8da087adc66e3a577e/daemon/ovirt_imageio/_internal/qemu_nbd.py#L104
This also seems to be the way libvirt builds the nodes using -blockdev.
Do we have a way to visualize the internal node graph used by
qemu-nbd/qemu-img?
Nir
Signed-off-by: Kevin Wolf <[email protected]>
> ---
> include/block/nbd.h | 4 ++--
> blockdev-nbd.c | 9 +--------
> nbd/server.c | 34 +++++++++++++++++-----------------
> qemu-nbd.c | 27 ++++++++++++---------------
> 4 files changed, 32 insertions(+), 42 deletions(-)
>
> diff --git a/include/block/nbd.h b/include/block/nbd.h
> index c8c5cb6b61..3846d2bac8 100644
> --- a/include/block/nbd.h
> +++ b/include/block/nbd.h
> @@ -329,8 +329,8 @@ typedef struct NBDExport NBDExport;
> typedef struct NBDClient NBDClient;
>
> BlockExport *nbd_export_create(BlockExportOptions *exp_args, Error
> **errp);
> -NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset,
> - uint64_t size, const char *name, const char
> *desc,
> +NBDExport *nbd_export_new(BlockDriverState *bs,
> + const char *name, const char *desc,
> const char *bitmap, bool readonly, bool shared,
> void (*close)(NBDExport *), bool writethrough,
> BlockBackend *on_eject_blk, Error **errp);
> diff --git a/blockdev-nbd.c b/blockdev-nbd.c
> index a1dc11bdd7..16cda3b052 100644
> --- a/blockdev-nbd.c
> +++ b/blockdev-nbd.c
> @@ -154,7 +154,6 @@ BlockExport *nbd_export_create(BlockExportOptions
> *exp_args, Error **errp)
> BlockDriverState *bs = NULL;
> BlockBackend *on_eject_blk;
> NBDExport *exp = NULL;
> - int64_t len;
> AioContext *aio_context;
>
> assert(exp_args->type == BLOCK_EXPORT_TYPE_NBD);
> @@ -192,12 +191,6 @@ BlockExport *nbd_export_create(BlockExportOptions
> *exp_args, Error **errp)
>
> aio_context = bdrv_get_aio_context(bs);
> aio_context_acquire(aio_context);
> - len = bdrv_getlength(bs);
> - if (len < 0) {
> - error_setg_errno(errp, -len,
> - "Failed to determine the NBD export's length");
> - goto out;
> - }
>
> if (!arg->has_writable) {
> arg->writable = false;
> @@ -206,7 +199,7 @@ BlockExport *nbd_export_create(BlockExportOptions
> *exp_args, Error **errp)
> arg->writable = false;
> }
>
> - exp = nbd_export_new(bs, 0, len, arg->name, arg->description,
> arg->bitmap,
> + exp = nbd_export_new(bs, arg->name, arg->description, arg->bitmap,
> !arg->writable, !arg->writable,
> NULL, false, on_eject_blk, errp);
> if (!exp) {
> diff --git a/nbd/server.c b/nbd/server.c
> index 774325dbe5..92360d1f08 100644
> --- a/nbd/server.c
> +++ b/nbd/server.c
> @@ -89,7 +89,6 @@ struct NBDExport {
> BlockBackend *blk;
> char *name;
> char *description;
> - uint64_t dev_offset;
> uint64_t size;
> uint16_t nbdflags;
> QTAILQ_HEAD(, NBDClient) clients;
> @@ -1507,8 +1506,8 @@ static void nbd_eject_notifier(Notifier *n, void
> *data)
> aio_context_release(aio_context);
> }
>
> -NBDExport *nbd_export_new(BlockDriverState *bs, uint64_t dev_offset,
> - uint64_t size, const char *name, const char
> *desc,
> +NBDExport *nbd_export_new(BlockDriverState *bs,
> + const char *name, const char *desc,
> const char *bitmap, bool readonly, bool shared,
> void (*close)(NBDExport *), bool writethrough,
> BlockBackend *on_eject_blk, Error **errp)
> @@ -1516,9 +1515,17 @@ NBDExport *nbd_export_new(BlockDriverState *bs,
> uint64_t dev_offset,
> AioContext *ctx;
> BlockBackend *blk;
> NBDExport *exp;
> + int64_t size;
> uint64_t perm;
> int ret;
>
> + size = bdrv_getlength(bs);
> + if (size < 0) {
> + error_setg_errno(errp, -size,
> + "Failed to determine the NBD export's length");
> + return NULL;
> + }
> +
> exp = g_new0(NBDExport, 1);
> exp->common = (BlockExport) {
> .drv = &blk_exp_nbd,
> @@ -1553,8 +1560,6 @@ NBDExport *nbd_export_new(BlockDriverState *bs,
> uint64_t dev_offset,
> exp->refcount = 1;
> QTAILQ_INIT(&exp->clients);
> exp->blk = blk;
> - assert(dev_offset <= INT64_MAX);
> - exp->dev_offset = dev_offset;
> exp->name = g_strdup(name);
> assert(!desc || strlen(desc) <= NBD_MAX_STRING_SIZE);
> exp->description = g_strdup(desc);
> @@ -1569,7 +1574,7 @@ NBDExport *nbd_export_new(BlockDriverState *bs,
> uint64_t dev_offset,
> exp->nbdflags |= (NBD_FLAG_SEND_TRIM | NBD_FLAG_SEND_WRITE_ZEROES
> |
> NBD_FLAG_SEND_FAST_ZERO);
> }
> - assert(size <= INT64_MAX - dev_offset);
> + assert(size <= INT64_MAX);
> exp->size = QEMU_ALIGN_DOWN(size, BDRV_SECTOR_SIZE);
>
> if (bitmap) {
> @@ -1928,8 +1933,7 @@ static int coroutine_fn
> nbd_co_send_sparse_read(NBDClient *client,
> stl_be_p(&chunk.length, pnum);
> ret = nbd_co_send_iov(client, iov, 1, errp);
> } else {
> - ret = blk_pread(exp->blk, offset + progress + exp->dev_offset,
> - data + progress, pnum);
> + ret = blk_pread(exp->blk, offset + progress, data + progress,
> pnum);
> if (ret < 0) {
> error_setg_errno(errp, -ret, "reading from file failed");
> break;
> @@ -2303,8 +2307,7 @@ static coroutine_fn int nbd_do_cmd_read(NBDClient
> *client, NBDRequest *request,
> data, request->len, errp);
> }
>
> - ret = blk_pread(exp->blk, request->from + exp->dev_offset, data,
> - request->len);
> + ret = blk_pread(exp->blk, request->from, data, request->len);
> if (ret < 0) {
> return nbd_send_generic_reply(client, request->handle, ret,
> "reading from file failed", errp);
> @@ -2339,7 +2342,7 @@ static coroutine_fn int nbd_do_cmd_cache(NBDClient
> *client, NBDRequest *request,
>
> assert(request->type == NBD_CMD_CACHE);
>
> - ret = blk_co_preadv(exp->blk, request->from + exp->dev_offset,
> request->len,
> + ret = blk_co_preadv(exp->blk, request->from, request->len,
> NULL, BDRV_REQ_COPY_ON_READ | BDRV_REQ_PREFETCH);
>
> return nbd_send_generic_reply(client, request->handle, ret,
> @@ -2370,8 +2373,7 @@ static coroutine_fn int nbd_handle_request(NBDClient
> *client,
> if (request->flags & NBD_CMD_FLAG_FUA) {
> flags |= BDRV_REQ_FUA;
> }
> - ret = blk_pwrite(exp->blk, request->from + exp->dev_offset,
> - data, request->len, flags);
> + ret = blk_pwrite(exp->blk, request->from, data, request->len,
> flags);
> return nbd_send_generic_reply(client, request->handle, ret,
> "writing to file failed", errp);
>
> @@ -2386,8 +2388,7 @@ static coroutine_fn int nbd_handle_request(NBDClient
> *client,
> if (request->flags & NBD_CMD_FLAG_FAST_ZERO) {
> flags |= BDRV_REQ_NO_FALLBACK;
> }
> - ret = blk_pwrite_zeroes(exp->blk, request->from + exp->dev_offset,
> - request->len, flags);
> + ret = blk_pwrite_zeroes(exp->blk, request->from, request->len,
> flags);
> return nbd_send_generic_reply(client, request->handle, ret,
> "writing to file failed", errp);
>
> @@ -2401,8 +2402,7 @@ static coroutine_fn int nbd_handle_request(NBDClient
> *client,
> "flush failed", errp);
>
> case NBD_CMD_TRIM:
> - ret = blk_co_pdiscard(exp->blk, request->from + exp->dev_offset,
> - request->len);
> + ret = blk_co_pdiscard(exp->blk, request->from, request->len);
> if (ret == 0 && request->flags & NBD_CMD_FLAG_FUA) {
> ret = blk_co_flush(exp->blk);
> }
> diff --git a/qemu-nbd.c b/qemu-nbd.c
> index d2657b8db5..818c3f5d46 100644
> --- a/qemu-nbd.c
> +++ b/qemu-nbd.c
> @@ -523,7 +523,6 @@ int main(int argc, char **argv)
> const char *port = NULL;
> char *sockpath = NULL;
> char *device = NULL;
> - int64_t fd_size;
> QemuOpts *sn_opts = NULL;
> const char *sn_id_or_name = NULL;
> const char *sopt = "hVb:o:p:rsnc:dvk:e:f:tl:x:T:D:B:L";
> @@ -1028,6 +1027,17 @@ int main(int argc, char **argv)
> }
> bs = blk_bs(blk);
>
> + if (dev_offset) {
> + QDict *raw_opts = qdict_new();
> + qdict_put_str(raw_opts, "driver", "raw");
> + qdict_put_str(raw_opts, "file", bs->node_name);
> + qdict_put_int(raw_opts, "offset", dev_offset);
> + bs = bdrv_open(NULL, NULL, raw_opts, flags, &error_fatal);
> + blk_remove_bs(blk);
> + blk_insert_bs(blk, bs, &error_fatal);
> + bdrv_unref(bs);
> + }
> +
> blk_set_enable_write_cache(blk, !writethrough);
>
> if (sn_opts) {
> @@ -1045,21 +1055,8 @@ int main(int argc, char **argv)
> }
>
> bs->detect_zeroes = detect_zeroes;
> - fd_size = blk_getlength(blk);
> - if (fd_size < 0) {
> - error_report("Failed to determine the image length: %s",
> - strerror(-fd_size));
> - exit(EXIT_FAILURE);
> - }
> -
> - if (dev_offset >= fd_size) {
> - error_report("Offset (%" PRIu64 ") has to be smaller than the
> image "
> - "size (%" PRId64 ")", dev_offset, fd_size);
> - exit(EXIT_FAILURE);
> - }
> - fd_size -= dev_offset;
>
> - export = nbd_export_new(bs, dev_offset, fd_size, export_name,
> + export = nbd_export_new(bs, export_name,
> export_description, bitmap, readonly, shared
> > 1,
> nbd_export_closed, writethrough, NULL,
> &error_fatal);
> --
> 2.25.4
>
>
>