On Sun, Aug 18, 2019 at 12:21 AM Nir Soffer <nir...@gmail.com> wrote:
> Implement alignment probing similar to file-posix, by reading from the > first 4k of the image. > > Before this change, provisioning a VM on storage with sector size of > 4096 bytes would fail when the installer try to create filesystems. Here > is an example command that reproduces this issue: > > $ qemu-system-x86_64 -accel kvm -m 2048 -smp 2 \ > -drive > file=gluster://gluster1/gv0/fedora29.raw,format=raw,cache=none \ > -cdrom Fedora-Server-dvd-x86_64-29-1.2.iso > > The installer fails in few seconds when trying to create filesystem on > /dev/mapper/fedora-root. In error report we can see that it failed with > EINVAL (I could not extract the error from guest). > > Copying disk fails with EINVAL: > > $ qemu-img convert -p -f raw -O raw -t none -T none \ > gluster://gluster1/gv0/fedora29.raw \ > gluster://gluster1/gv0/fedora29-clone.raw > qemu-img: error while writing sector 4190208: Invalid argument > > This is a fix to same issue fixed in commit a6b257a08e3d (file-posix: > Handle undetectable alignment) for gluster:// images. > > This fix has the same limit, that the first block of the image should be > allocated, otherwise we cannot detect the alignment and fallback to a > safe value (4096) even when using storage with sector size of 512 bytes. > > Signed-off-by: Nir Soffer <nsof...@redhat.com> > --- > block/gluster.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++ > 1 file changed, 47 insertions(+) > > diff --git a/block/gluster.c b/block/gluster.c > index f64dc5b01e..d936240b72 100644 > --- a/block/gluster.c > +++ b/block/gluster.c > @@ -52,6 +52,9 @@ > > #define GERR_INDEX_HINT "hint: check in 'server' array index '%d'\n" > > +/* The value is known only on the server side. */ > +#define MAX_ALIGN 4096 > + > typedef struct GlusterAIOCB { > int64_t size; > int ret; > @@ -902,8 +905,52 @@ out: > return ret; > } > > +/* > + * Check if read is allowed with given memory buffer and length. > + * > + * This function is used to check O_DIRECT request alignment. > + */ > +static bool gluster_is_io_aligned(struct glfs_fd *fd, void *buf, size_t > len) > +{ > + ssize_t ret = glfs_pread(fd, buf, len, 0, 0, NULL); > + return ret >= 0 || errno != EINVAL; > +} > + > +static void gluster_probe_alignment(BlockDriverState *bs, struct glfs_fd > *fd, > + Error **errp) > +{ > + char *buf; > + size_t alignments[] = {1, 512, 1024, 2048, 4096}; > + size_t align; > + int i; > + > + buf = qemu_memalign(MAX_ALIGN, MAX_ALIGN); > + > + for (i = 0; i < ARRAY_SIZE(alignments); i++) { > + align = alignments[i]; > + if (gluster_is_io_aligned(fd, buf, align)) { > + /* Fallback to safe value. */ > + bs->bl.request_alignment = (align != 1) ? align : MAX_ALIGN; > + break; > + } > + } > + > + qemu_vfree(buf); > + > + if (!bs->bl.request_alignment) { > + error_setg(errp, "Could not find working O_DIRECT alignment"); > + error_append_hint(errp, "Try cache.direct=off\n"); > + } > +} > + > static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error > **errp) > { > + BDRVGlusterState *s = bs->opaque; > + > + gluster_probe_alignment(bs, s->fd, errp); > + > + bs->bl.min_mem_alignment = bs->bl.request_alignment; > + bs->bl.opt_mem_alignment = MAX(bs->bl.request_alignment, MAX_ALIGN); > bs->bl.max_transfer = GLUSTER_MAX_TRANSFER; > } > > -- > 2.20.1 > > To debug this I added this temporary patch: diff --git a/block/gluster.c b/block/gluster.c index d2d187490b..790ef4251b 100644 --- a/block/gluster.c +++ b/block/gluster.c @@ -912,6 +912,7 @@ out: static bool gluster_is_io_aligned(struct glfs_fd *fd, void *buf, size_t len) { ssize_t ret = glfs_pread(fd, buf, len, 0, 0, NULL); + printf("gluster_is_io_aligned len=%ld ret=%ld errno=%d\n", len, ret, errno); return ret >= 0 || errno != EINVAL; } @@ -940,6 +941,9 @@ static void gluster_probe_alignment(BlockDriverState *bs, struct glfs_fd *fd, error_setg(errp, "Could not find working O_DIRECT alignment"); error_append_hint(errp, "Try cache.direct=off\n"); } + + printf("Probed aligment for %s request_alignment=%d\n", + bs->filename, bs->bl.request_alignment); } static void qemu_gluster_refresh_limits(BlockDriverState *bs, Error **errp) Here is example run with volume with sector size of 512 bytes: $ sudo mount -t glusterfs gluster1:/gv1 /tmp/gv1 $ dd if=/dev/zero bs=1M count=100 | tr "\0" "x" > /tmp/gv1/src.raw $ truncate -s 100m /tmp/gv1/dst.raw $ dd if=/dev/zero bs=1 count=1 of=/tmp/gv1/dst.raw conv=notrunc $ ./qemu-img convert -n -f raw -O raw -t none -T none gluster://gluster1/gv1/src.raw gluster://gluster1/gv1/dst.raw gluster_is_io_aligned len=1 ret=-1 errno=22 gluster_is_io_aligned len=512 ret=512 errno=0 Probed aligment for gluster://gluster1/gv1/src.raw request_alignment=512 gluster_is_io_aligned len=1 ret=-1 errno=22 gluster_is_io_aligned len=512 ret=512 errno=0 Probed aligment for gluster://gluster1/gv1/dst.raw request_alignment=512 And with volume with sector size of 4096 bytes: $ sudo mount -t glusterfs gluster1:/gv0 /tmp/gv0 $ dd if=/dev/zero bs=1M count=100 | tr "\0" "x" > /tmp/gv0/src.raw $ truncate -s 100m /tmp/gv0/dst.raw $ dd if=/dev/zero bs=1 count=1 of=/tmp/gv0/dst.raw conv=notrunc $ ./qemu-img convert -n -f raw -O raw -t none -T none gluster://gluster1/gv0/src.raw gluster://gluster1/gv0/dst.raw gluster_is_io_aligned len=1 ret=-1 errno=22 gluster_is_io_aligned len=512 ret=-1 errno=22 gluster_is_io_aligned len=1024 ret=-1 errno=22 gluster_is_io_aligned len=2048 ret=-1 errno=22 gluster_is_io_aligned len=4096 ret=4096 errno=0 Probed aligment for gluster://gluster1/gv0/src.raw request_alignment=4096 gluster_is_io_aligned len=1 ret=-1 errno=22 gluster_is_io_aligned len=512 ret=-1 errno=22 gluster_is_io_aligned len=1024 ret=-1 errno=22 gluster_is_io_aligned len=2048 ret=-1 errno=22 gluster_is_io_aligned len=4096 ret=4096 errno=0 Probed aligment for gluster://gluster1/gv0/dst.raw request_alignment=4096