When we turned the result of .can_zero into a tri-state for filters back in v1.16, the intention was that the backend would emulate by calling into .pwrite instead of .zero. The nozero filter already had an implementation of that algorithm, but it needs to live in backend.c to be used by all filters, rather than repeatedly recoded in each filter that wants .zero support by .pwrite emulation.
Discovered because the luks filter asked for .pwrite emulation in .can_zero without providing a .zero override, resulting in: $ qemu-img create -f luks --object secret,data=LETMEPASS,id=sec0 -o key-secret=sec0 encrypted.img 100M Formatting 'encrypted.img', fmt=luks size=104857600 key-secret=sec0 $ rm -f data.img $ truncate -s 100M data.img $ nbdkit file encrypted.img --filter=luks passphrase=LETMEPASS --run 'nbdcopy data.img $nbd' nbdkit: backend.c:718: backend_zero: Assertion `c->can_zero > NBDKIT_ZERO_NONE' failed. write at offset 0 failed: Transport endpoint is not connected nbdkit: nbdkit command was killed by signal 6 As a result of moving it into the backend, the nozero filter is simplified, but the corresponding test has to change. Since --filter=log does not alter .can_zero, the emulation actually occurs earlier in the stack (prior to calling into any filter, regardless of whether log is placed before or after nozero, rather than at the point of the nozero filter), so there is now no discernable difference between sock3 and sock4 (we can eliminate the duplicate), and the old sock5a no longer shows a Zero request. [Historically, when the nozero filter was first written, we were relying on qemu to send packets, and the use of the log filter was essential to ensure that we were getting the desired NBD_CMD_WRITE_ZEROES across various versions of qemu; but now that the test uses nbdkit, we know that h.zero() is doing exactly that, so the reduced length of the testsuite is no longer risky.] Reported-by: Richard W.M. Jones <[email protected]> Fixes: fd361554 ("filters: Cache and change semantics of can_zero", v1.15.1) --- docs/nbdkit-filter.pod | 4 +-- server/backend.c | 37 ++++++++++++++++++- filters/nozero/nozero.c | 39 +++------------------ tests/test-nozero.sh | 78 +++++++++++++++++------------------------ 4 files changed, 75 insertions(+), 83 deletions(-) diff --git a/docs/nbdkit-filter.pod b/docs/nbdkit-filter.pod index 4ce5bda0..c4ec1004 100644 --- a/docs/nbdkit-filter.pod +++ b/docs/nbdkit-filter.pod @@ -898,8 +898,8 @@ This intercepts the plugin C<.zero> method and can be used to modify zero requests. This function will not be called if C<.can_zero> returned -C<NBDKIT_ZERO_NONE>; in turn, the filter should not call -C<next-E<gt>zero> if C<next-E<gt>can_zero> returned +C<NBDKIT_ZERO_NONE> or C<NBDKIT_ZERO_EMULATE>; in turn, the filter +should not call C<next-E<gt>zero> if C<next-E<gt>can_zero> returned C<NBDKIT_ZERO_NONE>. On input, the parameter C<flags> may include C<NBDKIT_FLAG_MAY_TRIM> diff --git a/server/backend.c b/server/backend.c index 7d46b99d..30ec4c24 100644 --- a/server/backend.c +++ b/server/backend.c @@ -728,7 +728,42 @@ backend_zero (struct context *c, b->name, count, offset, !!(flags & NBDKIT_FLAG_MAY_TRIM), fua, fast); - r = b->zero (c, count, offset, flags, err); + if (c->can_zero == NBDKIT_ZERO_NATIVE) + r = b->zero (c, count, offset, flags, err); + else { /* NBDKIT_ZERO_EMULATE */ + int writeflags = 0; + bool need_flush = false; + + if (fast) { + *err = ENOTSUP; + return -1; + } + + if (fua) { + if (c->can_fua == NBDKIT_FUA_EMULATE) + need_flush = true; + else + writeflags = NBDKIT_FLAG_FUA; + } + + while (count) { + /* Always contains zeroes, but we can't use const or else gcc 9 + * will use .rodata instead of .bss and inflate the binary size. + */ + static /* const */ char buffer[MAX_REQUEST_SIZE]; + uint32_t limit = MIN (count, MAX_REQUEST_SIZE); + + if (limit == count && need_flush) + writeflags = NBDKIT_FLAG_FUA; + + if (backend_pwrite (c, buffer, limit, offset, writeflags, err) == -1) + return -1; + offset += limit; + count -= limit; + } + r = 0; + } + if (r == -1) { assert (*err); if (!fast) diff --git a/filters/nozero/nozero.c b/filters/nozero/nozero.c index 7c5d67c4..5c04975b 100644 --- a/filters/nozero/nozero.c +++ b/filters/nozero/nozero.c @@ -1,5 +1,5 @@ /* nbdkit - * Copyright (C) 2018-2020 Red Hat Inc. + * Copyright (C) 2018-2022 Red Hat Inc. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions are @@ -49,8 +49,6 @@ #undef IGNORE #endif -#define MAX_WRITE (64 * 1024 * 1024) - static enum ZeroMode { NONE, EMULATE, @@ -159,14 +157,10 @@ nozero_zero (nbdkit_next *next, void *handle, uint32_t count, uint64_t offs, uint32_t flags, int *err) { - int writeflags = 0; - bool need_flush = false; - - assert (zeromode != NONE); + assert (zeromode != NONE && zeromode != EMULATE); if (flags & NBDKIT_FLAG_FAST_ZERO) { assert (fastzeromode != NOFAST); - if (fastzeromode == SLOW || - (fastzeromode == DEFAULT && zeromode == EMULATE)) { + if (fastzeromode == SLOW) { *err = ENOTSUP; return -1; } @@ -177,32 +171,7 @@ nozero_zero (nbdkit_next *next, if (zeromode == NOTRIM) flags &= ~NBDKIT_FLAG_MAY_TRIM; - if (zeromode != EMULATE) - return next->zero (next, count, offs, flags, err); - - if (flags & NBDKIT_FLAG_FUA) { - if (next->can_fua (next) == NBDKIT_FUA_EMULATE) - need_flush = true; - else - writeflags = NBDKIT_FLAG_FUA; - } - - while (count) { - /* Always contains zeroes, but we can't use const or else gcc 9 - * will use .rodata instead of .bss and inflate the binary size. - */ - static /* const */ char buffer[MAX_WRITE]; - uint32_t size = MIN (count, MAX_WRITE); - - if (size == count && need_flush) - writeflags = NBDKIT_FLAG_FUA; - - if (next->pwrite (next, buffer, size, offs, writeflags, err) == -1) - return -1; - offs += size; - count -= size; - } - return 0; + return next->zero (next, count, offs, flags, err); } static struct nbdkit_filter filter = { diff --git a/tests/test-nozero.sh b/tests/test-nozero.sh index 61911a42..5940ad33 100755 --- a/tests/test-nozero.sh +++ b/tests/test-nozero.sh @@ -1,6 +1,6 @@ #!/usr/bin/env bash # nbdkit -# Copyright (C) 2018-2020 Red Hat Inc. +# Copyright (C) 2018-2022 Red Hat Inc. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions are @@ -36,17 +36,15 @@ set -x sock2=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) sock3=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) -sock4=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) -sock5a=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) -sock5b=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) -sock6=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) +sock4a=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) +sock4b=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) +sock5=$(mktemp -u /tmp/nbdkit-test-sock.XXXXXX) files="nozero1.img nozero1.log nozero2.img nozero2.log $sock2 nozero2.pid nozero3.img nozero3.log $sock3 nozero3.pid - nozero4.img nozero4.log $sock4 nozero4.pid - nozero5.img nozero5a.log nozero5b.log $sock5a $sock5b - nozero5a.pid nozero5b.pid - nozero6.img nozero6.log $sock6 nozero6.pid" + nozero4.img nozero4a.log nozero4b.log $sock4a $sock4b + nozero4a.pid nozero4b.pid + nozero5.img nozero5.log $sock5 nozero5.pid" rm -f $files fail=0 @@ -60,14 +58,12 @@ cleanup () cat nozero2.log || : echo "Log 3 file contents:" cat nozero3.log || : - echo "Log 4 file contents:" - cat nozero4.log || : - echo "Log 5a file contents:" - cat nozero5a.log || : - echo "Log 5b file contents:" - cat nozero5b.log || : - echo "Log 6 file contents:" - cat nozero6.log || : + echo "Log 4a file contents:" + cat nozero4a.log || : + echo "Log 4b file contents:" + cat nozero4b.log || : + echo "Log 5 file contents:" + cat nozero5.log || : rm -f $files } cleanup_fn cleanup @@ -79,10 +75,9 @@ cp nozero1.img nozero2.img cp nozero1.img nozero3.img cp nozero1.img nozero4.img cp nozero1.img nozero5.img -cp nozero1.img nozero6.img # Debug number of blocks and block size in the images. -for f in {1..6}; do +for f in {1..5}; do stat -c "%n: %b allocated blocks of size %B bytes, total size %s" \ nozero$f.img sizes[$f]=$(stat -c %b nozero$f.img) @@ -99,24 +94,21 @@ fi # Run several parallel nbdkit; to compare the logs and see what changes. # 1: unfiltered (above), check that nbdsh sends ZERO request and plugin trims # 2: log before filter with zeromode=none (default), to ensure no ZERO request -# 3: log before filter with zeromode=emulate, to ensure ZERO from client -# 4: log after filter with zeromode=emulate, to ensure no ZERO to plugin -# 5a/b: both sides of nbd plugin: even though server side does not advertise +# 3: log after filter with zeromode=emulate, to ensure no ZERO to plugin +# 4a/b: both sides of nbd plugin: even though server side does not advertise # ZERO, the client side still exposes it, and just skips calling nbd's .zero -# 6: log after filter with zeromode=notrim, to ensure plugin does not trim +# 5: log after filter with zeromode=notrim, to ensure plugin does not trim start_nbdkit -P nozero2.pid -U $sock2 --filter=log --filter=nozero \ file logfile=nozero2.log nozero2.img -start_nbdkit -P nozero3.pid -U $sock3 --filter=log --filter=nozero \ +start_nbdkit -P nozero3.pid -U $sock3 --filter=nozero --filter=log \ file logfile=nozero3.log nozero3.img zeromode=emulate -start_nbdkit -P nozero4.pid -U $sock4 --filter=nozero --filter=log \ - file logfile=nozero4.log nozero4.img zeromode=emulate -# Start 5b before 5a so that cleanup visits the client before the server -start_nbdkit -P nozero5b.pid -U $sock5b --filter=log \ - nbd logfile=nozero5b.log socket=$sock5a -start_nbdkit -P nozero5a.pid -U $sock5a --filter=log --filter=nozero \ - file logfile=nozero5a.log nozero5.img -start_nbdkit -P nozero6.pid -U $sock6 --filter=nozero --filter=log \ - file logfile=nozero6.log nozero6.img zeromode=notrim +# Start 4b before 4a so that cleanup visits the client before the server +start_nbdkit -P nozero4b.pid -U $sock4b --filter=log \ + nbd logfile=nozero4b.log socket=$sock4a +start_nbdkit -P nozero4a.pid -U $sock4a --filter=log --filter=nozero \ + file logfile=nozero4a.log nozero4.img +start_nbdkit -P nozero5.pid -U $sock5 --filter=nozero --filter=log \ + file logfile=nozero5.log nozero5.img zeromode=notrim # Perform the zero write. nbdsh -u "nbd+unix://?socket=$sock2" -c ' @@ -124,38 +116,34 @@ assert not h.can_zero() h.pwrite (bytearray(1024*1024), 0) ' nbdsh -u "nbd+unix://?socket=$sock3" -c 'h.zero(1024*1024, 0)' -nbdsh -u "nbd+unix://?socket=$sock4" -c 'h.zero(1024*1024, 0)' -nbdsh -u "nbd+unix://?socket=$sock5b" -c 'h.zero(1024*1024, 0)' -nbdsh -u "nbd+unix://?socket=$sock6" -c 'h.zero(1024*1024, 0)' +nbdsh -u "nbd+unix://?socket=$sock4b" -c 'h.zero(1024*1024, 0)' +nbdsh -u "nbd+unix://?socket=$sock5" -c 'h.zero(1024*1024, 0)' # Check for expected ZERO vs. WRITE results -grep 'connection=1 Zero' nozero1.log +grep 'connection=1 Zero' nozero1.log || fail=1 if grep 'connection=1 Zero' nozero2.log; then echo "filter should have prevented zero" fail=1 fi -grep 'connection=1 Zero' nozero3.log -if grep 'connection=1 Zero' nozero4.log; then +if grep 'connection=1 Zero' nozero3.log; then echo "filter should have converted zero into write" fail=1 fi -grep 'connection=1 Zero' nozero5b.log -if grep 'connection=1 Zero' nozero5a.log; then +if grep 'connection=1 Zero' nozero4a.log; then echo "nbdkit should have converted zero into write before nbd plugin" fail=1 fi -grep 'connection=1 Zero' nozero6.log +grep 'connection=1 Zero' nozero5.log # Sanity check on contents - all 5 files should read identically cmp nozero1.img nozero2.img cmp nozero2.img nozero3.img cmp nozero3.img nozero4.img cmp nozero4.img nozero5.img -cmp nozero5.img nozero6.img -# Sanity check on sparseness: images 2-6 should not be sparse (although the +# Sanity check on sparseness: images 2-5 should not be sparse (although the # filesystem may have reserved additional space due to our writes) -for i in {2..6}; do +for i in {2..5}; do if test "$(stat -c %b nozero$i.img)" -lt "${sizes[$i]}"; then echo "nozero$i.img was trimmed by mistake" fail=1 -- 2.36.1 _______________________________________________ Libguestfs mailing list [email protected] https://listman.redhat.com/mailman/listinfo/libguestfs
