Am 18.02.2026 um 14:26 hat Hanna Czenczek geschrieben:
> Manually read requests from the /dev/fuse FD and process them, without
> using libfuse.  This allows us to safely add parallel request processing
> in coroutines later, without having to worry about libfuse internals.
> (Technically, we already have exactly that problem with
> read_from_fuse_export()/read_from_fuse_fd() nesting.)
> 
> We will continue to use libfuse for mounting the filesystem; fusermount3
> is a effectively a helper program of libfuse, so it should know best how
> to interact with it.  (Doing it manually without libfuse, while doable,
> is a bit of a pain, and it is not clear to me how stable the "protocol"
> actually is.)
> 
> Take this opportunity of quite a major rewrite to update the Copyright
> line with corrected information that has surfaced in the meantime.
> 
> Here are some benchmarks from before this patch (4k, iodepth=16, libaio;
> except 'sync', which are iodepth=1 and pvsync2):
> 
> file:
>   read:
>     seq aio:    99.8k ±1.5k IOPS
>     rand aio:   50.5k ±1.0k
>     seq sync:   36.1k ±1.1k
>     rand sync:  10.0k ±0.1k
>   write:
>     seq aio:    72.0k ±9.3k
>     rand aio:   70.6k ±2.5k
>     seq sync:   30.6k ±0.8k
>     rand sync:  30.1k ±1.0k
> null:
>   read:
>     seq aio:   157.9k ±4.7k
>     rand aio:  158.7k ±4.8k
>     seq sync:   80.2k ±2.8k
>     rand sync:  77.5k ±3.8k
>   write:
>     seq aio:   154.3k ±3.6k
>     rand aio:  154.3k ±4.2k
>     seq sync:   76.1k ±5.2k
>     rand sync:  72.9k ±4.0k
> 
> And with this patch applied:
> 
> file:
>   read:
>     seq aio:   106.8k ±1.9k (+7%)
>     rand aio:   48.3k ±8.8k (-4%)
>     seq sync:   35.5k ±1.4k (-2%)
>     rand sync:  10.0k ±0.2k (±0%)
>   write:
>     seq aio:    76.3k ±6.6k (+6%)
>     rand aio:   76.4k ±1.5k (+8%)
>     seq sync:   31.6k ±0.6k (+3%)
>     rand sync:  30.9k ±0.8k (+3%)
> null:
>   read:
>     seq aio:   161.7k ±6.0k (+2%)
>     rand aio:  165.6k ±7.1k (+4%)
>     seq sync:   80.5k ±3.0k (±0%)
>     rand sync:  78.5k ±3.1k (+1%)
>   write:
>     seq aio:   185.1k ±3.3k (+20%)
>     rand aio:  186.7k ±4.8k (+21%)
>     seq sync:   82.5k ±4.2k (+8%)
>     rand sync:  78.7k ±3.2k (+8%)
> 
> So not much difference, aside from write AIO to a null-co export getting
> a bit better.
> 
> Signed-off-by: Hanna Czenczek <[email protected]>
> ---
>  block/export/fuse.c | 944 +++++++++++++++++++++++++++++++++-----------
>  1 file changed, 720 insertions(+), 224 deletions(-)
> 
> diff --git a/block/export/fuse.c b/block/export/fuse.c
> index af0a8de17b..c481fb72a2 100644
> --- a/block/export/fuse.c
> +++ b/block/export/fuse.c
> @@ -1,7 +1,7 @@
>  /*
>   * Present a block device as a raw image through FUSE
>   *
> - * Copyright (c) 2020 Max Reitz <[email protected]>
> + * Copyright (c) 2020, 2025 Hanna Czenczek <[email protected]>
>   *
>   * This program is free software; you can redistribute it and/or modify
>   * it under the terms of the GNU General Public License as published by
> @@ -27,12 +27,15 @@
>  #include "block/qapi.h"
>  #include "qapi/error.h"
>  #include "qapi/qapi-commands-block.h"
> +#include "qemu/error-report.h"
>  #include "qemu/main-loop.h"
>  #include "system/block-backend.h"
>  
>  #include <fuse.h>
>  #include <fuse_lowlevel.h>
>  
> +#include "standard-headers/linux/fuse.h"
> +
>  #if defined(CONFIG_FALLOCATE_ZERO_RANGE)
>  #include <linux/falloc.h>
>  #endif
> @@ -42,17 +45,102 @@
>  #endif
>  
>  /* Prevent overly long bounce buffer allocations */
> -#define FUSE_MAX_BOUNCE_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
> +#define FUSE_MAX_READ_BYTES (MIN(BDRV_REQUEST_MAX_BYTES, 64 * 1024 * 1024))
> +/* Small enough to fit in the request buffer */
> +#define FUSE_MAX_WRITE_BYTES (64 * 1024)

Is the comment stale now that you moved to two separate buffers?

>  /**
> - * Handle client reads from the exported image.
> + * Handle client reads from the exported image.  Allocates *bufptr and reads
> + * data from the block device into that buffer.
> + * Returns the buffer (read) size on success, and -errno on error.
> + * After use, *bufptr must be freed via qemu_vfree().
>   */
> -static void fuse_read(fuse_req_t req, fuse_ino_t inode,
> -                      size_t size, off_t offset, struct fuse_file_info *fi)
> +static ssize_t fuse_read(FuseExport *exp, void **bufptr,
> +                         uint64_t offset, uint32_t size)
>  {
> -    FuseExport *exp = fuse_req_userdata(req);
>      int64_t blk_len;
>      void *buf;
>      int ret;
>  
>      /* Limited by max_read, should not happen */
> -    if (size > FUSE_MAX_BOUNCE_BYTES) {
> -        fuse_reply_err(req, EINVAL);
> -        return;
> +    if (size > FUSE_MAX_READ_BYTES) {
> +        return -EINVAL;
>      }
>  
>      /**
> @@ -653,18 +954,12 @@ static void fuse_read(fuse_req_t req, fuse_ino_t inode,
>       */
>      blk_len = blk_getlength(exp->common.blk);
>      if (blk_len < 0) {
> -        fuse_reply_err(req, -blk_len);
> -        return;
> +        return blk_len;
>      }
>  
>      if (offset >= blk_len) {
> -        /*
> -         * Technically libfuse does not allow returning a zero error code for
> -         * read requests, but in practice this is a 0-length read (and a 
> future
> -         * commit will change this code anyway)
> -         */
> -        fuse_reply_err(req, 0);
> -        return;
> +        *bufptr = NULL;
> +        return 0;

It feels a bit inconsistent to set *bufptr = NULL here, but not in the
error paths. Both cases depend on it being NULL afterwards, but the
caller already makes sure that it is NULL when it calls fuse_read().

>      }
>  
>      if (offset + size > blk_len) {

Overall, this feels much nicer than v3!

Kevin


Reply via email to