--- Begin Message ---
Package: e2fsprogs
Version: 1.47.2~rc1-2~bpo12+2
Severity: normal
Dear Maintainer,
when doing rm of a very large file (or possibly just a file whose block
extends past 2^32-1) it fails with
FUSE2FS-remove_inode: put ino=12 links=1
fuse: bad error value: 75
unique: 16, error: -34 (Numerical result out of range), outsize: 16
rm: cannot remove 'mount/filler': Numerical result out of range
rm in debugfs does work. Attached patch replaces the deallocate block
logic with that from debugfs.
This possibly points to a bug in the underlying ext2_punch routine that
fuse2fs is using but I haven't tried to investigate further. This
function is also called from punch_helper so I suspect that using
fallocate to punch holes might also have problems.
Four files are attached:
rm-fix.patch - the actual fix for this bug
test.sh - a short test script to trigger this bug
inusefile.patch - adds support for the -o inusefile= flag that the test
uses. If you don't apply this patch then you'll need to add a large
sleep after the fusermount -u calls instead (c50s is required on my
system)
lseek.patch - irrelevant to this fix but the other two patches are built
on it and will (probably) apply but with line offsets. This adds support
for SEEK_HOLE and SEEK_DATA. I intend to send this upstream eventually
but I've got no time to work on writing any tests right now - but it's
working perfectly for my usecase.
N.B. The test takes around 10-15 minutes to run, most of the time is in
the rm and requires c1.5GB of disk space. It will not clean up properly
if it fails part way through.
Tim.
-- System Information:
Debian Release: 12.11
APT prefers stable-security
APT policy: (500, 'stable-security'), (500, 'stable')
Architecture: amd64 (x86_64)
Kernel: Linux 6.1.0-35-amd64 (SMP w/4 CPU threads; PREEMPT)
Kernel taint flags: TAINT_WARN
Locale: LANG=en_GB.UTF-8, LC_CTYPE=en_GB.UTF-8 (charmap=UTF-8), LANGUAGE not set
Shell: /bin/sh linked to /usr/bin/dash
Init: sysvinit (via /sbin/init)
Versions of packages e2fsprogs depends on:
ii libblkid1 2.38.1-5+deb12u3
ii libc6 2.36-9+deb12u10
ii libcom-err2 1.47.0-2
ii libext2fs2 1.47.2~rc1-2~bpo12+2
ii libss2 1.47.0-2
ii libuuid1 2.38.1-5+deb12u3
ii logsave 1.47.0-2
Versions of packages e2fsprogs recommends:
pn e2fsprogs-l10n <none>
Versions of packages e2fsprogs suggests:
pn e2fsck-static <none>
ii fuse2fs 1.47.2~rc1-2~bpo12+2
pn gpart <none>
pn parted <none>
-- no debconf information
diff -urN e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c
e2fsprogs-1.47.2~rc1/misc/fuse2fs.c
--- e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c 2024-11-29 08:02:27.000000000
+0000
+++ e2fsprogs-1.47.2~rc1/misc/fuse2fs.c 2024-11-29 08:02:27.000000000 +0000
@@ -1237,6 +1237,25 @@
return update_mtime(fs, dir, NULL);
}
+static int release_blocks_proc(ext2_filsys fs, blk64_t *blocknr,
+ e2_blkcnt_t blockcnt EXT2FS_ATTR((unused)),
+ blk64_t ref_block EXT2FS_ATTR((unused)),
+ int ref_offset EXT2FS_ATTR((unused)),
+ void *private)
+{
+ blk64_t block = *blocknr;
+ blk64_t *last_cluster = (blk64_t *)private;
+ blk64_t cluster = EXT2FS_B2C(fs, block);
+
+ if (cluster == *last_cluster)
+ return 0;
+
+ *last_cluster = cluster;
+
+ ext2fs_block_alloc_stats2(fs, block, -1);
+ return 0;
+}
+
static int remove_inode(struct fuse2fs *ff, ext2_ino_t ino)
{
ext2_filsys fs = ff->fs;
@@ -1278,8 +1297,11 @@
goto write_out;
if (ext2fs_inode_has_valid_blocks2(fs, (struct ext2_inode *)&inode)) {
- err = ext2fs_punch(fs, ino, (struct ext2_inode *)&inode, NULL,
- 0, ~0ULL);
+ blk64_t last_cluster = 0;
+ ext2fs_block_iterate3(fs, ino, BLOCK_FLAG_READ_ONLY,
+ NULL, release_blocks_proc, &last_cluster);
+// err = ext2fs_punch(fs, ino, (struct ext2_inode *)&inode, NULL,
+// 0, ~0ULL);
if (err) {
ret = translate_error(fs, ino, err);
goto write_out;
#!/bin/bash
set -e
ROOT=mount
DEV=container_mount/pv1
rm -f container
rm -fr container_mount
rm -fr mount
mkdir -p container_mount
# create a container fs that can hold a 5T sparse file
truncate -s 3G container
/sbin/mke2fs -t ext4 -O \
none,has_journal,ext_attr,dir_index,filetype,extent,64bit,flex_bg,sparse_super,large_file,huge_file,dir_nlink,extra_isize,metadata_csum
\
-b 4096 container
fuse2fs -o fakeroot -o inusefile=container.inuse container container_mount
mkdir -p "$ROOT"
echo "truncate $(date)"
time truncate -s 5T "${DEV}"
echo "mke2fs $(date)"
time /sbin/mkfs.ext4 -N 1000000 -O \
none,has_journal,ext_attr,dir_index,filetype,extent,64bit,flex_bg,sparse_super,large_file,huge_file,dir_nlink,extra_isize,metadata_csum
\
-b 1024 "$DEV"
echo "fuse2fs ${DEV} ${ROOT} $(date)"
time fuse2fs -o fakeroot -o inusefile="$DEV.inuse" "$DEV" "$ROOT"
echo "make filler $(date) - this is slow"
time fallocate -l 4294967295K "${ROOT}/filler"
echo "fusermount -u $ROOT $(date)"
time fusermount -u "$ROOT"
while [[ -f "${DEV}.inuse" ]]; do
sleep 10
echo "Waiting for fuse to complete"
done
echo "fuse2fs ${DEV} ${ROOT} $(date)"
time fuse2fs -o fakeroot -o inusefile="$DEV.inuse" "$DEV" "$ROOT"
echo "rm filler $(date) - this is slow"
time rm "${ROOT}/filler"
echo "fusermount -u $ROOT $(date)"
time fusermount -u "$ROOT"
while [[ -f "${DEV}.inuse" ]]; do
sleep 10
echo "Waiting for fuse to complete"
done
fusermount -u container_mount
while [[ -f container.inuse ]]; do
echo "Waiting for container fuse"
sleep 10
done
rm container
rmdir $ROOT
rmdir container_mount
exit 0
diff -urN e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c
e2fsprogs-1.47.2~rc1/misc/fuse2fs.c
--- e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c 2024-11-29 08:02:27.000000000
+0000
+++ e2fsprogs-1.47.2~rc1/misc/fuse2fs.c 2024-11-29 08:02:27.000000000 +0000
@@ -348,6 +348,7 @@
unsigned long offset;
FILE *err_fp;
unsigned int next_generation;
+ char* inusefile;
};
#define FUSE2FS_CHECK_MAGIC(fs, ptr, num) do {if ((ptr)->magic != (num)) \
@@ -3873,6 +3874,7 @@
FUSE2FS_OPT("no_default_opts", no_default_opts, 1),
FUSE2FS_OPT("norecovery", norecovery, 1),
FUSE2FS_OPT("offset=%lu", offset, 0),
+ FUSE2FS_OPT("inusefile=%s", inusefile, 0),
FUSE_OPT_KEY("-V", FUSE2FS_VERSION),
FUSE_OPT_KEY("--version", FUSE2FS_VERSION),
@@ -3914,6 +3916,7 @@
" -o offset=<bytes> similar to mount -o offset=<bytes>, mount
the partition starting at <bytes>\n"
" -o norecovery don't replay the journal (implies ro)\n"
" -o fuse2fs_debug enable fuse2fs debugging\n"
+ " -o inusefile=<file> file to show that fuse is still using the
file system image\n"
"\n",
outargs->argv[0]);
if (key == FUSE2FS_HELPFULL) {
@@ -3987,6 +3990,24 @@
fctx.alloc_all_blocks = 1;
}
+ if(fctx.inusefile) {
+ FILE* inusefile=fopen(fctx.inusefile, "w");
+ if(!inusefile) {
+ fprintf(stderr, "Requested inusefile=%s but couldn't
open the file for writing\n", fctx.inusefile);
+ exit(1);
+ }
+ fclose(inusefile);
+ char* resolved = realpath(fctx.inusefile, NULL);
+ if (!resolved) {
+ perror("realpath");
+ fprintf(stderr, "Could not resolve realpath for
inusefile=%s\n", fctx.inusefile);
+ unlink(fctx.inusefile);
+ exit(1);
+ }
+ free(fctx.inusefile);
+ fctx.inusefile = resolved;
+ }
+
/* Start up the fs (while we still can use stdout) */
ret = 2;
if (!fctx.ro)
@@ -4107,6 +4128,11 @@
com_err(argv[0], err, "while closing fs");
global_fs = NULL;
}
+ if(fctx.inusefile) {
+ err = unlink(fctx.inusefile);
+ if (err)
+ com_err(argv[0], "unlink: %s while unlinking '%s'",
strerror(errno), fctx.inusefile);
+ }
return ret;
}
diff -urN e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c
e2fsprogs-1.47.2~rc1/misc/fuse2fs.c
--- e2fsprogs-1.47.2~rc1.orig/misc/fuse2fs.c 2024-11-29 08:02:27.000000000
+0000
+++ e2fsprogs-1.47.2~rc1/misc/fuse2fs.c 2024-11-29 08:02:27.000000000 +0000
@@ -2040,6 +2040,147 @@
return ret;
}
+struct block_context {
+ e2_blkcnt_t next_block;
+ off_t blksize;
+ off_t offset;
+ off_t pos;
+ off_t next_hole;
+ off_t next_data;
+};
+
+static int
+dumponeblock(ext2_filsys fs, blk64_t *blocknr, e2_blkcnt_t blockcnt,
+ blk64_t ref_block, int ref_offset, void * privdata)
+{
+ struct block_context *p;
+ e2_blkcnt_t i;
+
+ p = (struct block_context *)privdata;
+ printf("p->pos = %ld p->offset=%ld blockcnt=%lld\n", p->pos, p->offset,
blockcnt);
+
+ // Stepping over a hole
+ e2_blkcnt_t holesize = blockcnt - p->next_block;
+ if (p->pos <= p->offset && p->pos + holesize * p->blksize > p->offset) {
+ // offset is in this hole
+ p->next_hole = p->offset;
+ } else if (p->pos > p->offset && p->pos < p->next_hole) {
+ // First hole after offset
+ p->next_hole = p->pos;
+ }
+ p->pos += p->blksize * holesize;
+
+ // A data block
+ p->next_block = blockcnt + 1;
+ if (p->pos <= p->offset && p->pos + p->blksize > p->offset) {
+ // offset is in this data block
+ p->next_data = p->offset;
+ } else if (p->pos > p->offset && p->pos < p->next_data) {
+ // first data block after offset
+ p->next_data = p->pos;
+ }
+ p->pos += p->blksize;
+ return 0;
+}
+
+
+static off_t op_lseek(const char* path, off_t offset, int whence, struct
fuse_file_info *fp)
+{
+ struct fuse_context *ctxt = fuse_get_context();
+ struct fuse2fs *ff = (struct fuse2fs *)ctxt->private_data;
+ struct fuse2fs_file_handle *fh =
+ (struct fuse2fs_file_handle *)(uintptr_t)fp->fh;
+ ext2_filsys fs;
+ struct ext2_inode_large inode;
+ blk64_t start, end;
+ __u64 i_size;
+ errcode_t err;
+ int flags;
+
+ FUSE2FS_CHECK_CONTEXT(ff);
+ fs = ff->fs;
+ FUSE2FS_CHECK_MAGIC(fs, fh, FUSE2FS_FILE_MAGIC);
+
+ memset(&inode, 0, sizeof(inode));
+ err = ext2fs_read_inode_full(fs, fh->ino, (struct ext2_inode *)&inode,
+ sizeof(inode));
+ if (err)
+ return err;
+ i_size = EXT2_I_SIZE(&inode);
+
+ if (offset >= i_size)
+ return -ENXIO;
+
+ struct block_context bc = {
+ .next_block = 0,
+ .blksize = fs->blocksize,
+ .offset = offset,
+ .pos = 0,
+ .next_hole = i_size,
+ .next_data = i_size,
+ };
+
+ if (inode.i_mode & S_IFREG && inode.i_flags & EXT4_EXTENTS_FL) {
+ ext2_extent_handle_t handle = NULL;
+ struct ext2fs_extent extent;
+ int op = EXT2_EXTENT_ROOT;
+
+ err = ext2fs_extent_open(fs, fh->ino, &handle);
+ if (err) {
+ // Why doesn't op_create do this?
+ err = translate_error(fs, fh->ino, err);
+ return err;
+ }
+ while (1) {
+ err = ext2fs_extent_get(handle, op, &extent);
+ if (err == EXT2_ET_EXTENT_NO_NEXT)
+ break;
+ if (err) {
+ err = translate_error(fs, fh->ino, err);
+ ext2fs_extent_free(handle);
+ return err;
+ }
+ op = EXT2_EXTENT_NEXT;
+
+ if (extent.e_flags & EXT2_EXTENT_FLAGS_SECOND_VISIT) {
+ continue;
+ }
+ if (!(extent.e_flags & EXT2_EXTENT_FLAGS_LEAF)) {
+ continue;
+ }
+
+ blk64_t start = extent.e_pblk;
+ e2_blkcnt_t blockcnt = extent.e_lblk;
+ for(blk64_t blocknr = start; blocknr < start +
extent.e_len; ++blocknr, ++blockcnt) {
+ // TODO We can be much more efficient here
+ dumponeblock(fs, &blocknr, blockcnt, 0, 0, &bc);
+ }
+ }
+ ext2fs_extent_free(handle);
+ } else if (inode.i_mode & S_IFREG && inode.i_flags &
EXT4_INLINE_DATA_FL) {
+ if (whence == SEEK_DATA) {
+ return offset;
+ } else {
+ return i_size;
+ }
+ } else {
+ ext2fs_block_iterate3(fs, fh->ino, BLOCK_FLAG_DATA_ONLY, NULL,
dumponeblock, &bc);
+ }
+
+ /* deal with holes at the end of the inode */
+ if (i_size > bc.pos) {
+ if (bc.next_hole == i_size)
+ bc.next_hole = bc.pos > bc.offset ? bc.pos : bc.offset;
+ }
+
+ if (whence == SEEK_DATA) {
+ if (bc.next_data == i_size) return -ENXIO;
+ return bc.next_data;
+ } else {
+ return bc.next_hole;
+ }
+}
+
static int op_truncate(const char *path, off_t len
#if FUSE_VERSION >= FUSE_MAKE_VERSION(3, 0)
, struct fuse_file_info *fi EXT2FS_ATTR((unused))
@@ -3695,6 +3836,7 @@
.fallocate = op_fallocate,
# endif
#endif
+ .lseek = op_lseek,
};
static int get_random_bytes(void *p, size_t sz)
--- End Message ---