It turns out that: lseek(3, 0, SEEK_HOLE) returns end-of-file for a sparse file copied from a Linux squashfs mounted drive. This breaks the --sparse=auto heuristic that detects a sparse file.
I have a fix for you to consider. To reproduce: First, create a squashfs drive containing a file output_file.bin. mkdir squashfs-root cd squashfs-root Then run the following script mkhole.sh: -------------------------------------------------------------- #!/bin/bash OUTPUT="output_file.bin" # Remove file if it exists rm -f "$OUTPUT" # Write 4KB of 'A' dd if=<(yes A | tr -d '\n' | head -c 4096) of="$OUTPUT" bs=4096 count=1 # Create a 4k*100 hole followed by 4KB of zeros dd if=/dev/zero of="$OUTPUT" bs=4096 count=1 seek=101 # Write another 4KB of 'A' after the hole (overwriting the 4k of zeros) dd if=<(yes A | tr -d '\n' | head -c 4096) of="$OUTPUT" bs=4096 count=1 seek=101 -------------------------------------------------------------- Now create the mysquashfs.img file to mount: cd .. mksquashfs squashfs-root mysquashfs.img sudo mount -o loop mysquashfs.img /mnt Check that /mnt/output_file.bin is sparse: ls -lh /mnt/output_file.bin du -sh /mnt/output_file.bin (the second value should be less). Now use a newly built cp command from coreutils to copy this file to a local filesystem. mkdir ~/tmp cd ~/tmp ~/src/coreutils/src/cp --reflink=never /mnt/output_file.bin nonsparse Even though --sparse=auto and the file is sparse, it is not detected as such. This can be confirmed by running: strace ~/src/coreutils/src/cp --reflink=never /mnt/output_file.bin nonsparse and you will see: lseek(3, 0, SEEK_DATA) = 0 fadvise64(3, 0, 0, POSIX_FADV_SEQUENTIAL) = 0 lseek(3, 0, SEEK_HOLE) = 417792 The following diff fixes this for me, and still passes "make check". diff --git a/src/copy.c b/src/copy.c index 77f0c561e..91136cd7c 100644 --- a/src/copy.c +++ b/src/copy.c @@ -592,7 +592,7 @@ lseek_copy (int src_fd, int dest_fd, char **abuf, size_t buf_size, else if (sparse_mode != SPARSE_NEVER) { if (! create_hole (dest_fd, dst_name, - sparse_mode == SPARSE_ALWAYS, + sparse_mode != SPARSE_NEVER, ext_hole_size)) return false; } @@ -621,7 +621,7 @@ lseek_copy (int src_fd, int dest_fd, char **abuf, size_t buf_size, if ( ! sparse_copy (src_fd, dest_fd, abuf, buf_size, true, allow_reflink, src_name, dst_name, ext_len, - sparse_mode == SPARSE_ALWAYS ? hole_size : nullptr, + sparse_mode != SPARSE_NEVER ? hole_size : nullptr, &n_read)) return false; @@ -1576,7 +1576,7 @@ copy_reg (char const *src_name, char const *dst_name, : #endif sparse_copy (source_desc, dest_desc, &buf, buf_size, - x->sparse_mode == SPARSE_ALWAYS, + x->sparse_mode != SPARSE_NEVER, x->reflink_mode != REFLINK_NEVER, src_name, dst_name, UINTMAX_MAX, make_holes ? &hole_size : nullptr, &n_read))) Thanks ! Jeremy Allison, CIQ Inc.