Scan holes for chunk-based inodes if either --chunksize=# or
-Ededupe (without compression) is specified so that sparse files
can be made, which has already been supported since Linux 5.15.

Signed-off-by: Gao Xiang <[email protected]>
---
 lib/blobchunk.c | 71 ++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 55 insertions(+), 16 deletions(-)

diff --git a/lib/blobchunk.c b/lib/blobchunk.c
index 77b0c17..55ea061 100644
--- a/lib/blobchunk.c
+++ b/lib/blobchunk.c
@@ -18,7 +18,7 @@ void erofs_sha256(const unsigned char *in, unsigned long 
in_size,
 struct erofs_blobchunk {
        struct hashmap_entry ent;
        char            sha256[32];
-       unsigned int    chunksize;
+       erofs_off_t     chunksize;
        erofs_blk_t     blkaddr;
 };
 
@@ -27,9 +27,12 @@ static FILE *blobfile;
 static erofs_blk_t remapped_base;
 static bool multidev;
 static struct erofs_buffer_head *bh_devt;
+struct erofs_blobchunk erofs_holechunk = {
+       .blkaddr = EROFS_NULL_ADDR,
+};
 
 static struct erofs_blobchunk *erofs_blob_getchunk(int fd,
-               unsigned int chunksize)
+               erofs_off_t chunksize)
 {
        static u8 zeroed[EROFS_BLKSIZ];
        u8 *chunkdata, sha256[32];
@@ -129,7 +132,11 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode 
*inode,
 
                chunk = *(void **)(inode->chunkindexes + src);
 
-               idx.blkaddr = base_blkaddr + chunk->blkaddr;
+               if (chunk->blkaddr != EROFS_NULL_ADDR)
+                       idx.blkaddr = base_blkaddr + chunk->blkaddr;
+               else
+                       idx.blkaddr = EROFS_NULL_ADDR;
+
                if (extent_start != EROFS_NULL_ADDR &&
                    idx.blkaddr == extent_end + 1) {
                        extent_end = idx.blkaddr;
@@ -163,14 +170,28 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode 
*inode,
 
 int erofs_blob_write_chunked_file(struct erofs_inode *inode)
 {
-       unsigned int chunksize = 1 << cfg.c_chunkbits;
-       unsigned int count = DIV_ROUND_UP(inode->i_size, chunksize);
+       unsigned int chunkbits = cfg.c_chunkbits;
+       unsigned int count, unit;
        struct erofs_inode_chunk_index *idx;
-       erofs_off_t pos, len;
-       unsigned int unit;
+       erofs_off_t pos, len, chunksize;
        int fd, ret;
 
-       inode->u.chunkformat |= inode->u.chunkbits - LOG_BLOCK_SIZE;
+       fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
+       if (fd < 0)
+               return -errno;
+#ifdef SEEK_DATA
+       /* if the file is fully sparsed, use one big chunk instead */
+       if (lseek(fd, 0, SEEK_DATA) < 0 && errno == ENXIO) {
+               chunkbits = ilog2(inode->i_size - 1) + 1;
+               if (chunkbits < LOG_BLOCK_SIZE)
+                       chunkbits = LOG_BLOCK_SIZE;
+       }
+#endif
+       if (chunkbits - LOG_BLOCK_SIZE > EROFS_CHUNK_FORMAT_BLKBITS_MASK)
+               chunkbits = EROFS_CHUNK_FORMAT_BLKBITS_MASK + LOG_BLOCK_SIZE;
+       chunksize = 1ULL << chunkbits;
+       count = DIV_ROUND_UP(inode->i_size, chunksize);
+       inode->u.chunkformat |= chunkbits - LOG_BLOCK_SIZE;
        if (multidev)
                inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES;
 
@@ -181,24 +202,41 @@ int erofs_blob_write_chunked_file(struct erofs_inode 
*inode)
 
        inode->extent_isize = count * unit;
        idx = malloc(count * max(sizeof(*idx), sizeof(void *)));
-       if (!idx)
+       if (!idx) {
+               close(fd);
                return -ENOMEM;
-       inode->chunkindexes = idx;
-
-       fd = open(inode->i_srcpath, O_RDONLY | O_BINARY);
-       if (fd < 0) {
-               ret = -errno;
-               goto err;
        }
+       inode->chunkindexes = idx;
 
        for (pos = 0; pos < inode->i_size; pos += len) {
                struct erofs_blobchunk *chunk;
+#ifdef SEEK_DATA
+               off64_t offset = lseek(fd, pos, SEEK_DATA);
+
+               if (offset < 0) {
+                       if (errno != ENXIO)
+                               offset = pos;
+                       else
+                               offset = ((pos >> chunkbits) + 1) << chunkbits;
+               } else {
+                       offset &= ~(chunksize - 1);
+               }
+
+               if (offset > pos) {
+                       len = 0;
+                       do {
+                               *(void **)idx++ = &erofs_holechunk;
+                               pos += chunksize;
+                       } while (pos < offset);
+                       DBG_BUGON(pos != offset);
+                       continue;
+               }
+#endif
 
                len = min_t(u64, inode->i_size - pos, chunksize);
                chunk = erofs_blob_getchunk(fd, len);
                if (IS_ERR(chunk)) {
                        ret = PTR_ERR(chunk);
-                       close(fd);
                        goto err;
                }
                *(void **)idx++ = chunk;
@@ -207,6 +245,7 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode)
        close(fd);
        return 0;
 err:
+       close(fd);
        free(inode->chunkindexes);
        inode->chunkindexes = NULL;
        return ret;
-- 
2.24.4

Reply via email to