Let's add a new mode "tarerofs" for mkfs.erofs. It mainly has two use cases: - Convert a tarball (or later tarballs with a merged view) into a full erofs image;
- Generate an EROFS image as a manifest to refer to exist tarballs. The second use case is mainly prepared for OCI direct mount without OCI blob unpacking. Cc: Xin Yin <[email protected]> Cc: Jia Zhu <[email protected]> Signed-off-by: Gao Xiang <[email protected]> --- include/erofs/blobchunk.h | 2 +- include/erofs/inode.h | 10 + include/erofs/internal.h | 7 +- lib/Makefile.am | 2 +- lib/blobchunk.c | 35 ++- lib/inode.c | 147 ++++++--- lib/tar.c | 616 ++++++++++++++++++++++++++++++++++++++ mkfs/main.c | 121 +++++--- 8 files changed, 847 insertions(+), 93 deletions(-) create mode 100644 lib/tar.c diff --git a/include/erofs/blobchunk.h b/include/erofs/blobchunk.h index 49cb7bf..d7deb33 100644 --- a/include/erofs/blobchunk.h +++ b/include/erofs/blobchunk.h @@ -15,7 +15,7 @@ extern "C" #include "erofs/internal.h" int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, erofs_off_t off); -int erofs_blob_write_chunked_file(struct erofs_inode *inode); +int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd); int erofs_blob_remap(void); void erofs_blob_exit(void); int erofs_blob_init(const char *blobfile_path); diff --git a/include/erofs/inode.h b/include/erofs/inode.h index 058a235..b9b1f9c 100644 --- a/include/erofs/inode.h +++ b/include/erofs/inode.h @@ -15,11 +15,21 @@ extern "C" #include "erofs/internal.h" +static inline struct erofs_inode *erofs_igrab(struct erofs_inode *inode) +{ + ++inode->i_count; + return inode; +} + unsigned char erofs_mode_to_ftype(umode_t mode); unsigned char erofs_ftype_to_dtype(unsigned int filetype); void erofs_inode_manager_init(void); unsigned int erofs_iput(struct erofs_inode *inode); erofs_nid_t erofs_lookupnid(struct erofs_inode *inode); +struct erofs_dentry *erofs_d_alloc(struct erofs_inode *parent, + const char *name); +int tarerofs_dump_tree(struct erofs_inode *dir); +struct erofs_inode *erofs_new_inode(void); struct erofs_inode *erofs_mkfs_build_tree_from_path(const char *path); struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name); diff --git a/include/erofs/internal.h b/include/erofs/internal.h index 1f1e730..c184bc4 100644 --- a/include/erofs/internal.h +++ b/include/erofs/internal.h @@ -22,6 +22,7 @@ typedef unsigned short umode_t; #include "erofs_fs.h" #include <fcntl.h> #include <sys/types.h> /* for off_t definition */ +#include <stdio.h> #ifndef PATH_MAX #define PATH_MAX 4096 /* # chars in a path name including nul */ @@ -174,12 +175,16 @@ struct erofs_inode { } u; char *i_srcpath; - + union { + char *i_link; + FILE *i_tmpfile; + }; unsigned char datalayout; unsigned char inode_isize; /* inline tail-end packing size */ unsigned short idata_size; bool compressed_idata; + bool with_tmpfile; unsigned int xattr_isize; unsigned int extent_isize; diff --git a/lib/Makefile.am b/lib/Makefile.am index faa7311..a377370 100644 --- a/lib/Makefile.am +++ b/lib/Makefile.am @@ -29,7 +29,7 @@ noinst_HEADERS += compressor.h liberofs_la_SOURCES = config.c io.c cache.c super.c inode.c xattr.c exclude.c \ namei.c data.c compress.c compressor.c zmap.c decompress.c \ compress_hints.c hashmap.c sha256.c blobchunk.c dir.c \ - fragments.c rb_tree.c dedupe.c + fragments.c rb_tree.c dedupe.c tar.c liberofs_la_CFLAGS = -Wall -I$(top_srcdir)/include if ENABLE_LZ4 diff --git a/lib/blobchunk.c b/lib/blobchunk.c index 8142cc3..9665d63 100644 --- a/lib/blobchunk.c +++ b/lib/blobchunk.c @@ -128,12 +128,20 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, src += sizeof(void *), dst += unit) { struct erofs_blobchunk *chunk; - chunk = *(void **)(inode->chunkindexes + src); + if (blobfile) { + chunk = *(void **)(inode->chunkindexes + src); - if (chunk->blkaddr != EROFS_NULL_ADDR) - idx.blkaddr = base_blkaddr + chunk->blkaddr; - else - idx.blkaddr = EROFS_NULL_ADDR; + if (chunk->blkaddr != EROFS_NULL_ADDR) + idx.blkaddr = base_blkaddr + chunk->blkaddr; + else + idx.blkaddr = EROFS_NULL_ADDR; + } else if (unit == EROFS_BLOCK_MAP_ENTRY_SIZE) { + idx.blkaddr = le32_to_cpu(*(__le32 *)(inode->chunkindexes + dst)); + } else { + struct erofs_inode_chunk_index *i2 = inode->chunkindexes + dst; + + idx.blkaddr = le32_to_cpu(i2->blkaddr); + } if (extent_start != EROFS_NULL_ADDR && idx.blkaddr == extent_end + 1) { @@ -149,6 +157,10 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, extent_start = idx.blkaddr; extent_end = idx.blkaddr; } + + if (!blobfile) + continue; + if (unit == EROFS_BLOCK_MAP_ENTRY_SIZE) memcpy(inode->chunkindexes + dst, &idx.blkaddr, unit); else @@ -166,17 +178,14 @@ int erofs_blob_write_chunk_indexes(struct erofs_inode *inode, return dev_write(inode->chunkindexes, off, inode->extent_isize); } -int erofs_blob_write_chunked_file(struct erofs_inode *inode) +int erofs_blob_write_chunked_file(struct erofs_inode *inode, int fd) { unsigned int chunkbits = cfg.c_chunkbits; unsigned int count, unit; struct erofs_inode_chunk_index *idx; erofs_off_t pos, len, chunksize; - int fd, ret; + int ret; - fd = open(inode->i_srcpath, O_RDONLY | O_BINARY); - if (fd < 0) - return -errno; #ifdef SEEK_DATA /* if the file is fully sparsed, use one big chunk instead */ if (lseek(fd, 0, SEEK_DATA) < 0 && errno == ENXIO) { @@ -200,10 +209,8 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode) inode->extent_isize = count * unit; idx = malloc(count * max(sizeof(*idx), sizeof(void *))); - if (!idx) { - close(fd); + if (!idx) return -ENOMEM; - } inode->chunkindexes = idx; for (pos = 0; pos < inode->i_size; pos += len) { @@ -240,10 +247,8 @@ int erofs_blob_write_chunked_file(struct erofs_inode *inode) *(void **)idx++ = chunk; } inode->datalayout = EROFS_INODE_CHUNK_BASED; - close(fd); return 0; err: - close(fd); free(inode->chunkindexes); inode->chunkindexes = NULL; return ret; diff --git a/lib/inode.c b/lib/inode.c index 9db84a8..09793cd 100644 --- a/lib/inode.c +++ b/lib/inode.c @@ -75,12 +75,6 @@ void erofs_inode_manager_init(void) init_list_head(&inode_hashtable[i]); } -static struct erofs_inode *erofs_igrab(struct erofs_inode *inode) -{ - ++inode->i_count; - return inode; -} - /* get the inode from the (source) inode # */ struct erofs_inode *erofs_iget(dev_t dev, ino_t ino) { @@ -121,6 +115,10 @@ unsigned int erofs_iput(struct erofs_inode *inode) list_del(&inode->i_hash); if (inode->i_srcpath) free(inode->i_srcpath); + if (inode->with_tmpfile) + fclose(inode->i_tmpfile); + else if (inode->i_link) + free(inode->i_link); free(inode); return 0; } @@ -180,10 +178,32 @@ static int comp_subdir(const void *a, const void *b) return strcmp(da->name, db->name); } +static void erofs_setup_dir_layout(struct erofs_inode *dir) +{ + struct erofs_dentry *d; + unsigned int d_size = 0; + + /* let's calculate dir size */ + list_for_each_entry(d, &dir->i_subdirs, d_child) { + int len = strlen(d->name) + sizeof(struct erofs_dirent); + + if ((d_size & (erofs_blksiz() - 1)) + len > erofs_blksiz()) + d_size = round_up(d_size, erofs_blksiz()); + d_size += len; + } + dir->i_size = d_size; + + /* no compression for all dirs */ + dir->datalayout = EROFS_INODE_FLAT_INLINE; + + /* it will be used in erofs_prepare_inode_buffer */ + dir->idata_size = d_size % erofs_blksiz(); +} + int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs) { struct erofs_dentry *d, *n, **sorted_d; - unsigned int d_size, i; + unsigned int i; /* dot is pointed to the current dir inode */ d = erofs_d_alloc(dir, "."); @@ -215,22 +235,7 @@ int erofs_prepare_dir_file(struct erofs_inode *dir, unsigned int nr_subdirs) list_add_tail(&sorted_d[i]->d_child, &dir->i_subdirs); free(sorted_d); - /* let's calculate dir size */ - d_size = 0; - list_for_each_entry(d, &dir->i_subdirs, d_child) { - int len = strlen(d->name) + sizeof(struct erofs_dirent); - - if ((d_size & (erofs_blksiz() - 1)) + len > erofs_blksiz()) - d_size = round_up(d_size, erofs_blksiz()); - d_size += len; - } - dir->i_size = d_size; - - /* no compression for all dirs */ - dir->datalayout = EROFS_INODE_FLAT_INLINE; - - /* it will be used in erofs_prepare_inode_buffer */ - dir->idata_size = d_size % erofs_blksiz(); + erofs_setup_dir_layout(dir); return 0; } @@ -347,7 +352,7 @@ static int erofs_write_dir_file(struct erofs_inode *dir) return 0; } -static int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf) +int erofs_write_file_from_buffer(struct erofs_inode *inode, char *buf) { const unsigned int nblocks = erofs_blknr(inode->i_size); int ret; @@ -424,9 +429,9 @@ static int write_uncompressed_file_from_fd(struct erofs_inode *inode, int fd) return 0; } -static int erofs_write_file(struct erofs_inode *inode) +int erofs_write_file(struct erofs_inode *inode, int fd) { - int ret, fd; + int ret; if (!inode->i_size) { inode->datalayout = EROFS_INODE_FLAT_PLAIN; @@ -439,28 +444,17 @@ static int erofs_write_file(struct erofs_inode *inode) inode->u.chunkformat = 0; if (cfg.c_force_chunkformat == FORCE_INODE_CHUNK_INDEXES) inode->u.chunkformat = EROFS_CHUNK_FORMAT_INDEXES; - return erofs_blob_write_chunked_file(inode); + return erofs_blob_write_chunked_file(inode, fd); } if (cfg.c_compr_alg[0] && erofs_file_is_compressible(inode)) { - fd = open(inode->i_srcpath, O_RDONLY | O_BINARY); - if (fd < 0) - return -errno; ret = erofs_write_compressed_file(inode, fd); - close(fd); - if (!ret || ret != -ENOSPC) return ret; } /* fallback to all data uncompressed */ - fd = open(inode->i_srcpath, O_RDONLY | O_BINARY); - if (fd < 0) - return -errno; - - ret = write_uncompressed_file_from_fd(inode, fd); - close(fd); - return ret; + return write_uncompressed_file_from_fd(inode, fd); } static bool erofs_bh_flush_write_inode(struct erofs_buffer_head *bh) @@ -950,7 +944,7 @@ static int erofs_fill_inode(struct erofs_inode *inode, struct stat *st, return 0; } -static struct erofs_inode *erofs_new_inode(void) +struct erofs_inode *erofs_new_inode(void) { struct erofs_inode *inode; @@ -1048,7 +1042,12 @@ static int erofs_mkfs_build_tree(struct erofs_inode *dir, struct list_head *dirs ret = erofs_write_file_from_buffer(dir, symlink); free(symlink); } else { - ret = erofs_write_file(dir); + int fd = open(dir->i_srcpath, O_RDONLY | O_BINARY); + if (fd < 0) + return -errno; + + ret = erofs_write_file(dir, fd); + close(fd); } if (ret) return ret; @@ -1271,3 +1270,69 @@ struct erofs_inode *erofs_mkfs_build_special_from_fd(int fd, const char *name) erofs_write_tail_end(inode); return inode; } + +int tarerofs_dump_tree(struct erofs_inode *dir) +{ + struct erofs_dentry *d; + unsigned int nr_subdirs; + int ret; + + if (erofs_should_use_inode_extended(dir)) { + if (cfg.c_force_inodeversion == FORCE_INODE_COMPACT) { + erofs_err("file %s cannot be in compact form", + dir->i_srcpath); + return -EINVAL; + } + dir->inode_isize = sizeof(struct erofs_inode_extended); + } else { + dir->inode_isize = sizeof(struct erofs_inode_compact); + } + + if (!S_ISDIR(dir->i_mode)) { + if (S_ISLNK(dir->i_mode)) { + ret = erofs_write_file_from_buffer(dir, dir->i_link); + free(dir->i_link); + } else if (dir->i_tmpfile) { + ret = erofs_write_file(dir, fileno(dir->i_tmpfile)); + fclose(dir->i_tmpfile); + } else { + ret = 0; + } + if (ret) + return ret; + ret = erofs_prepare_inode_buffer(dir); + if (ret) + return ret; + erofs_write_tail_end(dir); + return 0; + } + + nr_subdirs = 0; + list_for_each_entry(d, &dir->i_subdirs, d_child) + ++nr_subdirs; + + erofs_setup_dir_layout(dir); + + ret = erofs_prepare_inode_buffer(dir); + if (ret) + return ret; + dir->bh->op = &erofs_skip_write_bhops; + + if (IS_ROOT(dir)) + erofs_fixup_meta_blkaddr(dir); + + list_for_each_entry(d, &dir->i_subdirs, d_child) { + struct erofs_inode *inode = erofs_igrab(d->inode); + + if (is_dot_dotdot(d->name)) + continue; + + ret = tarerofs_dump_tree(inode); + if (ret) + return ret; + } + erofs_write_dir_file(dir); + erofs_write_tail_end(dir); + dir->bh->op = &erofs_write_inode_bhops; + return 0; +} diff --git a/lib/tar.c b/lib/tar.c new file mode 100644 index 0000000..1b3cde3 --- /dev/null +++ b/lib/tar.c @@ -0,0 +1,616 @@ +// SPDX-License-Identifier: GPL-2.0+ OR Apache-2.0 +#include <unistd.h> +#include <stdlib.h> +#include <string.h> +#ifdef HAVE_LINUX_AUFS_TYPE_H +#include <linux/aufs_type.h> +#else +#define AUFS_WH_PFX ".wh." +#define AUFS_DIROPQ_NAME AUFS_WH_PFX ".opq" +#define AUFS_WH_DIROPQ AUFS_WH_PFX AUFS_DIROPQ_NAME +#endif +#include "erofs/print.h" +#include "erofs/cache.h" +#include "erofs/inode.h" +#include "erofs/list.h" +#include "erofs/tar.h" +#include "erofs/io.h" + +#define EROFS_WHITEOUT_DEV 0 + +static char erofs_libbuf[16384]; + +struct tar_header { + char name[100]; /* 0-99 */ + char mode[8]; /* 100-107 */ + char uid[8]; /* 108-115 */ + char gid[8]; /* 116-123 */ + char size[12]; /* 124-135 */ + char mtime[12]; /* 136-147 */ + char chksum[8]; /* 148-155 */ + char typeflag; /* 156-156 */ + char linkname[100]; /* 157-256 */ + char magic[6]; /* 257-262 */ + char version[2]; /* 263-264 */ + char uname[32]; /* 265-296 */ + char gname[32]; /* 297-328 */ + char devmajor[8]; /* 329-336 */ + char devminor[8]; /* 337-344 */ + char prefix[155]; /* 345-499 */ + char padding[12]; /* 500-512 (pad to exactly the 512 byte) */ +}; + +s64 erofs_read_from_fd(int fd, void *buf, u64 bytes) +{ + s64 i = 0; + + while (bytes) { + int len = bytes > INT_MAX ? INT_MAX : bytes; + int ret; + + ret = read(fd, buf + i, len); + if (ret < 1) { + if (ret == 0) { + break; + } else if (errno != EINTR) { + erofs_err("failed to read : %s\n", + strerror(errno)); + return -errno; + } + } + bytes -= ret; + i += ret; + } + return i; +} + +/* + * skip this many bytes of input. Return 0 for success, >0 means this much + * left after input skipped. + */ +u64 erofs_lskip(int fd, u64 sz) +{ + s64 cur = lseek(fd, 0, SEEK_CUR); + + if (cur >= 0) { + s64 end = lseek(fd, 0, SEEK_END) - cur; + + if (end > 0 && end < sz) + return sz - end; + + end = cur + sz; + if (end == lseek(fd, end, SEEK_SET)) + return 0; + } + + while (sz) { + int try = min_t(int, sz, sizeof(erofs_libbuf)); + int or; + + or = read(fd, erofs_libbuf, try); + if (or <= 0) + break; + else + sz -= or; + } + return sz; +} + +long long tarerofs_otoi(const char *ptr, int len) +{ + char inp[32]; + char *endp = inp; + long long val; + + memcpy(inp, ptr, len); + inp[len] = '\0'; + + errno = 0; + val = strtol(ptr, &endp, 8); + if ((!val && endp == inp) | + (*endp && *endp != ' ')) + errno = -EINVAL; + return val; +} + +static struct erofs_dentry *tarerofs_get_dentry(struct erofs_inode *pwd, char *path, + bool aufs, bool *whout) +{ + struct erofs_dentry *d = NULL; + unsigned int len = strlen(path); + char *s = path; + + *whout = false; + + while (s < path + len) { + char *slash = memchr(s, '/', path + len - s); + if (slash) { + if (s + 1 == slash) { + s += 2; + continue; + } + *slash = '\0'; + } + + if (!memcmp(s, ".", 2)) { + /* null */ + } else if (!memcmp(s, "..", 3)) { + pwd = pwd->i_parent; + } else { + struct erofs_inode *inode = NULL; + + if (aufs && !slash) { + if (!memcmp(s, AUFS_WH_DIROPQ, sizeof(AUFS_WH_DIROPQ))) + break; + if (!memcmp(s, AUFS_WH_PFX, sizeof(AUFS_WH_PFX) - 1)) { + s += sizeof(AUFS_WH_PFX) - 1; + *whout = true; + } + } + + list_for_each_entry(d, &pwd->i_subdirs, d_child) { + if (!strcmp(d->name, s)) { + if (d->type == EROFS_FT_DIR && !slash) + return ERR_PTR(-EIO); + if (d->type != EROFS_FT_DIR && slash) + return ERR_PTR(-EIO); + inode = d->inode; + break; + } + } + + if (inode) { + pwd = inode; + } else if (!slash) { + d = erofs_d_alloc(pwd, s); + d->type = EROFS_FT_UNKNOWN; + d->inode = pwd; + } else { + return ERR_PTR(-EIO); + } + } + if (slash) { + *slash = '/'; + s = slash + 1; + } else { + break; + } + } + return d; +} + +int tarerofs_init_empty_dir(struct erofs_inode *inode) +{ + struct erofs_dentry *d; + + /* dot is pointed to the current dir inode */ + d = erofs_d_alloc(inode, "."); + if (IS_ERR(d)) + return PTR_ERR(d); + d->inode = erofs_igrab(inode); + d->type = EROFS_FT_DIR; + + /* dotdot is pointed to the parent dir */ + d = erofs_d_alloc(inode, ".."); + if (IS_ERR(d)) + return PTR_ERR(d); + d->inode = erofs_igrab(inode->i_parent); + d->type = EROFS_FT_DIR; + inode->i_nlink = 2; + return 0; +} + +int tarerofs_parse_pax_header(int fd, struct erofs_pax_header *eh, u32 size) +{ + char *buf, *p; + int ret; + + buf = malloc(size); + if (!buf) + return -ENOMEM; + p = buf; + + ret = erofs_read_from_fd(fd, buf, size); + if (ret != size) + goto out; + + while (p < buf + size) { + char *kv, *value; + int len, n; + /* extended records are of the format: "LEN NAME=VALUE\n" */ + ret = sscanf(p, "%d %n", &len, &n); + if (ret < 1 || len <= n || len > buf + size - p) { + ret = -EIO; + goto out; + } + kv = p + n; + p += len; + + if (p[-1] != '\n') { + ret = -EIO; + goto out; + } + p[-1] = '\0'; + + printf("kv %s\n", kv); + + value = memchr(kv, '=', p - kv); + if (!value) { + ret = -EIO; + goto out; + } else { + value++; + + if (!strncmp(kv, "path=", sizeof("path=") - 1)) { + free(eh->path); + eh->path = strdup(value); + } else if (!strncmp(kv, "linkpath=", + sizeof("linkpath=") - 1)) { + free(eh->link); + eh->link = strdup(value); + } + } + + } + ret = 0; +out: + free(buf); + return ret; + +} + +int tarerofs_write_chunk_indexes(struct erofs_inode *inode, erofs_blk_t blkaddr) +{ + unsigned int chunkbits = ilog2(inode->i_size - 1) + 1; + unsigned int count, unit; + erofs_off_t chunksize, len, pos; + struct erofs_inode_chunk_index *idx; + + if (chunkbits < sbi.blkszbits) + chunkbits = sbi.blkszbits; + inode->u.chunkformat |= chunkbits - sbi.blkszbits; + inode->u.chunkformat |= EROFS_CHUNK_FORMAT_INDEXES; + chunksize = 1ULL << chunkbits; + count = DIV_ROUND_UP(inode->i_size, chunksize); + + unit = sizeof(struct erofs_inode_chunk_index); + inode->extent_isize = count * unit; + idx = calloc(count, max(sizeof(*idx), sizeof(void *))); + if (!idx) + return -ENOMEM; + inode->chunkindexes = idx; + + for (pos = 0; pos < inode->i_size; pos += len) { + len = min_t(erofs_off_t, inode->i_size - pos, chunksize); + + *idx++ = (struct erofs_inode_chunk_index){ + .device_id = 1, + .blkaddr = blkaddr, + }; + blkaddr += erofs_blknr(len); + } + inode->datalayout = EROFS_INODE_CHUNK_BASED; + return 0; +} + +int tarerofs_parse_tar(struct erofs_inode *root, struct erofs_tarfile *tar) +{ + char path[PATH_MAX]; + struct erofs_pax_header eh = tar->global; + bool e, whout; + struct stat st; + erofs_off_t tar_offset, data_offset; + + struct tar_header th; + struct erofs_dentry *d; + struct erofs_inode *inode; + unsigned int j, csum, cksum; + int ckksum, ret, rem; + + if (eh.path) + eh.path = strdup(eh.path); + if (eh.link) + eh.link = strdup(eh.link); + +restart: + rem = tar->offset & 511; + if (rem) { + if (erofs_lskip(tar->fd, 512 - rem)) { + ret = -EIO; + goto out; + } + tar->offset += 512 - rem; + } + + tar_offset = tar->offset; + ret = erofs_read_from_fd(tar->fd, &th, sizeof(th)); + if (ret != sizeof(th)) + goto out; + tar->offset += sizeof(th); + if (*th.name == '\0') { + if (e) { /* end of tar 2 empty blocks */ + ret = 1; + goto out; + } + e = true; /* empty jump to next block */ + goto restart; + } + + if (strncmp(th.magic, "ustar", 5)) { + erofs_err("invalid tar magic @ %llu", tar_offset); + ret = -EIO; + goto out; + } + + /* chksum field itself treated as ' ' */ + csum = tarerofs_otoi(th.chksum, sizeof(th.chksum)); + if (errno) { + erofs_err("invalid chksum @ %llu", tar_offset); + ret = -EBADMSG; + goto out; + } + cksum = 0; + for (j = 0; j < 8; ++j) + cksum += (unsigned int)' '; + ckksum = cksum; + for (j = 0; j < 148; ++j) { + cksum += (unsigned int)((char*)&th)[j]; + ckksum += (int)((char*)&th)[j]; + } + for (j = 156; j < 500; ++j) { + cksum += (unsigned int)((char*)&th)[j]; + ckksum += (int)((char*)&th)[j]; + } + if (csum != cksum && csum != ckksum) { + erofs_err("chksum mismatch @ %llu", tar_offset); + ret = -EBADMSG; + goto out; + } + + st.st_mode = tarerofs_otoi(th.mode, sizeof(th.mode)); + if (errno) + goto invalid_tar; + + st.st_uid = tarerofs_otoi(th.uid, sizeof(th.uid)); + if (errno) + goto invalid_tar; + + st.st_gid = tarerofs_otoi(th.gid, sizeof(th.gid)); + if (errno) + goto invalid_tar; + + st.st_size = tarerofs_otoi(th.size, sizeof(th.size)); + if (errno) + goto invalid_tar; + + st.st_mtime = tarerofs_otoi(th.mtime, sizeof(th.mtime)); + if (errno) + goto invalid_tar; + + if (th.typeflag <= '7' && !eh.path) { + eh.path = path; + j = 0; + if (*th.prefix) { + memcpy(path, th.prefix, sizeof(th.prefix)); + path[sizeof(th.prefix)] = '\0'; + j = strlen(path); + if (path[j - 1] != '/') { + path[j] = '/'; + path[++j] = '\0'; + } + } + memcpy(path + j, th.name, sizeof(th.name)); + path[j + sizeof(th.name)] = '\0'; + j = strlen(path); + while (path[j - 1] == '/') + path[--j] = '\0'; + } + + data_offset = tar->offset; + tar->offset += st.st_size; + if (th.typeflag == '0' || th.typeflag == '7' || th.typeflag == '1') { + st.st_mode |= S_IFREG; + } else if (th.typeflag == '2') { + st.st_mode |= S_IFLNK; + if (!eh.link) + eh.link = strndup(th.linkname, sizeof(th.linkname)); + } else if (th.typeflag == '3') { + st.st_mode |= S_IFCHR; + } else if (th.typeflag == '4') { + st.st_mode |= S_IFBLK; + } else if (th.typeflag == '5') { + st.st_mode |= S_IFDIR; + } else if (th.typeflag == '6') { + st.st_mode |= S_IFIFO; + } else if (th.typeflag == 'g') { + ret = tarerofs_parse_pax_header(tar->fd, &tar->global, st.st_size); + if (ret) + goto out; + if (tar->global.path) { + free(eh.path); + eh.path = strdup(tar->global.path); + } + if (tar->global.link) { + free(eh.link); + eh.link = strdup(tar->global.link); + } + goto restart; + } else if (th.typeflag == 'x') { + ret = tarerofs_parse_pax_header(tar->fd, &eh, st.st_size); + if (ret) + goto out; + goto restart; + } else if (th.typeflag == 'K') { + free(eh.link); + eh.link = malloc(st.st_size + 1); + if (st.st_size >= PATH_MAX || st.st_size != + erofs_read_from_fd(tar->fd, eh.link, st.st_size)) + goto invalid_tar; + eh.link[st.st_size] = '\0'; + goto restart; + } + + if (erofs_blkoff(tar_offset + sizeof(th))) { + erofs_err("invalid tar data alignment @%llu", tar_offset); + ret = -EIO; + goto out; + } + + erofs_dbg("parsing %s (mode %05o)", eh.path, st.st_mode); + + d = tarerofs_get_dentry(root, eh.path, tar->aufs, &whout); + if (IS_ERR(d)) { + ret = PTR_ERR(d); + goto out; + } + if (th.typeflag == '1') { /* hard link cases */ + struct erofs_dentry *d2; + bool dumb; + + if (d->type != EROFS_FT_UNKNOWN) { + struct erofs_inode *old_inode = d->inode; + + --old_inode->i_nlink; + erofs_iput(old_inode); + d->inode = NULL; + } + d2 = tarerofs_get_dentry(root, eh.link, tar->aufs, &dumb); + if (IS_ERR(d2)) { + ret = PTR_ERR(d2); + goto out; + } + if (d2->type == EROFS_FT_UNKNOWN) { + ret = -ENOENT; + goto out; + } + if (S_ISDIR(d2->inode->i_mode)) { + ret = -EISDIR; + goto out; + } + inode = erofs_igrab(d2->inode); + d->inode = inode; + d->type = d2->type; + ++inode->i_nlink; + } else if (d->type == EROFS_FT_UNKNOWN) { + inode = erofs_new_inode(); + if (IS_ERR(inode)) { + ret = PTR_ERR(inode); + goto out; + } + inode->i_parent = d->inode; + d->inode = inode; + d->type = erofs_mode_to_ftype(st.st_mode); + } else { + inode = d->inode; + } + + if (whout) { + inode->i_mode = (inode->i_mode & ~S_IFMT) | S_IFCHR; + inode->u.i_rdev = EROFS_WHITEOUT_DEV; + } else { + inode->i_mode = st.st_mode; + } + inode->i_srcpath = strdup(eh.path); + inode->i_uid = st.st_uid; + inode->i_gid = st.st_gid; + inode->i_size = st.st_size; + inode->i_mtime = st.st_mtime; + + if (!S_ISDIR(inode->i_mode)) { + if (S_ISLNK(inode->i_mode)) { + inode->i_link = strdup(eh.link); + } else if (!tar->index_mode) { + char buf[16384]; + + if (!inode->i_tmpfile) + inode->i_tmpfile = tmpfile(); + for (j = inode->i_size; j; ) { + rem = min_t(int, sizeof(buf), j); + + if (erofs_read_from_fd(tar->fd, buf, rem) != rem || + fwrite(buf, rem, 1, inode->i_tmpfile) != 1) { + erofs_iput(inode); + ret = -EIO; + goto out; + } + j -= rem; + } + fseek(inode->i_tmpfile, 0, SEEK_SET); + inode->with_tmpfile = true; + } else { + ret = tarerofs_write_chunk_indexes(inode, + erofs_blknr(data_offset)); + if (ret) + goto out; +// inode->u.i_blkaddr = erofs_blknr(tar->offset); +// inode->datalayout = EROFS_INODE_FLAT_PLAIN; + if (erofs_lskip(tar->fd, inode->i_size)) { + ret = -EIO; + goto out; + } + } + inode->i_nlink++; + ret = 0; + goto out; + } + + if (!inode->i_nlink) + ret = tarerofs_init_empty_dir(inode); + else + ret = 0; + +out: + if (eh.path != path) + free(eh.path); + free(eh.link); + return ret; + +invalid_tar: + erofs_err("invalid tar @ %llu", tar_offset); + ret = -EIO; + goto out; +} + +static struct erofs_buffer_head *bh_devt; + +int tarerofs_reverse_devtable(unsigned int devices) +{ + if (!devices) + return 0; + + bh_devt = erofs_balloc(DEVT, + sizeof(struct erofs_deviceslot) * devices, 0, 0); + if (IS_ERR(bh_devt)) + return PTR_ERR(bh_devt); + + erofs_mapbh(bh_devt->block); + bh_devt->op = &erofs_skip_write_bhops; + sbi.devt_slotoff = erofs_btell(bh_devt, false) / EROFS_DEVT_SLOT_SIZE; + sbi.extra_devices = devices; + erofs_sb_set_device_table(); + return 0; +} + +int tarerofs_write_devtable(struct erofs_tarfile *tar) +{ + erofs_off_t pos_out; + unsigned int i; + + if (!sbi.extra_devices) + return 0; + pos_out = erofs_btell(bh_devt, false); + for (i = 0; i < sbi.extra_devices; ++i) { + struct erofs_deviceslot dis = { + .blocks = erofs_blknr(tar->offset), + }; + int ret; + + ret = dev_write(&dis, pos_out, sizeof(dis)); + if (ret) + return ret; + pos_out += sizeof(dis); + } + bh_devt->op = &erofs_drop_directly_bhops; + erofs_bdrop(bh_devt, false); + return 0; +} diff --git a/mkfs/main.c b/mkfs/main.c index 27e3f03..f3af725 100644 --- a/mkfs/main.c +++ b/mkfs/main.c @@ -16,6 +16,7 @@ #include "erofs/print.h" #include "erofs/cache.h" #include "erofs/inode.h" +#include "erofs/tar.h" #include "erofs/io.h" #include "erofs/compress.h" #include "erofs/dedupe.h" @@ -56,6 +57,7 @@ static struct option long_options[] = { {"preserve-mtime", no_argument, NULL, 15}, {"uid-offset", required_argument, NULL, 16}, {"gid-offset", required_argument, NULL, 17}, + {"tar", required_argument, NULL, 19}, {"mount-point", required_argument, NULL, 512}, #ifdef WITH_ANDROID {"product-out", required_argument, NULL, 513}, @@ -128,6 +130,8 @@ static void usage(void) } static unsigned int pclustersize_packed, pclustersize_max; +static struct erofs_tarfile erofstar; +static bool tar_mode; static int parse_extended_opts(const char *opts) { @@ -475,6 +479,11 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -EINVAL; } break; + case 19: + if (!strcmp(optarg, "index") || !strcmp(optarg, "0")) + erofstar.index_mode = true; + tar_mode = true; + break; case 1: usage(); exit(0); @@ -506,20 +515,24 @@ static int mkfs_parse_options_cfg(int argc, char *argv[]) return -ENOMEM; if (optind >= argc) { - erofs_err("missing argument: DIRECTORY"); - return -EINVAL; - } - - cfg.c_src_path = realpath(argv[optind++], NULL); - if (!cfg.c_src_path) { - erofs_err("failed to parse source directory: %s", - erofs_strerror(-errno)); - return -ENOENT; - } + if (!tar_mode) { + erofs_err("missing argument: DIRECTORY"); + return -EINVAL; + } else { + erofstar.fd = STDIN_FILENO; + } + }else { + cfg.c_src_path = realpath(argv[optind++], NULL); + if (!cfg.c_src_path) { + erofs_err("failed to parse source directory: %s", + erofs_strerror(-errno)); + return -ENOENT; + } - if (optind < argc) { - erofs_err("unexpected argument: %s\n", argv[optind]); - return -EINVAL; + if (optind < argc) { + erofs_err("unexpected argument: %s\n", argv[optind]); + return -EINVAL; + } } if (quiet) { cfg.c_dbg_lvl = EROFS_ERR; @@ -733,14 +746,24 @@ int main(int argc, char **argv) return 1; } - err = lstat(cfg.c_src_path, &st); - if (err) - return 1; - if (!S_ISDIR(st.st_mode)) { - erofs_err("root of the filesystem is not a directory - %s", - cfg.c_src_path); - usage(); - return 1; + if (!tar_mode) { + err = lstat(cfg.c_src_path, &st); + if (err) + return 1; + if (!S_ISDIR(st.st_mode)) { + erofs_err("root of the filesystem is not a directory - %s", + cfg.c_src_path); + usage(); + return 1; + } + erofs_set_fs_root(cfg.c_src_path); + } else if (cfg.c_src_path) { + erofstar.fd = open(cfg.c_src_path, O_RDONLY); + if (erofstar.fd < 0) { + erofs_err("failed to open file: %s", cfg.c_src_path); + usage(); + return 1; + } } if (cfg.c_unix_timestamp != -1) { @@ -783,11 +806,14 @@ int main(int argc, char **argv) } if (cfg.c_dedupe) erofs_warn("EXPERIMENTAL data deduplication feature in use. Use at your own risk!"); - erofs_set_fs_root(cfg.c_src_path); + #ifndef NDEBUG if (cfg.c_random_pclusterblks) srand(time(NULL)); #endif + + if (tar_mode) + sbi.blkszbits = 9; sb_bh = erofs_buffer_init(); if (IS_ERR(sb_bh)) { err = PTR_ERR(sb_bh); @@ -843,7 +869,10 @@ int main(int argc, char **argv) return 1; } - err = erofs_generate_devtable(); + if (tar_mode && erofstar.index_mode) + err = tarerofs_reverse_devtable(1); + else + err = erofs_generate_devtable(); if (err) { erofs_err("failed to generate device table: %s", erofs_strerror(err)); @@ -856,22 +885,46 @@ int main(int argc, char **argv) erofs_inode_manager_init(); - err = erofs_build_shared_xattrs_from_path(cfg.c_src_path); - if (err) { - erofs_err("failed to build shared xattrs: %s", - erofs_strerror(err)); - goto exit; - } + if (!tar_mode) { + err = erofs_build_shared_xattrs_from_path(cfg.c_src_path); + if (err) { + erofs_err("failed to build shared xattrs: %s", + erofs_strerror(err)); + goto exit; + } - root_inode = erofs_mkfs_build_tree_from_path(cfg.c_src_path); - if (IS_ERR(root_inode)) { - err = PTR_ERR(root_inode); - goto exit; - } + root_inode = erofs_mkfs_build_tree_from_path(cfg.c_src_path); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + goto exit; + } + } else { + root_inode = erofs_new_inode(); + if (IS_ERR(root_inode)) { + err = PTR_ERR(root_inode); + goto exit; + } + root_inode->i_srcpath = strdup("/"); + root_inode->i_mode = S_IFDIR | 0777; + root_inode->i_parent = root_inode; + root_inode->i_mtime = sbi.build_time; + root_inode->i_mtime_nsec = sbi.build_time_nsec; + tarerofs_init_empty_dir(root_inode); + while (!(err = tarerofs_parse_tar(root_inode, &erofstar))); + + if (err < 0) + goto exit; + + err = tarerofs_dump_tree(root_inode); + if (err < 0) + goto exit; + } root_nid = erofs_lookupnid(root_inode); erofs_iput(root_inode); + if (tar_mode) + tarerofs_write_devtable(&erofstar); if (cfg.c_chunkbits) { erofs_info("total metadata: %u blocks", erofs_mapbh(NULL)); err = erofs_blob_remap(); -- 2.24.4
