This patch introduces a thread-safe metadata cache to reduce redundant I/O and decompression overhead during fsck extraction.
To ensure it remains highly concurrent for worker threads extracting pclusters, the cache utilizes a bucketed, rw-semaphore protected architecture modeled after the existing fragment cache. Furthermore, to prevent out-of-memory (OOM) scenarios on exceptionally large EROFS images, the cache implements a strict Global Least Recently Used (LRU) eviction policy. The maximum cache size is dynamically configurable via the new '--cache-size' parameter, which defaults to a safe, fixed threshold of 32 MB. Signed-off-by: Nithurshen <[email protected]> --- fsck/main.c | 12 ++++ include/erofs/internal.h | 2 + lib/data.c | 149 ++++++++++++++++++++++++++++++++++++++- 3 files changed, 160 insertions(+), 3 deletions(-) diff --git a/fsck/main.c b/fsck/main.c index ffe7e29..7a1e573 100644 --- a/fsck/main.c +++ b/fsck/main.c @@ -67,6 +67,7 @@ static struct option long_options[] = { {"no-xattrs", no_argument, 0, 14}, {"nid", required_argument, 0, 15}, {"path", required_argument, 0, 16}, + {"cache-size", required_argument, 0, 17}, {"no-sbcrc", no_argument, 0, 512}, {0, 0, 0, 0}, }; @@ -120,6 +121,7 @@ static void usage(int argc, char **argv) " --offset=# skip # bytes at the beginning of IMAGE\n" " --nid=# check or extract from the target inode of nid #\n" " --path=X check or extract from the target inode of path X\n" + " --cache-size=# set maximum metadata cache size in bytes (default 32MB)\n" " --no-sbcrc bypass the superblock checksum verification\n" " --[no-]xattrs whether to dump extended attributes (default off)\n" "\n" @@ -261,6 +263,16 @@ static int erofsfsck_parse_options_cfg(int argc, char **argv) case 16: fsckcfg.inode_path = optarg; break; + case 17: { + char *endptr; + unsigned long cache_size = strtoul(optarg, &endptr, 0); + if (*endptr != '\0') { + erofs_err("invalid metadata cache size %s", optarg); + return -EINVAL; + } + erofs_meta_cache_set_capacity(cache_size); + break; + } case 512: fsckcfg.nosbcrc = true; break; diff --git a/include/erofs/internal.h b/include/erofs/internal.h index 94f14da..34b7eb3 100644 --- a/include/erofs/internal.h +++ b/include/erofs/internal.h @@ -459,6 +459,8 @@ struct z_erofs_read_ctx { void z_erofs_read_ctx_enqueue(struct z_erofs_read_ctx *ctx); +void erofs_meta_cache_set_capacity(unsigned long bytes); + int liberofs_global_init(void); void liberofs_global_exit(void); diff --git a/lib/data.c b/lib/data.c index e9d2218..9acf2bf 100644 --- a/lib/data.c +++ b/lib/data.c @@ -29,6 +29,84 @@ struct z_erofs_decompress_task { unsigned int nr_reqs; }; +#define META_HASHSIZE 65536 +#define META_HASH(c) ((c) & (META_HASHSIZE - 1)) + +struct erofs_meta_bucket { + struct list_head hash; + erofs_rwsem_t lock; +}; + +struct erofs_meta_item { + struct list_head list; + struct list_head lru; + u64 key; + char *data; + int length; + bool evicting; +}; + +static struct erofs_meta_bucket meta_bks[META_HASHSIZE]; +static bool meta_cache_inited = false; +EROFS_DEFINE_MUTEX(meta_cache_init_lock); + +static EROFS_DEFINE_MUTEX(meta_lru_lock); +static struct list_head meta_lru_list; +static unsigned long meta_cache_bytes = 0; +static unsigned long meta_cache_max_bytes = 32 * 1024 * 1024; + +void erofs_meta_cache_set_capacity(unsigned long bytes) +{ + meta_cache_max_bytes = bytes; +} + +static void erofs_meta_cache_init(void) +{ + int i; + + erofs_mutex_lock(&meta_cache_init_lock); + if (meta_cache_inited) { + erofs_mutex_unlock(&meta_cache_init_lock); + return; + } + + for (i = 0; i < META_HASHSIZE; ++i) { + init_list_head(&meta_bks[i].hash); + erofs_init_rwsem(&meta_bks[i].lock); + } + init_list_head(&meta_lru_list); + meta_cache_inited = true; + erofs_mutex_unlock(&meta_cache_init_lock); +} + +static void erofs_meta_cache_evict(void) +{ + struct erofs_meta_item *item; + struct erofs_meta_bucket *bk; + + erofs_mutex_lock(&meta_lru_lock); + while (meta_cache_bytes > meta_cache_max_bytes && !list_empty(&meta_lru_list)) { + /* Get the least recently used item (tail of the list) */ + item = list_last_entry(&meta_lru_list, struct erofs_meta_item, lru); + item->evicting = true; /* Mark it dead to block cache hits from resurrecting it */ + list_del(&item->lru); + init_list_head(&item->lru); + meta_cache_bytes -= item->length; + erofs_mutex_unlock(&meta_lru_lock); + + bk = &meta_bks[META_HASH(item->key)]; + erofs_down_write(&bk->lock); + list_del(&item->list); + erofs_up_write(&bk->lock); + + free(item->data); + free(item); + + erofs_mutex_lock(&meta_lru_lock); + } + erofs_mutex_unlock(&meta_lru_lock); +} + static void z_erofs_decompress_worker(struct erofs_work *work, void *tlsp) { struct z_erofs_decompress_task *task = (struct z_erofs_decompress_task *)work; @@ -604,7 +682,72 @@ static void *erofs_read_metadata_bdi(struct erofs_sb_info *sbi, void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid, erofs_off_t *offset, int *lengthp) { + u64 key = nid ? nid : *offset; + struct erofs_meta_bucket *bk; + struct erofs_meta_item *item; + void *buffer = NULL; + + if (__erofs_unlikely(!meta_cache_inited)) + erofs_meta_cache_init(); + + bk = &meta_bks[META_HASH(key)]; + + erofs_down_read(&bk->lock); + list_for_each_entry(item, &bk->hash, list) { + if (item->key == key) { + buffer = malloc(item->length); + if (buffer) { + memcpy(buffer, item->data, item->length); + *lengthp = item->length; + *offset = round_up(*offset, 4); + *offset += sizeof(__le16) + item->length; + + erofs_mutex_lock(&meta_lru_lock); + if (!item->evicting) + list_del(&item->lru); + list_add(&item->lru, &meta_lru_list); + erofs_mutex_unlock(&meta_lru_lock); + } + break; + } + } + erofs_up_read(&bk->lock); + + if (buffer) + return buffer; + if (nid) - return erofs_read_metadata_nid(sbi, nid, offset, lengthp); - return erofs_read_metadata_bdi(sbi, offset, lengthp); -} + buffer = erofs_read_metadata_nid(sbi, nid, offset, lengthp); + else + buffer = erofs_read_metadata_bdi(sbi, offset, lengthp); + + if (IS_ERR(buffer)) + return buffer; + + item = malloc(sizeof(*item)); + if (item) { + item->key = key; + item->length = *lengthp; + item->evicting = false; + item->data = malloc(*lengthp); + if (item->data) { + memcpy(item->data, buffer, *lengthp); + + erofs_down_write(&bk->lock); + list_add_tail(&item->list, &bk->hash); + erofs_up_write(&bk->lock); + + erofs_mutex_lock(&meta_lru_lock); + list_add(&item->lru, &meta_lru_list); + meta_cache_bytes += *lengthp; + erofs_mutex_unlock(&meta_lru_lock); + + if (meta_cache_bytes > meta_cache_max_bytes) + erofs_meta_cache_evict(); + } else { + free(item); + } + } + + return buffer; +} \ No newline at end of file -- 2.52.0
