This patch introduces a thread-safe userspace metadata cache to reduce
redundant decompression cycles and the overhead of repetitive pread()
syscalls across multiple background worker threads.

To ensure it remains highly concurrent for worker threads extracting
pclusters, the cache utilizes a bucketed, rw-semaphore protected
architecture modeled after the existing fragment cache.

While the introduction of a userspace cache inherently increases the
memory footprint compared to relying solely on the kernel's page cache,
this patch implements a strict Global Least Recently Used (LRU) eviction
policy to safely bound this additional memory overhead. This prevents the
cache from growing unbounded on exceptionally large EROFS images. The
maximum cache capacity is dynamically configurable via the new
'--cache-size' parameter, which defaults to a safe threshold of 32 MB.

Signed-off-by: Nithurshen <[email protected]>
---
 fsck/main.c              |  12 ++++
 include/erofs/internal.h |   2 +
 lib/data.c               | 150 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 161 insertions(+), 3 deletions(-)

diff --git a/fsck/main.c b/fsck/main.c
index ffe7e29..7a1e573 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -67,6 +67,7 @@ static struct option long_options[] = {
        {"no-xattrs", no_argument, 0, 14},
        {"nid", required_argument, 0, 15},
        {"path", required_argument, 0, 16},
+       {"cache-size", required_argument, 0, 17},
        {"no-sbcrc", no_argument, 0, 512},
        {0, 0, 0, 0},
 };
@@ -120,6 +121,7 @@ static void usage(int argc, char **argv)
                " --offset=#             skip # bytes at the beginning of 
IMAGE\n"
                " --nid=#                check or extract from the target inode 
of nid #\n"
                " --path=X               check or extract from the target inode 
of path X\n"
+               " --cache-size=#        set maximum metadata cache size in 
bytes (default 32MB)\n"
                " --no-sbcrc             bypass the superblock checksum 
verification\n"
                " --[no-]xattrs          whether to dump extended attributes 
(default off)\n"
                "\n"
@@ -261,6 +263,16 @@ static int erofsfsck_parse_options_cfg(int argc, char 
**argv)
                case 16:
                        fsckcfg.inode_path = optarg;
                        break;
+               case 17: {
+                       char *endptr;
+                       unsigned long cache_size = strtoul(optarg, &endptr, 0);
+                       if (*endptr != '\0') {
+                               erofs_err("invalid metadata cache size %s", 
optarg);
+                               return -EINVAL;
+                       }
+                       erofs_meta_cache_set_capacity(cache_size);
+                       break;
+               }
                case 512:
                        fsckcfg.nosbcrc = true;
                        break;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 94f14da..34b7eb3 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -459,6 +459,8 @@ struct z_erofs_read_ctx {
 
 void z_erofs_read_ctx_enqueue(struct z_erofs_read_ctx *ctx);
 
+void erofs_meta_cache_set_capacity(unsigned long bytes);
+
 int liberofs_global_init(void);
 void liberofs_global_exit(void);
 
diff --git a/lib/data.c b/lib/data.c
index e9d2218..b8d81b3 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -29,6 +29,84 @@ struct z_erofs_decompress_task {
        unsigned int nr_reqs;
 };
 
+#define META_HASHSIZE          65536
+#define META_HASH(c)           ((c) & (META_HASHSIZE - 1))
+
+struct erofs_meta_bucket {
+       struct list_head hash;
+       erofs_rwsem_t lock;
+};
+
+struct erofs_meta_item {
+       struct list_head list;
+       struct list_head lru;
+       u64 key;
+       char *data;
+       int length;
+       bool evicting;
+};
+
+static struct erofs_meta_bucket meta_bks[META_HASHSIZE];
+static bool meta_cache_inited = false;
+EROFS_DEFINE_MUTEX(meta_cache_init_lock);
+
+static EROFS_DEFINE_MUTEX(meta_lru_lock);
+static struct list_head meta_lru_list;
+static unsigned long meta_cache_bytes = 0;
+static unsigned long meta_cache_max_bytes = 32 * 1024 * 1024; 
+
+void erofs_meta_cache_set_capacity(unsigned long bytes)
+{
+       meta_cache_max_bytes = bytes;
+}
+
+static void erofs_meta_cache_init(void)
+{
+       int i;
+
+       erofs_mutex_lock(&meta_cache_init_lock);
+       if (meta_cache_inited) {
+               erofs_mutex_unlock(&meta_cache_init_lock);
+               return;
+       }
+
+       for (i = 0; i < META_HASHSIZE; ++i) {
+               init_list_head(&meta_bks[i].hash);
+               erofs_init_rwsem(&meta_bks[i].lock);
+       }
+       init_list_head(&meta_lru_list);
+       meta_cache_inited = true;
+       erofs_mutex_unlock(&meta_cache_init_lock);
+}
+
+static void erofs_meta_cache_evict(void)
+{
+       struct erofs_meta_item *item;
+       struct erofs_meta_bucket *bk;
+
+       erofs_mutex_lock(&meta_lru_lock);
+       while (meta_cache_bytes > meta_cache_max_bytes && 
!list_empty(&meta_lru_list)) {
+               /* Get the least recently used item (tail of the list) */
+               item = list_last_entry(&meta_lru_list, struct erofs_meta_item, 
lru);
+               item->evicting = true; /* Mark it dead to block cache hits from 
resurrecting it */
+               list_del(&item->lru);
+               init_list_head(&item->lru);
+               meta_cache_bytes -= item->length;
+               erofs_mutex_unlock(&meta_lru_lock);
+
+               bk = &meta_bks[META_HASH(item->key)];
+               erofs_down_write(&bk->lock);
+               list_del(&item->list);
+               erofs_up_write(&bk->lock);
+
+               free(item->data);
+               free(item);
+
+               erofs_mutex_lock(&meta_lru_lock);
+       }
+       erofs_mutex_unlock(&meta_lru_lock);
+}
+
 static void z_erofs_decompress_worker(struct erofs_work *work, void *tlsp)
 {
        struct z_erofs_decompress_task *task = (struct z_erofs_decompress_task 
*)work;
@@ -604,7 +682,73 @@ static void *erofs_read_metadata_bdi(struct erofs_sb_info 
*sbi,
 void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid,
                          erofs_off_t *offset, int *lengthp)
 {
+       u64 key = nid ? nid : *offset;
+       struct erofs_meta_bucket *bk;
+       struct erofs_meta_item *item;
+       void *buffer = NULL;
+
+       if (__erofs_unlikely(!meta_cache_inited))
+               erofs_meta_cache_init();
+
+       bk = &meta_bks[META_HASH(key)];
+
+       erofs_down_read(&bk->lock);
+       list_for_each_entry(item, &bk->hash, list) {
+               if (item->key == key) {
+                       buffer = malloc(item->length);
+                       if (buffer) {
+                               memcpy(buffer, item->data, item->length);
+                               *lengthp = item->length;
+                               *offset = round_up(*offset, 4);
+                               *offset += sizeof(__le16) + item->length;
+                               
+                               erofs_mutex_lock(&meta_lru_lock);
+                               if (!item->evicting) {
+                                       list_del(&item->lru);
+                                       list_add(&item->lru, &meta_lru_list);
+                               }
+                               erofs_mutex_unlock(&meta_lru_lock);
+                       }
+                       break;
+               }
+       }
+       erofs_up_read(&bk->lock);
+
+       if (buffer)
+               return buffer;
+
        if (nid)
-               return erofs_read_metadata_nid(sbi, nid, offset, lengthp);
-       return erofs_read_metadata_bdi(sbi, offset, lengthp);
-}
+               buffer = erofs_read_metadata_nid(sbi, nid, offset, lengthp);
+       else
+               buffer = erofs_read_metadata_bdi(sbi, offset, lengthp);
+
+       if (IS_ERR(buffer))
+               return buffer;
+
+       item = malloc(sizeof(*item));
+       if (item) {
+               item->key = key;
+               item->length = *lengthp;
+               item->evicting = false;
+               item->data = malloc(*lengthp);
+               if (item->data) {
+                       memcpy(item->data, buffer, *lengthp);
+                       
+                       erofs_down_write(&bk->lock);
+                       list_add_tail(&item->list, &bk->hash);
+                       erofs_up_write(&bk->lock);
+
+                       erofs_mutex_lock(&meta_lru_lock);
+                       list_add(&item->lru, &meta_lru_list);
+                       meta_cache_bytes += *lengthp;
+                       erofs_mutex_unlock(&meta_lru_lock);
+
+                       if (meta_cache_bytes > meta_cache_max_bytes)
+                               erofs_meta_cache_evict();
+               } else {
+                       free(item);
+               }
+       }
+
+       return buffer;
+}
\ No newline at end of file
-- 
2.52.0


Reply via email to