This patch introduces a thread-safe metadata cache to reduce redundant
I/O and decompression overhead during fsck extraction.

To ensure it remains highly concurrent for worker threads extracting
pclusters, the cache utilizes a bucketed, rw-semaphore protected
architecture modeled after the existing fragment cache.

Furthermore, to prevent out-of-memory (OOM) scenarios on exceptionally
large EROFS images, the cache implements a strict Global Least Recently
Used (LRU) eviction policy. The maximum cache size is dynamically
configurable via the new '--cache-size' parameter, which defaults to a
safe, fixed threshold of 32 MB.

Signed-off-by: Nithurshen <[email protected]>
---
 fsck/main.c              |  12 ++++
 include/erofs/internal.h |   2 +
 lib/data.c               | 149 ++++++++++++++++++++++++++++++++++++++-
 3 files changed, 160 insertions(+), 3 deletions(-)

diff --git a/fsck/main.c b/fsck/main.c
index ffe7e29..7a1e573 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -67,6 +67,7 @@ static struct option long_options[] = {
        {"no-xattrs", no_argument, 0, 14},
        {"nid", required_argument, 0, 15},
        {"path", required_argument, 0, 16},
+       {"cache-size", required_argument, 0, 17},
        {"no-sbcrc", no_argument, 0, 512},
        {0, 0, 0, 0},
 };
@@ -120,6 +121,7 @@ static void usage(int argc, char **argv)
                " --offset=#             skip # bytes at the beginning of 
IMAGE\n"
                " --nid=#                check or extract from the target inode 
of nid #\n"
                " --path=X               check or extract from the target inode 
of path X\n"
+               " --cache-size=#        set maximum metadata cache size in 
bytes (default 32MB)\n"
                " --no-sbcrc             bypass the superblock checksum 
verification\n"
                " --[no-]xattrs          whether to dump extended attributes 
(default off)\n"
                "\n"
@@ -261,6 +263,16 @@ static int erofsfsck_parse_options_cfg(int argc, char 
**argv)
                case 16:
                        fsckcfg.inode_path = optarg;
                        break;
+               case 17: {
+                       char *endptr;
+                       unsigned long cache_size = strtoul(optarg, &endptr, 0);
+                       if (*endptr != '\0') {
+                               erofs_err("invalid metadata cache size %s", 
optarg);
+                               return -EINVAL;
+                       }
+                       erofs_meta_cache_set_capacity(cache_size);
+                       break;
+               }
                case 512:
                        fsckcfg.nosbcrc = true;
                        break;
diff --git a/include/erofs/internal.h b/include/erofs/internal.h
index 94f14da..34b7eb3 100644
--- a/include/erofs/internal.h
+++ b/include/erofs/internal.h
@@ -459,6 +459,8 @@ struct z_erofs_read_ctx {
 
 void z_erofs_read_ctx_enqueue(struct z_erofs_read_ctx *ctx);
 
+void erofs_meta_cache_set_capacity(unsigned long bytes);
+
 int liberofs_global_init(void);
 void liberofs_global_exit(void);
 
diff --git a/lib/data.c b/lib/data.c
index e9d2218..9acf2bf 100644
--- a/lib/data.c
+++ b/lib/data.c
@@ -29,6 +29,84 @@ struct z_erofs_decompress_task {
        unsigned int nr_reqs;
 };
 
+#define META_HASHSIZE          65536
+#define META_HASH(c)           ((c) & (META_HASHSIZE - 1))
+
+struct erofs_meta_bucket {
+       struct list_head hash;
+       erofs_rwsem_t lock;
+};
+
+struct erofs_meta_item {
+       struct list_head list;
+       struct list_head lru;
+       u64 key;
+       char *data;
+       int length;
+       bool evicting;
+};
+
+static struct erofs_meta_bucket meta_bks[META_HASHSIZE];
+static bool meta_cache_inited = false;
+EROFS_DEFINE_MUTEX(meta_cache_init_lock);
+
+static EROFS_DEFINE_MUTEX(meta_lru_lock);
+static struct list_head meta_lru_list;
+static unsigned long meta_cache_bytes = 0;
+static unsigned long meta_cache_max_bytes = 32 * 1024 * 1024; 
+
+void erofs_meta_cache_set_capacity(unsigned long bytes)
+{
+       meta_cache_max_bytes = bytes;
+}
+
+static void erofs_meta_cache_init(void)
+{
+       int i;
+
+       erofs_mutex_lock(&meta_cache_init_lock);
+       if (meta_cache_inited) {
+               erofs_mutex_unlock(&meta_cache_init_lock);
+               return;
+       }
+
+       for (i = 0; i < META_HASHSIZE; ++i) {
+               init_list_head(&meta_bks[i].hash);
+               erofs_init_rwsem(&meta_bks[i].lock);
+       }
+       init_list_head(&meta_lru_list);
+       meta_cache_inited = true;
+       erofs_mutex_unlock(&meta_cache_init_lock);
+}
+
+static void erofs_meta_cache_evict(void)
+{
+       struct erofs_meta_item *item;
+       struct erofs_meta_bucket *bk;
+
+       erofs_mutex_lock(&meta_lru_lock);
+       while (meta_cache_bytes > meta_cache_max_bytes && 
!list_empty(&meta_lru_list)) {
+               /* Get the least recently used item (tail of the list) */
+               item = list_last_entry(&meta_lru_list, struct erofs_meta_item, 
lru);
+               item->evicting = true; /* Mark it dead to block cache hits from 
resurrecting it */
+               list_del(&item->lru);
+               init_list_head(&item->lru);
+               meta_cache_bytes -= item->length;
+               erofs_mutex_unlock(&meta_lru_lock);
+
+               bk = &meta_bks[META_HASH(item->key)];
+               erofs_down_write(&bk->lock);
+               list_del(&item->list);
+               erofs_up_write(&bk->lock);
+
+               free(item->data);
+               free(item);
+
+               erofs_mutex_lock(&meta_lru_lock);
+       }
+       erofs_mutex_unlock(&meta_lru_lock);
+}
+
 static void z_erofs_decompress_worker(struct erofs_work *work, void *tlsp)
 {
        struct z_erofs_decompress_task *task = (struct z_erofs_decompress_task 
*)work;
@@ -604,7 +682,72 @@ static void *erofs_read_metadata_bdi(struct erofs_sb_info 
*sbi,
 void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid,
                          erofs_off_t *offset, int *lengthp)
 {
+       u64 key = nid ? nid : *offset;
+       struct erofs_meta_bucket *bk;
+       struct erofs_meta_item *item;
+       void *buffer = NULL;
+
+       if (__erofs_unlikely(!meta_cache_inited))
+               erofs_meta_cache_init();
+
+       bk = &meta_bks[META_HASH(key)];
+
+       erofs_down_read(&bk->lock);
+       list_for_each_entry(item, &bk->hash, list) {
+               if (item->key == key) {
+                       buffer = malloc(item->length);
+                       if (buffer) {
+                               memcpy(buffer, item->data, item->length);
+                               *lengthp = item->length;
+                               *offset = round_up(*offset, 4);
+                               *offset += sizeof(__le16) + item->length;
+                               
+                               erofs_mutex_lock(&meta_lru_lock);
+                if (!item->evicting)
+                    list_del(&item->lru);
+                                       list_add(&item->lru, &meta_lru_list);
+                erofs_mutex_unlock(&meta_lru_lock);
+                       }
+                       break;
+               }
+       }
+       erofs_up_read(&bk->lock);
+
+       if (buffer)
+               return buffer;
+
        if (nid)
-               return erofs_read_metadata_nid(sbi, nid, offset, lengthp);
-       return erofs_read_metadata_bdi(sbi, offset, lengthp);
-}
+               buffer = erofs_read_metadata_nid(sbi, nid, offset, lengthp);
+       else
+               buffer = erofs_read_metadata_bdi(sbi, offset, lengthp);
+
+       if (IS_ERR(buffer))
+               return buffer;
+
+       item = malloc(sizeof(*item));
+       if (item) {
+               item->key = key;
+               item->length = *lengthp;
+               item->evicting = false;
+               item->data = malloc(*lengthp);
+               if (item->data) {
+                       memcpy(item->data, buffer, *lengthp);
+                       
+                       erofs_down_write(&bk->lock);
+                       list_add_tail(&item->list, &bk->hash);
+                       erofs_up_write(&bk->lock);
+
+                       erofs_mutex_lock(&meta_lru_lock);
+            list_add(&item->lru, &meta_lru_list);
+            meta_cache_bytes += *lengthp;
+            erofs_mutex_unlock(&meta_lru_lock);
+
+                       if (meta_cache_bytes > meta_cache_max_bytes)
+                               erofs_meta_cache_evict();
+               } else {
+                       free(item);
+               }
+       }
+
+       return buffer;
+}
\ No newline at end of file
-- 
2.52.0


Reply via email to