Hi Xiang,

Following up on our discussion regarding the multi-threaded fsck
pipeline and the need to cache metadata to reduce bottlenecking, I
noticed your TODO in lib/data.c about introducing a metabox cache.
I have put together this PoC. To ensure it remains highly concurrent
and thread-safe for the upcoming worker threads extracting pclusters,
I modeled it directly after the bucketed, erofs_rwsem_t approach used
in lib/fragments.c.

Testing on an LZ4HC 4K EROFS image of the Linux 6.7 source tree showed
a significant drop in I/O overhead:
Baseline Extraction: 1.538s
With Meta Cache PoC: 1.090s (~29% reduction)

Currently, the cache grows without bounds for the PoC. Before turning
this into a formal patch, I plan to add an LRU eviction policy to keep
the memory footprint bound on large images.

I would love your thoughts on this approach and if it aligns with your
vision for the metadata caching prerequisite.

Regards,
Nithurshen

On Wed, Mar 4, 2026 at 8:17 AM Nithurshen <[email protected]> wrote:
>
> This PoC introduces a thread-safe metadata cache to reduce redundant I/O
> and decompression overhead during fsck extraction. It directly addresses
> the TODO in erofs_bread by modeling a bucketed, rw-semaphore protected
> cache after the existing fragment cache implementation.
>
> Baseline (LZ4HC 4K pclusters, Linux 6.7 tree):
> Extraction time: 1.538s
>
> With Meta Cache PoC:
> Extraction time: 1.090s (~29% reduction)
>
> Signed-off-by: Nithurshen <[email protected]>
> ---
>  lib/data.c | 84 ++++++++++++++++++++++++++++++++++++++++++++++++++++--
>  1 file changed, 81 insertions(+), 3 deletions(-)
>
> diff --git a/lib/data.c b/lib/data.c
> index 6fd1389..bcd8d17 100644
> --- a/lib/data.c
> +++ b/lib/data.c
> @@ -9,6 +9,35 @@
>  #include "erofs/trace.h"
>  #include "erofs/decompress.h"
>  #include "liberofs_fragments.h"
> +#include "erofs/lock.h"
> +
> +#define META_HASHSIZE          65536
> +#define META_HASH(c)           ((c) & (META_HASHSIZE - 1))
> +
> +struct erofs_meta_bucket {
> +       struct list_head hash;
> +       erofs_rwsem_t lock;
> +};
> +
> +struct erofs_meta_item {
> +       struct list_head list;
> +       u64 key;
> +       char *data;
> +       int length;
> +};
> +
> +static struct erofs_meta_bucket meta_bks[META_HASHSIZE];
> +static bool meta_cache_inited = false;
> +
> +static void erofs_meta_cache_init(void)
> +{
> +       int i;
> +       for (i = 0; i < META_HASHSIZE; ++i) {
> +               init_list_head(&meta_bks[i].hash);
> +               erofs_init_rwsem(&meta_bks[i].lock);
> +       }
> +       meta_cache_inited = true;
> +}
>
>  void *erofs_bread(struct erofs_buf *buf, erofs_off_t offset, bool need_kmap)
>  {
> @@ -500,7 +529,56 @@ static void *erofs_read_metadata_bdi(struct 
> erofs_sb_info *sbi,
>  void *erofs_read_metadata(struct erofs_sb_info *sbi, erofs_nid_t nid,
>                           erofs_off_t *offset, int *lengthp)
>  {
> +       u64 key = nid ? nid : *offset;
> +       struct erofs_meta_bucket *bk;
> +       struct erofs_meta_item *item;
> +       void *buffer = NULL;
> +
> +       if (__erofs_unlikely(!meta_cache_inited))
> +               erofs_meta_cache_init();
> +
> +       bk = &meta_bks[META_HASH(key)];
> +
> +       erofs_down_read(&bk->lock);
> +       list_for_each_entry(item, &bk->hash, list) {
> +               if (item->key == key) {
> +                       buffer = malloc(item->length);
> +                       if (buffer) {
> +                               memcpy(buffer, item->data, item->length);
> +                               *lengthp = item->length;
> +                               *offset = round_up(*offset, 4);
> +                               *offset += sizeof(__le16) + item->length;
> +                       }
> +                       break;
> +               }
> +       }
> +       erofs_up_read(&bk->lock);
> +
> +       if (buffer)
> +               return buffer;
> +
>         if (nid)
> -               return erofs_read_metadata_nid(sbi, nid, offset, lengthp);
> -       return erofs_read_metadata_bdi(sbi, offset, lengthp);
> -}
> +               buffer = erofs_read_metadata_nid(sbi, nid, offset, lengthp);
> +       else
> +               buffer = erofs_read_metadata_bdi(sbi, offset, lengthp);
> +
> +       if (IS_ERR(buffer))
> +               return buffer;
> +
> +       item = malloc(sizeof(*item));
> +       if (item) {
> +               item->key = key;
> +               item->length = *lengthp;
> +               item->data = malloc(*lengthp);
> +               if (item->data) {
> +                       memcpy(item->data, buffer, *lengthp);
> +                       erofs_down_write(&bk->lock);
> +                       list_add_tail(&item->list, &bk->hash);
> +                       erofs_up_write(&bk->lock);
> +               } else {
> +                       free(item);
> +               }
> +       }
> +
> +       return buffer;
> +}
> \ No newline at end of file
> --
> 2.51.0
>

Reply via email to