From: Hu Weiwen <[email protected]>

When using EROFS to pack our dataset which consists of millions of
files, mkfs.erofs is very slow compared with mksquashfs.

The bottleneck is `erofs_balloc` and `erofs_mapbh` function, which
iterate over all previously allocated buffer blocks, making the
complexity of the algrithm O(N^2) where N is the number of files.

With this patch:

* global `last_mapped_block` is mantained to avoid full scan in
  `erofs_mapbh` function.

* global `non_full_buffer_blocks` mantains a list of buffer block for
  each type and each possible remaining bytes in the block. Then it is
  used to identify the most suitable blocks in future `erofs_balloc`,
  avoiding full scan.

Some new data structure is allocated in this patch, more RAM usage is
expected, but not much. When I test it with ImageNet dataset (1.33M
files), 7GiB RAM is consumed, and it takes about 4 hours. Most time is
spent on IO.

Signed-off-by: Hu Weiwen <[email protected]>
Signed-off-by: Gao Xiang <[email protected]>
---
 include/erofs/cache.h |  1 +
 lib/cache.c           | 97 ++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 87 insertions(+), 11 deletions(-)

diff --git a/include/erofs/cache.h b/include/erofs/cache.h
index f8dff67b9736..611ca5b8432b 100644
--- a/include/erofs/cache.h
+++ b/include/erofs/cache.h
@@ -39,6 +39,7 @@ struct erofs_buffer_head {
 
 struct erofs_buffer_block {
        struct list_head list;
+       struct list_head mapped_list;
 
        erofs_blk_t blkaddr;
        int type;
diff --git a/lib/cache.c b/lib/cache.c
index 32a58311f563..17b2b096632c 100644
--- a/lib/cache.c
+++ b/lib/cache.c
@@ -18,6 +18,11 @@ static struct erofs_buffer_block blkh = {
 };
 static erofs_blk_t tail_blkaddr;
 
+/* buckets for all mapped buffer blocks to boost up allocation */
+static struct list_head mapped_buckets[2][EROFS_BLKSIZ];
+/* last mapped buffer block to accelerate erofs_mapbh() */
+static struct erofs_buffer_block *last_mapped_block = &blkh;
+
 static bool erofs_bh_flush_drop_directly(struct erofs_buffer_head *bh)
 {
        return erofs_bh_flush_generic_end(bh);
@@ -62,12 +67,17 @@ struct erofs_bhops erofs_buf_write_bhops = {
 /* return buffer_head of erofs super block (with size 0) */
 struct erofs_buffer_head *erofs_buffer_init(void)
 {
+       int i, j;
        struct erofs_buffer_head *bh = erofs_balloc(META, 0, 0, 0);
 
        if (IS_ERR(bh))
                return bh;
 
        bh->op = &erofs_skip_write_bhops;
+
+       for (i = 0; i < ARRAY_SIZE(mapped_buckets); i++)
+               for (j = 0; j < ARRAY_SIZE(mapped_buckets[0]); j++)
+                       init_list_head(&mapped_buckets[i][j]);
        return bh;
 }
 
@@ -132,20 +142,61 @@ struct erofs_buffer_head *erofs_balloc(int type, 
erofs_off_t size,
        struct erofs_buffer_block *cur, *bb;
        struct erofs_buffer_head *bh;
        unsigned int alignsize, used0, usedmax;
+       unsigned int used_before, used;
 
        int ret = get_alignsize(type, &type);
 
        if (ret < 0)
                return ERR_PTR(ret);
+
+       DBG_BUGON(type < 0 || type > META);
        alignsize = ret;
 
        used0 = (size + required_ext) % EROFS_BLKSIZ + inline_ext;
        usedmax = 0;
        bb = NULL;
 
-       list_for_each_entry(cur, &blkh.list, list) {
-               unsigned int used_before, used;
+       if (used0 == 0 || alignsize == EROFS_BLKSIZ)
+               goto alloc;
+
+       /* Try find a most fit mapped buffer block first. */
+       for (used_before = 1; used_before < EROFS_BLKSIZ; ++used_before) {
+               struct list_head *bt = mapped_buckets[type] + used_before;
+
+               if (list_empty(bt))
+                       continue;
+               cur = list_first_entry(bt, struct erofs_buffer_block,
+                                      mapped_list);
+
+               ret = __erofs_battach(cur, NULL, size, alignsize,
+                                     required_ext + inline_ext, true);
+               if (ret < 0)
+                       continue;
+
+               used = (ret + required_ext) % EROFS_BLKSIZ + inline_ext;
+
+               /* should contain inline data in current block */
+               if (used > EROFS_BLKSIZ)
+                       continue;
+
+               /*
+                * remaining should be smaller than before or
+                * larger than allocating a new buffer block
+                */
+               if (used < used_before && used < used0)
+                       continue;
+
+               if (usedmax < used) {
+                       bb = cur;
+                       usedmax = used;
+               }
+       }
 
+       /* try to start from the last mapped one, which can be extended */
+       cur = last_mapped_block;
+       if (cur == &blkh)
+               cur = list_next_entry(cur, list);
+       for (; cur != &blkh; cur = list_next_entry(cur, list)) {
                used_before = cur->buffers.off % EROFS_BLKSIZ;
 
                /* skip if buffer block is just full */
@@ -187,6 +238,7 @@ struct erofs_buffer_head *erofs_balloc(int type, 
erofs_off_t size,
                goto found;
        }
 
+alloc:
        /* allocate a new buffer block */
        if (used0 > EROFS_BLKSIZ)
                return ERR_PTR(-ENOSPC);
@@ -200,6 +252,7 @@ struct erofs_buffer_head *erofs_balloc(int type, 
erofs_off_t size,
        bb->buffers.off = 0;
        init_list_head(&bb->buffers.list);
        list_add_tail(&bb->list, &blkh.list);
+       init_list_head(&bb->mapped_list);
 
        bh = malloc(sizeof(struct erofs_buffer_head));
        if (!bh) {
@@ -214,6 +267,18 @@ found:
        return bh;
 }
 
+static void erofs_bupdate_mapped(struct erofs_buffer_block *bb)
+{
+       struct list_head *bkt;
+
+       if (bb->blkaddr == NULL_ADDR)
+               return;
+
+       bkt = mapped_buckets[bb->type] + bb->buffers.off % EROFS_BLKSIZ;
+       list_del(&bb->mapped_list);
+       list_add_tail(&bb->mapped_list, bkt);
+}
+
 struct erofs_buffer_head *erofs_battach(struct erofs_buffer_head *bh,
                                        int type, unsigned int size)
 {
@@ -239,6 +304,7 @@ struct erofs_buffer_head *erofs_battach(struct 
erofs_buffer_head *bh,
                free(nbh);
                return ERR_PTR(ret);
        }
+       erofs_bupdate_mapped(bb);
        return nbh;
 
 }
@@ -247,8 +313,11 @@ static erofs_blk_t __erofs_mapbh(struct erofs_buffer_block 
*bb)
 {
        erofs_blk_t blkaddr;
 
-       if (bb->blkaddr == NULL_ADDR)
+       if (bb->blkaddr == NULL_ADDR) {
                bb->blkaddr = tail_blkaddr;
+               last_mapped_block = bb;
+               erofs_bupdate_mapped(bb);
+       }
 
        blkaddr = bb->blkaddr + BLK_ROUND_UP(bb->buffers.off);
        if (blkaddr > tail_blkaddr)
@@ -259,15 +328,16 @@ static erofs_blk_t __erofs_mapbh(struct 
erofs_buffer_block *bb)
 
 erofs_blk_t erofs_mapbh(struct erofs_buffer_block *bb)
 {
-       struct erofs_buffer_block *t, *nt;
+       struct erofs_buffer_block *t = last_mapped_block;
 
-       if (!bb || bb->blkaddr == NULL_ADDR) {
-               list_for_each_entry_safe(t, nt, &blkh.list, list) {
-                       (void)__erofs_mapbh(t);
-                       if (t == bb)
-                               break;
-               }
-       }
+       do {
+               t = list_next_entry(t, list);
+               if (t == &blkh)
+                       break;
+
+               DBG_BUGON(t->blkaddr != NULL_ADDR);
+               (void)__erofs_mapbh(t);
+       } while (t != bb);
        return tail_blkaddr;
 }
 
@@ -309,6 +379,7 @@ bool erofs_bflush(struct erofs_buffer_block *bb)
 
                erofs_dbg("block %u to %u flushed", p->blkaddr, blkaddr - 1);
 
+               list_del(&p->mapped_list);
                list_del(&p->list);
                free(p);
        }
@@ -332,6 +403,10 @@ void erofs_bdrop(struct erofs_buffer_head *bh, bool 
tryrevoke)
        if (!list_empty(&bb->buffers.list))
                return;
 
+       if (bb == last_mapped_block)
+               last_mapped_block = list_prev_entry(bb, list);
+
+       list_del(&bb->mapped_list);
        list_del(&bb->list);
        free(bb);
 
-- 
2.24.0

Reply via email to