From: Gao Xiang <[email protected]>

Previously, the requested read length is strictly followed by EROFS
decompression strategy. However, it's quite inefficient to apply
partial decompression if non-negligible data in big pclusters needs
to be handled, especially for the upcoming LZMA algorithm.

Let's decompress more for the cases above as what other fses did.

Signed-off-by: Gao Xiang <[email protected]>
---
 fs/erofs/internal.h | 16 ++++++++
 fs/erofs/zdata.c    | 94 ++++++++++++++++++++++++++++++++++++---------
 fs/erofs/zmap.c     |  4 +-
 3 files changed, 94 insertions(+), 20 deletions(-)

diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 48bfc6eb2b02..e7378795a26c 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -307,6 +307,19 @@ static inline unsigned int erofs_inode_datalayout(unsigned 
int value)
                              EROFS_I_DATALAYOUT_BITS);
 }
 
+/*
+ * Different from grab_cache_page_nowait(), reclaiming is never triggered
+ * when allocating new pages.
+ */
+static inline
+struct page *erofs_grab_cache_page_nowait(struct address_space *mapping,
+                                         pgoff_t index)
+{
+       return pagecache_get_page(mapping, index,
+                       FGP_LOCK|FGP_CREAT|FGP_NOFS|FGP_NOWAIT,
+                       readahead_gfp_mask(mapping) & ~__GFP_RECLAIM);
+}
+
 extern const struct super_operations erofs_sops;
 
 extern const struct address_space_operations erofs_raw_access_aops;
@@ -368,6 +381,8 @@ struct erofs_map_blocks {
  * approach instead if possible since it's more metadata lightweight.)
  */
 #define EROFS_GET_BLOCKS_FIEMAP        0x0002
+/* Used to map the whole extent if non-negligible data is already requested */
+#define EROFS_GET_BLOCKS_READMORE      0x0004
 
 enum {
        Z_EROFS_COMPRESSION_SHIFTED = Z_EROFS_COMPRESSION_MAX,
@@ -375,6 +390,7 @@ enum {
 };
 
 /* zmap.c */
+#define Z_EROFS_LZ4_READMORE_THRESHOLD         (9 * EROFS_BLKSIZ)
 extern const struct iomap_ops z_erofs_iomap_report_ops;
 
 #ifdef CONFIG_EROFS_FS_ZIP
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 5c34ef66677f..a1861a9acfd0 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -1377,6 +1377,67 @@ static void z_erofs_runqueue(struct super_block *sb,
        z_erofs_decompress_queue(&io[JQ_SUBMIT], pagepool);
 }
 
+static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f,
+                                     struct readahead_control *rac,
+                                     erofs_off_t end,
+                                     struct list_head *pagepool,
+                                     bool backmost)
+{
+       struct inode *const inode = f->inode;
+       struct erofs_map_blocks *const map = &f->map;
+       erofs_off_t cur;
+       int err;
+
+       if (backmost) {
+               map->m_la = end;
+               err = z_erofs_map_blocks_iter(inode, map,
+                                             EROFS_GET_BLOCKS_READMORE);
+               if (err)
+                       return;
+               end = round_up(end, PAGE_SIZE);
+
+               /* expend ra for the trailing edge if readahead */
+               if (rac) {
+                       loff_t newstart = readahead_pos(rac);
+
+                       readahead_expand(rac, newstart, end - newstart);
+                       return;
+               }
+       } else {
+               end = round_up(map->m_la, PAGE_SIZE);
+
+               if (!map->m_llen)
+                       return;
+       }
+
+       cur = map->m_la + map->m_llen - 1;
+       while (cur >= end) {
+               pgoff_t index = cur >> PAGE_SHIFT;
+               struct page *page;
+
+               page = erofs_grab_cache_page_nowait(inode->i_mapping, index);
+               if (!page)
+                       goto skip;
+
+               if (PageUptodate(page)) {
+                       unlock_page(page);
+                       put_page(page);
+                       goto skip;
+               }
+
+               err = z_erofs_do_read_page(f, page, pagepool);
+               if (err)
+                       erofs_err(inode->i_sb,
+                                 "readmore error at page %lu @ nid %llu",
+                                 index, EROFS_I(inode)->nid);
+               put_page(page);
+skip:
+               if (cur < PAGE_SIZE)
+                       break;
+               cur = (index << PAGE_SHIFT) - 1;
+       }
+}
+
 static int z_erofs_readpage(struct file *file, struct page *page)
 {
        struct inode *const inode = page->mapping->host;
@@ -1385,10 +1446,13 @@ static int z_erofs_readpage(struct file *file, struct 
page *page)
        LIST_HEAD(pagepool);
 
        trace_erofs_readpage(page, false);
-
        f.headoffset = (erofs_off_t)page->index << PAGE_SHIFT;
 
+       z_erofs_pcluster_readmore(&f, NULL, f.headoffset + PAGE_SIZE - 1,
+                                 &pagepool, true);
        err = z_erofs_do_read_page(&f, page, &pagepool);
+       z_erofs_pcluster_readmore(&f, NULL, 0, &pagepool, false);
+
        (void)z_erofs_collector_end(&f.clt);
 
        /* if some compressed cluster ready, need submit them anyway */
@@ -1409,29 +1473,20 @@ static void z_erofs_readahead(struct readahead_control 
*rac)
 {
        struct inode *const inode = rac->mapping->host;
        struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
-
-       unsigned int nr_pages = readahead_count(rac);
-       bool sync = (sbi->ctx.readahead_sync_decompress &&
-                       nr_pages <= sbi->ctx.max_sync_decompress_pages);
        struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
        struct page *page, *head = NULL;
+       unsigned int nr_pages;
        LIST_HEAD(pagepool);
 
-       trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
-
        f.readahead = true;
        f.headoffset = readahead_pos(rac);
 
-       while ((page = readahead_page(rac))) {
-               prefetchw(&page->flags);
-
-               /*
-                * A pure asynchronous readahead is indicated if
-                * a PG_readahead marked page is hitted at first.
-                * Let's also do asynchronous decompression for this case.
-                */
-               sync &= !(PageReadahead(page) && !head);
+       z_erofs_pcluster_readmore(&f, rac, f.headoffset +
+                                 readahead_length(rac) - 1, &pagepool, true);
+       nr_pages = readahead_count(rac);
+       trace_erofs_readpages(inode, readahead_index(rac), nr_pages, false);
 
+       while ((page = readahead_page(rac))) {
                set_page_private(page, (unsigned long)head);
                head = page;
        }
@@ -1450,11 +1505,12 @@ static void z_erofs_readahead(struct readahead_control 
*rac)
                                  page->index, EROFS_I(inode)->nid);
                put_page(page);
        }
-
+       z_erofs_pcluster_readmore(&f, rac, 0, &pagepool, false);
        (void)z_erofs_collector_end(&f.clt);
 
-       z_erofs_runqueue(inode->i_sb, &f, &pagepool, sync);
-
+       z_erofs_runqueue(inode->i_sb, &f, &pagepool,
+                        sbi->ctx.readahead_sync_decompress &&
+                        nr_pages <= sbi->ctx.max_sync_decompress_pages);
        if (f.map.mpage)
                put_page(f.map.mpage);
 
diff --git a/fs/erofs/zmap.c b/fs/erofs/zmap.c
index a61cc7f55ef0..7f42a1c8a338 100644
--- a/fs/erofs/zmap.c
+++ b/fs/erofs/zmap.c
@@ -673,7 +673,9 @@ int z_erofs_map_blocks_iter(struct inode *inode,
        else
                map->m_algorithmformat = vi->z_algorithmtype[0];
 
-       if (flags & EROFS_GET_BLOCKS_FIEMAP) {
+       if (flags & EROFS_GET_BLOCKS_FIEMAP ||
+           ((flags & EROFS_GET_BLOCKS_READMORE) &&
+            map->m_llen >= Z_EROFS_LZ4_READMORE_THRESHOLD)) {
                err = z_erofs_get_extent_decompressedlen(&m);
                if (!err)
                        map->m_flags |= EROFS_MAP_FULL_MAPPED;
-- 
2.20.1

Reply via email to