[PATCH 1/2] erofs: use workqueue decompression for atomic contexts only

2021-03-16 Thread Huang Jianan
z_erofs_decompressqueue_endio may not be executed in the atomic
context, for example, when dm-verity is turned on. In this scenario,
data can be decompressed directly to get rid of additional kworker
scheduling overhead.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
Reviewed-by: Gao Xiang 
---
 fs/erofs/zdata.c | 9 -
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 6cb356c4217b..cf2d28582c14 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -706,6 +706,7 @@ static int z_erofs_do_read_page(struct 
z_erofs_decompress_frontend *fe,
goto out;
 }
 
+static void z_erofs_decompressqueue_work(struct work_struct *work);
 static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
   bool sync, int bios)
 {
@@ -720,8 +721,14 @@ static void z_erofs_decompress_kickoff(struct 
z_erofs_decompressqueue *io,
return;
}
 
-   if (!atomic_add_return(bios, >pending_bios))
+   if (atomic_add_return(bios, >pending_bios))
+   return;
+   /* Use workqueue decompression for atomic contexts only */
+   if (in_atomic() || irqs_disabled()) {
queue_work(z_erofs_workqueue, >u.work);
+   return;
+   }
+   z_erofs_decompressqueue_work(>u.work);
 }
 
 static bool z_erofs_page_is_invalidated(struct page *page)
-- 
2.25.1



[PATCH 2/2] erofs: use sync decompression for atomic contexts only

2021-03-16 Thread Huang Jianan
Sync decompression was introduced to get rid of additional kworker
scheduling overhead. But there is no such overhead in non-atomic
contexts. Therefore, it should be better to turn off sync decompression
to avoid the current thread waiting in z_erofs_runqueue.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
Reviewed-by: Gao Xiang 
---
 fs/erofs/internal.h | 2 ++
 fs/erofs/super.c| 1 +
 fs/erofs/zdata.c| 8 ++--
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 67a7ec945686..fbc4040715be 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -50,6 +50,8 @@ struct erofs_fs_context {
 #ifdef CONFIG_EROFS_FS_ZIP
/* current strategy of how to use managed cache */
unsigned char cache_strategy;
+   /* strategy of sync decompression (false - auto, true - force on) */
+   bool readahead_sync_decompress;
 
/* threshold for decompression synchronously */
unsigned int max_sync_decompress_pages;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index d5a6b9b888a5..0445d09b6331 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -200,6 +200,7 @@ static void erofs_default_options(struct erofs_fs_context 
*ctx)
 #ifdef CONFIG_EROFS_FS_ZIP
ctx->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
ctx->max_sync_decompress_pages = 3;
+   ctx->readahead_sync_decompress = false;
 #endif
 #ifdef CONFIG_EROFS_FS_XATTR
set_opt(ctx, XATTR_USER);
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index cf2d28582c14..25a0c4890d0a 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -710,6 +710,8 @@ static void z_erofs_decompressqueue_work(struct work_struct 
*work);
 static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
   bool sync, int bios)
 {
+   struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
+
/* wake up the caller thread for sync decompression */
if (sync) {
unsigned long flags;
@@ -723,9 +725,10 @@ static void z_erofs_decompress_kickoff(struct 
z_erofs_decompressqueue *io,
 
if (atomic_add_return(bios, >pending_bios))
return;
-   /* Use workqueue decompression for atomic contexts only */
+   /* Use workqueue and sync decompression for atomic contexts only */
if (in_atomic() || irqs_disabled()) {
queue_work(z_erofs_workqueue, >u.work);
+   sbi->ctx.readahead_sync_decompress = true;
return;
}
z_erofs_decompressqueue_work(>u.work);
@@ -1340,7 +1343,8 @@ static void z_erofs_readahead(struct readahead_control 
*rac)
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
 
unsigned int nr_pages = readahead_count(rac);
-   bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages);
+   bool sync = (sbi->ctx.readahead_sync_decompress &&
+   nr_pages <= sbi->ctx.max_sync_decompress_pages);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
struct page *page, *head = NULL;
LIST_HEAD(pagepool);
-- 
2.25.1



[PATCH 0/2] erofs: decompress in endio if possible

2021-03-16 Thread Huang Jianan
This patch set was separated form erofs: decompress in endio if possible
since it does these things:
- combine dm-verity and erofs workqueue
- change policy of decompression in context of thread 

Huang Jianan (2):
  erofs: use workqueue decompression for atomic contexts only
  erofs: use sync decompression for atomic contexts only

 fs/erofs/internal.h |  2 ++
 fs/erofs/super.c|  1 +
 fs/erofs/zdata.c| 15 +--
 3 files changed, 16 insertions(+), 2 deletions(-)

-- 
2.25.1



Re: [PATCH v6 2/2] erofs: decompress in endio if possible

2021-03-16 Thread Huang Jianan



On 2021/3/16 16:26, Chao Yu wrote:

Hi Jianan,

On 2021/3/16 11:15, Huang Jianan via Linux-erofs wrote:

z_erofs_decompressqueue_endio may not be executed in the atomic
context, for example, when dm-verity is turned on. In this scenario,
data can be decompressed directly to get rid of additional kworker
scheduling overhead. Also, it makes no sense to apply synchronous
decompression for such case.


It looks this patch does more than one things:
- combine dm-verity and erofs workqueue
- change policy of decompression in context of thread

Normally, we do one thing in one patch, by this way, we will be 
benefit in

scenario of when backporting patches and bisecting problematic patch with
minimum granularity, and also it will help reviewer to focus on reviewing
single code logic by following patch's goal.

So IMO, it would be better to separate this patch into two.


Thanks for the suggestion, I will send a new patch set.
One more thing is could you explain a little bit more about why we 
need to
change policy of decompression in context of thread? for better 
performance?


Sync decompression was introduced to get rid of additional kworker 
scheduling


overhead. But there is no such overhead in if we try to decompress 
directly in


z_erofs_decompressqueue_endio . Therefore, it  should be better to turn off

sync decompression to avoid the current thread waiting in z_erofs_runqueue.


BTW, code looks clean to me. :)

Thanks,



Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
Reviewed-by: Gao Xiang 
---
  fs/erofs/internal.h |  2 ++
  fs/erofs/super.c    |  1 +
  fs/erofs/zdata.c    | 15 +--
  3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 67a7ec945686..fbc4040715be 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -50,6 +50,8 @@ struct erofs_fs_context {
  #ifdef CONFIG_EROFS_FS_ZIP
  /* current strategy of how to use managed cache */
  unsigned char cache_strategy;
+    /* strategy of sync decompression (false - auto, true - force 
on) */

+    bool readahead_sync_decompress;
    /* threshold for decompression synchronously */
  unsigned int max_sync_decompress_pages;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index d5a6b9b888a5..0445d09b6331 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -200,6 +200,7 @@ static void erofs_default_options(struct 
erofs_fs_context *ctx)

  #ifdef CONFIG_EROFS_FS_ZIP
  ctx->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
  ctx->max_sync_decompress_pages = 3;
+    ctx->readahead_sync_decompress = false;
  #endif
  #ifdef CONFIG_EROFS_FS_XATTR
  set_opt(ctx, XATTR_USER);
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 6cb356c4217b..25a0c4890d0a 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -706,9 +706,12 @@ static int z_erofs_do_read_page(struct 
z_erofs_decompress_frontend *fe,

  goto out;
  }
  +static void z_erofs_decompressqueue_work(struct work_struct *work);
  static void z_erofs_decompress_kickoff(struct 
z_erofs_decompressqueue *io,

 bool sync, int bios)
  {
+    struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
+
  /* wake up the caller thread for sync decompression */
  if (sync) {
  unsigned long flags;
@@ -720,8 +723,15 @@ static void z_erofs_decompress_kickoff(struct 
z_erofs_decompressqueue *io,

  return;
  }
  -    if (!atomic_add_return(bios, >pending_bios))
+    if (atomic_add_return(bios, >pending_bios))
+    return;
+    /* Use workqueue and sync decompression for atomic contexts only */
+    if (in_atomic() || irqs_disabled()) {
  queue_work(z_erofs_workqueue, >u.work);
+    sbi->ctx.readahead_sync_decompress = true;
+    return;
+    }
+    z_erofs_decompressqueue_work(>u.work);
  }
    static bool z_erofs_page_is_invalidated(struct page *page)
@@ -1333,7 +1343,8 @@ static void z_erofs_readahead(struct 
readahead_control *rac)

  struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
    unsigned int nr_pages = readahead_count(rac);
-    bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages);
+    bool sync = (sbi->ctx.readahead_sync_decompress &&
+    nr_pages <= sbi->ctx.max_sync_decompress_pages);
  struct z_erofs_decompress_frontend f = 
DECOMPRESS_FRONTEND_INIT(inode);

  struct page *page, *head = NULL;
  LIST_HEAD(pagepool);



[PATCH v6 2/2] erofs: decompress in endio if possible

2021-03-15 Thread Huang Jianan
z_erofs_decompressqueue_endio may not be executed in the atomic
context, for example, when dm-verity is turned on. In this scenario,
data can be decompressed directly to get rid of additional kworker
scheduling overhead. Also, it makes no sense to apply synchronous
decompression for such case.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
Reviewed-by: Gao Xiang 
---
 fs/erofs/internal.h |  2 ++
 fs/erofs/super.c|  1 +
 fs/erofs/zdata.c| 15 +--
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 67a7ec945686..fbc4040715be 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -50,6 +50,8 @@ struct erofs_fs_context {
 #ifdef CONFIG_EROFS_FS_ZIP
/* current strategy of how to use managed cache */
unsigned char cache_strategy;
+   /* strategy of sync decompression (false - auto, true - force on) */
+   bool readahead_sync_decompress;
 
/* threshold for decompression synchronously */
unsigned int max_sync_decompress_pages;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index d5a6b9b888a5..0445d09b6331 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -200,6 +200,7 @@ static void erofs_default_options(struct erofs_fs_context 
*ctx)
 #ifdef CONFIG_EROFS_FS_ZIP
ctx->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
ctx->max_sync_decompress_pages = 3;
+   ctx->readahead_sync_decompress = false;
 #endif
 #ifdef CONFIG_EROFS_FS_XATTR
set_opt(ctx, XATTR_USER);
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 6cb356c4217b..25a0c4890d0a 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -706,9 +706,12 @@ static int z_erofs_do_read_page(struct 
z_erofs_decompress_frontend *fe,
goto out;
 }
 
+static void z_erofs_decompressqueue_work(struct work_struct *work);
 static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
   bool sync, int bios)
 {
+   struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
+
/* wake up the caller thread for sync decompression */
if (sync) {
unsigned long flags;
@@ -720,8 +723,15 @@ static void z_erofs_decompress_kickoff(struct 
z_erofs_decompressqueue *io,
return;
}
 
-   if (!atomic_add_return(bios, >pending_bios))
+   if (atomic_add_return(bios, >pending_bios))
+   return;
+   /* Use workqueue and sync decompression for atomic contexts only */
+   if (in_atomic() || irqs_disabled()) {
queue_work(z_erofs_workqueue, >u.work);
+   sbi->ctx.readahead_sync_decompress = true;
+   return;
+   }
+   z_erofs_decompressqueue_work(>u.work);
 }
 
 static bool z_erofs_page_is_invalidated(struct page *page)
@@ -1333,7 +1343,8 @@ static void z_erofs_readahead(struct readahead_control 
*rac)
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
 
unsigned int nr_pages = readahead_count(rac);
-   bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages);
+   bool sync = (sbi->ctx.readahead_sync_decompress &&
+   nr_pages <= sbi->ctx.max_sync_decompress_pages);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
struct page *page, *head = NULL;
LIST_HEAD(pagepool);
-- 
2.25.1



[PATCH v6 1/2] erofs: avoid memory allocation failure during rolling decompression

2021-03-15 Thread Huang Jianan
Currently, err would be treated as io error. Therefore, it'd be
better to ensure memory allocation during rolling decompression
to avoid such io error.

In the long term, we might consider adding another !Uptodate case
for such case.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
Reviewed-by: Gao Xiang 
---
 fs/erofs/decompressor.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 1cb1ffd10569..34e73ff76f89 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -73,9 +73,8 @@ static int z_erofs_lz4_prepare_destpages(struct 
z_erofs_decompress_req *rq,
victim = availables[--top];
get_page(victim);
} else {
-   victim = erofs_allocpage(pagepool, GFP_KERNEL);
-   if (!victim)
-   return -ENOMEM;
+   victim = erofs_allocpage(pagepool,
+GFP_KERNEL | __GFP_NOFAIL);
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
}
rq->out[i] = victim;
-- 
2.25.1



Re: [LTP] [f2fs] 02eb84b96b: ltp.swapon03.fail

2021-03-09 Thread Huang Jianan

Hi Richard,

On 2021/3/9 12:01, Matthew Wilcox wrote:

On Tue, Mar 09, 2021 at 10:23:35AM +0800, Weichao Guo wrote:

Hi Richard,

On 2021/3/8 19:53, Richard Palethorpe wrote:

Hello,


kern  :err   : [  187.461914] F2FS-fs (sda1): Swapfile does not align to section
commit 02eb84b96bc1b382dd138bf60724edbefe77b025
Author: huangjia...@oppo.com 
Date:   Mon Mar 1 12:58:44 2021 +0800
  f2fs: check if swapfile is section-alligned
  If the swapfile isn't created by pin and fallocate, it can't be
  guaranteed section-aligned, so it may be selected by f2fs gc. When
  gc_pin_file_threshold is reached, the address of swapfile may change,
  but won't be synchronized to swap_extent, so swap will write to wrong
  address, which will cause data corruption.
  Signed-off-by: Huang Jianan 
  Signed-off-by: Guo Weichao 
  Reviewed-by: Chao Yu 
  Signed-off-by: Jaegeuk Kim 

The test uses fallocate to preallocate the swap file and writes zeros to
it. I'm not sure what pin refers to?

'pin' refers to pinned file feature in F2FS, the LBA(Logical Block Address)
of a file is fixed after pinned. Without this operation before fallocate,
the LBA may not align with section(F2FS GC unit), some LBA of the file may
be changed by F2FS GC in some extreme cases.

For this test case, how about pin the swap file before fallocate for F2FS as
following:

ioctl(fd, F2FS_IOC_SET_PIN_FILE, true);

No special ioctl should be needed.  f2fs_swap_activate() should pin the
file, just like it converts inline inodes and disables compression.


Now f2fs_swap_activate() will pin the file. The problem is that when 
f2fs_swap_activate()


is executed, the file has been created and may not be section-aligned.

So I think it would be better to consider aligning the swapfile during 
f2fs_swap_activate()?




[PATCH v5 2/2] erofs: decompress in endio if possible

2021-03-05 Thread Huang Jianan
z_erofs_decompressqueue_endio may not be executed in the atomic
context, for example, when dm-verity is turned on. In this scenario,
data can be decompressed directly to get rid of additional kworker
scheduling overhead. Also, it makes no sense to apply synchronous
decompression for such case.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/erofs/internal.h |  2 ++
 fs/erofs/super.c|  1 +
 fs/erofs/zdata.c| 16 +---
 3 files changed, 16 insertions(+), 3 deletions(-)

diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 67a7ec945686..e325da7be237 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -53,6 +53,8 @@ struct erofs_fs_context {
 
/* threshold for decompression synchronously */
unsigned int max_sync_decompress_pages;
+   /* decide whether to decompress synchronously */
+   bool readahead_sync_decompress;
 #endif
unsigned int mount_opt;
 };
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index d5a6b9b888a5..0445d09b6331 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -200,6 +200,7 @@ static void erofs_default_options(struct erofs_fs_context 
*ctx)
 #ifdef CONFIG_EROFS_FS_ZIP
ctx->cache_strategy = EROFS_ZIP_CACHE_READAROUND;
ctx->max_sync_decompress_pages = 3;
+   ctx->readahead_sync_decompress = false;
 #endif
 #ifdef CONFIG_EROFS_FS_XATTR
set_opt(ctx, XATTR_USER);
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 6cb356c4217b..b22cea78a9fd 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -706,9 +706,12 @@ static int z_erofs_do_read_page(struct 
z_erofs_decompress_frontend *fe,
goto out;
 }
 
+static void z_erofs_decompressqueue_work(struct work_struct *work);
 static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
   bool sync, int bios)
 {
+   struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
+
/* wake up the caller thread for sync decompression */
if (sync) {
unsigned long flags;
@@ -720,8 +723,14 @@ static void z_erofs_decompress_kickoff(struct 
z_erofs_decompressqueue *io,
return;
}
 
-   if (!atomic_add_return(bios, >pending_bios))
-   queue_work(z_erofs_workqueue, >u.work);
+   if (!atomic_add_return(bios, >pending_bios)) {
+   if (in_atomic() || irqs_disabled()) {
+   queue_work(z_erofs_workqueue, >u.work);
+   sbi->ctx.readahead_sync_decompress = true;
+   } else {
+   z_erofs_decompressqueue_work(>u.work);
+   }
+   }
 }
 
 static bool z_erofs_page_is_invalidated(struct page *page)
@@ -1333,7 +1342,8 @@ static void z_erofs_readahead(struct readahead_control 
*rac)
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
 
unsigned int nr_pages = readahead_count(rac);
-   bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages);
+   bool sync = (sbi->ctx.readahead_sync_decompress &&
+   nr_pages <= sbi->ctx.max_sync_decompress_pages);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
struct page *page, *head = NULL;
LIST_HEAD(pagepool);
-- 
2.25.1



[PATCH v5 1/2] erofs: avoid memory allocation failure during rolling decompression

2021-03-05 Thread Huang Jianan
Currently, err would be treated as io error. Therefore, it'd be
better to ensure memory allocation during rolling decompression
to avoid such io error.

In the long term, we might consider adding another !Uptodate case
for such case.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/erofs/decompressor.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 1cb1ffd10569..3d276a8aad86 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -73,7 +73,8 @@ static int z_erofs_lz4_prepare_destpages(struct 
z_erofs_decompress_req *rq,
victim = availables[--top];
get_page(victim);
} else {
-   victim = erofs_allocpage(pagepool, GFP_KERNEL);
+   victim = erofs_allocpage(pagepool,
+GFP_KERNEL | __GFP_NOFAIL);
if (!victim)
return -ENOMEM;
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
-- 
2.25.1



[PATCH v4 2/2] erofs: decompress in endio if possible

2021-03-05 Thread Huang Jianan
z_erofs_decompressqueue_endio may not be executed in the atomic
context, for example, when dm-verity is turned on. In this scenario,
data can be decompressed directly to get rid of additional kworker
scheduling overhead. Also, it makes no sense to apply synchronous
decompression for such case.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/erofs/internal.h |  3 +++
 fs/erofs/super.c|  4 
 fs/erofs/zdata.c| 16 +---
 3 files changed, 20 insertions(+), 3 deletions(-)

diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 67a7ec945686..b817cb85d67b 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -70,6 +70,9 @@ struct erofs_sb_info {
 
/* pseudo inode to manage cached pages */
struct inode *managed_cache;
+
+   /* decide whether to decompress synchronously */
+   bool readahead_sync_decompress;
 #endif /* CONFIG_EROFS_FS_ZIP */
u32 blocks;
u32 meta_blkaddr;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index d5a6b9b888a5..77819efe9b15 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -175,6 +175,10 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->root_nid = le16_to_cpu(dsb->root_nid);
sbi->inos = le64_to_cpu(dsb->inos);
 
+#ifdef CONFIG_EROFS_FS_ZIP
+   sbi->readahead_sync_decompress = false;
+#endif
+
sbi->build_time = le64_to_cpu(dsb->build_time);
sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
 
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 6cb356c4217b..c21447c42eb0 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -706,9 +706,12 @@ static int z_erofs_do_read_page(struct 
z_erofs_decompress_frontend *fe,
goto out;
 }
 
+static void z_erofs_decompressqueue_work(struct work_struct *work);
 static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
   bool sync, int bios)
 {
+   struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
+
/* wake up the caller thread for sync decompression */
if (sync) {
unsigned long flags;
@@ -720,8 +723,14 @@ static void z_erofs_decompress_kickoff(struct 
z_erofs_decompressqueue *io,
return;
}
 
-   if (!atomic_add_return(bios, >pending_bios))
-   queue_work(z_erofs_workqueue, >u.work);
+   if (!atomic_add_return(bios, >pending_bios)) {
+   if (in_atomic() || irqs_disabled()) {
+   queue_work(z_erofs_workqueue, >u.work);
+   sbi->readahead_sync_decompress = true;
+   } else {
+   z_erofs_decompressqueue_work(>u.work);
+   }
+   }
 }
 
 static bool z_erofs_page_is_invalidated(struct page *page)
@@ -1333,7 +1342,8 @@ static void z_erofs_readahead(struct readahead_control 
*rac)
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
 
unsigned int nr_pages = readahead_count(rac);
-   bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages);
+   bool sync = (sbi->readahead_sync_decompress &&
+   nr_pages <= sbi->ctx.max_sync_decompress_pages);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
struct page *page, *head = NULL;
LIST_HEAD(pagepool);
-- 
2.25.1



Re: [PATCH v2 2/2] erofs: decompress in endio if possible

2021-03-05 Thread Huang Jianan



On 2021/3/5 16:08, Huang Jianan wrote:

z_erofs_decompressqueue_endio may not be executed in the atomic
context, for example, when dm-verity is turned on. In this scenario,
data can be decompressed directly to get rid of additional kworker
scheduling overhead. Also, it makes no sense to apply synchronous
decompression for such case.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
  fs/erofs/internal.h |  3 +++
  fs/erofs/super.c|  4 
  fs/erofs/zdata.c| 13 +++--
  3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 67a7ec945686..b817cb85d67b 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -70,6 +70,9 @@ struct erofs_sb_info {
  
  	/* pseudo inode to manage cached pages */

struct inode *managed_cache;
+
+   /* decide whether to decompress synchronously */
+   bool readahead_sync_decompress;
  #endif/* CONFIG_EROFS_FS_ZIP */
u32 blocks;
u32 meta_blkaddr;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index d5a6b9b888a5..77819efe9b15 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -175,6 +175,10 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->root_nid = le16_to_cpu(dsb->root_nid);
sbi->inos = le64_to_cpu(dsb->inos);
  
+#ifdef CONFIG_EROFS_FS_ZIP

+   sbi->readahead_sync_decompress = false;
+#endif
+
sbi->build_time = le64_to_cpu(dsb->build_time);
sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
  
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c

index 6cb356c4217b..49ffc817dd9e 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -706,6 +706,7 @@ static int z_erofs_do_read_page(struct 
z_erofs_decompress_frontend *fe,
goto out;
  }
  
+static void z_erofs_decompressqueue_work(struct work_struct *work);

  static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
   bool sync, int bios)
  {
@@ -720,8 +721,14 @@ static void z_erofs_decompress_kickoff(struct 
z_erofs_decompressqueue *io,
return;
}
  
-	if (!atomic_add_return(bios, >pending_bios))

-   queue_work(z_erofs_workqueue, >u.work);
+   if (!atomic_add_return(bios, >pending_bios)) {
+   if (in_atomic() || irqs_disabled()) {
+   queue_work(z_erofs_workqueue, >u.work);
+   sbi->readahead_sync_decompress = true;
+   } else {
+   z_erofs_decompressqueue_work(>u.work);
+   }
+   }
  }
  
  static bool z_erofs_page_is_invalidated(struct page *page)

@@ -1333,6 +1340,8 @@ static void z_erofs_readahead(struct readahead_control 
*rac)
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
  
  	unsigned int nr_pages = readahead_count(rac);

+   bool sync = (sbi->readahead_sync_decompress &&
+   nr_pages <= sbi->ctx.max_sync_decompress_pages);
bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages);

sorry for my mistake here, i will send v3 to fix this.

struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
struct page *page, *head = NULL;


[PATCH v3 2/2] erofs: decompress in endio if possible

2021-03-05 Thread Huang Jianan
z_erofs_decompressqueue_endio may not be executed in the atomic
context, for example, when dm-verity is turned on. In this scenario,
data can be decompressed directly to get rid of additional kworker
scheduling overhead. Also, it makes no sense to apply synchronous
decompression for such case.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/erofs/internal.h |  3 +++
 fs/erofs/super.c|  4 
 fs/erofs/zdata.c| 14 +++---
 3 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 67a7ec945686..b817cb85d67b 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -70,6 +70,9 @@ struct erofs_sb_info {
 
/* pseudo inode to manage cached pages */
struct inode *managed_cache;
+
+   /* decide whether to decompress synchronously */
+   bool readahead_sync_decompress;
 #endif /* CONFIG_EROFS_FS_ZIP */
u32 blocks;
u32 meta_blkaddr;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index d5a6b9b888a5..77819efe9b15 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -175,6 +175,10 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->root_nid = le16_to_cpu(dsb->root_nid);
sbi->inos = le64_to_cpu(dsb->inos);
 
+#ifdef CONFIG_EROFS_FS_ZIP
+   sbi->readahead_sync_decompress = false;
+#endif
+
sbi->build_time = le64_to_cpu(dsb->build_time);
sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
 
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 6cb356c4217b..ca90d0993599 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -706,6 +706,7 @@ static int z_erofs_do_read_page(struct 
z_erofs_decompress_frontend *fe,
goto out;
 }
 
+static void z_erofs_decompressqueue_work(struct work_struct *work);
 static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
   bool sync, int bios)
 {
@@ -720,8 +721,14 @@ static void z_erofs_decompress_kickoff(struct 
z_erofs_decompressqueue *io,
return;
}
 
-   if (!atomic_add_return(bios, >pending_bios))
-   queue_work(z_erofs_workqueue, >u.work);
+   if (!atomic_add_return(bios, >pending_bios)) {
+   if (in_atomic() || irqs_disabled()) {
+   queue_work(z_erofs_workqueue, >u.work);
+   sbi->readahead_sync_decompress = true;
+   } else {
+   z_erofs_decompressqueue_work(>u.work);
+   }
+   }
 }
 
 static bool z_erofs_page_is_invalidated(struct page *page)
@@ -1333,7 +1340,8 @@ static void z_erofs_readahead(struct readahead_control 
*rac)
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
 
unsigned int nr_pages = readahead_count(rac);
-   bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages);
+   bool sync = (sbi->readahead_sync_decompress &&
+   nr_pages <= sbi->ctx.max_sync_decompress_pages);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
struct page *page, *head = NULL;
LIST_HEAD(pagepool);
-- 
2.25.1



[PATCH v2 2/2] erofs: decompress in endio if possible

2021-03-05 Thread Huang Jianan
z_erofs_decompressqueue_endio may not be executed in the atomic
context, for example, when dm-verity is turned on. In this scenario,
data can be decompressed directly to get rid of additional kworker
scheduling overhead. Also, it makes no sense to apply synchronous
decompression for such case.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/erofs/internal.h |  3 +++
 fs/erofs/super.c|  4 
 fs/erofs/zdata.c| 13 +++--
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 67a7ec945686..b817cb85d67b 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -70,6 +70,9 @@ struct erofs_sb_info {
 
/* pseudo inode to manage cached pages */
struct inode *managed_cache;
+
+   /* decide whether to decompress synchronously */
+   bool readahead_sync_decompress;
 #endif /* CONFIG_EROFS_FS_ZIP */
u32 blocks;
u32 meta_blkaddr;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index d5a6b9b888a5..77819efe9b15 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -175,6 +175,10 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->root_nid = le16_to_cpu(dsb->root_nid);
sbi->inos = le64_to_cpu(dsb->inos);
 
+#ifdef CONFIG_EROFS_FS_ZIP
+   sbi->readahead_sync_decompress = false;
+#endif
+
sbi->build_time = le64_to_cpu(dsb->build_time);
sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
 
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 6cb356c4217b..49ffc817dd9e 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -706,6 +706,7 @@ static int z_erofs_do_read_page(struct 
z_erofs_decompress_frontend *fe,
goto out;
 }
 
+static void z_erofs_decompressqueue_work(struct work_struct *work);
 static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
   bool sync, int bios)
 {
@@ -720,8 +721,14 @@ static void z_erofs_decompress_kickoff(struct 
z_erofs_decompressqueue *io,
return;
}
 
-   if (!atomic_add_return(bios, >pending_bios))
-   queue_work(z_erofs_workqueue, >u.work);
+   if (!atomic_add_return(bios, >pending_bios)) {
+   if (in_atomic() || irqs_disabled()) {
+   queue_work(z_erofs_workqueue, >u.work);
+   sbi->readahead_sync_decompress = true;
+   } else {
+   z_erofs_decompressqueue_work(>u.work);
+   }
+   }
 }
 
 static bool z_erofs_page_is_invalidated(struct page *page)
@@ -1333,6 +1340,8 @@ static void z_erofs_readahead(struct readahead_control 
*rac)
struct erofs_sb_info *const sbi = EROFS_I_SB(inode);
 
unsigned int nr_pages = readahead_count(rac);
+   bool sync = (sbi->readahead_sync_decompress &&
+   nr_pages <= sbi->ctx.max_sync_decompress_pages);
bool sync = (nr_pages <= sbi->ctx.max_sync_decompress_pages);
struct z_erofs_decompress_frontend f = DECOMPRESS_FRONTEND_INIT(inode);
struct page *page, *head = NULL;
-- 
2.25.1



[PATCH v2 1/2] erofs: avoid memory allocation failure during rolling decompression

2021-03-05 Thread Huang Jianan
Currently, err would be treated as io error. Therefore, it'd be
better to ensure memory allocation during rolling decompression
to avoid such io error.

In the long term, we might consider adding another !Uptodate case
for such case.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/erofs/decompressor.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 1cb1ffd10569..3d276a8aad86 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -73,7 +73,8 @@ static int z_erofs_lz4_prepare_destpages(struct 
z_erofs_decompress_req *rq,
victim = availables[--top];
get_page(victim);
} else {
-   victim = erofs_allocpage(pagepool, GFP_KERNEL);
+   victim = erofs_allocpage(pagepool,
+GFP_KERNEL | __GFP_NOFAIL);
if (!victim)
return -ENOMEM;
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
-- 
2.25.1



[PATCH 2/2] erofs: decompress in endio if possible

2021-03-04 Thread Huang Jianan
z_erofs_decompressqueue_endio may not be executed in the interrupt
context, for example, when dm-verity is turned on. In this scenario,
io should be decompressed directly to avoid additional scheduling
overhead. Also there is no need to wait for endio to execute
synchronous decompression.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/erofs/internal.h |   3 ++
 fs/erofs/super.c|   1 +
 fs/erofs/zdata.c| 102 
 3 files changed, 60 insertions(+), 46 deletions(-)

diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 77965490dced..a19bcbb681fc 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -71,6 +71,9 @@ struct erofs_sb_info {
/* pseudo inode to manage cached pages */
struct inode *managed_cache;
 
+   /* decide whether to decompress synchronously */
+   bool sync_decompress;
+
/* # of pages needed for EROFS lz4 rolling decompression */
u16 lz4_max_distance_pages;
 #endif /* CONFIG_EROFS_FS_ZIP */
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index 37f1cc9d28cc..5b9a21d10a30 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -188,6 +188,7 @@ static int erofs_read_superblock(struct super_block *sb)
goto out;
}
 
+   sbi->sync_decompress = false;
/* parse on-disk compression configurations */
z_erofs_load_lz4_config(sbi, dsb);
ret = 0;
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c
index 6cb356c4217b..727dd01f55c1 100644
--- a/fs/erofs/zdata.c
+++ b/fs/erofs/zdata.c
@@ -706,56 +706,11 @@ static int z_erofs_do_read_page(struct 
z_erofs_decompress_frontend *fe,
goto out;
 }
 
-static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
-  bool sync, int bios)
-{
-   /* wake up the caller thread for sync decompression */
-   if (sync) {
-   unsigned long flags;
-
-   spin_lock_irqsave(>u.wait.lock, flags);
-   if (!atomic_add_return(bios, >pending_bios))
-   wake_up_locked(>u.wait);
-   spin_unlock_irqrestore(>u.wait.lock, flags);
-   return;
-   }
-
-   if (!atomic_add_return(bios, >pending_bios))
-   queue_work(z_erofs_workqueue, >u.work);
-}
-
 static bool z_erofs_page_is_invalidated(struct page *page)
 {
return !page->mapping && !z_erofs_is_shortlived_page(page);
 }
 
-static void z_erofs_decompressqueue_endio(struct bio *bio)
-{
-   tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
-   struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
-   blk_status_t err = bio->bi_status;
-   struct bio_vec *bvec;
-   struct bvec_iter_all iter_all;
-
-   bio_for_each_segment_all(bvec, bio, iter_all) {
-   struct page *page = bvec->bv_page;
-
-   DBG_BUGON(PageUptodate(page));
-   DBG_BUGON(z_erofs_page_is_invalidated(page));
-
-   if (err)
-   SetPageError(page);
-
-   if (erofs_page_is_managed(EROFS_SB(q->sb), page)) {
-   if (!err)
-   SetPageUptodate(page);
-   unlock_page(page);
-   }
-   }
-   z_erofs_decompress_kickoff(q, tagptr_unfold_tags(t), -1);
-   bio_put(bio);
-}
-
 static int z_erofs_decompress_pcluster(struct super_block *sb,
   struct z_erofs_pcluster *pcl,
   struct list_head *pagepool)
@@ -991,6 +946,60 @@ static void z_erofs_decompressqueue_work(struct 
work_struct *work)
kvfree(bgq);
 }
 
+static void z_erofs_decompress_kickoff(struct z_erofs_decompressqueue *io,
+  bool sync, int bios)
+{
+   struct erofs_sb_info *const sbi = EROFS_SB(io->sb);
+
+   /* wake up the caller thread for sync decompression */
+   if (sync) {
+   unsigned long flags;
+
+   spin_lock_irqsave(>u.wait.lock, flags);
+   if (!atomic_add_return(bios, >pending_bios))
+   wake_up_locked(>u.wait);
+   spin_unlock_irqrestore(>u.wait.lock, flags);
+   return;
+   }
+
+   if (!atomic_add_return(bios, >pending_bios)) {
+   if (in_atomic() || irqs_disabled()) {
+   queue_work(z_erofs_workqueue, >u.work);
+   if (unlikely(!sbi->sync_decompress))
+   sbi->sync_decompress = true;
+   }
+   else
+   z_erofs_decompressqueue_work(>u.work);
+   }
+}
+
+static void z_erofs_decompressqueue_endio(struct bio *bio)
+{
+   tagptr1_t t = tagptr_init(tagptr1_t, bio->bi_private);
+   struct z_erofs_decompressqueue *q = tagptr_unfold_ptr(t);
+   blk_status_t err

[PATCH 1/2] erofs: avoid memory allocation failure during rolling decompression

2021-03-04 Thread Huang Jianan
It should be better to ensure memory allocation during rolling
decompression to avoid io error.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/erofs/decompressor.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 49347e681a53..fb0fa4e5b9ea 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -86,7 +86,7 @@ static int z_erofs_lz4_prepare_destpages(struct 
z_erofs_decompress_req *rq,
victim = availables[--top];
get_page(victim);
} else {
-   victim = erofs_allocpage(pagepool, GFP_KERNEL);
+   victim = erofs_allocpage(pagepool, GFP_KERNEL | 
__GFP_NOFAIL);
if (!victim)
return -ENOMEM;
set_page_private(victim, Z_EROFS_SHORTLIVED_PAGE);
-- 
2.25.1



[PATCH v2 3/3] f2fs: check if swapfile is section-alligned

2021-02-28 Thread Huang Jianan
If the swapfile isn't created by pin and fallocate, it can't be
guaranteed section-aligned, so it may be selected by f2fs gc. When
gc_pin_file_threshold is reached, the address of swapfile may change,
but won't be synchronized to swap_extent, so swap will write to wrong
address, which will cause data corruption.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/f2fs/data.c | 109 +++--
 1 file changed, 88 insertions(+), 21 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 4dbc1cafc55d..d33085daa3dc 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3781,11 +3781,64 @@ int f2fs_migrate_page(struct address_space *mapping,
 #endif
 
 #ifdef CONFIG_SWAP
+static int f2fs_is_file_aligned(struct inode *inode)
+{
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+   block_t main_blkaddr = SM_I(sbi)->main_blkaddr;
+   block_t cur_lblock;
+   block_t last_lblock;
+   block_t pblock;
+   unsigned long nr_pblocks;
+   unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+   int ret = 0;
+
+   cur_lblock = 0;
+   last_lblock = bytes_to_blks(inode, i_size_read(inode));
+
+   while (cur_lblock < last_lblock) {
+   struct f2fs_map_blocks map;
+
+   memset(, 0, sizeof(map));
+   map.m_lblk = cur_lblock;
+   map.m_len = last_lblock - cur_lblock;
+   map.m_next_pgofs = NULL;
+   map.m_next_extent = NULL;
+   map.m_seg_type = NO_CHECK_TYPE;
+   map.m_may_create = false;
+
+   ret = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_FIEMAP);
+   if (ret)
+   goto out;
+
+   /* hole */
+   if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+   f2fs_err(sbi, "Swapfile has holes\n");
+   ret = -ENOENT;
+   goto out;
+   }
+
+   pblock = map.m_pblk;
+   nr_pblocks = map.m_len;
+
+   if ((pblock - main_blkaddr) & (blocks_per_sec - 1) ||
+   nr_pblocks & (blocks_per_sec - 1)) {
+   f2fs_err(sbi, "Swapfile does not align to section");
+   ret = -EINVAL;
+   goto out;
+   }
+
+   cur_lblock += nr_pblocks;
+   }
+out:
+   return ret;
+}
+
 static int check_swap_activate_fast(struct swap_info_struct *sis,
struct file *swap_file, sector_t *span)
 {
struct address_space *mapping = swap_file->f_mapping;
struct inode *inode = mapping->host;
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
sector_t cur_lblock;
sector_t last_lblock;
sector_t pblock;
@@ -3793,8 +3846,8 @@ static int check_swap_activate_fast(struct 
swap_info_struct *sis,
sector_t highest_pblock = 0;
int nr_extents = 0;
unsigned long nr_pblocks;
-   u64 len;
-   int ret;
+   unsigned int blocks_per_sec = BLKS_PER_SEC(sbi);
+   int ret = 0;
 
/*
 * Map all the blocks into the extent list.  This code doesn't try
@@ -3802,31 +3855,41 @@ static int check_swap_activate_fast(struct 
swap_info_struct *sis,
 */
cur_lblock = 0;
last_lblock = bytes_to_blks(inode, i_size_read(inode));
-   len = i_size_read(inode);
 
while (cur_lblock + 1 <= last_lblock && cur_lblock < sis->max) {
struct f2fs_map_blocks map;
-   pgoff_t next_pgofs;
 
cond_resched();
 
memset(, 0, sizeof(map));
map.m_lblk = cur_lblock;
-   map.m_len = bytes_to_blks(inode, len) - cur_lblock;
-   map.m_next_pgofs = _pgofs;
+   map.m_len = last_lblock - cur_lblock;
+   map.m_next_pgofs = NULL;
+   map.m_next_extent = NULL;
map.m_seg_type = NO_CHECK_TYPE;
+   map.m_may_create = false;
 
ret = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_FIEMAP);
if (ret)
-   goto err_out;
+   goto out;
 
/* hole */
-   if (!(map.m_flags & F2FS_MAP_FLAGS))
-   goto err_out;
+   if (!(map.m_flags & F2FS_MAP_FLAGS)) {
+   f2fs_err(sbi, "Swapfile has holes\n");
+   ret = -ENOENT;
+   goto out;
+   }
 
pblock = map.m_pblk;
nr_pblocks = map.m_len;
 
+   if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) ||
+   nr_pblocks & (blocks_per_sec - 1)) {
+   f2fs_err(sbi, "Swapfile does not align to section");
+   ret = -EINVAL;
+

[PATCH 3/3] f2fs: check if swapfile is section-alligned

2021-02-27 Thread Huang Jianan
If the swapfile isn't created by pin and fallocate, it cann't be
guaranteed section-aligned, so it may be selected by f2fs gc. When
gc_pin_file_threshold is reached, the address of swapfile may change,
but won't be synchroniz to swap_extent, so swap will write to wrong
address, which will cause data corruption.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/f2fs/data.c | 63 ++
 1 file changed, 63 insertions(+)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index 4dbc1cafc55d..3e523d6e4643 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3781,11 +3781,63 @@ int f2fs_migrate_page(struct address_space *mapping,
 #endif
 
 #ifdef CONFIG_SWAP
+static int f2fs_check_file_aligned(struct inode *inode)
+{
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
+   block_t main_blkaddr = SM_I(sbi)->main_blkaddr;
+   block_t cur_lblock;
+   block_t last_lblock;
+   block_t pblock;
+   unsigned long len;
+   unsigned long nr_pblocks;
+   unsigned int blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec;
+   int ret;
+
+   cur_lblock = 0;
+   last_lblock = bytes_to_blks(inode, i_size_read(inode));
+   len = i_size_read(inode);
+
+   while (cur_lblock < last_lblock) {
+   struct f2fs_map_blocks map;
+   pgoff_t next_pgofs;
+
+   memset(, 0, sizeof(map));
+   map.m_lblk = cur_lblock;
+   map.m_len = bytes_to_blks(inode, len) - cur_lblock;
+   map.m_next_pgofs = _pgofs;
+   map.m_seg_type = NO_CHECK_TYPE;
+
+   ret = f2fs_map_blocks(inode, , 0, F2FS_GET_BLOCK_FIEMAP);
+
+   if (ret)
+   goto err_out;
+
+   /* hole */
+   if (!(map.m_flags & F2FS_MAP_FLAGS))
+   goto err_out;
+
+   pblock = map.m_pblk;
+   nr_pblocks = map.m_len;
+
+   if ((pblock - main_blkaddr) & (blocks_per_sec - 1) ||
+   nr_pblocks & (blocks_per_sec - 1))
+   goto err_out;
+
+   cur_lblock += nr_pblocks;
+   }
+
+   return 0;
+err_out:
+   pr_err("swapon: swapfile isn't section-aligned\n");
+   return -EINVAL;
+}
+
 static int check_swap_activate_fast(struct swap_info_struct *sis,
struct file *swap_file, sector_t *span)
 {
struct address_space *mapping = swap_file->f_mapping;
struct inode *inode = mapping->host;
+   struct f2fs_sb_info *sbi = F2FS_I_SB(inode);
sector_t cur_lblock;
sector_t last_lblock;
sector_t pblock;
@@ -3793,6 +3845,7 @@ static int check_swap_activate_fast(struct 
swap_info_struct *sis,
sector_t highest_pblock = 0;
int nr_extents = 0;
unsigned long nr_pblocks;
+   unsigned int blocks_per_sec = sbi->blocks_per_seg * sbi->segs_per_sec;
u64 len;
int ret;
 
@@ -3827,6 +3880,13 @@ static int check_swap_activate_fast(struct 
swap_info_struct *sis,
pblock = map.m_pblk;
nr_pblocks = map.m_len;
 
+   if ((pblock - SM_I(sbi)->main_blkaddr) & (blocks_per_sec - 1) ||
+   nr_pblocks & (blocks_per_sec - 1)) {
+   pr_err("swapon: swapfile isn't section-aligned\n");
+   ret = -EINVAL;
+   goto out;
+   }
+
if (cur_lblock + nr_pblocks >= sis->max)
nr_pblocks = sis->max - cur_lblock;
 
@@ -3878,6 +3938,9 @@ static int check_swap_activate(struct swap_info_struct 
*sis,
if (PAGE_SIZE == F2FS_BLKSIZE)
return check_swap_activate_fast(sis, swap_file, span);
 
+   if (f2fs_check_file_aligned(inode))
+   return -EINVAL;
+
blocks_per_page = bytes_to_blks(inode, PAGE_SIZE);
 
/*
-- 
2.25.1



[PATCH 2/3] f2fs: fix last_lblock check in check_swap_activate_fast

2021-02-27 Thread Huang Jianan
Because page_no < sis->max guarantees that the while loop break out 
normally, the wrong check contidion here doesn't cause a problem.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/f2fs/data.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index a1498a1a345c..4dbc1cafc55d 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -3804,7 +3804,7 @@ static int check_swap_activate_fast(struct 
swap_info_struct *sis,
last_lblock = bytes_to_blks(inode, i_size_read(inode));
len = i_size_read(inode);
 
-   while (cur_lblock <= last_lblock && cur_lblock < sis->max) {
+   while (cur_lblock + 1 <= last_lblock && cur_lblock < sis->max) {
struct f2fs_map_blocks map;
pgoff_t next_pgofs;
 
-- 
2.25.1



[PATCH 1/3] f2fs: remove unnecessary IS_SWAPFILE check

2021-02-27 Thread Huang Jianan
Now swapfile in f2fs directly submit IO to blockdev according to
swapfile extents reported by f2fs when swapon, therefore there is
no need to check IS_SWAPFILE when exec filesystem operation.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/f2fs/data.c | 2 +-
 fs/f2fs/f2fs.h | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c
index b9721c8f116c..a1498a1a345c 100644
--- a/fs/f2fs/data.c
+++ b/fs/f2fs/data.c
@@ -1723,7 +1723,7 @@ static int get_data_block_dio_write(struct inode *inode, 
sector_t iblock,
return __get_data_block(inode, iblock, bh_result, create,
F2FS_GET_BLOCK_DIO, NULL,
f2fs_rw_hint_to_seg_type(inode->i_write_hint),
-   IS_SWAPFILE(inode) ? false : true);
+   true);
 }
 
 static int get_data_block_dio(struct inode *inode, sector_t iblock,
diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
index cccdfb1a40ab..3f65cfe11a0f 100644
--- a/fs/f2fs/f2fs.h
+++ b/fs/f2fs/f2fs.h
@@ -4176,8 +4176,7 @@ static inline bool f2fs_force_buffered_io(struct inode 
*inode,
if (F2FS_IO_ALIGNED(sbi))
return true;
}
-   if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED) &&
-   !IS_SWAPFILE(inode))
+   if (is_sbi_flag_set(F2FS_I_SB(inode), SBI_CP_DISABLED))
return true;
 
return false;
-- 
2.25.1



[PATCH v3] erofs: support adjust lz4 history window size

2021-02-22 Thread Huang Jianan
lz4 uses LZ4_DISTANCE_MAX to record history preservation. When
using rolling decompression, a block with a higher compression
ratio will cause a larger memory allocation (up to 64k). It may
cause a large resource burden in extreme cases on devices with
small memory and a large number of concurrent IOs. So appropriately
reducing this value can improve performance.

Decreasing this value will reduce the compression ratio (except
when input_size 
Signed-off-by: Guo Weichao 
---

change since v2:
- use z_erofs_load_lz4_config to calculate lz4_distance_pages
- add description about the compatibility of the old kernel version
- drop useless comment

 fs/erofs/decompressor.c | 22 ++
 fs/erofs/erofs_fs.h |  3 ++-
 fs/erofs/internal.h |  7 +++
 fs/erofs/super.c|  2 ++
 4 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 1cb1ffd10569..0bb7903e3f9b 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -28,6 +28,18 @@ struct z_erofs_decompressor {
char *name;
 };
 
+int z_erofs_load_lz4_config(struct erofs_sb_info *sbi,
+   struct erofs_super_block *dsb)
+{
+   u16 distance = le16_to_cpu(dsb->lz4_max_distance);
+
+   sbi->lz4_max_distance_pages = distance ?
+   (DIV_ROUND_UP(distance, PAGE_SIZE) + 1) 
:
+   LZ4_MAX_DISTANCE_PAGES;
+
+   return 0;
+}
+
 static int z_erofs_lz4_prepare_destpages(struct z_erofs_decompress_req *rq,
 struct list_head *pagepool)
 {
@@ -36,6 +48,8 @@ static int z_erofs_lz4_prepare_destpages(struct 
z_erofs_decompress_req *rq,
struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
   BITS_PER_LONG)] = { 0 };
+   unsigned int lz4_max_distance_pages =
+   EROFS_SB(rq->sb)->lz4_max_distance_pages;
void *kaddr = NULL;
unsigned int i, j, top;
 
@@ -44,14 +58,14 @@ static int z_erofs_lz4_prepare_destpages(struct 
z_erofs_decompress_req *rq,
struct page *const page = rq->out[i];
struct page *victim;
 
-   if (j >= LZ4_MAX_DISTANCE_PAGES)
+   if (j >= lz4_max_distance_pages)
j = 0;
 
/* 'valid' bounced can only be tested after a complete round */
if (test_bit(j, bounced)) {
-   DBG_BUGON(i < LZ4_MAX_DISTANCE_PAGES);
-   DBG_BUGON(top >= LZ4_MAX_DISTANCE_PAGES);
-   availables[top++] = rq->out[i - LZ4_MAX_DISTANCE_PAGES];
+   DBG_BUGON(i < lz4_max_distance_pages);
+   DBG_BUGON(top >= lz4_max_distance_pages);
+   availables[top++] = rq->out[i - lz4_max_distance_pages];
}
 
if (page) {
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 9ad1615f4474..b27d0e4e4ab5 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -39,7 +39,8 @@ struct erofs_super_block {
__u8 uuid[16];  /* 128-bit uuid for volume */
__u8 volume_name[16];   /* volume name */
__le32 feature_incompat;
-   __u8 reserved2[44];
+   __le16 lz4_max_distance;
+   __u8 reserved2[42];
 };
 
 /*
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 67a7ec945686..4cb2395db45c 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -70,6 +70,9 @@ struct erofs_sb_info {
 
/* pseudo inode to manage cached pages */
struct inode *managed_cache;
+
+   /* # of pages needed for EROFS lz4 rolling decompression */
+   u16 lz4_max_distance_pages;
 #endif /* CONFIG_EROFS_FS_ZIP */
u32 blocks;
u32 meta_blkaddr;
@@ -420,6 +423,8 @@ int erofs_try_to_free_all_cached_pages(struct erofs_sb_info 
*sbi,
   struct erofs_workgroup *egrp);
 int erofs_try_to_free_cached_page(struct address_space *mapping,
  struct page *page);
+int z_erofs_load_lz4_config(struct erofs_sb_info *sbi,
+   struct erofs_super_block *dsb);
 #else
 static inline void erofs_shrinker_register(struct super_block *sb) {}
 static inline void erofs_shrinker_unregister(struct super_block *sb) {}
@@ -427,6 +432,8 @@ static inline int erofs_init_shrinker(void) { return 0; }
 static inline void erofs_exit_shrinker(void) {}
 static inline int z_erofs_init_zip_subsystem(void) { return 0; }
 static inline void z_erofs_exit_zip_subsystem(void) {}
+static inline int z_erofs_load_lz4_config(struct erofs_sb_info *sbi,
+ struct erofs_super_block *dsb) { 
return 0; }
 #endif /* !CONFIG_EROFS_FS_ZIP */
 
 #define EFSCORRUPTEDEUCLEAN /* Filesystem is corrupted 

[PATCH v2] erofs: support adjust lz4 history window size

2021-02-22 Thread Huang Jianan
From: huangjianan 

lz4 uses LZ4_DISTANCE_MAX to record history preservation. When
using rolling decompression, a block with a higher compression
ratio will cause a larger memory allocation (up to 64k). It may
cause a large resource burden in extreme cases on devices with
small memory and a large number of concurrent IOs. So appropriately
reducing this value can improve performance.

Decreasing this value will reduce the compression ratio (except
when input_size 
Signed-off-by: Guo Weichao 
---

changes since previous version
- change compr_alg to lz4_max_distance
- calculate lz4_max_distance_pages when reading super_block

 fs/erofs/decompressor.c | 12 
 fs/erofs/erofs_fs.h |  3 ++-
 fs/erofs/internal.h |  3 +++
 fs/erofs/super.c|  5 +
 4 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 1cb1ffd10569..fb2b4f1b8806 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -36,22 +36,26 @@ static int z_erofs_lz4_prepare_destpages(struct 
z_erofs_decompress_req *rq,
struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
   BITS_PER_LONG)] = { 0 };
+   unsigned int lz4_max_distance_pages = LZ4_MAX_DISTANCE_PAGES;
void *kaddr = NULL;
unsigned int i, j, top;
 
+   if (EROFS_SB(rq->sb)->lz4_max_distance_pages)
+   lz4_max_distance_pages = 
EROFS_SB(rq->sb)->lz4_max_distance_pages;
+
top = 0;
for (i = j = 0; i < nr; ++i, ++j) {
struct page *const page = rq->out[i];
struct page *victim;
 
-   if (j >= LZ4_MAX_DISTANCE_PAGES)
+   if (j >= lz4_max_distance_pages)
j = 0;
 
/* 'valid' bounced can only be tested after a complete round */
if (test_bit(j, bounced)) {
-   DBG_BUGON(i < LZ4_MAX_DISTANCE_PAGES);
-   DBG_BUGON(top >= LZ4_MAX_DISTANCE_PAGES);
-   availables[top++] = rq->out[i - LZ4_MAX_DISTANCE_PAGES];
+   DBG_BUGON(i < lz4_max_distance_pages);
+   DBG_BUGON(top >= lz4_max_distance_pages);
+   availables[top++] = rq->out[i - lz4_max_distance_pages];
}
 
if (page) {
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 9ad1615f4474..5eb37002b1a3 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -39,7 +39,8 @@ struct erofs_super_block {
__u8 uuid[16];  /* 128-bit uuid for volume */
__u8 volume_name[16];   /* volume name */
__le32 feature_incompat;
-   __u8 reserved2[44];
+   __le16 lz4_max_distance;/* lz4 max distance */
+   __u8 reserved2[42];
 };
 
 /*
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 67a7ec945686..7457710a763a 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -70,6 +70,9 @@ struct erofs_sb_info {
 
/* pseudo inode to manage cached pages */
struct inode *managed_cache;
+
+   /* lz4 max distance pages */
+   u16 lz4_max_distance_pages;
 #endif /* CONFIG_EROFS_FS_ZIP */
u32 blocks;
u32 meta_blkaddr;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index d5a6b9b888a5..3a3d235de7cc 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -174,6 +174,11 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
sbi->root_nid = le16_to_cpu(dsb->root_nid);
sbi->inos = le64_to_cpu(dsb->inos);
+#ifdef CONFIG_EROFS_FS_ZIP
+   if (dsb->lz4_max_distance)
+   sbi->lz4_max_distance_pages =
+   DIV_ROUND_UP(le16_to_cpu(dsb->lz4_max_distance), 
PAGE_SIZE) + 1;
+#endif
 
sbi->build_time = le64_to_cpu(dsb->build_time);
sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
-- 
2.25.1



Re: [PATCH] erofs: support adjust lz4 history window size

2021-02-22 Thread Huang Jianan

Hi Xiang,

On 2021/2/22 12:44, Gao Xiang wrote:

Hi Jianan,

On Thu, Feb 18, 2021 at 08:00:49PM +0800, Huang Jianan via Linux-erofs wrote:

From: huangjianan 

lz4 uses LZ4_DISTANCE_MAX to record history preservation. When
using rolling decompression, a block with a higher compression
ratio will cause a larger memory allocation (up to 64k). It may
cause a large resource burden in extreme cases on devices with
small memory and a large number of concurrent IOs. So appropriately
reducing this value can improve performance.

Decreasing this value will reduce the compression ratio (except
when input_size 
Signed-off-by: Guo Weichao 
---
  fs/erofs/decompressor.c | 13 +
  fs/erofs/erofs_fs.h |  3 ++-
  fs/erofs/internal.h |  3 +++
  fs/erofs/super.c|  3 +++
  4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 1cb1ffd10569..94ae56b3ff71 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -36,22 +36,27 @@ static int z_erofs_lz4_prepare_destpages(struct 
z_erofs_decompress_req *rq,
struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
   BITS_PER_LONG)] = { 0 };
+   unsigned int lz4_distance_pages = LZ4_MAX_DISTANCE_PAGES;
void *kaddr = NULL;
unsigned int i, j, top;
  
+	if (EROFS_SB(rq->sb)->compr_alg)

+   lz4_distance_pages = DIV_ROUND_UP(EROFS_SB(rq->sb)->compr_alg,
+ PAGE_SIZE) + 1;
+

Thanks for your patch, I agree that will reduce runtime memory
footpoint. and keep max sliding window ondisk in bytes (rather
than in blocks) is better., but could we calculate lz4_distance_pages
ahead when reading super_block?

Thanks for suggestion, i will update it soon.

Also, in the next cycle, I'd like to introduce a bitmap for available
algorithms (maximum 16-bit) for the next LZMA algorithm, and for each
available algorithm introduces an on-disk variable-array like below:
bitmap(16-bit)2   1   0
 ... LZMALZ4
__le16  compr_opt_off;  /* get the opt array start offset
(I think also in 4-byte) */

compr alg 0 (lz4)   __le16  alg_opt_size;
/* next opt off = roundup(off + alg_opt_size, 4); */
__le16  lz4_max_distance;

/* 4-byte aligned */
compr alg x (if available)  u8  alg_opt_size;
...

...

When reading sb, first, it scans the whole bitmap, and get all the
available algorithms in the image at once. And then read such compr
opts one-by-one.

Do you have some interest and extra time to implement it? :) That
makes me work less since I'm debugging mbpcluster compression now...


Sounds good, I will try to do this part of the work.

Thanks,

Jianan


Thanks,
Gao Xiang



[PATCH] erofs: support adjust lz4 history window size

2021-02-18 Thread Huang Jianan
From: huangjianan 

lz4 uses LZ4_DISTANCE_MAX to record history preservation. When
using rolling decompression, a block with a higher compression
ratio will cause a larger memory allocation (up to 64k). It may
cause a large resource burden in extreme cases on devices with
small memory and a large number of concurrent IOs. So appropriately
reducing this value can improve performance.

Decreasing this value will reduce the compression ratio (except
when input_size 
Signed-off-by: Guo Weichao 
---
 fs/erofs/decompressor.c | 13 +
 fs/erofs/erofs_fs.h |  3 ++-
 fs/erofs/internal.h |  3 +++
 fs/erofs/super.c|  3 +++
 4 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/fs/erofs/decompressor.c b/fs/erofs/decompressor.c
index 1cb1ffd10569..94ae56b3ff71 100644
--- a/fs/erofs/decompressor.c
+++ b/fs/erofs/decompressor.c
@@ -36,22 +36,27 @@ static int z_erofs_lz4_prepare_destpages(struct 
z_erofs_decompress_req *rq,
struct page *availables[LZ4_MAX_DISTANCE_PAGES] = { NULL };
unsigned long bounced[DIV_ROUND_UP(LZ4_MAX_DISTANCE_PAGES,
   BITS_PER_LONG)] = { 0 };
+   unsigned int lz4_distance_pages = LZ4_MAX_DISTANCE_PAGES;
void *kaddr = NULL;
unsigned int i, j, top;
 
+   if (EROFS_SB(rq->sb)->compr_alg)
+   lz4_distance_pages = DIV_ROUND_UP(EROFS_SB(rq->sb)->compr_alg,
+ PAGE_SIZE) + 1;
+
top = 0;
for (i = j = 0; i < nr; ++i, ++j) {
struct page *const page = rq->out[i];
struct page *victim;
 
-   if (j >= LZ4_MAX_DISTANCE_PAGES)
+   if (j >= lz4_distance_pages)
j = 0;
 
/* 'valid' bounced can only be tested after a complete round */
if (test_bit(j, bounced)) {
-   DBG_BUGON(i < LZ4_MAX_DISTANCE_PAGES);
-   DBG_BUGON(top >= LZ4_MAX_DISTANCE_PAGES);
-   availables[top++] = rq->out[i - LZ4_MAX_DISTANCE_PAGES];
+   DBG_BUGON(i < lz4_distance_pages);
+   DBG_BUGON(top >= lz4_distance_pages);
+   availables[top++] = rq->out[i - lz4_distance_pages];
}
 
if (page) {
diff --git a/fs/erofs/erofs_fs.h b/fs/erofs/erofs_fs.h
index 9ad1615f4474..bffc02991f5a 100644
--- a/fs/erofs/erofs_fs.h
+++ b/fs/erofs/erofs_fs.h
@@ -39,7 +39,8 @@ struct erofs_super_block {
__u8 uuid[16];  /* 128-bit uuid for volume */
__u8 volume_name[16];   /* volume name */
__le32 feature_incompat;
-   __u8 reserved2[44];
+   __le16 compr_alg;   /* compression algorithm specific parameters */
+   __u8 reserved2[42];
 };
 
 /*
diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h
index 67a7ec945686..f1c99dc2659f 100644
--- a/fs/erofs/internal.h
+++ b/fs/erofs/internal.h
@@ -70,6 +70,9 @@ struct erofs_sb_info {
 
/* pseudo inode to manage cached pages */
struct inode *managed_cache;
+
+   /* compression algorithm specific parameters */
+   u16 compr_alg;
 #endif /* CONFIG_EROFS_FS_ZIP */
u32 blocks;
u32 meta_blkaddr;
diff --git a/fs/erofs/super.c b/fs/erofs/super.c
index d5a6b9b888a5..198435e3eb2d 100644
--- a/fs/erofs/super.c
+++ b/fs/erofs/super.c
@@ -174,6 +174,9 @@ static int erofs_read_superblock(struct super_block *sb)
sbi->islotbits = ilog2(sizeof(struct erofs_inode_compact));
sbi->root_nid = le16_to_cpu(dsb->root_nid);
sbi->inos = le64_to_cpu(dsb->inos);
+#ifdef CONFIG_EROFS_FS_ZIP
+   sbi->compr_alg = le16_to_cpu(dsb->compr_alg);
+#endif
 
sbi->build_time = le64_to_cpu(dsb->build_time);
sbi->build_time_nsec = le32_to_cpu(dsb->build_time_nsec);
-- 
2.25.1



Re: [fuse-devel] [PATCH] fuse: avoid deadlock when write fuse inode

2021-02-06 Thread Huang Jianan

friendly ping ... 

On 2021/2/2 12:11, Huang Jianan via fuse-devel wrote:

Hi all,


This patch works well in our product, but I am not sure this is the 
correct


way to solve this problem. I think that the inode->i_count shouldn't be

zero after iput is executed in dentry_unlink_inode, then the inode won't

be writeback. But i haven't found where iget is missing.


Thanks,

Jianan

On 2021/2/2 12:08, Huang Jianan wrote:

We found the following deadlock situations in low memory scenarios:
Thread A Thread B
- __writeback_single_inode
  - fuse_write_inode
   - fuse_simple_request
    - __fuse_request_send
 - request_wait_answer
  - fuse_dev_splice_read
   - fuse_copy_fill
    - __alloc_pages_direct_reclaim
 - do_shrink_slab
  - super_cache_scan
   - shrink_dentry_list
    - dentry_unlink_inode
 - iput_final
  - inode_wait_for_writeback

The request and inode processed by Thread A and B are the same, which
causes a deadlock. To avoid this, we remove the __GFP_FS flag when
allocating memory in fuse_copy_fill, so there will be no memory
reclaimation in super_cache_scan.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
  fs/fuse/dev.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 588f8d1240aa..e580b9d04c25 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -721,7 +721,7 @@ static int fuse_copy_fill(struct fuse_copy_state 
*cs)

  if (cs->nr_segs >= cs->pipe->max_usage)
  return -EIO;
  -    page = alloc_page(GFP_HIGHUSER);
+    page = alloc_page(GFP_HIGHUSER & ~__GFP_FS);
  if (!page)
  return -ENOMEM;





Re: [PATCH] fuse: avoid deadlock when write fuse inode

2021-02-01 Thread Huang Jianan

Hi all,


This patch works well in our product, but I am not sure this is the correct

way to solve this problem. I think that the inode->i_count shouldn't be

zero after iput is executed in dentry_unlink_inode, then the inode won't

be writeback. But i haven't found where iget is missing.


Thanks,

Jianan

On 2021/2/2 12:08, Huang Jianan wrote:

We found the following deadlock situations in low memory scenarios:
Thread A Thread B
- __writeback_single_inode
  - fuse_write_inode
   - fuse_simple_request
- __fuse_request_send
 - request_wait_answer
  - fuse_dev_splice_read
   - fuse_copy_fill
- __alloc_pages_direct_reclaim
 - do_shrink_slab
  - super_cache_scan
   - shrink_dentry_list
- dentry_unlink_inode
 - iput_final
  - inode_wait_for_writeback

The request and inode processed by Thread A and B are the same, which
causes a deadlock. To avoid this, we remove the __GFP_FS flag when
allocating memory in fuse_copy_fill, so there will be no memory
reclaimation in super_cache_scan.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
  fs/fuse/dev.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 588f8d1240aa..e580b9d04c25 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -721,7 +721,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO;
  
-			page = alloc_page(GFP_HIGHUSER);

+   page = alloc_page(GFP_HIGHUSER & ~__GFP_FS);
if (!page)
return -ENOMEM;
  


[PATCH] fuse: avoid deadlock when write fuse inode

2021-02-01 Thread Huang Jianan
We found the following deadlock situations in low memory scenarios:
Thread A Thread B
- __writeback_single_inode
 - fuse_write_inode
  - fuse_simple_request
   - __fuse_request_send
- request_wait_answer
 - fuse_dev_splice_read
  - fuse_copy_fill
   - __alloc_pages_direct_reclaim
- do_shrink_slab
 - super_cache_scan
  - shrink_dentry_list
   - dentry_unlink_inode
- iput_final
 - inode_wait_for_writeback

The request and inode processed by Thread A and B are the same, which
causes a deadlock. To avoid this, we remove the __GFP_FS flag when
allocating memory in fuse_copy_fill, so there will be no memory
reclaimation in super_cache_scan.

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/fuse/dev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c
index 588f8d1240aa..e580b9d04c25 100644
--- a/fs/fuse/dev.c
+++ b/fs/fuse/dev.c
@@ -721,7 +721,7 @@ static int fuse_copy_fill(struct fuse_copy_state *cs)
if (cs->nr_segs >= cs->pipe->max_usage)
return -EIO;
 
-   page = alloc_page(GFP_HIGHUSER);
+   page = alloc_page(GFP_HIGHUSER & ~__GFP_FS);
if (!page)
return -ENOMEM;
 
-- 
2.25.1



Re: [PATCH] erofs: support direct IO for uncompressed file

2020-12-23 Thread Huang Jianan

Hi Christoph,

The reason we use dio is because we need to deploy the patch on some 
early kernel versions, and we don't pay much attention to the change of 
iomap. Anyway, I will study the problem mentioned by Gao Xiang and try 
to convert the current patch to iomap.


Thanks,

Jianan


On Wed, Dec 23, 2020 at 03:39:01AM +0800, Gao Xiang wrote:

Hi Christoph,

On Tue, Dec 22, 2020 at 02:22:34PM +, Christoph Hellwig wrote:

Please do not add new callers of __blockdev_direct_IO and use the modern
iomap variant instead.

We've talked about this topic before. The current status is that iomap
doesn't support tail-packing inline data yet (Chao once sent out a version),
and erofs only cares about read intrastructure for now (So we don't think
more about how to deal with tail-packing inline write path). Plus, the
original patch was once lack of inline data regression test from gfs2 folks.

So resend Chaos prep patch as part of the series switching parts of
erofs to iomap.  We need to move things off the old infrastructure instead
of adding more users and everyone needs to help a little.


[PATCH] fsstress: support direct IO

2020-12-14 Thread Huang Jianan
From: huangjianan 

add direct IO test for the stress tool which was mentioned here:
https://lore.kernel.org/linux-erofs/20200206135631.1491-1-hsiang...@aol.com/

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 stress.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/stress.c b/stress.c
index f4bf874..7e7cc93 100644
--- a/stress.c
+++ b/stress.c
@@ -4,12 +4,14 @@
  *
  * Copyright (C) 2019-2020 Gao Xiang 
  */
+#define _GNU_SOURCE
 #define _LARGEFILE64_SOURCE
 #include 
 #include 
 #include 
 #include 
 #include 
+#include 
 #include 
 #include 
 #include 
@@ -21,6 +23,7 @@
 #define MAX_CHUNKSIZE  (4 * 1024 * 1024)
 #define MAX_SCAN_CHUNKSIZE (256 * 1024)
 
+bool direct_io = false;
 unsigned int nprocs = 512;
 sig_atomic_t should_stop = 0;
 
@@ -98,7 +101,7 @@ int drop_file_cache(int fd, int mode)
 
 int tryopen(char *filename)
 {
-   int fd = open(filename, O_RDONLY);
+   int fd = open(filename, direct_io ? O_RDONLY : O_RDONLY | O_DIRECT);
 
if (fd < 0)
return -errno;
@@ -166,6 +169,13 @@ int randread(int fd, int chkfd, uint64_t filesize)
if (start + length > filesize)
length = filesize - start;
 
+   if (direct_io) {
+   length = (((length - 1) >> PAGE_SHIFT) + 1)
+   << PAGE_SHIFT;
+   if (!length || start + length > filesize)
+   return 0;
+   }
+
printf("randread(%u): %llu bytes @ %llu\n",
   getpid(), (unsigned long long)length,
   (unsigned long long)start);
@@ -212,7 +222,7 @@ int testfd(int fd, int chkfd, int mode)
err = doscan(fd, chkfd, filesize, chunksize);
if (err)
return err;
-   } else if (mode == RANDSCAN_UNALIGNED) {
+   } else if (mode == RANDSCAN_UNALIGNED && !direct_io) {
chunksize = (random() * random() % MAX_SCAN_CHUNKSIZE) + 1;
err = doscan(fd, chkfd, filesize, chunksize);
if (err)
@@ -252,8 +262,11 @@ static int parse_options(int argc, char *argv[])
 {
int opt;
 
-   while ((opt = getopt(argc, argv, "p:")) != -1) {
+   while ((opt = getopt(argc, argv, "dp:")) != -1) {
switch (opt) {
+   case 'd':
+   direct_io = true;
+   break;
case 'p':
nprocs = atoi(optarg);
if (nprocs < 0) {
@@ -281,6 +294,7 @@ void usage(void)
 {
fputs("usage: [options] TESTFILE [COMPRFILE]\n\n"
  "stress tester for read-only filesystems\n"
+ " -d  use direct io\n"
  " -p# set workers to #\n", stderr);
 }
 
-- 
2.7.4



[PATCH] erofs: support direct IO for uncompressed file

2020-12-14 Thread Huang Jianan
direct IO is useful in certain scenarios for uncompressed files.
For example, it can avoid double pagecache when use the uncompressed
file to mount upper layer filesystem.

In addition, another patch adds direct IO test for the stress tool
which was mentioned here:
https://lore.kernel.org/linux-erofs/20200206135631.1491-1-hsiang...@aol.com/

Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/erofs/data.c | 57 +
 1 file changed, 57 insertions(+)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index ea4f693bee22..3067aa3defff 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -6,6 +6,8 @@
  */
 #include "internal.h"
 #include 
+#include 
+#include 
 
 #include 
 
@@ -312,6 +314,60 @@ static void erofs_raw_access_readahead(struct 
readahead_control *rac)
submit_bio(bio);
 }
 
+static int erofs_get_block(struct inode *inode, sector_t iblock,
+  struct buffer_head *bh, int create)
+{
+   struct erofs_map_blocks map = {
+   .m_la = blknr_to_addr(iblock),
+   };
+   int err;
+
+   err = erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW);
+   if (err)
+   return err;
+
+   if (map.m_flags & EROFS_MAP_MAPPED)
+   map_bh(bh, inode->i_sb, erofs_blknr(map.m_pa));
+
+   return err;
+}
+
+static int check_direct_IO(struct inode *inode, struct iov_iter *iter,
+  loff_t offset)
+{
+   unsigned i_blkbits = READ_ONCE(inode->i_blkbits);
+   unsigned blkbits = i_blkbits;
+   unsigned blocksize_mask = (1 << blkbits) - 1;
+   unsigned long align = offset | iov_iter_alignment(iter);
+   struct block_device *bdev = inode->i_sb->s_bdev;
+
+   if (align & blocksize_mask) {
+   if (bdev)
+   blkbits = blksize_bits(bdev_logical_block_size(bdev));
+   blocksize_mask = (1 << blkbits) - 1;
+   if (align & blocksize_mask)
+   return -EINVAL;
+   return 1;
+   }
+   return 0;
+}
+
+static ssize_t erofs_direct_IO(struct kiocb *iocb, struct iov_iter *iter)
+{
+   struct address_space *mapping = iocb->ki_filp->f_mapping;
+   struct inode *inode = mapping->host;
+   loff_t offset = iocb->ki_pos;
+   int err;
+
+   err = check_direct_IO(inode, iter, offset);
+   if (err)
+   return err < 0 ? err : 0;
+
+   return __blockdev_direct_IO(iocb, inode, inode->i_sb->s_bdev, iter,
+   erofs_get_block, NULL, NULL,
+   DIO_LOCKING | DIO_SKIP_HOLES);
+}
+
 static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
 {
struct inode *inode = mapping->host;
@@ -336,6 +392,7 @@ static sector_t erofs_bmap(struct address_space *mapping, 
sector_t block)
 const struct address_space_operations erofs_raw_access_aops = {
.readpage = erofs_raw_access_readpage,
.readahead = erofs_raw_access_readahead,
+   .direct_IO = erofs_direct_IO,
.bmap = erofs_bmap,
 };
 
-- 
2.25.1



Re: [PATCH v5] erofs: avoid using generic_block_bmap

2020-12-09 Thread Huang Jianan

Thanks for reminding me, I will pay attention about this next time.

在 2020/12/10 10:36, Gao Xiang 写道:

Hi Jianan,

On Wed, Dec 09, 2020 at 07:57:40PM +0800, Huang Jianan wrote:

iblock indicates the number of i_blkbits-sized blocks rather than
sectors.

In addition, considering buffer_head limits mapped size to 32-bits,
should avoid using generic_block_bmap.

Fixes: 9da681e017a3 ("staging: erofs: support bmap")
Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 

Reviewed-by: Gao Xiang 

Also, I think Chao has sent his Reviewed-by in the previous reply ---
so unless some major modification happens, it needs to be attached with
all new versions as a common practice...

I will apply it later to for-next, thanks for your patch!

Thanks,
Gao Xiang


---
  fs/erofs/data.c | 26 +++---
  1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 347be146884c..ea4f693bee22 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -312,27 +312,12 @@ static void erofs_raw_access_readahead(struct 
readahead_control *rac)
submit_bio(bio);
  }
  
-static int erofs_get_block(struct inode *inode, sector_t iblock,

-  struct buffer_head *bh, int create)
-{
-   struct erofs_map_blocks map = {
-   .m_la = iblock << 9,
-   };
-   int err;
-
-   err = erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW);
-   if (err)
-   return err;
-
-   if (map.m_flags & EROFS_MAP_MAPPED)
-   bh->b_blocknr = erofs_blknr(map.m_pa);
-
-   return err;
-}
-
  static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
  {
struct inode *inode = mapping->host;
+   struct erofs_map_blocks map = {
+   .m_la = blknr_to_addr(block),
+   };
  
  	if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {

erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
@@ -341,7 +326,10 @@ static sector_t erofs_bmap(struct address_space *mapping, 
sector_t block)
return 0;
}
  
-	return generic_block_bmap(mapping, block, erofs_get_block);

+   if (!erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW))
+   return erofs_blknr(map.m_pa);
+
+   return 0;
  }
  
  /* for uncompressed (aligned) files and raw access for other files */

--
2.25.1



[PATCH v5] erofs: avoid using generic_block_bmap

2020-12-09 Thread Huang Jianan
iblock indicates the number of i_blkbits-sized blocks rather than
sectors.

In addition, considering buffer_head limits mapped size to 32-bits,
should avoid using generic_block_bmap.

Fixes: 9da681e017a3 ("staging: erofs: support bmap")
Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/erofs/data.c | 26 +++---
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 347be146884c..ea4f693bee22 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -312,27 +312,12 @@ static void erofs_raw_access_readahead(struct 
readahead_control *rac)
submit_bio(bio);
 }
 
-static int erofs_get_block(struct inode *inode, sector_t iblock,
-  struct buffer_head *bh, int create)
-{
-   struct erofs_map_blocks map = {
-   .m_la = iblock << 9,
-   };
-   int err;
-
-   err = erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW);
-   if (err)
-   return err;
-
-   if (map.m_flags & EROFS_MAP_MAPPED)
-   bh->b_blocknr = erofs_blknr(map.m_pa);
-
-   return err;
-}
-
 static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
 {
struct inode *inode = mapping->host;
+   struct erofs_map_blocks map = {
+   .m_la = blknr_to_addr(block),
+   };
 
if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {
erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
@@ -341,7 +326,10 @@ static sector_t erofs_bmap(struct address_space *mapping, 
sector_t block)
return 0;
}
 
-   return generic_block_bmap(mapping, block, erofs_get_block);
+   if (!erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW))
+   return erofs_blknr(map.m_pa);
+
+   return 0;
 }
 
 /* for uncompressed (aligned) files and raw access for other files */
-- 
2.25.1



Re: [PATCH v4] erofs: avoid using generic_block_bmap

2020-12-09 Thread Huang Jianan

Thanks for review, i will update it soon.

在 2020/12/9 19:39, Gao Xiang 写道:

Hi Jianan,

On Wed, Dec 09, 2020 at 06:08:41PM +0800, Chao Yu wrote:

On 2020/12/9 10:39, Huang Jianan wrote:

iblock indicates the number of i_blkbits-sized blocks rather than
sectors.

In addition, considering buffer_head limits mapped size to 32-bits,
should avoid using generic_block_bmap.

Fixes: 9da681e017a3 ("staging: erofs: support bmap")
Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 

Could you send out an updated version? I might get a point to freeze
dev branch since it needs some time on linux-next

Thanks,
Gao Xiang


---
   fs/erofs/data.c | 30 ++
   1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 347be146884c..d6ea0a216b57 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -312,36 +312,26 @@ static void erofs_raw_access_readahead(struct 
readahead_control *rac)
submit_bio(bio);
   }
-static int erofs_get_block(struct inode *inode, sector_t iblock,
-  struct buffer_head *bh, int create)
-{
-   struct erofs_map_blocks map = {
-   .m_la = iblock << 9,
-   };
-   int err;
-
-   err = erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW);
-   if (err)
-   return err;
-
-   if (map.m_flags & EROFS_MAP_MAPPED)
-   bh->b_blocknr = erofs_blknr(map.m_pa);
-
-   return err;
-}
-
   static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
   {
struct inode *inode = mapping->host;
+   struct erofs_map_blocks map = {
+   .m_la = blknr_to_addr(block),
+   };
+   sector_t blknr = 0;

It could be removed?


if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {
erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
if (block >> LOG_SECTORS_PER_BLOCK >= blks)
-   return 0;

return 0;


+   goto out;
}
-   return generic_block_bmap(mapping, block, erofs_get_block);
+   if (!erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW))
+   blknr = erofs_blknr(map.m_pa);

return erofs_blknr(map.m_pa);


+
+out:
+   return blknr;

return 0;

Anyway, LGTM.

Reviewed-by: Chao Yu 

Thanks,


   }
   /* for uncompressed (aligned) files and raw access for other files */



[PATCH v4] erofs: avoid using generic_block_bmap

2020-12-08 Thread Huang Jianan
iblock indicates the number of i_blkbits-sized blocks rather than
sectors.

In addition, considering buffer_head limits mapped size to 32-bits,
should avoid using generic_block_bmap.

Fixes: 9da681e017a3 ("staging: erofs: support bmap")
Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/erofs/data.c | 30 ++
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 347be146884c..d6ea0a216b57 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -312,36 +312,26 @@ static void erofs_raw_access_readahead(struct 
readahead_control *rac)
submit_bio(bio);
 }
 
-static int erofs_get_block(struct inode *inode, sector_t iblock,
-  struct buffer_head *bh, int create)
-{
-   struct erofs_map_blocks map = {
-   .m_la = iblock << 9,
-   };
-   int err;
-
-   err = erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW);
-   if (err)
-   return err;
-
-   if (map.m_flags & EROFS_MAP_MAPPED)
-   bh->b_blocknr = erofs_blknr(map.m_pa);
-
-   return err;
-}
-
 static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
 {
struct inode *inode = mapping->host;
+   struct erofs_map_blocks map = {
+   .m_la = blknr_to_addr(block),
+   };
+   sector_t blknr = 0;
 
if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {
erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
 
if (block >> LOG_SECTORS_PER_BLOCK >= blks)
-   return 0;
+   goto out;
}
 
-   return generic_block_bmap(mapping, block, erofs_get_block);
+   if (!erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW))
+   blknr = erofs_blknr(map.m_pa);
+
+out:
+   return blknr;
 }
 
 /* for uncompressed (aligned) files and raw access for other files */
-- 
2.25.1



Re: [PATCH v3] erofs: avoiding using generic_block_bmap

2020-12-08 Thread Huang Jianan



在 2020/12/8 21:11, Huang Jianan 写道:

iblock indicates the number of i_blkbits-sized blocks rather than
sectors.

In addition, considering buffer_head limits mapped size to 32-bits,
should avoid using generic_block_bmap.

Fixes: 9da681e017a3 ("staging: erofs: support bmap")
Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
  fs/erofs/data.c | 30 ++
  1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 347be146884c..399ffd857c50 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -312,36 +312,26 @@ static void erofs_raw_access_readahead(struct 
readahead_control *rac)
submit_bio(bio);
  }
  
-static int erofs_get_block(struct inode *inode, sector_t iblock,

-  struct buffer_head *bh, int create)
-{
-   struct erofs_map_blocks map = {
-   .m_la = iblock << 9,
-   };
-   int err;
-
-   err = erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW);
-   if (err)
-   return err;
-
-   if (map.m_flags & EROFS_MAP_MAPPED)
-   bh->b_blocknr = erofs_blknr(map.m_pa);
-
-   return err;
-}
-
  static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
  {
struct inode *inode = mapping->host;
+   struct erofs_map_blocks map = {
+   .m_la = blknr_to_addr(iblock),


Sorry for my mistake, it should be:

.m_la = blknr_to_addr(block),


+   };
+   sector_t blknr = 0;
  
  	if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {

erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
  
  		if (block >> LOG_SECTORS_PER_BLOCK >= blks)

-   return 0;
+   goto out;
}
  
-	return generic_block_bmap(mapping, block, erofs_get_block);

+   if (!erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW))
+   blknr = erofs_blknr(map.m_pa);
+
+out:
+   return blknr;
  }
  
  /* for uncompressed (aligned) files and raw access for other files */


[PATCH v3] erofs: avoiding using generic_block_bmap

2020-12-08 Thread Huang Jianan
iblock indicates the number of i_blkbits-sized blocks rather than
sectors.

In addition, considering buffer_head limits mapped size to 32-bits,
should avoid using generic_block_bmap.

Fixes: 9da681e017a3 ("staging: erofs: support bmap")
Signed-off-by: Huang Jianan 
Signed-off-by: Guo Weichao 
---
 fs/erofs/data.c | 30 ++
 1 file changed, 10 insertions(+), 20 deletions(-)

diff --git a/fs/erofs/data.c b/fs/erofs/data.c
index 347be146884c..399ffd857c50 100644
--- a/fs/erofs/data.c
+++ b/fs/erofs/data.c
@@ -312,36 +312,26 @@ static void erofs_raw_access_readahead(struct 
readahead_control *rac)
submit_bio(bio);
 }
 
-static int erofs_get_block(struct inode *inode, sector_t iblock,
-  struct buffer_head *bh, int create)
-{
-   struct erofs_map_blocks map = {
-   .m_la = iblock << 9,
-   };
-   int err;
-
-   err = erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW);
-   if (err)
-   return err;
-
-   if (map.m_flags & EROFS_MAP_MAPPED)
-   bh->b_blocknr = erofs_blknr(map.m_pa);
-
-   return err;
-}
-
 static sector_t erofs_bmap(struct address_space *mapping, sector_t block)
 {
struct inode *inode = mapping->host;
+   struct erofs_map_blocks map = {
+   .m_la = blknr_to_addr(iblock),
+   };
+   sector_t blknr = 0;
 
if (EROFS_I(inode)->datalayout == EROFS_INODE_FLAT_INLINE) {
erofs_blk_t blks = i_size_read(inode) >> LOG_BLOCK_SIZE;
 
if (block >> LOG_SECTORS_PER_BLOCK >= blks)
-   return 0;
+   goto out;
}
 
-   return generic_block_bmap(mapping, block, erofs_get_block);
+   if (!erofs_map_blocks(inode, , EROFS_GET_BLOCKS_RAW))
+   blknr = erofs_blknr(map.m_pa);
+
+out:
+   return blknr;
 }
 
 /* for uncompressed (aligned) files and raw access for other files */
-- 
2.25.1