Re: [f2fs-dev] [PATCH v1] f2fs: Fix system crash due to lack of free space in LFS

2023-03-16 Thread Jaegeuk Kim
On 03/14, Yonggil Song wrote:
> When f2fs tries to checkpoint during foreground gc in LFS mode, system
> crash occurs due to lack of free space if the amount of dirty node and
> dentry pages generated by data migration exceeds free space.
> The reproduction sequence is as follows.
> 
>  - 20GiB capacity block device (null_blk)
>  - format and mount with LFS mode
>  - create a file and write 20,000MiB
>  - 4k random write on full range of the file
> 
>  RIP: 0010:new_curseg+0x48a/0x510 [f2fs]
>  Code: 55 e7 f5 89 c0 48 0f af c3 48 8b 5d c0 48 c1 e8 20 83 c0 01 89 43 6c 
> 48 83 c4 28 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc <0f> 0b f0 41 80 4f 
> 48 04 45 85 f6 0f 84 ba fd ff ff e9 ef fe ff ff
>  RSP: 0018:977bc397b218 EFLAGS: 00010246
>  RAX: 27b9 RBX:  RCX: 27c0
>  RDX:  RSI: 27b9 RDI: 8c25ab4e74f8
>  RBP: 977bc397b268 R08: 27b9 R09: 8c29e4a34b40
>  R10: 0001 R11: 977bc397b0d8 R12: 
>  R13: 8c25b4dd81a0 R14:  R15: 8c2f667f9000
>  FS: () GS:8c344ec8() knlGS:
>  CS: 0010 DS:  ES:  CR0: 80050033
>  CR2: 00c00055d000 CR3: 000e30810003 CR4: 003706e0
>  DR0:  DR1:  DR2: 
>  DR3:  DR6: fffe0ff0 DR7: 0400
>  Call Trace:
>  
>  allocate_segment_by_default+0x9c/0x110 [f2fs]
>  f2fs_allocate_data_block+0x243/0xa30 [f2fs]
>  ? __mod_lruvec_page_state+0xa0/0x150
>  do_write_page+0x80/0x160 [f2fs]
>  f2fs_do_write_node_page+0x32/0x50 [f2fs]
>  __write_node_page+0x339/0x730 [f2fs]
>  f2fs_sync_node_pages+0x5a6/0x780 [f2fs]
>  block_operations+0x257/0x340 [f2fs]
>  f2fs_write_checkpoint+0x102/0x1050 [f2fs]
>  f2fs_gc+0x27c/0x630 [f2fs]
>  ? folio_mark_dirty+0x36/0x70
>  f2fs_balance_fs+0x16f/0x180 [f2fs]
> 
> This patch adds checking whether free sections are enough before checkpoint
> during gc.
> 
> Signed-off-by: Yonggil Song 
> ---
>  fs/f2fs/gc.c  |  7 ++-
>  fs/f2fs/segment.h | 26 +-
>  2 files changed, 27 insertions(+), 6 deletions(-)
> 
> diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
> index 4546e01b2ee0..b22f49a6f128 100644
> --- a/fs/f2fs/gc.c
> +++ b/fs/f2fs/gc.c
> @@ -1773,6 +1773,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct 
> f2fs_gc_control *gc_control)
>   .iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
>   };
>   unsigned int skipped_round = 0, round = 0;
> + unsigned int nr_needed_secs = 0, node_blocks = 0, dent_blocks = 0;
>  
>   trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
>   gc_control->nr_free_secs,
> @@ -1858,8 +1859,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct 
> f2fs_gc_control *gc_control)
>   }
>   }
>  
> + /* need more three extra sections for writer's data/node/dentry */
> + nr_needed_secs = get_min_need_secs(sbi, _blocks, _blocks) + 3;

get_min_need_secs(, )
{
...

*lower = node_secs + dent_secs;
*upper = *lower + (node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0);
}

> + nr_needed_secs += ((node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0));
> +
>   /* Write checkpoint to reclaim prefree segments */
> - if (free_sections(sbi) < NR_CURSEG_PERSIST_TYPE &&
> + if (free_sections(sbi) <= nr_needed_secs &&

#define NR_GC_CHECKPOINT_SECS   (3) /* data/node/dentry sections */

if (free_sections(sbi) <= upper + NR_GC_CHECKPOINT_SECS &&

>   prefree_segments(sbi)) {
>   ret = f2fs_write_checkpoint(sbi, );
>   if (ret)
> diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
> index be8f2d7d007b..ac11c47bfe37 100644
> --- a/fs/f2fs/segment.h
> +++ b/fs/f2fs/segment.h
> @@ -605,8 +605,11 @@ static inline bool has_curseg_enough_space(struct 
> f2fs_sb_info *sbi,
>   return true;
>  }
>  
> -static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
> - int freed, int needed)
> +/*
> + * calculate the minimum number of sections (needed) for dirty node/dentry
> + */
> +static inline unsigned int get_min_need_secs(struct f2fs_sb_info *sbi,
> + unsigned int *node_blocks, unsigned int *dent_blocks)
>  {
>   unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
>   get_pages(sbi, F2FS_DIRTY_DENTS) +
> @@ -614,15 +617,28 @@ static inline bool has_not_enough_free_secs(struct 
> f2fs_sb_info *sbi,
>   unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
>   unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi);
>   unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi);
> - unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi);
> - 

[f2fs-dev] [PATCH v1] f2fs: Fix system crash due to lack of free space in LFS

2023-03-14 Thread Yonggil Song
When f2fs tries to checkpoint during foreground gc in LFS mode, system
crash occurs due to lack of free space if the amount of dirty node and
dentry pages generated by data migration exceeds free space.
The reproduction sequence is as follows.

 - 20GiB capacity block device (null_blk)
 - format and mount with LFS mode
 - create a file and write 20,000MiB
 - 4k random write on full range of the file

 RIP: 0010:new_curseg+0x48a/0x510 [f2fs]
 Code: 55 e7 f5 89 c0 48 0f af c3 48 8b 5d c0 48 c1 e8 20 83 c0 01 89 43 6c 48 
83 c4 28 5b 41 5c 41 5d 41 5e 41 5f 5d c3 cc cc cc cc <0f> 0b f0 41 80 4f 48 04 
45 85 f6 0f 84 ba fd ff ff e9 ef fe ff ff
 RSP: 0018:977bc397b218 EFLAGS: 00010246
 RAX: 27b9 RBX:  RCX: 27c0
 RDX:  RSI: 27b9 RDI: 8c25ab4e74f8
 RBP: 977bc397b268 R08: 27b9 R09: 8c29e4a34b40
 R10: 0001 R11: 977bc397b0d8 R12: 
 R13: 8c25b4dd81a0 R14:  R15: 8c2f667f9000
 FS: () GS:8c344ec8() knlGS:
 CS: 0010 DS:  ES:  CR0: 80050033
 CR2: 00c00055d000 CR3: 000e30810003 CR4: 003706e0
 DR0:  DR1:  DR2: 
 DR3:  DR6: fffe0ff0 DR7: 0400
 Call Trace:
 
 allocate_segment_by_default+0x9c/0x110 [f2fs]
 f2fs_allocate_data_block+0x243/0xa30 [f2fs]
 ? __mod_lruvec_page_state+0xa0/0x150
 do_write_page+0x80/0x160 [f2fs]
 f2fs_do_write_node_page+0x32/0x50 [f2fs]
 __write_node_page+0x339/0x730 [f2fs]
 f2fs_sync_node_pages+0x5a6/0x780 [f2fs]
 block_operations+0x257/0x340 [f2fs]
 f2fs_write_checkpoint+0x102/0x1050 [f2fs]
 f2fs_gc+0x27c/0x630 [f2fs]
 ? folio_mark_dirty+0x36/0x70
 f2fs_balance_fs+0x16f/0x180 [f2fs]

This patch adds checking whether free sections are enough before checkpoint
during gc.

Signed-off-by: Yonggil Song 
---
 fs/f2fs/gc.c  |  7 ++-
 fs/f2fs/segment.h | 26 +-
 2 files changed, 27 insertions(+), 6 deletions(-)

diff --git a/fs/f2fs/gc.c b/fs/f2fs/gc.c
index 4546e01b2ee0..b22f49a6f128 100644
--- a/fs/f2fs/gc.c
+++ b/fs/f2fs/gc.c
@@ -1773,6 +1773,7 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct 
f2fs_gc_control *gc_control)
.iroot = RADIX_TREE_INIT(gc_list.iroot, GFP_NOFS),
};
unsigned int skipped_round = 0, round = 0;
+   unsigned int nr_needed_secs = 0, node_blocks = 0, dent_blocks = 0;
 
trace_f2fs_gc_begin(sbi->sb, gc_type, gc_control->no_bg_gc,
gc_control->nr_free_secs,
@@ -1858,8 +1859,12 @@ int f2fs_gc(struct f2fs_sb_info *sbi, struct 
f2fs_gc_control *gc_control)
}
}
 
+   /* need more three extra sections for writer's data/node/dentry */
+   nr_needed_secs = get_min_need_secs(sbi, _blocks, _blocks) + 3;
+   nr_needed_secs += ((node_blocks ? 1 : 0) + (dent_blocks ? 1 : 0));
+
/* Write checkpoint to reclaim prefree segments */
-   if (free_sections(sbi) < NR_CURSEG_PERSIST_TYPE &&
+   if (free_sections(sbi) <= nr_needed_secs &&
prefree_segments(sbi)) {
ret = f2fs_write_checkpoint(sbi, );
if (ret)
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index be8f2d7d007b..ac11c47bfe37 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -605,8 +605,11 @@ static inline bool has_curseg_enough_space(struct 
f2fs_sb_info *sbi,
return true;
 }
 
-static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
-   int freed, int needed)
+/*
+ * calculate the minimum number of sections (needed) for dirty node/dentry
+ */
+static inline unsigned int get_min_need_secs(struct f2fs_sb_info *sbi,
+   unsigned int *node_blocks, unsigned int *dent_blocks)
 {
unsigned int total_node_blocks = get_pages(sbi, F2FS_DIRTY_NODES) +
get_pages(sbi, F2FS_DIRTY_DENTS) +
@@ -614,15 +617,28 @@ static inline bool has_not_enough_free_secs(struct 
f2fs_sb_info *sbi,
unsigned int total_dent_blocks = get_pages(sbi, F2FS_DIRTY_DENTS);
unsigned int node_secs = total_node_blocks / CAP_BLKS_PER_SEC(sbi);
unsigned int dent_secs = total_dent_blocks / CAP_BLKS_PER_SEC(sbi);
-   unsigned int node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi);
-   unsigned int dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi);
+
+   f2fs_bug_on(sbi, (!node_blocks || !dent_blocks));
+
+   *node_blocks = total_node_blocks % CAP_BLKS_PER_SEC(sbi);
+   *dent_blocks = total_dent_blocks % CAP_BLKS_PER_SEC(sbi);
+
+   return (node_secs + dent_secs);
+}
+
+static inline bool has_not_enough_free_secs(struct f2fs_sb_info *sbi,
+   int freed, int needed)
+{
+   unsigned int node_blocks = 0;
+   unsigned int dent_blocks = 0;