Re: [f2fs-dev] [PATCH v4] f2fs: introduce a method to make nat journal more fresh

2018-04-09 Thread heyunlei


>-Original Message-
>From: Jaegeuk Kim [mailto:jaeg...@kernel.org]
>Sent: Tuesday, April 10, 2018 12:14 PM
>To: heyunlei
>Cc: Yuchao (T); linux-f2fs-devel@lists.sourceforge.net; Wangbintian
>Subject: Re: [f2fs-dev][PATCH v4] f2fs: introduce a method to make nat journal 
>more fresh
>
>Yunlei,
>
>Could you please post all the related patches including Chao's diff?

Okay, I'll do it.

Thanks.
>
>On 03/16, Yunlei He wrote:
>> This patch introduce a method to make nat journal more fresh:
>> i.  sort set list using dirty entry number and cp version
>> average value.
>> ii. if meet with cache hit, update average version valus with
>> current cp version.
>>
>> With this patch, newly modified nat set will flush to journal,
>> and flush old nat set with same dirty entry number to nat area.
>>
>> Signed-off-by: Yunlei He 
>> ---
>>  fs/f2fs/node.c | 39 ---
>>  fs/f2fs/node.h |  4 +++-
>>  2 files changed, 27 insertions(+), 16 deletions(-)
>>
>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>> index 177c438..c511ef6 100644
>> --- a/fs/f2fs/node.c
>> +++ b/fs/f2fs/node.c
>> @@ -193,8 +193,8 @@ static void __del_from_nat_cache(struct f2fs_nm_info 
>> *nm_i, struct nat_entry *e)
>>  __free_nat_entry(e);
>>  }
>>
>> -static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
>> -struct nat_entry *ne)
>> +static void __set_nat_cache_dirty(struct f2fs_sb_info *sbi, bool 
>> remove_journal,
>> +struct f2fs_nm_info *nm_i, struct nat_entry *ne)
>>  {
>>  nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
>>  struct nat_entry_set *head;
>> @@ -207,12 +207,18 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info 
>> *nm_i,
>>  INIT_LIST_HEAD(>set_list);
>>  head->set = set;
>>  head->entry_cnt = 0;
>> +head->cp_ver = 0;
>>  f2fs_radix_tree_insert(_i->nat_set_root, set, head);
>>  }
>>
>>  if (get_nat_flag(ne, IS_DIRTY))
>>  goto refresh_list;
>>
>> +/* journal hit case, try to locate set in journal */
>> +if (!remove_journal && head->entry_cnt <= NAT_JOURNAL_ENTRIES)
>> +head->cp_ver += div_u64(cur_cp_version(F2FS_CKPT(sbi)),
>> +NAT_JOURNAL_ENTRIES);
>> +
>>  nm_i->dirty_nat_cnt++;
>>  head->entry_cnt++;
>>  set_nat_flag(ne, IS_DIRTY, true);
>> @@ -357,7 +363,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, 
>> struct node_info *ni,
>>  nat_set_blkaddr(e, new_blkaddr);
>>  if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
>>  set_nat_flag(e, IS_CHECKPOINTED, false);
>> -__set_nat_cache_dirty(nm_i, e);
>> +__set_nat_cache_dirty(sbi, false, nm_i, e);
>>
>>  /* update fsync_mark if its inode nat entry is still alive */
>>  if (ni->nid != ni->ino)
>> @@ -2395,14 +2401,14 @@ static void remove_nats_in_journal(struct 
>> f2fs_sb_info *sbi)
>>  spin_unlock(_i->nid_list_lock);
>>  }
>>
>> -__set_nat_cache_dirty(nm_i, ne);
>> +__set_nat_cache_dirty(sbi, true, nm_i, ne);
>>  }
>>  update_nats_in_cursum(journal, -i);
>>  up_write(>journal_rwsem);
>>  }
>>
>> -static void __adjust_nat_entry_set(struct nat_entry_set *nes,
>> -struct list_head *head, int max)
>> +static void __adjust_nat_entry_set(struct f2fs_sb_info *sbi,
>> +struct nat_entry_set *nes, struct list_head *head, int max)
>>  {
>>  struct nat_entry_set *cur;
>>
>> @@ -2410,7 +2416,9 @@ static void __adjust_nat_entry_set(struct 
>> nat_entry_set *nes,
>>  goto add_out;
>>
>>  list_for_each_entry(cur, head, set_list) {
>> -if (cur->entry_cnt >= nes->entry_cnt) {
>> +if (cur->entry_cnt > nes->entry_cnt ||
>> +(cur->entry_cnt == nes->entry_cnt &&
>> +cur->cp_ver < nes->cp_ver)) {
>>  list_add(>set_list, cur->set_list.prev);
>>  return;
>>  }
>> @@ -2458,7 +2466,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info 
>> *sbi,
>>  struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>  struct f2fs_journal *journal = curseg->journal;
>>  nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
>> -bool to_journal = true;
>>  struct f2fs_nat_block *nat_blk;
>>  struct nat_entry *ne, *cur;
>>  struct page *page = NULL;
>> @@ -2468,11 +2475,14 @@ static void __flush_nat_entry_set(struct 
>> f2fs_sb_info *sbi,
>>   * #1, flush nat entries to journal in current hot data summary block.
>>   * #2, flush nat entries to nat page.
>>   */
>> +
>> +set->to_journal = true;
>> +
>>  if (enabled_nat_bits(sbi, cpc) ||
>>  !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>> -to_journal = false;
>> + 

Re: [f2fs-dev] [PATCH] fsck.f2fs: recover nat bits feature default by fsck

2018-04-09 Thread heyunlei


>-Original Message-
>From: Jaegeuk Kim [mailto:jaeg...@kernel.org]
>Sent: Tuesday, April 10, 2018 12:01 PM
>To: heyunlei
>Cc: Yuchao (T); linux-f2fs-devel@lists.sourceforge.net; Wangbintian; 
>Zhangdianfang (Euler)
>Subject: Re: [f2fs-dev][PATCH] fsck.f2fs: recover nat bits feature default by 
>fsck
>
>On 04/09, Yunlei He wrote:
>> Now, nat bits feature is enabled by default, we will
>> meet with the following scenarios:
>>
>> i.   disabled, without CP_NAT_BITS_FLAG, if fsck find some
>>  fs errors, fix or write new checkpoint will then enable it.
>> ii.  enabled, with CP_NAT_BITS_FLAG, in the case of sudden
>>  power off, bitmap will get lost but CP_NAT_BITS_FLAG
>>  still exist, fsck will recover bitmap in f2fs_do_mount.
>> iii. enabled, with CP_NAT_BITS_FLAG, both of bitmap and
>>  CP_NAT_BITS_FLAG will get lost if not enough space for
>>  nat bits or nat bits check failed during mounting.
>>  SBI_NEED_FSCK is set, fsck will recover flag and bitmap
>>  before next mount.
>>
>> SBI_NEED_FSCK means fs is corrupted, is not suitable for
>> nat bits disabled. This patch try to recover nat bits all
>> by fsck, no need set SBI_NEED_FSCK flag in kernel.
>>
>> Signed-off-by: Yunlei He 
>> ---
>>  fsck/mount.c | 15 ++-
>>  1 file changed, 10 insertions(+), 5 deletions(-)
>>
>> diff --git a/fsck/mount.c b/fsck/mount.c
>> index e5574c5..2361ee0 100644
>> --- a/fsck/mount.c
>> +++ b/fsck/mount.c
>> @@ -2389,7 +2389,7 @@ int f2fs_do_mount(struct f2fs_sb_info *sbi)
>>  }
>>
>>  /* Check nat_bits */
>> -if (c.func != DUMP && is_set_ckpt_flags(cp, CP_NAT_BITS_FLAG)) {
>> +if (c.func != DUMP) {
>>  u_int32_t nat_bits_bytes, nat_bits_blocks;
>>  __le64 *kaddr;
>>  u_int32_t blk;
>> @@ -2406,10 +2406,15 @@ int f2fs_do_mount(struct f2fs_sb_info *sbi)
>>  kaddr = malloc(PAGE_SIZE);
>>  ret = dev_read_block(kaddr, blk);
>>  ASSERT(ret >= 0);
>> -if (*kaddr != get_cp_crc(cp))
>> -write_nat_bits(sbi, sb, cp, sbi->cur_cp);
>> -else
>> -MSG(0, "Info: Found valid nat_bits in checkpoint\n");
>> +if(is_set_ckpt_flags(cp, CP_NAT_BITS_FLAG)) {
>> +if (*kaddr != get_cp_crc(cp))
>> +write_nat_bits(sbi, sb, cp, sbi->cur_cp);
>> +else
>> +MSG(0, "Info: Found valid nat_bits in 
>> checkpoint\n");
>> +} else if (c.func == FSCK){
>> +ASSERT_MSG("Need to recover nat_bits.");
>> +c.fix_on = 1;
>
>What if kernel doesn't support this?

Fix or write checkpoint now will enable nat bits by default if cp space is 
enough,
So maybe it will not affect kernel not supporting nat bits?

>
>> +}
>>  free(kaddr);
>>  }
>>  return 0;
>> --
>> 1.9.1

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v4] f2fs: stop issue discard if something wrong with f2fs

2018-04-09 Thread Yunlei He
This patch stop async thread and umount process to issue discard
if something wrong with f2fs, which is similar to fstrim.

Signed-off-by: Yunlei He 
---
 fs/f2fs/segment.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5854cc4..1659985 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1202,6 +1202,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
int i, iter = 0, issued = 0;
bool io_interrupted = false;
 
+   if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+   return issued;
+
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
if (i + 1 < dpolicy->granularity)
break;
@@ -1410,6 +1413,8 @@ static int issue_discard_thread(void *data)
continue;
if (kthread_should_stop())
return 0;
+   if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+   continue;
 
if (dcc->discard_wake)
dcc->discard_wake = 0;
-- 
1.9.1


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v4] f2fs: introduce a method to make nat journal more fresh

2018-04-09 Thread Jaegeuk Kim
Yunlei,

Could you please post all the related patches including Chao's diff?

On 03/16, Yunlei He wrote:
> This patch introduce a method to make nat journal more fresh:
> i.  sort set list using dirty entry number and cp version
> average value.
> ii. if meet with cache hit, update average version valus with
> current cp version.
> 
> With this patch, newly modified nat set will flush to journal,
> and flush old nat set with same dirty entry number to nat area.
> 
> Signed-off-by: Yunlei He 
> ---
>  fs/f2fs/node.c | 39 ---
>  fs/f2fs/node.h |  4 +++-
>  2 files changed, 27 insertions(+), 16 deletions(-)
> 
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 177c438..c511ef6 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -193,8 +193,8 @@ static void __del_from_nat_cache(struct f2fs_nm_info 
> *nm_i, struct nat_entry *e)
>   __free_nat_entry(e);
>  }
>  
> -static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
> - struct nat_entry *ne)
> +static void __set_nat_cache_dirty(struct f2fs_sb_info *sbi, bool 
> remove_journal,
> + struct f2fs_nm_info *nm_i, struct nat_entry *ne)
>  {
>   nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
>   struct nat_entry_set *head;
> @@ -207,12 +207,18 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info 
> *nm_i,
>   INIT_LIST_HEAD(>set_list);
>   head->set = set;
>   head->entry_cnt = 0;
> + head->cp_ver = 0;
>   f2fs_radix_tree_insert(_i->nat_set_root, set, head);
>   }
>  
>   if (get_nat_flag(ne, IS_DIRTY))
>   goto refresh_list;
>  
> + /* journal hit case, try to locate set in journal */
> + if (!remove_journal && head->entry_cnt <= NAT_JOURNAL_ENTRIES)
> + head->cp_ver += div_u64(cur_cp_version(F2FS_CKPT(sbi)),
> + NAT_JOURNAL_ENTRIES);
> +
>   nm_i->dirty_nat_cnt++;
>   head->entry_cnt++;
>   set_nat_flag(ne, IS_DIRTY, true);
> @@ -357,7 +363,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, 
> struct node_info *ni,
>   nat_set_blkaddr(e, new_blkaddr);
>   if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
>   set_nat_flag(e, IS_CHECKPOINTED, false);
> - __set_nat_cache_dirty(nm_i, e);
> + __set_nat_cache_dirty(sbi, false, nm_i, e);
>  
>   /* update fsync_mark if its inode nat entry is still alive */
>   if (ni->nid != ni->ino)
> @@ -2395,14 +2401,14 @@ static void remove_nats_in_journal(struct 
> f2fs_sb_info *sbi)
>   spin_unlock(_i->nid_list_lock);
>   }
>  
> - __set_nat_cache_dirty(nm_i, ne);
> + __set_nat_cache_dirty(sbi, true, nm_i, ne);
>   }
>   update_nats_in_cursum(journal, -i);
>   up_write(>journal_rwsem);
>  }
>  
> -static void __adjust_nat_entry_set(struct nat_entry_set *nes,
> - struct list_head *head, int max)
> +static void __adjust_nat_entry_set(struct f2fs_sb_info *sbi,
> + struct nat_entry_set *nes, struct list_head *head, int max)
>  {
>   struct nat_entry_set *cur;
>  
> @@ -2410,7 +2416,9 @@ static void __adjust_nat_entry_set(struct nat_entry_set 
> *nes,
>   goto add_out;
>  
>   list_for_each_entry(cur, head, set_list) {
> - if (cur->entry_cnt >= nes->entry_cnt) {
> + if (cur->entry_cnt > nes->entry_cnt ||
> + (cur->entry_cnt == nes->entry_cnt &&
> + cur->cp_ver < nes->cp_ver)) {
>   list_add(>set_list, cur->set_list.prev);
>   return;
>   }
> @@ -2458,7 +2466,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info 
> *sbi,
>   struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>   struct f2fs_journal *journal = curseg->journal;
>   nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
> - bool to_journal = true;
>   struct f2fs_nat_block *nat_blk;
>   struct nat_entry *ne, *cur;
>   struct page *page = NULL;
> @@ -2468,11 +2475,14 @@ static void __flush_nat_entry_set(struct f2fs_sb_info 
> *sbi,
>* #1, flush nat entries to journal in current hot data summary block.
>* #2, flush nat entries to nat page.
>*/
> +
> + set->to_journal = true;
> +
>   if (enabled_nat_bits(sbi, cpc) ||
>   !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
> - to_journal = false;
> + set->to_journal = false;
>  
> - if (to_journal) {
> + if (set->to_journal) {
>   down_write(>journal_rwsem);
>   } else {
>   page = get_next_nat_page(sbi, start_nid);
> @@ -2488,7 +2498,7 @@ static void __flush_nat_entry_set(struct f2fs_sb_info 
> *sbi,
>  
>   f2fs_bug_on(sbi, nat_get_blkaddr(ne) == 

Re: [f2fs-dev] [PATCH] f2fs: enlarge block plug coverage

2018-04-09 Thread Jaegeuk Kim
On 04/10, Chao Yu wrote:
> On 2018/4/10 2:02, Jaegeuk Kim wrote:
> > On 04/08, Chao Yu wrote:
> >> On 2018/4/5 11:51, Jaegeuk Kim wrote:
> >>> On 04/04, Chao Yu wrote:
>  This patch enlarges block plug coverage in __issue_discard_cmd, in
>  order to collect more pending bios before issuing them, to avoid
>  being disturbed by previous discard I/O in IO aware discard mode.
> >>>
> >>> Hmm, then we need to wait for huge discard IO for over 10 secs, which
> >>
> >> We found that total discard latency is rely on total discard number we 
> >> issued
> >> last time instead of range or length discard covered. IMO, if we don't 
> >> change
> >> .max_requests value, we will not suffer longer latency.
> >>
> >>> will affect following read/write IOs accordingly. In order to avoid that,
> >>> we actually need to limit the discard size.
> 
> Do you mean limit discard count or discard length?

Both of them.

> 
> >>
> >> If you are worry about I/O interference in between discard and rw, I 
> >> suggest to
> >> decrease .max_requests value.
> > 
> > What do you mean? This will produce more pending requests in the queue?
> 
> I mean after applying this patch, we can queue more discard IOs in plug inside
> task, otherwise, previous issued discard in block layer can make is_idle() be 
> false,
> then it can stop IO awared user to issue pending discard command.

Then, unplug will issue lots of discard commands, which affects the following rw
latencies. My preference would be issuing discard commands one by one as much as
possible.

> 
> Thanks,
> 
> > 
> >>
> >> Thanks,
> >>
> >>>
> >>> Thanks,
> >>>
> 
>  Signed-off-by: Chao Yu 
>  ---
>   fs/f2fs/segment.c | 7 +--
>   1 file changed, 5 insertions(+), 2 deletions(-)
> 
>  diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>  index 8f0b5ba46315..4287e208c040 100644
>  --- a/fs/f2fs/segment.c
>  +++ b/fs/f2fs/segment.c
>  @@ -1208,10 +1208,12 @@ static int __issue_discard_cmd(struct 
>  f2fs_sb_info *sbi,
>   pend_list = >pend_list[i];
>   
>   mutex_lock(>cmd_lock);
>  +
>  +blk_start_plug();
>  +
>   if (list_empty(pend_list))
>   goto next;
>   f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, 
>  >root));
>  -blk_start_plug();
>   list_for_each_entry_safe(dc, tmp, pend_list, list) {
>   f2fs_bug_on(sbi, dc->state != D_PREP);
>   
>  @@ -1227,8 +1229,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info 
>  *sbi,
>   if (++iter >= dpolicy->max_requests)
>   break;
>   }
>  -blk_finish_plug();
>   next:
>  +blk_finish_plug();
>  +
>   mutex_unlock(>cmd_lock);
>   
>   if (iter >= dpolicy->max_requests)
>  -- 
>  2.15.0.55.gc2ece9dc4de6
> >>>
> >>> .
> >>>
> > 
> > .
> > 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: disable nat bits without setting SBI_NEED_FSCK

2018-04-09 Thread Jaegeuk Kim
On 04/09, Yunlei He wrote:
> nat bits feature is recover default by fsck, no need to
> set SBI_NEED_FSCK flag.
> 
> Signed-off-by: Yunlei He 
> ---
>  fs/f2fs/f2fs.h | 2 --
>  1 file changed, 2 deletions(-)
> 
> diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h
> index 1df7f10..0ffaefc2 100644
> --- a/fs/f2fs/f2fs.h
> +++ b/fs/f2fs/f2fs.h
> @@ -1520,8 +1520,6 @@ static inline void disable_nat_bits(struct f2fs_sb_info 
> *sbi, bool lock)
>  {
>   unsigned long flags;
>  
> - set_sbi_flag(sbi, SBI_NEED_FSCK);
> -

In order to support old fsck.f2fs, we need to keep this.

>   if (lock)
>   spin_lock_irqsave(>cp_lock, flags);
>   __clear_ckpt_flags(F2FS_CKPT(sbi), CP_NAT_BITS_FLAG);
> -- 
> 1.9.1

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] fsck.f2fs: recover nat bits feature default by fsck

2018-04-09 Thread Jaegeuk Kim
On 04/09, Yunlei He wrote:
> Now, nat bits feature is enabled by default, we will
> meet with the following scenarios:
> 
> i.   disabled, without CP_NAT_BITS_FLAG, if fsck find some
>  fs errors, fix or write new checkpoint will then enable it.
> ii.  enabled, with CP_NAT_BITS_FLAG, in the case of sudden
>  power off, bitmap will get lost but CP_NAT_BITS_FLAG
>  still exist, fsck will recover bitmap in f2fs_do_mount.
> iii. enabled, with CP_NAT_BITS_FLAG, both of bitmap and
>  CP_NAT_BITS_FLAG will get lost if not enough space for
>  nat bits or nat bits check failed during mounting.
>  SBI_NEED_FSCK is set, fsck will recover flag and bitmap
>  before next mount.
> 
> SBI_NEED_FSCK means fs is corrupted, is not suitable for
> nat bits disabled. This patch try to recover nat bits all
> by fsck, no need set SBI_NEED_FSCK flag in kernel.
> 
> Signed-off-by: Yunlei He 
> ---
>  fsck/mount.c | 15 ++-
>  1 file changed, 10 insertions(+), 5 deletions(-)
> 
> diff --git a/fsck/mount.c b/fsck/mount.c
> index e5574c5..2361ee0 100644
> --- a/fsck/mount.c
> +++ b/fsck/mount.c
> @@ -2389,7 +2389,7 @@ int f2fs_do_mount(struct f2fs_sb_info *sbi)
>   }
>  
>   /* Check nat_bits */
> - if (c.func != DUMP && is_set_ckpt_flags(cp, CP_NAT_BITS_FLAG)) {
> + if (c.func != DUMP) {
>   u_int32_t nat_bits_bytes, nat_bits_blocks;
>   __le64 *kaddr;
>   u_int32_t blk;
> @@ -2406,10 +2406,15 @@ int f2fs_do_mount(struct f2fs_sb_info *sbi)
>   kaddr = malloc(PAGE_SIZE);
>   ret = dev_read_block(kaddr, blk);
>   ASSERT(ret >= 0);
> - if (*kaddr != get_cp_crc(cp))
> - write_nat_bits(sbi, sb, cp, sbi->cur_cp);
> - else
> - MSG(0, "Info: Found valid nat_bits in checkpoint\n");
> + if(is_set_ckpt_flags(cp, CP_NAT_BITS_FLAG)) {
> + if (*kaddr != get_cp_crc(cp))
> + write_nat_bits(sbi, sb, cp, sbi->cur_cp);
> + else
> + MSG(0, "Info: Found valid nat_bits in 
> checkpoint\n");
> + } else if (c.func == FSCK){
> + ASSERT_MSG("Need to recover nat_bits.");
> + c.fix_on = 1;

What if kernel doesn't support this?

> + }
>   free(kaddr);
>   }
>   return 0;
> -- 
> 1.9.1

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: don't use GFP_ZERO for page caches

2018-04-09 Thread Jaegeuk Kim
On 04/10, Chao Yu wrote:
> On 2018/4/10 3:00, Jaegeuk Kim wrote:
> > From: Chao Yu 
> > 
> > Related to https://lkml.org/lkml/2018/4/8/661
> > 
> > Sometimes, we need to write meta data to new allocated block address,
> > then we will allocate a zeroed page in inner inode's address space, and
> > fill partial data in it, and leave other place with zero value which means
> > some fields are initial status.
> > 
> > There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
> > I have just checked them, for both of them, we can avoid using __GFP_ZERO,
> > and do initialization by ourselves to avoid unneeded/redundant zeroing
> > from mm.
> > 
> > Cc: 
> > Signed-off-by: Chao Yu 
> > Signed-off-by: Jaegeuk Kim 
> > ---
> >  fs/f2fs/inode.c| 4 ++--
> >  fs/f2fs/node.c | 6 --
> >  fs/f2fs/node.h | 7 ++-
> >  fs/f2fs/recovery.c | 3 +--
> >  4 files changed, 9 insertions(+), 11 deletions(-)
> > 
> > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> > index 417c9dcd0269..87535bf63421 100644
> > --- a/fs/f2fs/inode.c
> > +++ b/fs/f2fs/inode.c
> > @@ -320,10 +320,10 @@ struct inode *f2fs_iget(struct super_block *sb, 
> > unsigned long ino)
> >  make_now:
> > if (ino == F2FS_NODE_INO(sbi)) {
> > inode->i_mapping->a_ops = _node_aops;
> > -   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> > +   mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
> > } else if (ino == F2FS_META_INO(sbi)) {
> > inode->i_mapping->a_ops = _meta_aops;
> > -   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> > +   mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
> > } else if (S_ISREG(inode->i_mode)) {
> > inode->i_op = _file_inode_operations;
> > inode->i_fop = _file_operations;
> > diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> > index 9a99243054ba..6fc3311820ec 100644
> > --- a/fs/f2fs/node.c
> > +++ b/fs/f2fs/node.c
> > @@ -1096,7 +1096,8 @@ struct page *new_node_page(struct dnode_of_data *dn, 
> > unsigned int ofs)
> > set_node_addr(sbi, _ni, NEW_ADDR, false);
> >  
> > f2fs_wait_on_page_writeback(page, NODE, true);
> > -   fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
> > +   memset(F2FS_NODE(page), 0, PAGE_SIZE);
> > +   fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs);
> > set_cold_node(page, S_ISDIR(dn->inode->i_mode));
> > if (!PageUptodate(page))
> > SetPageUptodate(page);
> > @@ -2311,7 +2312,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, 
> > struct page *page)
> >  
> > if (!PageUptodate(ipage))
> > SetPageUptodate(ipage);
> > -   fill_node_footer(ipage, ino, ino, 0, true);
> > +   memset(F2FS_NODE(page), 0, PAGE_SIZE);
> 
> At a glance, should be memset(F2FS_NODE(ipage), 0, PAGE_SIZE);

Actually, we don't need to do this, since fill_node_footer(true) will reset the
page.

> 
> Sorry about that.
> 
> Thanks,
> 
> > +   fill_node_footer(ipage, ino, ino, 0);
> > set_cold_node(page, false);
> >  
> > src = F2FS_INODE(page);
> > diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> > index b95e49e4a928..42cd081114ab 100644
> > --- a/fs/f2fs/node.h
> > +++ b/fs/f2fs/node.h
> > @@ -263,15 +263,12 @@ static inline block_t next_blkaddr_of_node(struct 
> > page *node_page)
> >  }
> >  
> >  static inline void fill_node_footer(struct page *page, nid_t nid,
> > -   nid_t ino, unsigned int ofs, bool reset)
> > +   nid_t ino, unsigned int ofs)
> >  {
> > struct f2fs_node *rn = F2FS_NODE(page);
> > unsigned int old_flag = 0;
> >  
> > -   if (reset)
> > -   memset(rn, 0, sizeof(*rn));
> > -   else
> > -   old_flag = le32_to_cpu(rn->footer.flag);
> > +   old_flag = le32_to_cpu(rn->footer.flag);
> >  
> > rn->footer.nid = cpu_to_le32(nid);
> > rn->footer.ino = cpu_to_le32(ino);
> > diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> > index 1b23d3febe4c..de24f3247aa5 100644
> > --- a/fs/f2fs/recovery.c
> > +++ b/fs/f2fs/recovery.c
> > @@ -540,8 +540,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, 
> > struct inode *inode,
> > }
> >  
> > copy_node_footer(dn.node_page, page);
> > -   fill_node_footer(dn.node_page, dn.nid, ni.ino,
> > -   ofs_of_node(page), false);
> > +   fill_node_footer(dn.node_page, dn.nid, ni.ino, ofs_of_node(page));
> > set_page_dirty(dn.node_page);
> >  err:
> > f2fs_put_dnode();
> > 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: don't use GFP_ZERO for page caches

2018-04-09 Thread Chao Yu
On 2018/4/10 3:00, Jaegeuk Kim wrote:
> From: Chao Yu 
> 
> Related to https://lkml.org/lkml/2018/4/8/661
> 
> Sometimes, we need to write meta data to new allocated block address,
> then we will allocate a zeroed page in inner inode's address space, and
> fill partial data in it, and leave other place with zero value which means
> some fields are initial status.
> 
> There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
> I have just checked them, for both of them, we can avoid using __GFP_ZERO,
> and do initialization by ourselves to avoid unneeded/redundant zeroing
> from mm.
> 
> Cc: 
> Signed-off-by: Chao Yu 
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/inode.c| 4 ++--
>  fs/f2fs/node.c | 6 --
>  fs/f2fs/node.h | 7 ++-
>  fs/f2fs/recovery.c | 3 +--
>  4 files changed, 9 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index 417c9dcd0269..87535bf63421 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -320,10 +320,10 @@ struct inode *f2fs_iget(struct super_block *sb, 
> unsigned long ino)
>  make_now:
>   if (ino == F2FS_NODE_INO(sbi)) {
>   inode->i_mapping->a_ops = _node_aops;
> - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
>   } else if (ino == F2FS_META_INO(sbi)) {
>   inode->i_mapping->a_ops = _meta_aops;
> - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
>   } else if (S_ISREG(inode->i_mode)) {
>   inode->i_op = _file_inode_operations;
>   inode->i_fop = _file_operations;
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 9a99243054ba..6fc3311820ec 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1096,7 +1096,8 @@ struct page *new_node_page(struct dnode_of_data *dn, 
> unsigned int ofs)
>   set_node_addr(sbi, _ni, NEW_ADDR, false);
>  
>   f2fs_wait_on_page_writeback(page, NODE, true);
> - fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
> + memset(F2FS_NODE(page), 0, PAGE_SIZE);
> + fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs);
>   set_cold_node(page, S_ISDIR(dn->inode->i_mode));
>   if (!PageUptodate(page))
>   SetPageUptodate(page);
> @@ -2311,7 +2312,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct 
> page *page)
>  
>   if (!PageUptodate(ipage))
>   SetPageUptodate(ipage);
> - fill_node_footer(ipage, ino, ino, 0, true);
> + memset(F2FS_NODE(page), 0, PAGE_SIZE);

At a glance, should be memset(F2FS_NODE(ipage), 0, PAGE_SIZE);

Sorry about that.

Thanks,

> + fill_node_footer(ipage, ino, ino, 0);
>   set_cold_node(page, false);
>  
>   src = F2FS_INODE(page);
> diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> index b95e49e4a928..42cd081114ab 100644
> --- a/fs/f2fs/node.h
> +++ b/fs/f2fs/node.h
> @@ -263,15 +263,12 @@ static inline block_t next_blkaddr_of_node(struct page 
> *node_page)
>  }
>  
>  static inline void fill_node_footer(struct page *page, nid_t nid,
> - nid_t ino, unsigned int ofs, bool reset)
> + nid_t ino, unsigned int ofs)
>  {
>   struct f2fs_node *rn = F2FS_NODE(page);
>   unsigned int old_flag = 0;
>  
> - if (reset)
> - memset(rn, 0, sizeof(*rn));
> - else
> - old_flag = le32_to_cpu(rn->footer.flag);
> + old_flag = le32_to_cpu(rn->footer.flag);
>  
>   rn->footer.nid = cpu_to_le32(nid);
>   rn->footer.ino = cpu_to_le32(ino);
> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> index 1b23d3febe4c..de24f3247aa5 100644
> --- a/fs/f2fs/recovery.c
> +++ b/fs/f2fs/recovery.c
> @@ -540,8 +540,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, 
> struct inode *inode,
>   }
>  
>   copy_node_footer(dn.node_page, page);
> - fill_node_footer(dn.node_page, dn.nid, ni.ino,
> - ofs_of_node(page), false);
> + fill_node_footer(dn.node_page, dn.nid, ni.ino, ofs_of_node(page));
>   set_page_dirty(dn.node_page);
>  err:
>   f2fs_put_dnode();
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v3] f2fs: don't use GFP_ZERO for page caches

2018-04-09 Thread Jaegeuk Kim
Change log from v2:
 - consider IO error case when dealing with metapage
 - memset by fill_node_footer

Change log from v1:
 - don't memset for recovered page
 
Related to https://lkml.org/lkml/2018/4/8/661

Sometimes, we need to write meta data to new allocated block address,
then we will allocate a zeroed page in inner inode's address space, and
fill partial data in it, and leave other place with zero value which means
some fields are initial status.

There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
I have just checked them, for both of them, we can avoid using __GFP_ZERO,
and do initialization by ourselves to avoid unneeded/redundant zeroing
from mm.

Cc: 
Signed-off-by: Chao Yu 
Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/checkpoint.c | 4 +++-
 fs/f2fs/inode.c  | 4 ++--
 fs/f2fs/segment.c| 3 +++
 fs/f2fs/segment.h| 1 +
 4 files changed, 9 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/checkpoint.c b/fs/f2fs/checkpoint.c
index bf779461df13..2e23b953d304 100644
--- a/fs/f2fs/checkpoint.c
+++ b/fs/f2fs/checkpoint.c
@@ -100,8 +100,10 @@ static struct page *__get_meta_page(struct f2fs_sb_info 
*sbi, pgoff_t index,
 * readonly and make sure do not write checkpoint with non-uptodate
 * meta page.
 */
-   if (unlikely(!PageUptodate(page)))
+   if (unlikely(!PageUptodate(page))) {
+   memset(page_address(page), 0, PAGE_SIZE);
f2fs_stop_checkpoint(sbi, false);
+   }
 out:
return page;
 }
diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 417c9dcd0269..87535bf63421 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -320,10 +320,10 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned 
long ino)
 make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = _node_aops;
-   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+   mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (ino == F2FS_META_INO(sbi)) {
inode->i_mapping->a_ops = _meta_aops;
-   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+   mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (S_ISREG(inode->i_mode)) {
inode->i_op = _file_inode_operations;
inode->i_fop = _file_operations;
diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index a4b8e3e24ccb..1f5db557ab96 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -2021,6 +2021,7 @@ static void write_current_sum_page(struct f2fs_sb_info 
*sbi,
struct f2fs_summary_block *dst;
 
dst = (struct f2fs_summary_block *)page_address(page);
+   memset(dst, 0, PAGE_SIZE);
 
mutex_lock(>curseg_mutex);
 
@@ -3117,6 +3118,7 @@ static void write_compacted_summaries(struct f2fs_sb_info 
*sbi, block_t blkaddr)
 
page = grab_meta_page(sbi, blkaddr++);
kaddr = (unsigned char *)page_address(page);
+   memset(kaddr, 0, PAGE_SIZE);
 
/* Step 1: write nat cache */
seg_i = CURSEG_I(sbi, CURSEG_HOT_DATA);
@@ -3141,6 +3143,7 @@ static void write_compacted_summaries(struct f2fs_sb_info 
*sbi, block_t blkaddr)
if (!page) {
page = grab_meta_page(sbi, blkaddr++);
kaddr = (unsigned char *)page_address(page);
+   memset(kaddr, 0, PAGE_SIZE);
written_size = 0;
}
summary = (struct f2fs_summary *)(kaddr + written_size);
diff --git a/fs/f2fs/segment.h b/fs/f2fs/segment.h
index 3325d0769723..492ad0c86fa9 100644
--- a/fs/f2fs/segment.h
+++ b/fs/f2fs/segment.h
@@ -375,6 +375,7 @@ static inline void seg_info_to_sit_page(struct f2fs_sb_info 
*sbi,
int i;
 
raw_sit = (struct f2fs_sit_block *)page_address(page);
+   memset(raw_sit, 0, PAGE_SIZE);
for (i = 0; i < end - start; i++) {
rs = _sit->entries[i];
se = get_seg_entry(sbi, start + i);
-- 
2.15.0.531.g2ccb3012c9-goog


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH 2/2] f2fs-tools: remove duplicated declaration of f2fs_configuration c

2018-04-09 Thread Sheng Yong
The variable `c' is declared twice in f2fs_fs.h. This patch removes
the second declaration.

Signed-off-by: Sheng Yong 
---
 include/f2fs_fs.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/include/f2fs_fs.h b/include/f2fs_fs.h
index 2b0be2d..cbfdab5 100644
--- a/include/f2fs_fs.h
+++ b/include/f2fs_fs.h
@@ -1232,8 +1232,6 @@ extern int f2fs_get_zone_blocks(int);
 extern int f2fs_check_zones(int);
 extern int f2fs_reset_zones(int);
 
-extern struct f2fs_configuration c;
-
 #define SIZE_ALIGN(val, size)  ((val) + (size) - 1) / (size)
 #define SEG_ALIGN(blks)SIZE_ALIGN(blks, c.blks_per_seg)
 #define ZONE_ALIGN(blks)   SIZE_ALIGN(blks, c.blks_per_seg * \
-- 
2.14.1


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v2 1/2] f2fs-tools: introduce new option V to show version

2018-04-09 Thread Sheng Yong
This patch introduces a new option -V to show the version of f2fs tools
and exit after that.

Signed-off-by: Sheng Yong 
---
v2->v1: add -V for all f2fs tools

 fsck/main.c | 30 +-
 include/f2fs_fs.h   |  6 ++
 mkfs/f2fs_format_main.c |  6 +-
 3 files changed, 36 insertions(+), 6 deletions(-)

diff --git a/fsck/main.c b/fsck/main.c
index bbf82c3..ca3b789 100644
--- a/fsck/main.c
+++ b/fsck/main.c
@@ -58,6 +58,7 @@ void fsck_usage()
MSG(0, "  -t show directory tree\n");
MSG(0, "  -q preserve quota limits\n");
MSG(0, "  -y fix all the time\n");
+   MSG(0, "  -V print the version number and exit\n");
MSG(0, "  --dry-run do not really fix corruptions\n");
exit(1);
 }
@@ -73,6 +74,7 @@ void dump_usage()
MSG(0, "  -S sparse_mode\n");
MSG(0, "  -a [SSA dump segno from #1~#2 (decimal), for all 0~-1]\n");
MSG(0, "  -b blk_addr (in 4KB)\n");
+   MSG(0, "  -V print the version number and exit\n");
 
exit(1);
 }
@@ -87,6 +89,7 @@ void defrag_usage()
MSG(0, "  -l length [default:512 (2MB)]\n");
MSG(0, "  -t target block address [default: main_blkaddr + 2MB]\n");
MSG(0, "  -i set direction as shrink [default: expand]\n");
+   MSG(0, "  -V print the version number and exit\n");
exit(1);
 }
 
@@ -96,6 +99,7 @@ void resize_usage()
MSG(0, "[options]:\n");
MSG(0, "  -d debug level [default:0]\n");
MSG(0, "  -t target sectors [default: device size]\n");
+   MSG(0, "  -V print the version number and exit\n");
exit(1);
 }
 
@@ -111,6 +115,7 @@ void sload_usage()
MSG(0, "  -t mount point [prefix of target fs path, default:/]\n");
MSG(0, "  -T timestamp\n");
MSG(0, "  -d debug level [default:0]\n");
+   MSG(0, "  -V print the version number and exit\n");
exit(1);
 }
 
@@ -160,7 +165,7 @@ void f2fs_parse_options(int argc, char *argv[])
}
 
if (!strcmp("fsck.f2fs", prog)) {
-   const char *option_string = ":ad:fp:q:Sty";
+   const char *option_string = ":ad:fp:q:StyV";
int opt = 0;
struct option long_opt[] = {
{"dry-run", no_argument, 0, 1},
@@ -240,6 +245,9 @@ void f2fs_parse_options(int argc, char *argv[])
break;
}
break;
+   case 'V':
+   show_version(prog);
+   exit(0);
case '?':
option = optopt;
default:
@@ -250,7 +258,7 @@ void f2fs_parse_options(int argc, char *argv[])
break;
}
} else if (!strcmp("dump.f2fs", prog)) {
-   const char *option_string = "d:i:n:s:Sa:b:";
+   const char *option_string = "d:i:n:s:Sa:b:V";
static struct dump_option dump_opt = {
.nid = 0,   /* default root ino */
.start_nat = -1,
@@ -310,6 +318,9 @@ void f2fs_parse_options(int argc, char *argv[])
ret = sscanf(optarg, "%x",
_opt.blk_addr);
break;
+   case 'V':
+   show_version(prog);
+   exit(0);
default:
err = EUNKNOWN_OPT;
break;
@@ -321,7 +332,7 @@ void f2fs_parse_options(int argc, char *argv[])
 
c.private = _opt;
} else if (!strcmp("defrag.f2fs", prog)) {
-   const char *option_string = "d:s:Sl:t:i";
+   const char *option_string = "d:s:Sl:t:iV";
 
c.func = DEFRAG;
while ((option = getopt(argc, argv, option_string)) != EOF) {
@@ -367,6 +378,9 @@ void f2fs_parse_options(int argc, char *argv[])
case 'i':
c.defrag_shrink = 1;
break;
+   case 'V':
+   show_version(prog);
+   exit(0);
default:
err = EUNKNOWN_OPT;
break;
@@ -376,7 +390,7 @@ void f2fs_parse_options(int argc, char *argv[])
break;
}
} else if (!strcmp("resize.f2fs", prog)) {
-   const char *option_string = "d:t:";
+   const char *option_string = "d:t:V";
 
c.func = RESIZE;
while ((option = getopt(argc, argv, option_string)) != EOF) {
@@ -400,6 +414,9 @@ void f2fs_parse_options(int argc, char *argv[])
  

Re: [f2fs-dev] [PATCH v3] f2fs: stop issue discard if something wrong with f2fs

2018-04-09 Thread Chao Yu
On 2018/4/10 10:46, heyunlei wrote:
> 
> 
>> -Original Message-
>> From: Yuchao (T)
>> Sent: Tuesday, April 10, 2018 10:36 AM
>> To: heyunlei; jaeg...@kernel.org; linux-f2fs-devel@lists.sourceforge.net
>> Cc: Wangbintian; Zhangdianfang (Euler)
>> Subject: Re: [f2fs-dev][PATCH v3] f2fs: stop issue discard if something 
>> wrong with f2fs
>>
>> On 2018/4/9 19:27, Yunlei He wrote:
>>> This patch stop async thread and umount process to issue discard
>>> if something wrong with f2fs, which is similar to fstrim.
>>>
>>> v1->v2: add fs error check in not only discard thread but also umount 
>>> process
>>> v2->v3: remove redundant error message
>>>
>>> Signed-off-by: Yunlei He 
>>> ---
>>>  fs/f2fs/segment.c | 3 +++
>>>  1 file changed, 3 insertions(+)
>>>
>>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>>> index 5854cc4..e31d506 100644
>>> --- a/fs/f2fs/segment.c
>>> +++ b/fs/f2fs/segment.c
>>> @@ -1202,6 +1202,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info 
>>> *sbi,
>>> int i, iter = 0, issued = 0;
>>> bool io_interrupted = false;
>>>
>>> +   if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
>>> +   return issued;
>>
>> Hmm.. except here, I mean we'd better add it into issue_discard_thread too?
> 
> Async thread will also use this function to issue discard, besides, in this 
> case
> __issue_discard_cmd will return zero to async thread, thread will wait max 
> Interval 60s. Anything I missed?

There is still locking overhead from sb_{start,end}_intwrite, although it's 
tiny,
but since there is no way to revert NEED_FSCK status, so just let discard thread
be aware of this to walk around is OK.

Thanks,

> 
> Thanks.
> 
>>
>> Thanks,
>>
>>> +
>>> for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
>>> if (i + 1 < dpolicy->granularity)
>>> break;
>>>
> 
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v2] f2fs: use cur_map instead of ckpt_map

2018-04-09 Thread Yunlei He
In flush_sit_entries::add_discard_addrs path, cur_map and ckpt_map
are the same, but in exist_trim_candidates::add_discard_addrs,
cur_map is updated one, and newer than ckpt_map, so we need to use
cur_map to check whether there are discard candidates.

v1->v2: one problem of check discard candidates reported by Chao,
besides, update commit message.

Signed-off-by: Yunlei He 
---
 fs/f2fs/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5854cc4..f5d0499 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1559,7 +1559,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, 
struct cp_control *cpc,
 
/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
for (i = 0; i < entries; i++)
-   dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
+   dmap[i] = force ? ~cur_map[i] & ~discard_map[i] :
(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
 
while (force || SM_I(sbi)->dcc_info->nr_discards <=
-- 
1.9.1


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: use cur_map instead of ckpt_map

2018-04-09 Thread heyunlei


>-Original Message-
>From: Yuchao (T)
>Sent: Tuesday, April 10, 2018 10:45 AM
>To: heyunlei; jaeg...@kernel.org; linux-f2fs-devel@lists.sourceforge.net
>Cc: Wangbintian; Zhangdianfang (Euler)
>Subject: Re: [f2fs-dev][PATCH] f2fs: use cur_map instead of ckpt_map
>
>On 2018/4/9 19:36, Yunlei He wrote:
>> This patch use cur_map instead of ckpt_map for easy to
>> understand, no functional change.
>
>Actually, it does change the logic, but I think it can fix a issue..
>
>In flush_sit_entries::add_discard_addrs path, cur_map and ckpt_map are
>the same, but in exist_trim_candidates::add_discard_addrs, cur_map is
>updated one, and newer than ckpt_map, so we need to use cur_map to check
>whether there are discard candidates.
>
>Can you change commit message as described above and resend the patch?

Okay, I'll do it. 

Thanks.
>
>Thanks,
>
>>
>> Signed-off-by: Yunlei He 
>> ---
>>  fs/f2fs/segment.c | 2 +-
>>  1 file changed, 1 insertion(+), 1 deletion(-)
>>
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 5854cc4..f5d0499 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -1559,7 +1559,7 @@ static bool add_discard_addrs(struct f2fs_sb_info 
>> *sbi, struct cp_control *cpc,
>>
>>  /* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
>>  for (i = 0; i < entries; i++)
>> -dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
>> +dmap[i] = force ? ~cur_map[i] & ~discard_map[i] :
>>  (cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
>>
>>  while (force || SM_I(sbi)->dcc_info->nr_discards <=
>>


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Minchan Kim
On Mon, Apr 09, 2018 at 07:41:52PM -0700, Matthew Wilcox wrote:
> On Tue, Apr 10, 2018 at 11:33:39AM +0900, Minchan Kim wrote:
> > @@ -522,7 +532,7 @@ EXPORT_SYMBOL(radix_tree_preload);
> >   */
> >  int radix_tree_maybe_preload(gfp_t gfp_mask)
> >  {
> > -   if (gfpflags_allow_blocking(gfp_mask))
> > +   if (gfpflags_allow_blocking(gfp_mask) && !(gfp_mask & __GFP_ZERO))
> > return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
> > /* Preloading doesn't help anything with this gfp mask, skip it */
> > preempt_disable();
> 
> No, you've completely misunderstood what's going on in this function.

Okay, I hope this version clear current concerns.

>From fb37c41b90f7d3ead1798e5cb7baef76709afd94 Mon Sep 17 00:00:00 2001
From: Minchan Kim 
Date: Tue, 10 Apr 2018 11:54:57 +0900
Subject: [PATCH v3] mm: workingset: fix NULL ptr dereference

It assumes shadow entries of radix tree rely on the init state
that node->private_list allocated newly is list_empty state
for the working. Currently, it's initailized in SLAB constructor
which means node of radix tree would be initialized only when
*slub allocates new page*, not *slub alloctes new object*.

If some FS or subsystem pass gfp_mask to __GFP_ZERO, that means
newly allocated node can have !list_empty(node->private_list)
by memset of slab allocator. It ends up calling NULL deference
at workingset_update_node by failing list_empty check.

This patch fixes it.

Fixes: 449dd6984d0e ("mm: keep page cache radix tree nodes in check")
Cc: Johannes Weiner 
Cc: Jan Kara 
Cc: Matthew Wilcox 
Cc: Jaegeuk Kim 
Cc: Chao Yu 
Cc: Christopher Lameter 
Cc: linux-fsde...@vger.kernel.org
Cc: sta...@vger.kernel.org
Reported-by: Chris Fries 
Signed-off-by: Minchan Kim 
---
 lib/radix-tree.c | 9 +
 mm/filemap.c | 5 +++--
 2 files changed, 12 insertions(+), 2 deletions(-)

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index da9e10c827df..7569e637dbaa 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -470,6 +470,15 @@ static __must_check int __radix_tree_preload(gfp_t 
gfp_mask, unsigned nr)
struct radix_tree_node *node;
int ret = -ENOMEM;
 
+   /*
+* New allocate node must have node->private_list as INIT_LIST_HEAD
+* state by workingset shadow memory implementation.
+* If user pass  __GFP_ZERO by mistake, slab allocator will clear
+* node->private_list, which makes a BUG. Rather than going Oops,
+* just fix and warn about it.
+*/
+   if (WARN_ON(gfp_mask & __GFP_ZERO))
+   gfp_mask &= ~__GFP_ZERO;
/*
 * Nodes preloaded by one cgroup can be be used by another cgroup, so
 * they should never be accounted to any particular memory cgroup.
diff --git a/mm/filemap.c b/mm/filemap.c
index ab77e19ab09c..b6de9d691c8a 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -786,7 +786,7 @@ int replace_page_cache_page(struct page *old, struct page 
*new, gfp_t gfp_mask)
VM_BUG_ON_PAGE(!PageLocked(new), new);
VM_BUG_ON_PAGE(new->mapping, new);
 
-   error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+   error = radix_tree_preload(gfp_mask & ~(__GFP_HIGHMEM | __GFP_ZERO));
if (!error) {
struct address_space *mapping = old->mapping;
void (*freepage)(struct page *);
@@ -842,7 +842,8 @@ static int __add_to_page_cache_locked(struct page *page,
return error;
}
 
-   error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
+   error = radix_tree_maybe_preload(gfp_mask &
+   ~(__GFP_HIGHMEM | __GFP_ZERO));
if (error) {
if (!huge)
mem_cgroup_cancel_charge(page, memcg, false);
-- 
2.17.0.484.g0c8726318c-goog



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Matthew Wilcox
On Tue, Apr 10, 2018 at 11:33:39AM +0900, Minchan Kim wrote:
> @@ -522,7 +532,7 @@ EXPORT_SYMBOL(radix_tree_preload);
>   */
>  int radix_tree_maybe_preload(gfp_t gfp_mask)
>  {
> - if (gfpflags_allow_blocking(gfp_mask))
> + if (gfpflags_allow_blocking(gfp_mask) && !(gfp_mask & __GFP_ZERO))
>   return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
>   /* Preloading doesn't help anything with this gfp mask, skip it */
>   preempt_disable();

No, you've completely misunderstood what's going on in this function.

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Minchan Kim
On Tue, Apr 10, 2018 at 11:33:39AM +0900, Minchan Kim wrote:
> On Mon, Apr 09, 2018 at 06:12:11PM -0700, Matthew Wilcox wrote:
> > On Tue, Apr 10, 2018 at 08:04:09AM +0900, Minchan Kim wrote:
> > > On Mon, Apr 09, 2018 at 08:20:32AM -0700, Matthew Wilcox wrote:
> > > > I don't think this is something the radix tree should know about.
> > > 
> > > Because shadow entry implementation is hidden by radix tree implemetation.
> > > IOW, radix tree user cannot know how it works.
> > 
> > I have no idea what you mean.
> > 
> > > > SLAB should be checking for it (the patch I posted earlier in this
> > > 
> > > I don't think it's right approach. SLAB constructor can initialize
> > > some metadata for slab page populated as well as page zeroing.
> > > However, __GFP_ZERO means only clearing pages, not metadata.
> > > So it's different semantic. No need to mix out.
> > 
> > No, __GFP_ZERO is specified to clear the allocated memory whether
> > you're allocating from alloc_pages or from slab.  What makes no sense
> > is allocating an object from slab with a constructor *and* __GFP_ZERO.
> > They're in conflict, and slab can't fulfill both of those requirements.
> 
> It's a stable material. If you really think it does make sense,
> please submit patch separately.
> 
> > 
> > > > thread), but the right place to filter this out is in the caller of
> > > > radix_tree_maybe_preload -- it's already filtering out HIGHMEM pages,
> > > > and should filter out GFP_ZERO too.
> > > 
> > > radix_tree_[maybe]_preload is exported API, which are error-prone
> > > for out of modules or upcoming customers.
> > > 
> > > More proper place is __radix_tree_preload.
> > 
> > I could not disagree with you more.  It is the responsibility of the
> > callers of radix_tree_preload to avoid calling it with nonsense flags
> > like __GFP_DMA, __GFP_HIGHMEM or __GFP_ZERO.
> 
> How about this?
> 
> It would fix current problem and warn potential bugs as well.
> radix_tree_preload already has done such warning and
> radix_tree_maybe_preload has skipping for misbehaivor gfp.
> 
> From 27ecf7a009d3570d1155c528c7f08040ede68ed3 Mon Sep 17 00:00:00 2001
> From: Minchan Kim 
> Date: Tue, 10 Apr 2018 11:20:11 +0900
> Subject: [PATCH v2] mm: workingset: fix NULL ptr dereference
> 
> It assumes shadow entries of radix tree rely on the init state
> that node->private_list allocated newly is list_empty state
> for the working. Currently, it's initailized in SLAB constructor
> which means node of radix tree would be initialized only when
> *slub allocates new page*, not *slub alloctes new object*.
> 
> If some FS or subsystem pass gfp_mask to __GFP_ZERO, that means
> newly allocated node can have !list_empty(node->private_list)
> by memset of slab allocator. It ends up calling NULL deference
> at workingset_update_node by failing list_empty check.
> 
> This patch fixes it.
> 
> Fixes: 449dd6984d0e ("mm: keep page cache radix tree nodes in check")
> Cc: Johannes Weiner 
> Cc: Jan Kara 
> Cc: Matthew Wilcox 
> Cc: Jaegeuk Kim 
> Cc: Chao Yu 
> Cc: Christopher Lameter 
> Cc: linux-fsde...@vger.kernel.org
> Reported-by: Chris Fries 
> Signed-off-by: Minchan Kim 
> ---
>  lib/radix-tree.c | 12 +++-
>  1 file changed, 11 insertions(+), 1 deletion(-)
> 
> diff --git a/lib/radix-tree.c b/lib/radix-tree.c
> index da9e10c827df..9d68f2a7888e 100644
> --- a/lib/radix-tree.c
> +++ b/lib/radix-tree.c
> @@ -511,6 +511,16 @@ int radix_tree_preload(gfp_t gfp_mask)
>  {
>   /* Warn on non-sensical use... */
>   WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
> + /*
> +  * New allocate node must have node->private_list as INIT_LIST_HEAD
> +  * state by workingset shadow memory implementation.
> +  * If user pass  __GFP_ZERO by mistake, slab allocator will clear
> +  * node->private_list, which makes a BUG. Rather than going Oops,
> +  * just fix and warn about it.
> +  */
> + if (WARN_ON(gfp_mask & __GFP_ZERO))
> + gfp_mask &= ~GFP_ZERO
 
Build fail.

If others are okay for this patch, I will resend fixed patch with stable mark.
I will wait feedback from others.

Thanks.

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v3] f2fs: stop issue discard if something wrong with f2fs

2018-04-09 Thread Chao Yu
On 2018/4/9 19:27, Yunlei He wrote:
> This patch stop async thread and umount process to issue discard
> if something wrong with f2fs, which is similar to fstrim.
> 
> v1->v2: add fs error check in not only discard thread but also umount process
> v2->v3: remove redundant error message
> 
> Signed-off-by: Yunlei He 
> ---
>  fs/f2fs/segment.c | 3 +++
>  1 file changed, 3 insertions(+)
> 
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 5854cc4..e31d506 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1202,6 +1202,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
>   int i, iter = 0, issued = 0;
>   bool io_interrupted = false;
>  
> + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
> + return issued;

Hmm.. except here, I mean we'd better add it into issue_discard_thread too?

Thanks,

> +
>   for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
>   if (i + 1 < dpolicy->granularity)
>   break;
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: enlarge block plug coverage

2018-04-09 Thread Chao Yu
On 2018/4/10 2:02, Jaegeuk Kim wrote:
> On 04/08, Chao Yu wrote:
>> On 2018/4/5 11:51, Jaegeuk Kim wrote:
>>> On 04/04, Chao Yu wrote:
 This patch enlarges block plug coverage in __issue_discard_cmd, in
 order to collect more pending bios before issuing them, to avoid
 being disturbed by previous discard I/O in IO aware discard mode.
>>>
>>> Hmm, then we need to wait for huge discard IO for over 10 secs, which
>>
>> We found that total discard latency is rely on total discard number we issued
>> last time instead of range or length discard covered. IMO, if we don't change
>> .max_requests value, we will not suffer longer latency.
>>
>>> will affect following read/write IOs accordingly. In order to avoid that,
>>> we actually need to limit the discard size.

Do you mean limit discard count or discard length?

>>
>> If you are worry about I/O interference in between discard and rw, I suggest 
>> to
>> decrease .max_requests value.
> 
> What do you mean? This will produce more pending requests in the queue?

I mean after applying this patch, we can queue more discard IOs in plug inside
task, otherwise, previous issued discard in block layer can make is_idle() be 
false,
then it can stop IO awared user to issue pending discard command.

Thanks,

> 
>>
>> Thanks,
>>
>>>
>>> Thanks,
>>>

 Signed-off-by: Chao Yu 
 ---
  fs/f2fs/segment.c | 7 +--
  1 file changed, 5 insertions(+), 2 deletions(-)

 diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
 index 8f0b5ba46315..4287e208c040 100644
 --- a/fs/f2fs/segment.c
 +++ b/fs/f2fs/segment.c
 @@ -1208,10 +1208,12 @@ static int __issue_discard_cmd(struct f2fs_sb_info 
 *sbi,
pend_list = >pend_list[i];
  
mutex_lock(>cmd_lock);
 +
 +  blk_start_plug();
 +
if (list_empty(pend_list))
goto next;
f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, >root));
 -  blk_start_plug();
list_for_each_entry_safe(dc, tmp, pend_list, list) {
f2fs_bug_on(sbi, dc->state != D_PREP);
  
 @@ -1227,8 +1229,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info 
 *sbi,
if (++iter >= dpolicy->max_requests)
break;
}
 -  blk_finish_plug();
  next:
 +  blk_finish_plug();
 +
mutex_unlock(>cmd_lock);
  
if (iter >= dpolicy->max_requests)
 -- 
 2.15.0.55.gc2ece9dc4de6
>>>
>>> .
>>>
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Minchan Kim
On Mon, Apr 09, 2018 at 06:12:11PM -0700, Matthew Wilcox wrote:
> On Tue, Apr 10, 2018 at 08:04:09AM +0900, Minchan Kim wrote:
> > On Mon, Apr 09, 2018 at 08:20:32AM -0700, Matthew Wilcox wrote:
> > > I don't think this is something the radix tree should know about.
> > 
> > Because shadow entry implementation is hidden by radix tree implemetation.
> > IOW, radix tree user cannot know how it works.
> 
> I have no idea what you mean.
> 
> > > SLAB should be checking for it (the patch I posted earlier in this
> > 
> > I don't think it's right approach. SLAB constructor can initialize
> > some metadata for slab page populated as well as page zeroing.
> > However, __GFP_ZERO means only clearing pages, not metadata.
> > So it's different semantic. No need to mix out.
> 
> No, __GFP_ZERO is specified to clear the allocated memory whether
> you're allocating from alloc_pages or from slab.  What makes no sense
> is allocating an object from slab with a constructor *and* __GFP_ZERO.
> They're in conflict, and slab can't fulfill both of those requirements.

It's a stable material. If you really think it does make sense,
please submit patch separately.

> 
> > > thread), but the right place to filter this out is in the caller of
> > > radix_tree_maybe_preload -- it's already filtering out HIGHMEM pages,
> > > and should filter out GFP_ZERO too.
> > 
> > radix_tree_[maybe]_preload is exported API, which are error-prone
> > for out of modules or upcoming customers.
> > 
> > More proper place is __radix_tree_preload.
> 
> I could not disagree with you more.  It is the responsibility of the
> callers of radix_tree_preload to avoid calling it with nonsense flags
> like __GFP_DMA, __GFP_HIGHMEM or __GFP_ZERO.

How about this?

It would fix current problem and warn potential bugs as well.
radix_tree_preload already has done such warning and
radix_tree_maybe_preload has skipping for misbehaivor gfp.

>From 27ecf7a009d3570d1155c528c7f08040ede68ed3 Mon Sep 17 00:00:00 2001
From: Minchan Kim 
Date: Tue, 10 Apr 2018 11:20:11 +0900
Subject: [PATCH v2] mm: workingset: fix NULL ptr dereference

It assumes shadow entries of radix tree rely on the init state
that node->private_list allocated newly is list_empty state
for the working. Currently, it's initailized in SLAB constructor
which means node of radix tree would be initialized only when
*slub allocates new page*, not *slub alloctes new object*.

If some FS or subsystem pass gfp_mask to __GFP_ZERO, that means
newly allocated node can have !list_empty(node->private_list)
by memset of slab allocator. It ends up calling NULL deference
at workingset_update_node by failing list_empty check.

This patch fixes it.

Fixes: 449dd6984d0e ("mm: keep page cache radix tree nodes in check")
Cc: Johannes Weiner 
Cc: Jan Kara 
Cc: Matthew Wilcox 
Cc: Jaegeuk Kim 
Cc: Chao Yu 
Cc: Christopher Lameter 
Cc: linux-fsde...@vger.kernel.org
Reported-by: Chris Fries 
Signed-off-by: Minchan Kim 
---
 lib/radix-tree.c | 12 +++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index da9e10c827df..9d68f2a7888e 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -511,6 +511,16 @@ int radix_tree_preload(gfp_t gfp_mask)
 {
/* Warn on non-sensical use... */
WARN_ON_ONCE(!gfpflags_allow_blocking(gfp_mask));
+   /*
+* New allocate node must have node->private_list as INIT_LIST_HEAD
+* state by workingset shadow memory implementation.
+* If user pass  __GFP_ZERO by mistake, slab allocator will clear
+* node->private_list, which makes a BUG. Rather than going Oops,
+* just fix and warn about it.
+*/
+   if (WARN_ON(gfp_mask & __GFP_ZERO))
+   gfp_mask &= ~GFP_ZERO
+
return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
 }
 EXPORT_SYMBOL(radix_tree_preload);
@@ -522,7 +532,7 @@ EXPORT_SYMBOL(radix_tree_preload);
  */
 int radix_tree_maybe_preload(gfp_t gfp_mask)
 {
-   if (gfpflags_allow_blocking(gfp_mask))
+   if (gfpflags_allow_blocking(gfp_mask) && !(gfp_mask & __GFP_ZERO))
return __radix_tree_preload(gfp_mask, RADIX_TREE_PRELOAD_SIZE);
/* Preloading doesn't help anything with this gfp mask, skip it */
preempt_disable();
-- 
2.17.0.484.g0c8726318c-goog



--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v2] f2fs: don't use GFP_ZERO for page caches

2018-04-09 Thread Chao Yu
On 2018/4/10 9:35, Jaegeuk Kim wrote:
> On 04/09, Jaegeuk Kim wrote:
>> Change log from v1:
>>  - don't memset for recovered page
>>
>> Related to https://lkml.org/lkml/2018/4/8/661
>>
>> Sometimes, we need to write meta data to new allocated block address,
>> then we will allocate a zeroed page in inner inode's address space, and
>> fill partial data in it, and leave other place with zero value which means
>> some fields are initial status.
>>
>> There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
>> I have just checked them, for both of them, we can avoid using __GFP_ZERO,
>> and do initialization by ourselves to avoid unneeded/redundant zeroing
>> from mm.
> 
> Okay, it seems we're missing some more places to reset the memory, since
> I'm still getting a panic. :(

Alright, let me recheck this patch... ;)

Thanks,

> 
> Thanks,
> 
>>
>> Cc: 
>> Signed-off-by: Chao Yu 
>> Signed-off-by: Jaegeuk Kim 
>> ---
>>  fs/f2fs/inode.c | 4 ++--
>>  fs/f2fs/node.c  | 3 ++-
>>  2 files changed, 4 insertions(+), 3 deletions(-)
>>
>> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
>> index 417c9dcd0269..87535bf63421 100644
>> --- a/fs/f2fs/inode.c
>> +++ b/fs/f2fs/inode.c
>> @@ -320,10 +320,10 @@ struct inode *f2fs_iget(struct super_block *sb, 
>> unsigned long ino)
>>  make_now:
>>  if (ino == F2FS_NODE_INO(sbi)) {
>>  inode->i_mapping->a_ops = _node_aops;
>> -mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
>> +mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
>>  } else if (ino == F2FS_META_INO(sbi)) {
>>  inode->i_mapping->a_ops = _meta_aops;
>> -mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
>> +mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
>>  } else if (S_ISREG(inode->i_mode)) {
>>  inode->i_op = _file_inode_operations;
>>  inode->i_fop = _file_operations;
>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>> index 9a99243054ba..5a4469093e43 100644
>> --- a/fs/f2fs/node.c
>> +++ b/fs/f2fs/node.c
>> @@ -1096,7 +1096,8 @@ struct page *new_node_page(struct dnode_of_data *dn, 
>> unsigned int ofs)
>>  set_node_addr(sbi, _ni, NEW_ADDR, false);
>>  
>>  f2fs_wait_on_page_writeback(page, NODE, true);
>> -fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
>> +memset(F2FS_NODE(page), 0, PAGE_SIZE);
>> +fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, false);
>>  set_cold_node(page, S_ISDIR(dn->inode->i_mode));
>>  if (!PageUptodate(page))
>>  SetPageUptodate(page);
>> -- 
>> 2.15.0.531.g2ccb3012c9-goog
>>
>>
>> --
>> Check out the vibrant tech community on one of the world's most
>> engaging tech sites, Slashdot.org! http://sdm.link/slashdot
>> ___
>> Linux-f2fs-devel mailing list
>> Linux-f2fs-devel@lists.sourceforge.net
>> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mkfs.f2fs: introduce new option V to show version

2018-04-09 Thread Sheng Yong



On 2018/4/10 1:55, Jaegeuk Kim wrote:

On 04/08, Chao Yu wrote:

On 2018/4/8 10:15, Sheng Yong wrote:

This patch introduces a new option -V to show the version of mkfs.f2fs
and exit after that.


BTW, should we add -V for other misc tools?


Yes, could you please add this for others?


OK. I'll do that.

thanks,
Sheng




Signed-off-by: Sheng Yong 


Reviewed-by: Chao Yu 

Thanks,


.




--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v2] f2fs: don't use GFP_ZERO for page caches

2018-04-09 Thread Jaegeuk Kim
On 04/09, Jaegeuk Kim wrote:
> Change log from v1:
>  - don't memset for recovered page
> 
> Related to https://lkml.org/lkml/2018/4/8/661
> 
> Sometimes, we need to write meta data to new allocated block address,
> then we will allocate a zeroed page in inner inode's address space, and
> fill partial data in it, and leave other place with zero value which means
> some fields are initial status.
> 
> There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
> I have just checked them, for both of them, we can avoid using __GFP_ZERO,
> and do initialization by ourselves to avoid unneeded/redundant zeroing
> from mm.

Okay, it seems we're missing some more places to reset the memory, since
I'm still getting a panic. :(

Thanks,

> 
> Cc: 
> Signed-off-by: Chao Yu 
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/inode.c | 4 ++--
>  fs/f2fs/node.c  | 3 ++-
>  2 files changed, 4 insertions(+), 3 deletions(-)
> 
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index 417c9dcd0269..87535bf63421 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -320,10 +320,10 @@ struct inode *f2fs_iget(struct super_block *sb, 
> unsigned long ino)
>  make_now:
>   if (ino == F2FS_NODE_INO(sbi)) {
>   inode->i_mapping->a_ops = _node_aops;
> - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
>   } else if (ino == F2FS_META_INO(sbi)) {
>   inode->i_mapping->a_ops = _meta_aops;
> - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
>   } else if (S_ISREG(inode->i_mode)) {
>   inode->i_op = _file_inode_operations;
>   inode->i_fop = _file_operations;
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 9a99243054ba..5a4469093e43 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1096,7 +1096,8 @@ struct page *new_node_page(struct dnode_of_data *dn, 
> unsigned int ofs)
>   set_node_addr(sbi, _ni, NEW_ADDR, false);
>  
>   f2fs_wait_on_page_writeback(page, NODE, true);
> - fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
> + memset(F2FS_NODE(page), 0, PAGE_SIZE);
> + fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, false);
>   set_cold_node(page, S_ISDIR(dn->inode->i_mode));
>   if (!PageUptodate(page))
>   SetPageUptodate(page);
> -- 
> 2.15.0.531.g2ccb3012c9-goog
> 
> 
> --
> Check out the vibrant tech community on one of the world's most
> engaging tech sites, Slashdot.org! http://sdm.link/slashdot
> ___
> Linux-f2fs-devel mailing list
> Linux-f2fs-devel@lists.sourceforge.net
> https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Matthew Wilcox
On Tue, Apr 10, 2018 at 08:04:09AM +0900, Minchan Kim wrote:
> On Mon, Apr 09, 2018 at 08:20:32AM -0700, Matthew Wilcox wrote:
> > I don't think this is something the radix tree should know about.
> 
> Because shadow entry implementation is hidden by radix tree implemetation.
> IOW, radix tree user cannot know how it works.

I have no idea what you mean.

> > SLAB should be checking for it (the patch I posted earlier in this
> 
> I don't think it's right approach. SLAB constructor can initialize
> some metadata for slab page populated as well as page zeroing.
> However, __GFP_ZERO means only clearing pages, not metadata.
> So it's different semantic. No need to mix out.

No, __GFP_ZERO is specified to clear the allocated memory whether
you're allocating from alloc_pages or from slab.  What makes no sense
is allocating an object from slab with a constructor *and* __GFP_ZERO.
They're in conflict, and slab can't fulfill both of those requirements.

> > thread), but the right place to filter this out is in the caller of
> > radix_tree_maybe_preload -- it's already filtering out HIGHMEM pages,
> > and should filter out GFP_ZERO too.
> 
> radix_tree_[maybe]_preload is exported API, which are error-prone
> for out of modules or upcoming customers.
> 
> More proper place is __radix_tree_preload.

I could not disagree with you more.  It is the responsibility of the
callers of radix_tree_preload to avoid calling it with nonsense flags
like __GFP_DMA, __GFP_HIGHMEM or __GFP_ZERO.

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Minchan Kim
On Mon, Apr 09, 2018 at 08:20:32AM -0700, Matthew Wilcox wrote:
> On Mon, Apr 09, 2018 at 11:49:58PM +0900, Minchan Kim wrote:
> > On Mon, Apr 09, 2018 at 08:25:06PM +0800, Chao Yu wrote:
> > > On 2018/4/9 19:25, Minchan Kim wrote:
> > > > On Mon, Apr 09, 2018 at 04:14:03AM -0700, Matthew Wilcox wrote:
> > > >> On Mon, Apr 09, 2018 at 12:09:30PM +0900, Minchan Kim wrote:
> > > >>> Look at fs/f2fs/inode.c
> > > >>> mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> > > >>>
> > > >>> __add_to_page_cache_locked
> > > >>>   radix_tree_maybe_preload
> > > >>>
> > > >>> add_to_page_cache_lru
> > > 
> > > No, sometimes, we need to write meta data to new allocated block address,
> > > then we will allocate a zeroed page in inner inode's address space, and
> > > fill partial data in it, and leave other place with zero value which means
> > > some fields are initial status.
> > 
> > Thanks for the explaining.
> > 
> > > There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
> > > I have just checked them, for both of them, we can avoid using __GFP_ZERO,
> > > and do initialization by ourselves to avoid unneeded/redundant zeroing
> > > from mm.
> > 
> > Yub, it would be desirable for f2fs. Please go ahead for f2fs side.
> > However, I think current problem is orthgonal. Now, the problem is
> > radix_tree_node allocation is bind to page cache allocation.
> > Why does FS cannot allocate page cache with __GFP_ZERO?
> > I agree if the concern is only performance matter as Matthew mentioned.
> > But it is beyond that because it shouldn't do due to limitation
> > of workingset shadow entry implementation. I think such coupling is
> > not a good idea.
> > 
> > I think right approach to abstract shadow entry in radix_tree is
> > to mask off __GFP_ZERO in radix_tree's allocation APIs.
> 
> I don't think this is something the radix tree should know about.

Because shadow entry implementation is hidden by radix tree implemetation.
IOW, radix tree user cannot know how it works.

> SLAB should be checking for it (the patch I posted earlier in this

I don't think it's right approach. SLAB constructor can initialize
some metadata for slab page populated as well as page zeroing.
However, __GFP_ZERO means only clearing pages, not metadata.
So it's different semantic. No need to mix out.

> thread), but the right place to filter this out is in the caller of
> radix_tree_maybe_preload -- it's already filtering out HIGHMEM pages,
> and should filter out GFP_ZERO too.

radix_tree_[maybe]_preload is exported API, which are error-prone
for out of modules or upcoming customers.

More proper place is __radix_tree_preload.

> 
> diff --git a/mm/filemap.c b/mm/filemap.c
> index c2147682f4c3..a87a523eea8e 100644
> --- a/mm/filemap.c
> +++ b/mm/filemap.c
> @@ -785,7 +785,7 @@ int replace_page_cache_page(struct page *old, struct page 
> *new, gfp_t gfp_mask)
>   VM_BUG_ON_PAGE(!PageLocked(new), new);
>   VM_BUG_ON_PAGE(new->mapping, new);
>  
> - error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
> + error = radix_tree_preload(gfp_mask & ~(__GFP_HIGHMEM | __GFP_ZERO));
>   if (!error) {
>   struct address_space *mapping = old->mapping;
>   void (*freepage)(struct page *);
> @@ -841,7 +841,8 @@ static int __add_to_page_cache_locked(struct page *page,
>   return error;
>   }
>  
> - error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
> + error = radix_tree_maybe_preload(gfp_mask &
> + ~(__GFP_HIGHMEM | __GFP_ZERO));
>   if (error) {
>   if (!huge)
>   mem_cgroup_cancel_charge(page, memcg, false);

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v10 00/62] Convert page cache to XArray

2018-04-09 Thread Goldwyn Rodrigues
Hi Matthew,

On 03/29/2018 10:41 PM, Matthew Wilcox wrote:
> From: Matthew Wilcox 
> 
> I'd like to thank Andrew for taking the first eight XArray patches
> into -next.  He's understandably nervous about taking the rest of the
> patches into -next given how few of the remaining patches have review
> tags on them.  So ... if you're on the cc, I'd really appreciate a review
> on something that you feel somewhat responsible for, eg the particular
> filesystem (nilfs, f2fs, lustre) that I've touched, or something in the
> mm/ or fs/ directories that you've worked on recently.
> 
> This is against next-20180329.

I tried these patches against next-20180329 and added the patch for the
bug reported by Mike Kravetz. I am getting the following BUG on ext4 and
xfs, running generic/048 tests of fstests. Each trace is from a
different instance/run.

BTW, for my convenience, do you have these patches in a public git tree?

[  222.007071] BUG: Bad page state in process kswapd0  pfn:132f25
[  222.007108] page:d6f144cbc940 count:0 mapcount:0
mapping:94b2735e3918 index:0x1
[  222.007140] flags: 0x4000()
[  222.007157] raw: 4000 94b2735e3918 0001

[  222.007186] raw: dead0100 dead0200 

[  222.007216] page dumped because: non-NULL mapping
[  222.007288] CPU: 0 PID: 55 Comm: kswapd0 Tainted: GE
4.16.0-rc7-next-20180329-xarray #2
[  222.007289] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
[  222.007290] Call Trace:
[  222.007297]  dump_stack+0x63/0x85
[  222.007300]  bad_page+0xd5/0x140
[  222.007302]  free_pages_check_bad+0x5f/0x70
[  222.007304]  free_pcppages_bulk+0x423/0x5c0
[  222.007308]  ? xas_load+0x3d/0xc0
[  222.007310]  free_unref_page_commit+0xad/0xd0
[  222.007312]  free_unref_page_list+0x101/0x190
[  222.007315]  release_pages+0x17c/0x3f0
[  222.007317]  __pagevec_release+0x2f/0x40
[  222.007319]  invalidate_mapping_pages+0x2d8/0x310
[  222.007323]  ? memcg_drain_all_list_lrus+0x120/0x120
[  222.007326]  inode_lru_isolate+0x131/0x180
[  222.007328]  __list_lru_walk_one.isra.7+0x92/0x150
[  222.007329]  ? iput+0x220/0x220
[  222.007331]  list_lru_walk_one+0x23/0x30
[  222.007332]  prune_icache_sb+0x40/0x60
[  222.007334]  super_cache_scan+0x137/0x1b0
[  222.007336]  shrink_slab.part.53+0x1ae/0x3a0
[  222.007338]  shrink_slab+0x35/0x40
[  222.007340]  shrink_node+0x158/0x490
[  222.007342]  balance_pgdat+0x149/0x320
[  222.007344]  kswapd+0x15f/0x400
[  222.007347]  ? wait_woken+0x80/0x80
[  222.007350]  kthread+0x121/0x140
[  222.007352]  ? balance_pgdat+0x320/0x320
[  222.007353]  ? kthread_create_worker_on_cpu+0x50/0x50
[  222.007356]  ret_from_fork+0x35/0x40
[  222.007357] Disabling lock debugging due to kernel taint




17252.906122] [ cut here ]
[17252.906124] kernel BUG at fs/inode.c:512!
[17252.906150] invalid opcode:  [#1] SMP PTI
[17252.906467] CPU: 2 PID: 31588 Comm: umount Tainted: GE
 4.16.0-rc7-next-20180329-xarray #2
[17252.906492] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996),
BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014
[17252.906523] RIP: 0010:clear_inode+0x8a/0xa0
[17252.906536] RSP: 0018:ba2302213d28 EFLAGS: 00010086
[17252.906552] RAX:  RBX: 8f1efb3976d8 RCX:

[17252.906571] RDX: 0001 RSI:  RDI:
8f1efb397858
[17252.906590] RBP: ba2302213d38 R08:  R09:

[17252.906609] R10: ba2302213ae8 R11: ba2302213ae8 R12:
8f1efb397858
[17252.906628] R13: c067e580 R14: 8f1dcc4281e8 R15:
8f1ef9fddc68
[17252.906648] FS:  7f6b9eae0fc0() GS:8f1effd0()
knlGS:
[17252.906670] CS:  0010 DS:  ES:  CR0: 80050033
[17252.906686] CR2: 55927e5f2118 CR3: ab29a000 CR4:
06e0
[17252.906708] DR0:  DR1:  DR2:

[17252.906728] DR3:  DR6: fffe0ff0 DR7:
0400
[17252.906747] Call Trace:
[17252.906786]  ext4_clear_inode+0x1a/0x80 [ext4]
[17252.906808]  ext4_evict_inode+0x54/0x590 [ext4]
[17252.906823]  evict+0xca/0x1a0
[17252.906833]  dispose_list+0x39/0x50
[17252.906844]  evict_inodes+0x158/0x170
[17252.906857]  generic_shutdown_super+0x44/0x120
[17252.906871]  kill_block_super+0x27/0x50
[17252.906883]  deactivate_locked_super+0x48/0x80
[17252.906897]  deactivate_super+0x40/0x60
[17252.906910]  cleanup_mnt+0x3f/0x80
[17252.906921]  __cleanup_mnt+0x12/0x20
[17252.906933]  task_work_run+0x9d/0xc0
[17252.907593]  exit_to_usermode_loop+0xa5/0xb0
[17252.908237]  do_syscall_64+0x14a/0x1e0
[17252.908884]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
[17252.909522] RIP: 0033:0x7f6b9e3b2a57
[17252.910154] RSP: 002b:71d2e6f8 EFLAGS: 0246 ORIG_RAX:
00a6
[17252.910810] RAX: 

Re: [f2fs-dev] [PATCH v2] f2fs: don't use GFP_ZERO for page caches

2018-04-09 Thread Jaegeuk Kim
Change log from v1:
 - don't memset for recovered page

Related to https://lkml.org/lkml/2018/4/8/661

Sometimes, we need to write meta data to new allocated block address,
then we will allocate a zeroed page in inner inode's address space, and
fill partial data in it, and leave other place with zero value which means
some fields are initial status.

There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
I have just checked them, for both of them, we can avoid using __GFP_ZERO,
and do initialization by ourselves to avoid unneeded/redundant zeroing
from mm.

Cc: 
Signed-off-by: Chao Yu 
Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/inode.c | 4 ++--
 fs/f2fs/node.c  | 3 ++-
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 417c9dcd0269..87535bf63421 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -320,10 +320,10 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned 
long ino)
 make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = _node_aops;
-   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+   mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (ino == F2FS_META_INO(sbi)) {
inode->i_mapping->a_ops = _meta_aops;
-   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+   mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (S_ISREG(inode->i_mode)) {
inode->i_op = _file_inode_operations;
inode->i_fop = _file_operations;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9a99243054ba..5a4469093e43 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1096,7 +1096,8 @@ struct page *new_node_page(struct dnode_of_data *dn, 
unsigned int ofs)
set_node_addr(sbi, _ni, NEW_ADDR, false);
 
f2fs_wait_on_page_writeback(page, NODE, true);
-   fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
+   memset(F2FS_NODE(page), 0, PAGE_SIZE);
+   fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, false);
set_cold_node(page, S_ISDIR(dn->inode->i_mode));
if (!PageUptodate(page))
SetPageUptodate(page);
-- 
2.15.0.531.g2ccb3012c9-goog


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Matthew Wilcox
On Mon, Apr 09, 2018 at 11:38:27AM -0700, Jaegeuk Kim wrote:
> On 04/09, Minchan Kim wrote:
> > On Mon, Apr 09, 2018 at 04:14:03AM -0700, Matthew Wilcox wrote:
> > > On Mon, Apr 09, 2018 at 12:09:30PM +0900, Minchan Kim wrote:
> > > > On Sun, Apr 08, 2018 at 07:49:25PM -0700, Matthew Wilcox wrote:
> > > > > On Mon, Apr 09, 2018 at 10:58:15AM +0900, Minchan Kim wrote:
> > > > > > It assumes shadow entry of radix tree relies on the init state
> > > > > > that node->private_list allocated should be list_empty state.
> > > > > > Currently, it's initailized in SLAB constructor which means
> > > > > > node of radix tree would be initialized only when *slub allocates
> > > > > > new page*, not *new object*. So, if some FS or subsystem pass
> > > > > > gfp_mask to __GFP_ZERO, slub allocator will do memset blindly.
> > > > > 
> > > > > Wait, what?  Who's declaring their radix tree with GFP_ZERO flags?
> > > > > I don't see anyone using INIT_RADIX_TREE or RADIX_TREE or 
> > > > > RADIX_TREE_INIT
> > > > > with GFP_ZERO.
> > > > 
> > > > Look at fs/f2fs/inode.c
> > > > mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> > > > 
> > > > __add_to_page_cache_locked
> > > >   radix_tree_maybe_preload
> > > > 
> > > > add_to_page_cache_lru
> > > > 
> > > > What's the wrong with setting __GFP_ZERO with mapping->gfp_mask?
> > > 
> > > Because it's a stupid thing to do.  Pages are allocated and then filled
> > > from disk.  Zeroing them before DMAing to them is just a waste of time.
> > 
> > Every FSes do address_space to read pages from storage? I'm not sure.
> > 
> > If you're right, we need to insert WARN_ON to catch up __GFP_ZERO
> > on mapping_set_gfp_mask at the beginning and remove all of those
> > stupid thins. 
> > 
> > Jaegeuk, why do you need __GFP_ZERO? Could you explain?
> 
> Comment says "__GFP_ZERO returns a zeroed page on success."
> 
> The f2fs maintains two inodes to manage some metadata in the page cache,
> which requires zeroed data when introducing a new structure. It's not
> a big deal to avoid __GFP_ZERO for whatever performance reasons tho, does
> it only matters with f2fs?

This isn't a performance issue.

The problem is that the mapping gfp flags are used not only for allocating
pages, but also for allocating the page cache data structures that hold
the pages.  F2FS is the only filesystem that set the __GFP_ZERO bit,
so it's the first time anyone's noticed that the page cache passes the
__GFP_ZERO bit through to the radix tree allocation routines, which
causes the radix tree nodes to be zeroed instead of constructed.

I think the right solution to this is:

diff --git a/mm/filemap.c b/mm/filemap.c
index c2147682f4c3..a87a523eea8e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -785,7 +785,7 @@ int replace_page_cache_page(struct page *old, struct page 
*new, gfp_t gfp_mask)
VM_BUG_ON_PAGE(!PageLocked(new), new);
VM_BUG_ON_PAGE(new->mapping, new);
 
-   error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+   error = radix_tree_preload(gfp_mask & ~(__GFP_HIGHMEM | __GFP_ZERO));
if (!error) {
struct address_space *mapping = old->mapping;
void (*freepage)(struct page *);
@@ -841,7 +841,8 @@ static int __add_to_page_cache_locked(struct page *page,
return error;
}
 
-   error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
+   error = radix_tree_maybe_preload(gfp_mask &
+   ~(__GFP_HIGHMEM | __GFP_ZERO));
if (error) {
if (!huge)
mem_cgroup_cancel_charge(page, memcg, false);

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: don't use GFP_ZERO for page caches

2018-04-09 Thread Jaegeuk Kim
Chao,

I have to test this for a while. Meanwhile, could you take a look at this?

On 04/09, Jaegeuk Kim wrote:
> From: Chao Yu 
> 
> Related to https://lkml.org/lkml/2018/4/8/661
> 
> Sometimes, we need to write meta data to new allocated block address,
> then we will allocate a zeroed page in inner inode's address space, and
> fill partial data in it, and leave other place with zero value which means
> some fields are initial status.
> 
> There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
> I have just checked them, for both of them, we can avoid using __GFP_ZERO,
> and do initialization by ourselves to avoid unneeded/redundant zeroing
> from mm.
> 
> Cc: 
> Signed-off-by: Chao Yu 
> Signed-off-by: Jaegeuk Kim 
> ---
>  fs/f2fs/inode.c| 4 ++--
>  fs/f2fs/node.c | 6 --
>  fs/f2fs/node.h | 7 ++-
>  fs/f2fs/recovery.c | 3 +--
>  4 files changed, 9 insertions(+), 11 deletions(-)
> 
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index 417c9dcd0269..87535bf63421 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -320,10 +320,10 @@ struct inode *f2fs_iget(struct super_block *sb, 
> unsigned long ino)
>  make_now:
>   if (ino == F2FS_NODE_INO(sbi)) {
>   inode->i_mapping->a_ops = _node_aops;
> - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
>   } else if (ino == F2FS_META_INO(sbi)) {
>   inode->i_mapping->a_ops = _meta_aops;
> - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
>   } else if (S_ISREG(inode->i_mode)) {
>   inode->i_op = _file_inode_operations;
>   inode->i_fop = _file_operations;
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 9a99243054ba..6fc3311820ec 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1096,7 +1096,8 @@ struct page *new_node_page(struct dnode_of_data *dn, 
> unsigned int ofs)
>   set_node_addr(sbi, _ni, NEW_ADDR, false);
>  
>   f2fs_wait_on_page_writeback(page, NODE, true);
> - fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
> + memset(F2FS_NODE(page), 0, PAGE_SIZE);
> + fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs);
>   set_cold_node(page, S_ISDIR(dn->inode->i_mode));
>   if (!PageUptodate(page))
>   SetPageUptodate(page);
> @@ -2311,7 +2312,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct 
> page *page)
>  
>   if (!PageUptodate(ipage))
>   SetPageUptodate(ipage);
> - fill_node_footer(ipage, ino, ino, 0, true);
> + memset(F2FS_NODE(page), 0, PAGE_SIZE);
> + fill_node_footer(ipage, ino, ino, 0);
>   set_cold_node(page, false);
>  
>   src = F2FS_INODE(page);
> diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
> index b95e49e4a928..42cd081114ab 100644
> --- a/fs/f2fs/node.h
> +++ b/fs/f2fs/node.h
> @@ -263,15 +263,12 @@ static inline block_t next_blkaddr_of_node(struct page 
> *node_page)
>  }
>  
>  static inline void fill_node_footer(struct page *page, nid_t nid,
> - nid_t ino, unsigned int ofs, bool reset)
> + nid_t ino, unsigned int ofs)
>  {
>   struct f2fs_node *rn = F2FS_NODE(page);
>   unsigned int old_flag = 0;
>  
> - if (reset)
> - memset(rn, 0, sizeof(*rn));
> - else
> - old_flag = le32_to_cpu(rn->footer.flag);
> + old_flag = le32_to_cpu(rn->footer.flag);
>  
>   rn->footer.nid = cpu_to_le32(nid);
>   rn->footer.ino = cpu_to_le32(ino);
> diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
> index 1b23d3febe4c..de24f3247aa5 100644
> --- a/fs/f2fs/recovery.c
> +++ b/fs/f2fs/recovery.c
> @@ -540,8 +540,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, 
> struct inode *inode,
>   }
>  
>   copy_node_footer(dn.node_page, page);
> - fill_node_footer(dn.node_page, dn.nid, ni.ino,
> - ofs_of_node(page), false);
> + fill_node_footer(dn.node_page, dn.nid, ni.ino, ofs_of_node(page));
>   set_page_dirty(dn.node_page);
>  err:
>   f2fs_put_dnode();
> -- 
> 2.15.0.531.g2ccb3012c9-goog

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [xfstests-bld PATCH] test-appliance: support f2fs-tools v1.9 and later

2018-04-09 Thread Eric Biggers via Linux-f2fs-devel
On Fri, Apr 06, 2018 at 09:30:36AM -0400, Theodore Y. Ts'o wrote:
> On Thu, Apr 05, 2018 at 03:21:41PM -0700, Eric Biggers wrote:
> > Pass the -f option to mkfs.f2fs when it appears to support it.  This is
> > required by f2fs-tools v1.9 and later in order to format the filesystem
> > even when an existing filesystem is detected.  But earlier versions did
> > not accept this option.
> > 
> > Signed-off-by: Eric Biggers 
> 
> Applied for now, but ugh.  Grepping strings out of binaries is not
> something I really like to depend upon.  Can we convince the f2fs
> folks to provide a "/sbin/mkfs.f2fs -V" which prints a version string,
> or some such?
> 

Yes, it's ugly.  As something maybe a bit better, I've proposed

mkfs.f2fs --help |& grep -q "[[:space:]]-f[[:space:]|]"

in v2 of the xfstests patch to common/config.  So if that gets accepted into
xfstests I'll change this to use the same method.

Thanks,

Eric

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH] f2fs: don't use GFP_ZERO for page caches

2018-04-09 Thread Jaegeuk Kim
From: Chao Yu 

Related to https://lkml.org/lkml/2018/4/8/661

Sometimes, we need to write meta data to new allocated block address,
then we will allocate a zeroed page in inner inode's address space, and
fill partial data in it, and leave other place with zero value which means
some fields are initial status.

There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
I have just checked them, for both of them, we can avoid using __GFP_ZERO,
and do initialization by ourselves to avoid unneeded/redundant zeroing
from mm.

Cc: 
Signed-off-by: Chao Yu 
Signed-off-by: Jaegeuk Kim 
---
 fs/f2fs/inode.c| 4 ++--
 fs/f2fs/node.c | 6 --
 fs/f2fs/node.h | 7 ++-
 fs/f2fs/recovery.c | 3 +--
 4 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index 417c9dcd0269..87535bf63421 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -320,10 +320,10 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned 
long ino)
 make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = _node_aops;
-   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+   mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (ino == F2FS_META_INO(sbi)) {
inode->i_mapping->a_ops = _meta_aops;
-   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+   mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (S_ISREG(inode->i_mode)) {
inode->i_op = _file_inode_operations;
inode->i_fop = _file_operations;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9a99243054ba..6fc3311820ec 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1096,7 +1096,8 @@ struct page *new_node_page(struct dnode_of_data *dn, 
unsigned int ofs)
set_node_addr(sbi, _ni, NEW_ADDR, false);
 
f2fs_wait_on_page_writeback(page, NODE, true);
-   fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
+   memset(F2FS_NODE(page), 0, PAGE_SIZE);
+   fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs);
set_cold_node(page, S_ISDIR(dn->inode->i_mode));
if (!PageUptodate(page))
SetPageUptodate(page);
@@ -2311,7 +2312,8 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct 
page *page)
 
if (!PageUptodate(ipage))
SetPageUptodate(ipage);
-   fill_node_footer(ipage, ino, ino, 0, true);
+   memset(F2FS_NODE(page), 0, PAGE_SIZE);
+   fill_node_footer(ipage, ino, ino, 0);
set_cold_node(page, false);
 
src = F2FS_INODE(page);
diff --git a/fs/f2fs/node.h b/fs/f2fs/node.h
index b95e49e4a928..42cd081114ab 100644
--- a/fs/f2fs/node.h
+++ b/fs/f2fs/node.h
@@ -263,15 +263,12 @@ static inline block_t next_blkaddr_of_node(struct page 
*node_page)
 }
 
 static inline void fill_node_footer(struct page *page, nid_t nid,
-   nid_t ino, unsigned int ofs, bool reset)
+   nid_t ino, unsigned int ofs)
 {
struct f2fs_node *rn = F2FS_NODE(page);
unsigned int old_flag = 0;
 
-   if (reset)
-   memset(rn, 0, sizeof(*rn));
-   else
-   old_flag = le32_to_cpu(rn->footer.flag);
+   old_flag = le32_to_cpu(rn->footer.flag);
 
rn->footer.nid = cpu_to_le32(nid);
rn->footer.ino = cpu_to_le32(ino);
diff --git a/fs/f2fs/recovery.c b/fs/f2fs/recovery.c
index 1b23d3febe4c..de24f3247aa5 100644
--- a/fs/f2fs/recovery.c
+++ b/fs/f2fs/recovery.c
@@ -540,8 +540,7 @@ static int do_recover_data(struct f2fs_sb_info *sbi, struct 
inode *inode,
}
 
copy_node_footer(dn.node_page, page);
-   fill_node_footer(dn.node_page, dn.nid, ni.ino,
-   ofs_of_node(page), false);
+   fill_node_footer(dn.node_page, dn.nid, ni.ino, ofs_of_node(page));
set_page_dirty(dn.node_page);
 err:
f2fs_put_dnode();
-- 
2.15.0.531.g2ccb3012c9-goog


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v2] common/config: support f2fs-tools v1.9 and later

2018-04-09 Thread Eric Biggers via Linux-f2fs-devel
Pass the -f option to mkfs.f2fs when it appears to support it.  This is
required by f2fs-tools v1.9 and later in order to format the filesystem
even when an existing filesystem is detected.  But earlier versions did
not accept this option.

mkfs.f2fs doesn't yet have an option to print its version number.  So,
to detect a new enough version we grep for -f in the help output.  This
also works for mkfs.btrfs, so we switch that over to the same method
rather than grepping for "force overwrite" in the binary.

Signed-off-by: Eric Biggers 
---

v2: switch to grepping for -f in the help output

 common/config | 17 +++--
 1 file changed, 11 insertions(+), 6 deletions(-)

diff --git a/common/config b/common/config
index 20f0e5f3..cc318069 100644
--- a/common/config
+++ b/common/config
@@ -94,11 +94,16 @@ set_prog_path()
type -P $1
 }
 
-# Handle mkfs.btrfs which does (or does not) require -f to overwrite
-set_btrfs_mkfs_prog_path_with_opts()
+# Handle mkfs.$fstyp which does (or does not) require -f to overwrite
+set_mkfs_prog_path_with_opts()
 {
-   p=`set_prog_path mkfs.btrfs`
-   if [ "$p" != "" ] && grep -q 'force overwrite' $p; then
+   local fstyp=$1
+   local p=`set_prog_path mkfs.$fstyp`
+
+   # Note: mkfs.f2fs doesn't support the --help option yet, but it doesn't
+   # matter since it also prints the help when an invalid option is given.
+   if [ "$p" != "" ] && \
+   $p --help |& grep -q "[[:space:]]-f[[:space:]|]"; then
echo "$p -f"
else
echo $p
@@ -223,8 +228,8 @@ case "$HOSTOS" in
 export MKFS_XFS_PROG="`set_prog_path mkfs.xfs`"
 export MKFS_EXT4_PROG="`set_prog_path mkfs.ext4`"
 export MKFS_UDF_PROG="`set_prog_path mkudffs`"
-export MKFS_BTRFS_PROG="`set_btrfs_mkfs_prog_path_with_opts`"
-export MKFS_F2FS_PROG="`set_prog_path mkfs.f2fs`"
+export MKFS_BTRFS_PROG="`set_mkfs_prog_path_with_opts btrfs`"
+export MKFS_F2FS_PROG="`set_mkfs_prog_path_with_opts f2fs`"
 export DUMP_F2FS_PROG="`set_prog_path dump.f2fs`"
 export BTRFS_UTIL_PROG="`set_prog_path btrfs`"
 export BTRFS_SHOW_SUPER_PROG="`set_prog_path btrfs-show-super`"
-- 
2.17.0.484.g0c8726318c-goog


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Jaegeuk Kim
On 04/09, Minchan Kim wrote:
> On Mon, Apr 09, 2018 at 04:14:03AM -0700, Matthew Wilcox wrote:
> > On Mon, Apr 09, 2018 at 12:09:30PM +0900, Minchan Kim wrote:
> > > On Sun, Apr 08, 2018 at 07:49:25PM -0700, Matthew Wilcox wrote:
> > > > On Mon, Apr 09, 2018 at 10:58:15AM +0900, Minchan Kim wrote:
> > > > > It assumes shadow entry of radix tree relies on the init state
> > > > > that node->private_list allocated should be list_empty state.
> > > > > Currently, it's initailized in SLAB constructor which means
> > > > > node of radix tree would be initialized only when *slub allocates
> > > > > new page*, not *new object*. So, if some FS or subsystem pass
> > > > > gfp_mask to __GFP_ZERO, slub allocator will do memset blindly.
> > > > 
> > > > Wait, what?  Who's declaring their radix tree with GFP_ZERO flags?
> > > > I don't see anyone using INIT_RADIX_TREE or RADIX_TREE or 
> > > > RADIX_TREE_INIT
> > > > with GFP_ZERO.
> > > 
> > > Look at fs/f2fs/inode.c
> > > mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> > > 
> > > __add_to_page_cache_locked
> > >   radix_tree_maybe_preload
> > > 
> > > add_to_page_cache_lru
> > > 
> > > What's the wrong with setting __GFP_ZERO with mapping->gfp_mask?
> > 
> > Because it's a stupid thing to do.  Pages are allocated and then filled
> > from disk.  Zeroing them before DMAing to them is just a waste of time.
> 
> Every FSes do address_space to read pages from storage? I'm not sure.
> 
> If you're right, we need to insert WARN_ON to catch up __GFP_ZERO
> on mapping_set_gfp_mask at the beginning and remove all of those
> stupid thins. 
> 
> Jaegeuk, why do you need __GFP_ZERO? Could you explain?

Comment says "__GFP_ZERO returns a zeroed page on success."

The f2fs maintains two inodes to manage some metadata in the page cache,
which requires zeroed data when introducing a new structure. It's not
a big deal to avoid __GFP_ZERO for whatever performance reasons tho, does
it only matters with f2fs?

Thanks,

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: enlarge block plug coverage

2018-04-09 Thread Jaegeuk Kim
On 04/08, Chao Yu wrote:
> On 2018/4/5 11:51, Jaegeuk Kim wrote:
> > On 04/04, Chao Yu wrote:
> >> This patch enlarges block plug coverage in __issue_discard_cmd, in
> >> order to collect more pending bios before issuing them, to avoid
> >> being disturbed by previous discard I/O in IO aware discard mode.
> > 
> > Hmm, then we need to wait for huge discard IO for over 10 secs, which
> 
> We found that total discard latency is rely on total discard number we issued
> last time instead of range or length discard covered. IMO, if we don't change
> .max_requests value, we will not suffer longer latency.
> 
> > will affect following read/write IOs accordingly. In order to avoid that,
> > we actually need to limit the discard size.
> 
> If you are worry about I/O interference in between discard and rw, I suggest 
> to
> decrease .max_requests value.

What do you mean? This will produce more pending requests in the queue?

> 
> Thanks,
> 
> > 
> > Thanks,
> > 
> >>
> >> Signed-off-by: Chao Yu 
> >> ---
> >>  fs/f2fs/segment.c | 7 +--
> >>  1 file changed, 5 insertions(+), 2 deletions(-)
> >>
> >> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> >> index 8f0b5ba46315..4287e208c040 100644
> >> --- a/fs/f2fs/segment.c
> >> +++ b/fs/f2fs/segment.c
> >> @@ -1208,10 +1208,12 @@ static int __issue_discard_cmd(struct f2fs_sb_info 
> >> *sbi,
> >>pend_list = >pend_list[i];
> >>  
> >>mutex_lock(>cmd_lock);
> >> +
> >> +  blk_start_plug();
> >> +
> >>if (list_empty(pend_list))
> >>goto next;
> >>f2fs_bug_on(sbi, !__check_rb_tree_consistence(sbi, >root));
> >> -  blk_start_plug();
> >>list_for_each_entry_safe(dc, tmp, pend_list, list) {
> >>f2fs_bug_on(sbi, dc->state != D_PREP);
> >>  
> >> @@ -1227,8 +1229,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info 
> >> *sbi,
> >>if (++iter >= dpolicy->max_requests)
> >>break;
> >>}
> >> -  blk_finish_plug();
> >>  next:
> >> +  blk_finish_plug();
> >> +
> >>mutex_unlock(>cmd_lock);
> >>  
> >>if (iter >= dpolicy->max_requests)
> >> -- 
> >> 2.15.0.55.gc2ece9dc4de6
> > 
> > .
> > 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mkfs.f2fs: introduce new option V to show version

2018-04-09 Thread Jaegeuk Kim
On 04/08, Chao Yu wrote:
> On 2018/4/8 10:15, Sheng Yong wrote:
> > This patch introduces a new option -V to show the version of mkfs.f2fs
> > and exit after that.
> 
> BTW, should we add -V for other misc tools?

Yes, could you please add this for others?

> 
> > 
> > Signed-off-by: Sheng Yong 
> 
> Reviewed-by: Chao Yu 
> 
> Thanks,

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread David Sterba
On Mon, Apr 09, 2018 at 03:52:15PM +0200, Michal Hocko wrote:
> On Mon 09-04-18 06:41:14, Matthew Wilcox wrote:
> > On Mon, Apr 09, 2018 at 02:48:52PM +0200, Michal Hocko wrote:
> > > On Mon 09-04-18 20:25:06, Chao Yu wrote:
> > > [...]
> > > > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> > > > index c852e800..cc63f8c448f0 100644
> > > > --- a/fs/f2fs/inode.c
> > > > +++ b/fs/f2fs/inode.c
> > > > @@ -339,10 +339,10 @@ struct inode *f2fs_iget(struct super_block *sb, 
> > > > unsigned long ino)
> > > >  make_now:
> > > > if (ino == F2FS_NODE_INO(sbi)) {
> > > > inode->i_mapping->a_ops = _node_aops;
> > > > -   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> > > > +   mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
> > > 
> > > An unrelated question. Why do you make all allocations for the mapping
> > > NOFS automatically? What kind of reclaim recursion problems are you
> > > trying to prevent?
> > 
> > It's worth noting that this is endemic in filesystems.
> 
> Yes, and I have strong suspicion that this is a mindless copy
> Well, xfs had a good reason for it in the past - mostly to handle deep
> call stacks on complicated storage setups in the past when we used to
> trigger IO from the direct reclaim. I am not sure whether there are
> other reasons to keep the status quo except for finding somebody brave
> enough to post the patch, do all the due testing.
> 
> > $ git grep mapping_set_gfp_mask.*FS
> > drivers/block/loop.c:   mapping_set_gfp_mask(mapping, lo->old_gfp_mask & 
> > ~(__GFP_IO|__GFP_FS));
> > fs/btrfs/disk-io.c: 
> > mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, GFP_NOFS);

Thi was added in 1561deda687eef0
(https://git.kernel.org/linus/1561deda687eef0e9506) and probably after a
deadlock report.

The changelog mentions the potential recursion from fs -> allocation -> fs,
but I'm not sure if this still happens on the MM side today.

For the filesystem part, I think the key functions of the callchain are
still there.

The code was been added in 2011 and the 2nd hunk of the patch added a
code that's not present today AFAICS, so this is worth revisiting.

I still don't understand how it's related to the GFP_HIGHUSER_MOVABLE,
this patch is from time where the metadata pages were possibly allocated
from HIGHMEM but this was removed later in a65917156e345946db
(https://git.kernel.org/linus/a65917156e345946db).

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Matthew Wilcox
On Mon, Apr 09, 2018 at 11:49:58PM +0900, Minchan Kim wrote:
> On Mon, Apr 09, 2018 at 08:25:06PM +0800, Chao Yu wrote:
> > On 2018/4/9 19:25, Minchan Kim wrote:
> > > On Mon, Apr 09, 2018 at 04:14:03AM -0700, Matthew Wilcox wrote:
> > >> On Mon, Apr 09, 2018 at 12:09:30PM +0900, Minchan Kim wrote:
> > >>> Look at fs/f2fs/inode.c
> > >>> mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> > >>>
> > >>> __add_to_page_cache_locked
> > >>>   radix_tree_maybe_preload
> > >>>
> > >>> add_to_page_cache_lru
> > 
> > No, sometimes, we need to write meta data to new allocated block address,
> > then we will allocate a zeroed page in inner inode's address space, and
> > fill partial data in it, and leave other place with zero value which means
> > some fields are initial status.
> 
> Thanks for the explaining.
> 
> > There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
> > I have just checked them, for both of them, we can avoid using __GFP_ZERO,
> > and do initialization by ourselves to avoid unneeded/redundant zeroing
> > from mm.
> 
> Yub, it would be desirable for f2fs. Please go ahead for f2fs side.
> However, I think current problem is orthgonal. Now, the problem is
> radix_tree_node allocation is bind to page cache allocation.
> Why does FS cannot allocate page cache with __GFP_ZERO?
> I agree if the concern is only performance matter as Matthew mentioned.
> But it is beyond that because it shouldn't do due to limitation
> of workingset shadow entry implementation. I think such coupling is
> not a good idea.
> 
> I think right approach to abstract shadow entry in radix_tree is
> to mask off __GFP_ZERO in radix_tree's allocation APIs.

I don't think this is something the radix tree should know about.
SLAB should be checking for it (the patch I posted earlier in this
thread), but the right place to filter this out is in the caller of
radix_tree_maybe_preload -- it's already filtering out HIGHMEM pages,
and should filter out GFP_ZERO too.

diff --git a/mm/filemap.c b/mm/filemap.c
index c2147682f4c3..a87a523eea8e 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -785,7 +785,7 @@ int replace_page_cache_page(struct page *old, struct page 
*new, gfp_t gfp_mask)
VM_BUG_ON_PAGE(!PageLocked(new), new);
VM_BUG_ON_PAGE(new->mapping, new);
 
-   error = radix_tree_preload(gfp_mask & ~__GFP_HIGHMEM);
+   error = radix_tree_preload(gfp_mask & ~(__GFP_HIGHMEM | __GFP_ZERO));
if (!error) {
struct address_space *mapping = old->mapping;
void (*freepage)(struct page *);
@@ -841,7 +841,8 @@ static int __add_to_page_cache_locked(struct page *page,
return error;
}
 
-   error = radix_tree_maybe_preload(gfp_mask & ~__GFP_HIGHMEM);
+   error = radix_tree_maybe_preload(gfp_mask &
+   ~(__GFP_HIGHMEM | __GFP_ZERO));
if (error) {
if (!huge)
mem_cgroup_cancel_charge(page, memcg, false);

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Minchan Kim
On Mon, Apr 09, 2018 at 08:25:06PM +0800, Chao Yu wrote:
> On 2018/4/9 19:25, Minchan Kim wrote:
> > On Mon, Apr 09, 2018 at 04:14:03AM -0700, Matthew Wilcox wrote:
> >> On Mon, Apr 09, 2018 at 12:09:30PM +0900, Minchan Kim wrote:
> >>> On Sun, Apr 08, 2018 at 07:49:25PM -0700, Matthew Wilcox wrote:
>  On Mon, Apr 09, 2018 at 10:58:15AM +0900, Minchan Kim wrote:
> > It assumes shadow entry of radix tree relies on the init state
> > that node->private_list allocated should be list_empty state.
> > Currently, it's initailized in SLAB constructor which means
> > node of radix tree would be initialized only when *slub allocates
> > new page*, not *new object*. So, if some FS or subsystem pass
> > gfp_mask to __GFP_ZERO, slub allocator will do memset blindly.
> 
>  Wait, what?  Who's declaring their radix tree with GFP_ZERO flags?
>  I don't see anyone using INIT_RADIX_TREE or RADIX_TREE or RADIX_TREE_INIT
>  with GFP_ZERO.
> >>>
> >>> Look at fs/f2fs/inode.c
> >>> mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> >>>
> >>> __add_to_page_cache_locked
> >>>   radix_tree_maybe_preload
> >>>
> >>> add_to_page_cache_lru
> >>>
> >>> What's the wrong with setting __GFP_ZERO with mapping->gfp_mask?
> >>
> >> Because it's a stupid thing to do.  Pages are allocated and then filled
> >> from disk.  Zeroing them before DMAing to them is just a waste of time.
> > 
> > Every FSes do address_space to read pages from storage? I'm not sure.
> 
> No, sometimes, we need to write meta data to new allocated block address,
> then we will allocate a zeroed page in inner inode's address space, and
> fill partial data in it, and leave other place with zero value which means
> some fields are initial status.

Thanks for the explaining.

> 
> There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
> I have just checked them, for both of them, we can avoid using __GFP_ZERO,
> and do initialization by ourselves to avoid unneeded/redundant zeroing
> from mm.

Yub, it would be desirable for f2fs. Please go ahead for f2fs side.
However, I think current problem is orthgonal. Now, the problem is
radix_tree_node allocation is bind to page cache allocation.
Why does FS cannot allocate page cache with __GFP_ZERO?
I agree if the concern is only performance matter as Matthew mentioned.
But it is beyond that because it shouldn't do due to limitation
of workingset shadow entry implementation. I think such coupling is
not a good idea.

I think right approach to abstract shadow entry in radix_tree is
to mask off __GFP_ZERO in radix_tree's allocation APIs.

> 
> To Jaegeuk, if I missed something, please let me know.
> 
> ---
>  fs/f2fs/inode.c | 4 ++--
>  fs/f2fs/node.c  | 2 ++
>  2 files changed, 4 insertions(+), 2 deletions(-)
> 
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index c852e800..cc63f8c448f0 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -339,10 +339,10 @@ struct inode *f2fs_iget(struct super_block *sb, 
> unsigned long ino)
>  make_now:
>   if (ino == F2FS_NODE_INO(sbi)) {
>   inode->i_mapping->a_ops = _node_aops;
> - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
>   } else if (ino == F2FS_META_INO(sbi)) {
>   inode->i_mapping->a_ops = _meta_aops;
> - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
>   } else if (S_ISREG(inode->i_mode)) {
>   inode->i_op = _file_inode_operations;
>   inode->i_fop = _file_operations;
> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
> index 9dedd4b5e077..31e5ecf98ffd 100644
> --- a/fs/f2fs/node.c
> +++ b/fs/f2fs/node.c
> @@ -1078,6 +1078,7 @@ struct page *new_node_page(struct dnode_of_data *dn, 
> unsigned int ofs)
>   set_node_addr(sbi, _ni, NEW_ADDR, false);
> 
>   f2fs_wait_on_page_writeback(page, NODE, true);
> + memset(F2FS_NODE(page), 0, PAGE_SIZE);
>   fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
>   set_cold_node(page, S_ISDIR(dn->inode->i_mode));
>   if (!PageUptodate(page))
> @@ -2321,6 +2322,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct 
> page *page)
> 
>   if (!PageUptodate(ipage))
>   SetPageUptodate(ipage);
> + memset(F2FS_NODE(page), 0, PAGE_SIZE);
>   fill_node_footer(ipage, ino, ino, 0, true);
>   set_cold_node(page, false);
> 
> -- 
> 
> > 
> > If you're right, we need to insert WARN_ON to catch up __GFP_ZERO
> > on mapping_set_gfp_mask at the beginning and remove all of those
> > stupid thins. 
> > 
> > Jaegeuk, why do you need __GFP_ZERO? Could you explain?
> > 
> > .
> > 
> 

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot

Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Michal Hocko
On Mon 09-04-18 06:41:14, Matthew Wilcox wrote:
> On Mon, Apr 09, 2018 at 02:48:52PM +0200, Michal Hocko wrote:
> > On Mon 09-04-18 20:25:06, Chao Yu wrote:
> > [...]
> > > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> > > index c852e800..cc63f8c448f0 100644
> > > --- a/fs/f2fs/inode.c
> > > +++ b/fs/f2fs/inode.c
> > > @@ -339,10 +339,10 @@ struct inode *f2fs_iget(struct super_block *sb, 
> > > unsigned long ino)
> > >  make_now:
> > >   if (ino == F2FS_NODE_INO(sbi)) {
> > >   inode->i_mapping->a_ops = _node_aops;
> > > - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> > > + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
> > 
> > An unrelated question. Why do you make all allocations for the mapping
> > NOFS automatically? What kind of reclaim recursion problems are you
> > trying to prevent?
> 
> It's worth noting that this is endemic in filesystems.

Yes, and I have strong suspicion that this is a mindless copy
Well, xfs had a good reason for it in the past - mostly to handle deep
call stacks on complicated storage setups in the past when we used to
trigger IO from the direct reclaim. I am not sure whether there are
other reasons to keep the status quo except for finding somebody brave
enough to post the patch, do all the due testing.

> $ git grep mapping_set_gfp_mask.*FS
> drivers/block/loop.c:   mapping_set_gfp_mask(mapping, lo->old_gfp_mask & 
> ~(__GFP_IO|__GFP_FS));
> fs/btrfs/disk-io.c: mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, 
> GFP_NOFS);
> fs/f2fs/inode.c:mapping_set_gfp_mask(inode->i_mapping, 
> GFP_F2FS_ZERO);
> fs/f2fs/inode.c:mapping_set_gfp_mask(inode->i_mapping, 
> GFP_F2FS_ZERO);
> fs/gfs2/glock.c:mapping_set_gfp_mask(mapping, GFP_NOFS);
> fs/gfs2/ops_fstype.c:   mapping_set_gfp_mask(mapping, GFP_NOFS);
> fs/jfs/jfs_imap.c:  mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
> fs/jfs/super.c: mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
> fs/nilfs2/gcinode.c:mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
> fs/nilfs2/page.c:   mapping_set_gfp_mask(mapping, GFP_NOFS);
> fs/reiserfs/xattr.c:mapping_set_gfp_mask(mapping, GFP_NOFS);
> fs/xfs/xfs_iops.c:  mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & 
> ~(__GFP_FS)));

-- 
Michal Hocko
SUSE Labs

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Christoph Hellwig
On Mon, Apr 09, 2018 at 06:41:14AM -0700, Matthew Wilcox wrote:
> It's worth noting that this is endemic in filesystems.

For the rationale in XFS take a look at commit 
ad22c7a043c2cc6792820e6c5da699935933e87d

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Matthew Wilcox
On Mon, Apr 09, 2018 at 02:48:52PM +0200, Michal Hocko wrote:
> On Mon 09-04-18 20:25:06, Chao Yu wrote:
> [...]
> > diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> > index c852e800..cc63f8c448f0 100644
> > --- a/fs/f2fs/inode.c
> > +++ b/fs/f2fs/inode.c
> > @@ -339,10 +339,10 @@ struct inode *f2fs_iget(struct super_block *sb, 
> > unsigned long ino)
> >  make_now:
> > if (ino == F2FS_NODE_INO(sbi)) {
> > inode->i_mapping->a_ops = _node_aops;
> > -   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> > +   mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
> 
> An unrelated question. Why do you make all allocations for the mapping
> NOFS automatically? What kind of reclaim recursion problems are you
> trying to prevent?

It's worth noting that this is endemic in filesystems.

$ git grep mapping_set_gfp_mask.*FS
drivers/block/loop.c:   mapping_set_gfp_mask(mapping, lo->old_gfp_mask & 
~(__GFP_IO|__GFP_FS));
fs/btrfs/disk-io.c: mapping_set_gfp_mask(fs_info->btree_inode->i_mapping, 
GFP_NOFS);
fs/f2fs/inode.c:mapping_set_gfp_mask(inode->i_mapping, 
GFP_F2FS_ZERO);
fs/f2fs/inode.c:mapping_set_gfp_mask(inode->i_mapping, 
GFP_F2FS_ZERO);
fs/gfs2/glock.c:mapping_set_gfp_mask(mapping, GFP_NOFS);
fs/gfs2/ops_fstype.c:   mapping_set_gfp_mask(mapping, GFP_NOFS);
fs/jfs/jfs_imap.c:  mapping_set_gfp_mask(ip->i_mapping, GFP_NOFS);
fs/jfs/super.c: mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
fs/nilfs2/gcinode.c:mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
fs/nilfs2/page.c:   mapping_set_gfp_mask(mapping, GFP_NOFS);
fs/reiserfs/xattr.c:mapping_set_gfp_mask(mapping, GFP_NOFS);
fs/xfs/xfs_iops.c:  mapping_set_gfp_mask(inode->i_mapping, (gfp_mask & 
~(__GFP_FS)));


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Michal Hocko
On Mon 09-04-18 20:25:06, Chao Yu wrote:
[...]
> diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
> index c852e800..cc63f8c448f0 100644
> --- a/fs/f2fs/inode.c
> +++ b/fs/f2fs/inode.c
> @@ -339,10 +339,10 @@ struct inode *f2fs_iget(struct super_block *sb, 
> unsigned long ino)
>  make_now:
>   if (ino == F2FS_NODE_INO(sbi)) {
>   inode->i_mapping->a_ops = _node_aops;
> - mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> + mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);

An unrelated question. Why do you make all allocations for the mapping
NOFS automatically? What kind of reclaim recursion problems are you
trying to prevent?
-- 
Michal Hocko
SUSE Labs

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Chao Yu
On 2018/4/9 19:25, Minchan Kim wrote:
> On Mon, Apr 09, 2018 at 04:14:03AM -0700, Matthew Wilcox wrote:
>> On Mon, Apr 09, 2018 at 12:09:30PM +0900, Minchan Kim wrote:
>>> On Sun, Apr 08, 2018 at 07:49:25PM -0700, Matthew Wilcox wrote:
 On Mon, Apr 09, 2018 at 10:58:15AM +0900, Minchan Kim wrote:
> It assumes shadow entry of radix tree relies on the init state
> that node->private_list allocated should be list_empty state.
> Currently, it's initailized in SLAB constructor which means
> node of radix tree would be initialized only when *slub allocates
> new page*, not *new object*. So, if some FS or subsystem pass
> gfp_mask to __GFP_ZERO, slub allocator will do memset blindly.

 Wait, what?  Who's declaring their radix tree with GFP_ZERO flags?
 I don't see anyone using INIT_RADIX_TREE or RADIX_TREE or RADIX_TREE_INIT
 with GFP_ZERO.
>>>
>>> Look at fs/f2fs/inode.c
>>> mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
>>>
>>> __add_to_page_cache_locked
>>>   radix_tree_maybe_preload
>>>
>>> add_to_page_cache_lru
>>>
>>> What's the wrong with setting __GFP_ZERO with mapping->gfp_mask?
>>
>> Because it's a stupid thing to do.  Pages are allocated and then filled
>> from disk.  Zeroing them before DMAing to them is just a waste of time.
> 
> Every FSes do address_space to read pages from storage? I'm not sure.

No, sometimes, we need to write meta data to new allocated block address,
then we will allocate a zeroed page in inner inode's address space, and
fill partial data in it, and leave other place with zero value which means
some fields are initial status.

There are two inner inodes (meta inode and node inode) setting __GFP_ZERO,
I have just checked them, for both of them, we can avoid using __GFP_ZERO,
and do initialization by ourselves to avoid unneeded/redundant zeroing
from mm.

To Jaegeuk, if I missed something, please let me know.

---
 fs/f2fs/inode.c | 4 ++--
 fs/f2fs/node.c  | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/f2fs/inode.c b/fs/f2fs/inode.c
index c852e800..cc63f8c448f0 100644
--- a/fs/f2fs/inode.c
+++ b/fs/f2fs/inode.c
@@ -339,10 +339,10 @@ struct inode *f2fs_iget(struct super_block *sb, unsigned 
long ino)
 make_now:
if (ino == F2FS_NODE_INO(sbi)) {
inode->i_mapping->a_ops = _node_aops;
-   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+   mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (ino == F2FS_META_INO(sbi)) {
inode->i_mapping->a_ops = _meta_aops;
-   mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
+   mapping_set_gfp_mask(inode->i_mapping, GFP_NOFS);
} else if (S_ISREG(inode->i_mode)) {
inode->i_op = _file_inode_operations;
inode->i_fop = _file_operations;
diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
index 9dedd4b5e077..31e5ecf98ffd 100644
--- a/fs/f2fs/node.c
+++ b/fs/f2fs/node.c
@@ -1078,6 +1078,7 @@ struct page *new_node_page(struct dnode_of_data *dn, 
unsigned int ofs)
set_node_addr(sbi, _ni, NEW_ADDR, false);

f2fs_wait_on_page_writeback(page, NODE, true);
+   memset(F2FS_NODE(page), 0, PAGE_SIZE);
fill_node_footer(page, dn->nid, dn->inode->i_ino, ofs, true);
set_cold_node(page, S_ISDIR(dn->inode->i_mode));
if (!PageUptodate(page))
@@ -2321,6 +2322,7 @@ int recover_inode_page(struct f2fs_sb_info *sbi, struct 
page *page)

if (!PageUptodate(ipage))
SetPageUptodate(ipage);
+   memset(F2FS_NODE(page), 0, PAGE_SIZE);
fill_node_footer(ipage, ino, ino, 0, true);
set_cold_node(page, false);

-- 

> 
> If you're right, we need to insert WARN_ON to catch up __GFP_ZERO
> on mapping_set_gfp_mask at the beginning and remove all of those
> stupid thins. 
> 
> Jaegeuk, why do you need __GFP_ZERO? Could you explain?
> 
> .
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH] f2fs: use cur_map instead of ckpt_map

2018-04-09 Thread Yunlei He
This patch use cur_map instead of ckpt_map for easy to
understand, no functional change.

Signed-off-by: Yunlei He 
---
 fs/f2fs/segment.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5854cc4..f5d0499 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1559,7 +1559,7 @@ static bool add_discard_addrs(struct f2fs_sb_info *sbi, 
struct cp_control *cpc,
 
/* SIT_VBLOCK_MAP_SIZE should be multiple of sizeof(unsigned long) */
for (i = 0; i < entries; i++)
-   dmap[i] = force ? ~ckpt_map[i] & ~discard_map[i] :
+   dmap[i] = force ? ~cur_map[i] & ~discard_map[i] :
(cur_map[i] ^ ckpt_map[i]) & ckpt_map[i];
 
while (force || SM_I(sbi)->dcc_info->nr_discards <=
-- 
1.9.1


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


[f2fs-dev] [PATCH v3] f2fs: stop issue discard if something wrong with f2fs

2018-04-09 Thread Yunlei He
This patch stop async thread and umount process to issue discard
if something wrong with f2fs, which is similar to fstrim.

v1->v2: add fs error check in not only discard thread but also umount process
v2->v3: remove redundant error message

Signed-off-by: Yunlei He 
---
 fs/f2fs/segment.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
index 5854cc4..e31d506 100644
--- a/fs/f2fs/segment.c
+++ b/fs/f2fs/segment.c
@@ -1202,6 +1202,9 @@ static int __issue_discard_cmd(struct f2fs_sb_info *sbi,
int i, iter = 0, issued = 0;
bool io_interrupted = false;
 
+   if (is_sbi_flag_set(sbi, SBI_NEED_FSCK))
+   return issued;
+
for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
if (i + 1 < dpolicy->granularity)
break;
-- 
1.9.1


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Minchan Kim
On Mon, Apr 09, 2018 at 04:14:03AM -0700, Matthew Wilcox wrote:
> On Mon, Apr 09, 2018 at 12:09:30PM +0900, Minchan Kim wrote:
> > On Sun, Apr 08, 2018 at 07:49:25PM -0700, Matthew Wilcox wrote:
> > > On Mon, Apr 09, 2018 at 10:58:15AM +0900, Minchan Kim wrote:
> > > > It assumes shadow entry of radix tree relies on the init state
> > > > that node->private_list allocated should be list_empty state.
> > > > Currently, it's initailized in SLAB constructor which means
> > > > node of radix tree would be initialized only when *slub allocates
> > > > new page*, not *new object*. So, if some FS or subsystem pass
> > > > gfp_mask to __GFP_ZERO, slub allocator will do memset blindly.
> > > 
> > > Wait, what?  Who's declaring their radix tree with GFP_ZERO flags?
> > > I don't see anyone using INIT_RADIX_TREE or RADIX_TREE or RADIX_TREE_INIT
> > > with GFP_ZERO.
> > 
> > Look at fs/f2fs/inode.c
> > mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> > 
> > __add_to_page_cache_locked
> >   radix_tree_maybe_preload
> > 
> > add_to_page_cache_lru
> > 
> > What's the wrong with setting __GFP_ZERO with mapping->gfp_mask?
> 
> Because it's a stupid thing to do.  Pages are allocated and then filled
> from disk.  Zeroing them before DMAing to them is just a waste of time.

Every FSes do address_space to read pages from storage? I'm not sure.

If you're right, we need to insert WARN_ON to catch up __GFP_ZERO
on mapping_set_gfp_mask at the beginning and remove all of those
stupid thins. 

Jaegeuk, why do you need __GFP_ZERO? Could you explain?

--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v2] f2fs: stop issue discard if something wrong with f2fs

2018-04-09 Thread heyunlei


>-Original Message-
>From: Yuchao (T)
>Sent: Monday, April 09, 2018 7:05 PM
>To: heyunlei; jaeg...@kernel.org; linux-f2fs-devel@lists.sourceforge.net
>Cc: Wangbintian; Zhangdianfang (Euler)
>Subject: Re: [f2fs-dev][PATCH v2] f2fs: stop issue discard if something wrong 
>with f2fs
>
>On 2018/4/9 11:54, Yunlei He wrote:
>> This patch stop discard thread to issue discard io if
>> something wrong with f2fs, which is similar to fstrim.
>>
>> v1->v2: add fs error check in not only discard thread but also umount process
>>
>> Signed-off-by: Yunlei He 
>> ---
>>  fs/f2fs/segment.c | 7 ++-
>>  1 file changed, 6 insertions(+), 1 deletion(-)
>>
>> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
>> index 5854cc4..7f45cc4 100644
>> --- a/fs/f2fs/segment.c
>> +++ b/fs/f2fs/segment.c
>> @@ -1202,6 +1202,12 @@ static int __issue_discard_cmd(struct f2fs_sb_info 
>> *sbi,
>>  int i, iter = 0, issued = 0;
>>  bool io_interrupted = false;
>>
>> +if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
>> +f2fs_msg(sbi->sb, KERN_WARNING,
>> +"Found FS corruption, run fsck to fix.");
>
>I think we don't need to print this message redundantly if we have print in all
>places we set the flag.
>
>So let's just skip issuing discard in async thread and __submit_discard_cmd.

Okay, I'll resend a new version.
 
>
>> +return issued;
>> +}
>> +
>>  for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
>>  if (i + 1 < dpolicy->granularity)
>>  break;
>> @@ -1410,7 +1416,6 @@ static int issue_discard_thread(void *data)
>>  continue;
>>  if (kthread_should_stop())
>>  return 0;
>> -
>
>We'd better to avoid meaningless blank line removal.

Sorry for this mistake.

Thanks.
>
>Thank,s
>
>>  if (dcc->discard_wake)
>>  dcc->discard_wake = 0;
>>
>>


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] mm: workingset: fix NULL ptr dereference

2018-04-09 Thread Matthew Wilcox
On Mon, Apr 09, 2018 at 12:09:30PM +0900, Minchan Kim wrote:
> On Sun, Apr 08, 2018 at 07:49:25PM -0700, Matthew Wilcox wrote:
> > On Mon, Apr 09, 2018 at 10:58:15AM +0900, Minchan Kim wrote:
> > > It assumes shadow entry of radix tree relies on the init state
> > > that node->private_list allocated should be list_empty state.
> > > Currently, it's initailized in SLAB constructor which means
> > > node of radix tree would be initialized only when *slub allocates
> > > new page*, not *new object*. So, if some FS or subsystem pass
> > > gfp_mask to __GFP_ZERO, slub allocator will do memset blindly.
> > 
> > Wait, what?  Who's declaring their radix tree with GFP_ZERO flags?
> > I don't see anyone using INIT_RADIX_TREE or RADIX_TREE or RADIX_TREE_INIT
> > with GFP_ZERO.
> 
> Look at fs/f2fs/inode.c
> mapping_set_gfp_mask(inode->i_mapping, GFP_F2FS_ZERO);
> 
> __add_to_page_cache_locked
>   radix_tree_maybe_preload
> 
> add_to_page_cache_lru
> 
> What's the wrong with setting __GFP_ZERO with mapping->gfp_mask?

Because it's a stupid thing to do.  Pages are allocated and then filled
from disk.  Zeroing them before DMAing to them is just a waste of time.


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v4] f2fs: introduce a method to make nat journal more fresh

2018-04-09 Thread Chao Yu
On 2018/3/16 18:24, Yunlei He wrote:
> This patch introduce a method to make nat journal more fresh:
> i.  sort set list using dirty entry number and cp version
> average value.
> ii. if meet with cache hit, update average version valus with
> current cp version.
> 
> With this patch, newly modified nat set will flush to journal,
> and flush old nat set with same dirty entry number to nat area.
> 
> Signed-off-by: Yunlei He 

Reviewed-by: Chao Yu 

Thanks,


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v2] f2fs: stop issue discard if something wrong with f2fs

2018-04-09 Thread Chao Yu
On 2018/4/9 11:54, Yunlei He wrote:
> This patch stop discard thread to issue discard io if
> something wrong with f2fs, which is similar to fstrim.
> 
> v1->v2: add fs error check in not only discard thread but also umount process
> 
> Signed-off-by: Yunlei He 
> ---
>  fs/f2fs/segment.c | 7 ++-
>  1 file changed, 6 insertions(+), 1 deletion(-)
> 
> diff --git a/fs/f2fs/segment.c b/fs/f2fs/segment.c
> index 5854cc4..7f45cc4 100644
> --- a/fs/f2fs/segment.c
> +++ b/fs/f2fs/segment.c
> @@ -1202,6 +1202,12 @@ static int __issue_discard_cmd(struct f2fs_sb_info 
> *sbi,
>   int i, iter = 0, issued = 0;
>   bool io_interrupted = false;
>  
> + if (is_sbi_flag_set(sbi, SBI_NEED_FSCK)) {
> + f2fs_msg(sbi->sb, KERN_WARNING,
> + "Found FS corruption, run fsck to fix.");

I think we don't need to print this message redundantly if we have print in all
places we set the flag.

So let's just skip issuing discard in async thread and __submit_discard_cmd.

> + return issued;
> + }
> +
>   for (i = MAX_PLIST_NUM - 1; i >= 0; i--) {
>   if (i + 1 < dpolicy->granularity)
>   break;
> @@ -1410,7 +1416,6 @@ static int issue_discard_thread(void *data)
>   continue;
>   if (kthread_should_stop())
>   return 0;
> -

We'd better to avoid meaningless blank line removal.

Thank,s

>   if (dcc->discard_wake)
>   dcc->discard_wake = 0;
>  
> 


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] f2fs: disable nat bits without setting SBI_NEED_FSCK

2018-04-09 Thread Chao Yu
On 2018/4/9 11:40, Yunlei He wrote:
> nat bits feature is recover default by fsck, no need to
> set SBI_NEED_FSCK flag.
> 
> Signed-off-by: Yunlei He 

Reviewed-by: Chao Yu 

Thanks,


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH] fsck.f2fs: recover nat bits feature default by fsck

2018-04-09 Thread Chao Yu
On 2018/4/9 11:34, Yunlei He wrote:
> Now, nat bits feature is enabled by default, we will
> meet with the following scenarios:
> 
> i.   disabled, without CP_NAT_BITS_FLAG, if fsck find some
>  fs errors, fix or write new checkpoint will then enable it.
> ii.  enabled, with CP_NAT_BITS_FLAG, in the case of sudden
>  power off, bitmap will get lost but CP_NAT_BITS_FLAG
>  still exist, fsck will recover bitmap in f2fs_do_mount.
> iii. enabled, with CP_NAT_BITS_FLAG, both of bitmap and
>  CP_NAT_BITS_FLAG will get lost if not enough space for
>  nat bits or nat bits check failed during mounting.
>  SBI_NEED_FSCK is set, fsck will recover flag and bitmap
>  before next mount.
> 
> SBI_NEED_FSCK means fs is corrupted, is not suitable for
> nat bits disabled. This patch try to recover nat bits all
> by fsck, no need set SBI_NEED_FSCK flag in kernel.
> 
> Signed-off-by: Yunlei He 

Reviewed-by: Chao Yu 

Thanks,


--
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
___
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel


Re: [f2fs-dev] [PATCH v4] f2fs: introduce a method to make nat journal more fresh

2018-04-09 Thread heyunlei


>-Original Message-
>From: Yuchao (T)
>Sent: Wednesday, March 28, 2018 9:28 AM
>To: heyunlei; jaeg...@kernel.org; linux-f2fs-devel@lists.sourceforge.net
>Cc: Wangbintian
>Subject: Re: [f2fs-dev][PATCH v4] f2fs: introduce a method to make nat journal 
>more fresh
>
>On 2018/3/16 18:24, Yunlei He wrote:
>> This patch introduce a method to make nat journal more fresh:
>> i.  sort set list using dirty entry number and cp version
>> average value.
>> ii. if meet with cache hit, update average version valus with
>> current cp version.
>>
>> With this patch, newly modified nat set will flush to journal,
>> and flush old nat set with same dirty entry number to nat area.
>>
>> Signed-off-by: Yunlei He 
>> ---
>>  fs/f2fs/node.c | 39 ---
>>  fs/f2fs/node.h |  4 +++-
>>  2 files changed, 27 insertions(+), 16 deletions(-)
>>
>> diff --git a/fs/f2fs/node.c b/fs/f2fs/node.c
>> index 177c438..c511ef6 100644
>> --- a/fs/f2fs/node.c
>> +++ b/fs/f2fs/node.c
>> @@ -193,8 +193,8 @@ static void __del_from_nat_cache(struct f2fs_nm_info 
>> *nm_i, struct nat_entry *e)
>>  __free_nat_entry(e);
>>  }
>>
>> -static void __set_nat_cache_dirty(struct f2fs_nm_info *nm_i,
>> -struct nat_entry *ne)
>> +static void __set_nat_cache_dirty(struct f2fs_sb_info *sbi, bool 
>> remove_journal,
>> +struct f2fs_nm_info *nm_i, struct nat_entry *ne)
>>  {
>>  nid_t set = NAT_BLOCK_OFFSET(ne->ni.nid);
>>  struct nat_entry_set *head;
>> @@ -207,12 +207,18 @@ static void __set_nat_cache_dirty(struct f2fs_nm_info 
>> *nm_i,
>>  INIT_LIST_HEAD(>set_list);
>>  head->set = set;
>>  head->entry_cnt = 0;
>> +head->cp_ver = 0;
>
>head->cp_ver = div_u64(cur_cp_version(F2FS_CKPT(sbi)), NAT_JOURNAL_ENTRIES)?
>
>Thanks,
>
>>  f2fs_radix_tree_insert(_i->nat_set_root, set, head);
>>  }
>>
>>  if (get_nat_flag(ne, IS_DIRTY))
>>  goto refresh_list;
>>
>> +/* journal hit case, try to locate set in journal */
>> +if (!remove_journal && head->entry_cnt <= NAT_JOURNAL_ENTRIES)
>> +head->cp_ver += div_u64(cur_cp_version(F2FS_CKPT(sbi)),
>> +NAT_JOURNAL_ENTRIES);

Cp version will increase here.

Thanks.
>> +
>>  nm_i->dirty_nat_cnt++;
>>  head->entry_cnt++;
>>  set_nat_flag(ne, IS_DIRTY, true);
>> @@ -357,7 +363,7 @@ static void set_node_addr(struct f2fs_sb_info *sbi, 
>> struct node_info *ni,
>>  nat_set_blkaddr(e, new_blkaddr);
>>  if (new_blkaddr == NEW_ADDR || new_blkaddr == NULL_ADDR)
>>  set_nat_flag(e, IS_CHECKPOINTED, false);
>> -__set_nat_cache_dirty(nm_i, e);
>> +__set_nat_cache_dirty(sbi, false, nm_i, e);
>>
>>  /* update fsync_mark if its inode nat entry is still alive */
>>  if (ni->nid != ni->ino)
>> @@ -2395,14 +2401,14 @@ static void remove_nats_in_journal(struct 
>> f2fs_sb_info *sbi)
>>  spin_unlock(_i->nid_list_lock);
>>  }
>>
>> -__set_nat_cache_dirty(nm_i, ne);
>> +__set_nat_cache_dirty(sbi, true, nm_i, ne);
>>  }
>>  update_nats_in_cursum(journal, -i);
>>  up_write(>journal_rwsem);
>>  }
>>
>> -static void __adjust_nat_entry_set(struct nat_entry_set *nes,
>> -struct list_head *head, int max)
>> +static void __adjust_nat_entry_set(struct f2fs_sb_info *sbi,
>> +struct nat_entry_set *nes, struct list_head *head, int max)
>>  {
>>  struct nat_entry_set *cur;
>>
>> @@ -2410,7 +2416,9 @@ static void __adjust_nat_entry_set(struct 
>> nat_entry_set *nes,
>>  goto add_out;
>>
>>  list_for_each_entry(cur, head, set_list) {
>> -if (cur->entry_cnt >= nes->entry_cnt) {
>> +if (cur->entry_cnt > nes->entry_cnt ||
>> +(cur->entry_cnt == nes->entry_cnt &&
>> +cur->cp_ver < nes->cp_ver)) {
>>  list_add(>set_list, cur->set_list.prev);
>>  return;
>>  }
>> @@ -2458,7 +2466,6 @@ static void __flush_nat_entry_set(struct f2fs_sb_info 
>> *sbi,
>>  struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
>>  struct f2fs_journal *journal = curseg->journal;
>>  nid_t start_nid = set->set * NAT_ENTRY_PER_BLOCK;
>> -bool to_journal = true;
>>  struct f2fs_nat_block *nat_blk;
>>  struct nat_entry *ne, *cur;
>>  struct page *page = NULL;
>> @@ -2468,11 +2475,14 @@ static void __flush_nat_entry_set(struct 
>> f2fs_sb_info *sbi,
>>   * #1, flush nat entries to journal in current hot data summary block.
>>   * #2, flush nat entries to nat page.
>>   */
>> +
>> +set->to_journal = true;
>> +
>>  if (enabled_nat_bits(sbi, cpc) ||
>>  !__has_cursum_space(journal, set->entry_cnt, NAT_JOURNAL))
>> -to_journal =