Hi Weichao,

This is corrupting the checkpoint showing dangling nids when running my test
where injects faults with shutdown loop.

Thanks,

On 07/28, Chao Yu wrote:
> On 2018/7/27 4:54, Weichao Guo wrote:
> > We may encounter both checkpoints invalid in such a case:
> > 1. write checkpoint A, B, C;
> > 2. sudden power-cut during write checkpoint D;
> > 3. fsck changes the total block count of checkpoint C;
> > 4. sudden power-cut during fsck write checkpoint C in place
> > 
> >  ---------           ---------
> > |  ver C  |         |  ver D  |
> > |   ...   |         |   ...   |
> > | content |         | content |
> > |   ...   |         |   ...   |
> > |  ver C  |         |         |
> > |  ver A  |         |  ver B  |
> >  ---------           ---------
> > 
> > As the total # of checkpoint C is changed, an old cp block
> > like ver A or an invalid cp block may be referenced.
> > To avoid both checkpoints invalid, and considering fsck should
> > not update the checkpoint version, fsck could write checkpoint
> > out of place first and then write checkpoint in place. This
> > makes sure the file system is fixed by fsck and at least one
> > of the two checkpoints is valid.
> > 
> > Signed-off-by: Weichao Guo <guoweic...@huawei.com>
> > ---
> >  fsck/defrag.c |  2 +-
> >  fsck/fsck.c   | 14 +++++++++++++-
> >  fsck/fsck.h   |  1 +
> >  fsck/mount.c  | 16 ++++++++++++++--
> >  fsck/sload.c  |  2 +-
> >  5 files changed, 30 insertions(+), 5 deletions(-)
> > 
> > diff --git a/fsck/defrag.c b/fsck/defrag.c
> > index bea0293..9fc932f 100644
> > --- a/fsck/defrag.c
> > +++ b/fsck/defrag.c
> > @@ -96,7 +96,7 @@ int f2fs_defragment(struct f2fs_sb_info *sbi, u64 from, 
> > u64 len, u64 to, int lef
> >     /* flush dirty sit entries */
> >     flush_sit_entries(sbi);
> >  
> > -   write_checkpoint(sbi);
> > +   __write_checkpoint(sbi);
> >  
> >     return 0;
> >  }
> > diff --git a/fsck/fsck.c b/fsck/fsck.c
> > index 91c8529..f2ff4bc 100644
> > --- a/fsck/fsck.c
> > +++ b/fsck/fsck.c
> > @@ -1943,7 +1943,7 @@ static void flush_curseg_sit_entries(struct 
> > f2fs_sb_info *sbi)
> >     free(sit_blk);
> >  }
> >  
> > -static void fix_checkpoint(struct f2fs_sb_info *sbi)
> > +static void __fix_checkpoint(struct f2fs_sb_info *sbi)
> >  {
> >     struct f2fs_fsck *fsck = F2FS_FSCK(sbi);
> >     struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
> > @@ -2004,6 +2004,18 @@ static void fix_checkpoint(struct f2fs_sb_info *sbi)
> >             write_nat_bits(sbi, sb, cp, sbi->cur_cp);
> >  }
> >  
> > +static void fix_checkpoint(struct f2fs_sb_info *sbi)
> > +{
> > +   int i = 0;
> > +
> > +   for (i = 0; i < 2; i++) {
> > +           /* write checkpoint out of place first */
> > +           sbi->cur_cp = sbi->cur_cp % 2 + 1;
> > +           __fix_checkpoint(sbi);
> > +           f2fs_fsync_device();
> 
> It needs to check return value here.
> 
> We can add below codes in the end of __fix_checkpoint()?
> 
>       ret = f2fs_fsync_device();
>       ASSERT(ret >= 0);
> 
> > +   }
> > +}
> > +
> >  int check_curseg_offset(struct f2fs_sb_info *sbi)
> >  {
> >     int i;
> > diff --git a/fsck/fsck.h b/fsck/fsck.h
> > index 8e133fa..068dd34 100644
> > --- a/fsck/fsck.h
> > +++ b/fsck/fsck.h
> > @@ -175,6 +175,7 @@ extern void flush_sit_entries(struct f2fs_sb_info *);
> >  extern void move_curseg_info(struct f2fs_sb_info *, u64);
> >  extern void write_curseg_info(struct f2fs_sb_info *);
> >  extern int find_next_free_block(struct f2fs_sb_info *, u64 *, int, int);
> > +extern void __write_checkpoint(struct f2fs_sb_info *);
> >  extern void write_checkpoint(struct f2fs_sb_info *);
> >  extern void update_data_blkaddr(struct f2fs_sb_info *, nid_t, u16, 
> > block_t);
> >  extern void update_nat_blkaddr(struct f2fs_sb_info *, nid_t, nid_t, 
> > block_t);
> > diff --git a/fsck/mount.c b/fsck/mount.c
> > index e5574c5..8a29421 100644
> > --- a/fsck/mount.c
> > +++ b/fsck/mount.c
> > @@ -1856,7 +1856,7 @@ void flush_journal_entries(struct f2fs_sb_info *sbi)
> >     int n_sits = flush_sit_journal_entries(sbi);
> >  
> >     if (n_nats || n_sits)
> > -           write_checkpoint(sbi);
> > +           __write_checkpoint(sbi);
> >  }
> >  
> >  void flush_sit_entries(struct f2fs_sb_info *sbi)
> > @@ -2079,7 +2079,7 @@ void nullify_nat_entry(struct f2fs_sb_info *sbi, u32 
> > nid)
> >     free(nat_block);
> >  }
> >  
> > -void write_checkpoint(struct f2fs_sb_info *sbi)
> > +void __write_checkpoint(struct f2fs_sb_info *sbi)
> >  {
> >     struct f2fs_checkpoint *cp = F2FS_CKPT(sbi);
> >     struct f2fs_super_block *sb = F2FS_RAW_SUPER(sbi);
> > @@ -2144,6 +2144,18 @@ void write_checkpoint(struct f2fs_sb_info *sbi)
> >     ASSERT(ret >= 0);
> >  }
> >  
> > +void write_checkpoint(struct f2fs_sb_info *sbi)
> > +{
> > +   int i = 0;
> > +
> > +   for (i = 0; i < 2; i++) {
> > +           /* write checkpoint out of place first */
> > +           sbi->cur_cp = sbi->cur_cp % 2 + 1;
> > +           __write_checkpoint(sbi);
> > +           f2fs_fsync_device();
> 
> Ditto.
> 
> Thanks,
> 
> > +   }
> > +}
> > +
> >  void build_nat_area_bitmap(struct f2fs_sb_info *sbi)
> >  {
> >     struct curseg_info *curseg = CURSEG_I(sbi, CURSEG_HOT_DATA);
> > diff --git a/fsck/sload.c b/fsck/sload.c
> > index 2842f2c..53d80fa 100644
> > --- a/fsck/sload.c
> > +++ b/fsck/sload.c
> > @@ -325,6 +325,6 @@ int f2fs_sload(struct f2fs_sb_info *sbi)
> >     /* flush dirty sit entries */
> >     flush_sit_entries(sbi);
> >  
> > -   write_checkpoint(sbi);
> > +   __write_checkpoint(sbi);
> >     return 0;
> >  }
> > 

------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most
engaging tech sites, Slashdot.org! http://sdm.link/slashdot
_______________________________________________
Linux-f2fs-devel mailing list
Linux-f2fs-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel

Reply via email to