On sudden f2fs shutdown, zoned block device status and f2fs current segment positions in meta data can be inconsistent. When f2fs shutdown happens before write operations completes, write pointers of zoned block devices can go further but f2fs meta data keeps current segments at positions before the write operations. After remounting the f2fs, the inconsistency causes write operations not at write pointers and "Unaligned write command" error is reported. This error was observed when xfstests test case generic/388 was run with f2fs on a zoned block device.
To avoid the error, have f2fs.fsck check consistency between each current segment's position and the write pointer of the zone the current segment points to. If the write pointer goes advance from the current segment, fix the current segment position setting at same as the write pointer position. If the write pointer goes to the zone end, find a new zone and set the current segment position at the new zone start. In case the write pointer is behind the current segment, write zero data at the write pointer position to make write pointer position at same as the current segment. When inconsistencies are found, turn on c.bug_on flag in fsck_verify() to ask users to fix them or not. When inconsistencies get fixed, turn on 'force' flag in fsck_verify() to enforce fixes in following checks. This position fix is done at the beginning of do_fsck() function so that other checks reflect the current segment modification. Also add GET_SEC_FROM_SEG and GET_SEG_FROM_SEC macros in fsck/fsck.h to simplify the code. Signed-off-by: Shin'ichiro Kawasaki <[email protected]> --- fsck/f2fs.h | 5 ++ fsck/fsck.c | 198 ++++++++++++++++++++++++++++++++++++++++++++++++++++ fsck/fsck.h | 3 + fsck/main.c | 2 + 4 files changed, 208 insertions(+) diff --git a/fsck/f2fs.h b/fsck/f2fs.h index 4dc6698..2c1c2b3 100644 --- a/fsck/f2fs.h +++ b/fsck/f2fs.h @@ -337,6 +337,11 @@ static inline block_t __end_block_addr(struct f2fs_sb_info *sbi) #define GET_BLKOFF_FROM_SEG0(sbi, blk_addr) \ (GET_SEGOFF_FROM_SEG0(sbi, blk_addr) & (sbi->blocks_per_seg - 1)) +#define GET_SEC_FROM_SEG(sbi, segno) \ + ((segno) / (sbi)->segs_per_sec) +#define GET_SEG_FROM_SEC(sbi, secno) \ + ((secno) * (sbi)->segs_per_sec) + #define FREE_I_START_SEGNO(sbi) \ GET_SEGNO_FROM_SEG0(sbi, SM_I(sbi)->main_blkaddr) #define GET_R2L_SEGNO(sbi, segno) (segno + FREE_I_START_SEGNO(sbi)) diff --git a/fsck/fsck.c b/fsck/fsck.c index 8953ca1..a0f6849 100644 --- a/fsck/fsck.c +++ b/fsck/fsck.c @@ -2574,6 +2574,190 @@ out: return cnt; } +/* + * Search a free section in main area. Start search from the section specified + * with segno argument toward main area end. Return first segment of the found + * section in segno argument. + */ +static int find_next_free_section(struct f2fs_sb_info *sbi, + unsigned int *segno) +{ + unsigned int i, sec, section_valid_blocks; + unsigned int end_segno = GET_SEGNO(sbi, SM_I(sbi)->main_blkaddr) + + SM_I(sbi)->main_segments; + unsigned int end_sec = GET_SEC_FROM_SEG(sbi, end_segno); + struct seg_entry *se; + struct curseg_info *cs; + + for (sec = GET_SEC_FROM_SEG(sbi, *segno); sec < end_sec; sec++) { + /* find a section without valid blocks */ + section_valid_blocks = 0; + for (i = 0; i < sbi->segs_per_sec; i++) { + se = get_seg_entry(sbi, GET_SEG_FROM_SEC(sbi, sec) + i); + section_valid_blocks += se->valid_blocks; + } + if (section_valid_blocks) + continue; + + /* check the cursegs do not use the section */ + for (i = 0; i < NO_CHECK_TYPE; i++) { + cs = &SM_I(sbi)->curseg_array[i]; + if (GET_SEC_FROM_SEG(sbi, cs->segno) == sec) + break; + } + if (i >= NR_CURSEG_TYPE) { + *segno = GET_SEG_FROM_SEC(sbi, sec); + return 0; + } + } + + return -1; +} + +struct write_pointer_check_data { + struct f2fs_sb_info *sbi; + struct device_info *dev; +}; + +static int fsck_chk_write_pointer(int i, struct blk_zone *blkz, void *opaque) +{ + struct write_pointer_check_data *wpd = opaque; + struct f2fs_sb_info *sbi = wpd->sbi; + struct device_info *dev = wpd->dev; + struct f2fs_fsck *fsck = F2FS_FSCK(sbi); + block_t zone_block, wp_block, wp_blkoff, cs_block, b; + unsigned int zone_segno, wp_segno, new_segno; + struct seg_entry *se; + struct curseg_info *cs; + int cs_index, ret; + int log_sectors_per_block = sbi->log_blocksize - SECTOR_SHIFT; + unsigned int segs_per_zone = sbi->segs_per_sec * sbi->secs_per_zone; + void *zero_blk; + + if (blk_zone_conv(blkz)) + return 0; + + zone_block = dev->start_blkaddr + + (blk_zone_sector(blkz) >> log_sectors_per_block); + zone_segno = GET_SEGNO(sbi, zone_block); + wp_block = dev->start_blkaddr + + (blk_zone_wp_sector(blkz) >> log_sectors_per_block); + wp_segno = GET_SEGNO(sbi, wp_block); + wp_blkoff = wp_block - START_BLOCK(sbi, wp_segno); + + /* find the curseg which points to the zone */ + for (cs_index = 0; cs_index < NO_CHECK_TYPE; cs_index++) { + cs = &SM_I(sbi)->curseg_array[cs_index]; + if (zone_segno <= cs->segno && + cs->segno < zone_segno + segs_per_zone) + break; + } + + if (cs_index >= NR_CURSEG_TYPE) + return 0; + + /* check write pointer consistency with the curseg in the zone */ + cs_block = START_BLOCK(sbi, cs->segno) + cs->next_blkoff; + if (wp_block == cs_block) + return 0; + + if (!c.fix_on) { + MSG(0, "Inconsistent write pointer: " + "curseg %d[0x%x,0x%x] wp[0x%x,0x%x]\n", + cs_index, cs->segno, cs->next_blkoff, wp_segno, wp_blkoff); + fsck->chk.wp_inconsistent_zones++; + return 0; + } + + /* + * If the curseg is in advance from the write pointer, write zero to + * move the write pointer forward to the same position as the curseg. + */ + if (wp_block < cs_block) { + ret = 0; + zero_blk = calloc(BLOCK_SZ, 1); + if (!zero_blk) + return -EINVAL; + + FIX_MSG("Advance write pointer to match with curseg %d: " + "[0x%x,0x%x]->[0x%x,0x%x]", + cs_index, wp_segno, wp_blkoff, + cs->segno, cs->next_blkoff); + for (b = wp_block; b < cs_block && !ret; b++) + ret = dev_write_block(zero_blk, b); + + fsck->chk.wp_fixed_zones++; + free(zero_blk); + return ret; + } + + if (wp_segno == zone_segno + segs_per_zone) { + /* + * If the write pointer is in advance from the curseg and at + * the zone end (section end), search a new free zone (section) + * between the curseg and main area end. + */ + new_segno = wp_segno; + ret = find_next_free_section(sbi, &new_segno); + if (ret) { + /* search again from main area start */ + new_segno = GET_SEGNO(sbi, SM_I(sbi)->main_blkaddr); + ret = find_next_free_section(sbi, &new_segno); + } + if (ret) { + MSG(0, "Free section not found\n"); + return ret; + } + FIX_MSG("New section for curseg %d: [0x%x,0x%x]->[0x%x,0x%x]", + cs_index, cs->segno, cs->next_blkoff, new_segno, 0); + cs->segno = new_segno; + cs->next_blkoff = 0; + } else { + /* + * If the write pointer is in advance from the curseg within + * the zone, modify the curseg position to be same as the + * write pointer. + */ + ASSERT(wp_segno < zone_segno + segs_per_zone); + FIX_MSG("Advance curseg %d: [0x%x,0x%x]->[0x%x,0x%x]", + cs_index, cs->segno, cs->next_blkoff, + wp_segno, wp_blkoff); + cs->segno = wp_segno; + cs->next_blkoff = wp_blkoff; + } + + se = get_seg_entry(sbi, cs->segno); + se->type = cs_index; + fsck->chk.wp_fixed_zones++; + + return 0; +} + +void fsck_chk_write_pointers(struct f2fs_sb_info *sbi) +{ + unsigned int i; + struct f2fs_fsck *fsck = F2FS_FSCK(sbi); + struct write_pointer_check_data wpd = { sbi, NULL }; + + if (c.zoned_model != F2FS_ZONED_HM) + return; + + for (i = 0; i < MAX_DEVICES; i++) { + if (!c.devices[i].path) + break; + + wpd.dev = c.devices + i; + if (f2fs_report_zones(i, fsck_chk_write_pointer, &wpd)) { + printf("[FSCK] Write pointer check failed: %s\n", + c.devices[i].path); + return; + } + } + + if (fsck->chk.wp_fixed_zones && c.fix_on) + write_curseg_info(sbi); +} + int fsck_chk_curseg_info(struct f2fs_sb_info *sbi) { struct curseg_info *curseg; @@ -2624,6 +2808,20 @@ int fsck_verify(struct f2fs_sb_info *sbi) printf("\n"); + if (c.zoned_model == F2FS_ZONED_HM) { + printf("[FSCK] Write pointers consistency "); + if (fsck->chk.wp_inconsistent_zones == 0x0) { + printf(" [Ok..]\n"); + } else { + printf(" [Fail] [0x%x]\n", + fsck->chk.wp_inconsistent_zones); + c.bug_on = 1; + } + + if (fsck->chk.wp_fixed_zones && c.fix_on) + force = 1; + } + if (c.feature & cpu_to_le32(F2FS_FEATURE_LOST_FOUND)) { for (i = 0; i < fsck->nr_nat_entries; i++) if (f2fs_test_bit(i, fsck->nat_area_bitmap) != 0) diff --git a/fsck/fsck.h b/fsck/fsck.h index d38e8de..aa3dbe7 100644 --- a/fsck/fsck.h +++ b/fsck/fsck.h @@ -80,6 +80,8 @@ struct f2fs_fsck { u32 multi_hard_link_files; u64 sit_valid_blocks; u32 sit_free_segs; + u32 wp_fixed_zones; + u32 wp_inconsistent_zones; } chk; struct hard_link_node *hard_link_list_head; @@ -156,6 +158,7 @@ int fsck_chk_inline_dentries(struct f2fs_sb_info *, struct f2fs_node *, struct child_info *); void fsck_chk_checkpoint(struct f2fs_sb_info *sbi); int fsck_chk_meta(struct f2fs_sb_info *sbi); +void fsck_chk_write_pointers(struct f2fs_sb_info *); int fsck_chk_curseg_info(struct f2fs_sb_info *); void pretty_print_filename(const u8 *raw_name, u32 len, char out[F2FS_PRINT_NAMELEN], int enc_name); diff --git a/fsck/main.c b/fsck/main.c index 9aca024..4b4a789 100644 --- a/fsck/main.c +++ b/fsck/main.c @@ -584,6 +584,8 @@ static void do_fsck(struct f2fs_sb_info *sbi) print_cp_state(flag); + fsck_chk_write_pointers(sbi); + fsck_chk_curseg_info(sbi); if (!c.fix_on && !c.bug_on) { -- 2.21.0 _______________________________________________ Linux-f2fs-devel mailing list [email protected] https://lists.sourceforge.net/lists/listinfo/linux-f2fs-devel
