On Tue, 16 Apr 2013 12:04:05 +0400, Vyacheslav Dubeyko wrote:
> From: Vyacheslav Dubeyko <[email protected]>
> Subject: [PATCH] nilfs2: fix issue with flush kernel thread after remount in
> RO mode because of driver's internal error or metadata corruption
>
> DESCRIPTION:
> NILFS2 driver remounts itself in RO mode in the case of metadata corruption
> discovering (for example, in the case of broken bmap discovering). But,
> usually, it takes place file system operations before remounting in RO mode.
> Thereby, NILFS2 driver can be in RO mode with presence of dirty pages in
> modified inodes' address spaces. It results in flush kernel thread's infinite
> trying to flush dirty pages in RO mode. As a result, it is possible to see
> such side effects as: (1) flush kernel thread occupies 50% - 99% of CPU time;
> (2) system can't be shutdowned without manual power switch off.
>
> SYMPTOMS:
> (1) System log contains error message: "Remounting filesystem read-only".
> (2) The flush kernel thread occupies 50% - 99% of CPU time.
> (3) The system can't be shutdowned without manual power switch off.
>
> REPRODUCTION PATH:
> (1) Create volume group with name "unencrypted" by means of vgcreate utility.
> (2) Run script (prepared by Anthony Doggett <[email protected]>):
>
> ----------------[BEGIN SCRIPT]--------------------
> #!/bin/bash
>
> VG=unencrypted
> #apt-get install nilfs-tools darcs
> lvcreate --size 2G --name ntest $VG
> mkfs.nilfs2 -b 1024 -B 8192 /dev/mapper/$VG-ntest
> mkdir /var/tmp/n
> mkdir /var/tmp/n/ntest
> mount /dev/mapper/$VG-ntest /var/tmp/n/ntest
> mkdir /var/tmp/n/ntest/thedir
> cd /var/tmp/n/ntest/thedir
> sleep 2
> date
> darcs init
> sleep 2
> dmesg|tail -n 5
> date
> darcs whatsnew || true
> date
> sleep 2
> dmesg|tail -n 5
> ----------------[END SCRIPT]--------------------
>
> (3) Try to shutdown the system.
>
> REPRODUCIBILITY: 100%
>
> FIX:
> The patch implements checking mount state of NILFS2 driver in
> nilfs_writepage(), nilfs_writepages() and nilfs_mdt_write_page() methods. If
> it is detected the RO mount state then all dirty pages are simply discarded
> with warning messages is written in system log.
>
> Signed-off-by: Vyacheslav Dubeyko <[email protected]>
> CC: Ryusuke Konishi <[email protected]>
> Tested-by: Vyacheslav Dubeyko <[email protected]>
Thank you for posting this fix.
I'll comment on code inline.
> ---
> fs/nilfs2/inode.c | 17 +++++++++++++
> fs/nilfs2/mdt.c | 19 +++++++++++----
> fs/nilfs2/page.c | 69
> ++++++++++++++++++++++++++++++++++++++++++++++++++++-
> fs/nilfs2/page.h | 3 ++-
> 4 files changed, 102 insertions(+), 6 deletions(-)
>
> diff --git a/fs/nilfs2/inode.c b/fs/nilfs2/inode.c
> index 6b49f14..7f4db1d 100644
> --- a/fs/nilfs2/inode.c
> +++ b/fs/nilfs2/inode.c
> @@ -175,6 +175,11 @@ static int nilfs_writepages(struct address_space
> *mapping,
> struct inode *inode = mapping->host;
> int err = 0;
>
> + if (inode->i_sb->s_flags & MS_RDONLY) {
> + nilfs_clear_dirty_pages(mapping, false);
> + return -EROFS;
> + }
> +
> if (wbc->sync_mode == WB_SYNC_ALL)
> err = nilfs_construct_dsync_segment(inode->i_sb, inode,
> wbc->range_start,
> @@ -187,6 +192,18 @@ static int nilfs_writepage(struct page *page, struct
> writeback_control *wbc)
> struct inode *inode = page->mapping->host;
> int err;
>
> + if (inode && (inode->i_sb->s_flags & MS_RDONLY)) {
> + /*
> + * It means that filesystem was remounted in read-only
> + * mode because of error or metadata corruption. But we
> + * have dirty pages that try to be flushed in background.
> + * So, here we simply discard this dirty page.
> + */
> + err = nilfs_discard_page_in_ro_mode(page);
> + if (err == -EROFS) /* page is unlocked */
> + return err;
> + }
> +
> redirty_page_for_writepage(wbc, page);
> unlock_page(page);
>
> diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c
> index f9897d0..a386fb0 100644
> --- a/fs/nilfs2/mdt.c
> +++ b/fs/nilfs2/mdt.c
> @@ -375,14 +375,25 @@ int nilfs_mdt_fetch_dirty(struct inode *inode)
> static int
> nilfs_mdt_write_page(struct page *page, struct writeback_control *wbc)
> {
> - struct inode *inode;
> + struct inode *inode = page->mapping->host;
> struct super_block *sb;
> int err = 0;
>
> + if (inode && (inode->i_sb->s_flags & MS_RDONLY)) {
> + /*
> + * It means that filesystem was remounted in read-only
> + * mode because of error or metadata corruption. But we
> + * have dirty pages that try to be flushed in background.
> + * So, here we simply discard this dirty page.
> + */
> + err = nilfs_discard_page_in_ro_mode(page);
> + if (err == -EROFS) /* page is unlocked */
> + return err;
> + }
> +
> redirty_page_for_writepage(wbc, page);
> unlock_page(page);
>
> - inode = page->mapping->host;
> if (!inode)
> return 0;
>
> @@ -561,10 +572,10 @@ void nilfs_mdt_restore_from_shadow_map(struct inode
> *inode)
> if (mi->mi_palloc_cache)
> nilfs_palloc_clear_cache(inode);
>
> - nilfs_clear_dirty_pages(inode->i_mapping);
> + nilfs_clear_dirty_pages(inode->i_mapping, true);
> nilfs_copy_back_pages(inode->i_mapping, &shadow->frozen_data);
>
> - nilfs_clear_dirty_pages(&ii->i_btnode_cache);
> + nilfs_clear_dirty_pages(&ii->i_btnode_cache, true);
> nilfs_copy_back_pages(&ii->i_btnode_cache, &shadow->frozen_btnodes);
>
> nilfs_bmap_restore(ii->i_bmap, &shadow->bmap_store);
> diff --git a/fs/nilfs2/page.c b/fs/nilfs2/page.c
> index 07f76db..9aa6793 100644
> --- a/fs/nilfs2/page.c
> +++ b/fs/nilfs2/page.c
> @@ -108,6 +108,61 @@ void nilfs_forget_buffer(struct buffer_head *bh)
> }
>
> /**
> + * nilfs_discard_page_in_ro_mode - discard page dirty state in RO mode
> + * @page: discarded dirty page
> + */
> +int nilfs_discard_page_in_ro_mode(struct page *page)
> +{
> + struct inode *inode = page->mapping->host;
> + struct super_block *sb;
> +
> + if (!inode)
> + return -EINVAL;
This check is redundant. Please remove it.
> +
> + BUG_ON(!test_bit(PG_locked, &page->flags));
> +
> + sb = inode->i_sb;
The local variable "sb" looks unused.
> +
> + if (inode->i_sb->s_flags & MS_RDONLY) {
This verification is also redundant. Please remove it.
> + /*
> + * It means that filesystem was remounted in read-only
> + * mode because of error or metadata corruption. But we
> + * have dirty pages that try to be flushed in background.
> + * So, here we simply discard this dirty page.
> + */
> + nilfs_warning(inode->i_sb, __func__,
> + "discard dirty page: offset %lld, ino %lu\n",
> + page_offset(page), inode->i_ino);
> +
> + ClearPageUptodate(page);
> + ClearPageMappedToDisk(page);
> + if (page_has_buffers(page)) {
> + struct buffer_head *bh, *head;
> +
> + bh = head = page_buffers(page);
> + do {
> + lock_buffer(bh);
> + nilfs_warning(inode->i_sb, __func__,
> + "discard dirty block %llu, size %lu\n",
> + (u64)bh->b_blocknr, bh->b_size);
> + clear_buffer_dirty(bh);
> + clear_buffer_nilfs_volatile(bh);
> + clear_buffer_nilfs_checked(bh);
> + clear_buffer_nilfs_redirected(bh);
> + clear_buffer_uptodate(bh);
> + clear_buffer_mapped(bh);
> + unlock_buffer(bh);
> + } while (bh = bh->b_this_page, bh != head);
> + }
> + __nilfs_clear_page_dirty(page);
These lines are basically same as the loop body of
nilfs_clear_dirty_pages().
> + unlock_page(page);
This unlock_page() is confusing because it is asymmetric and
conditional. Please move it to callers.
> + } else
> + return 0;
> +
> + return -EROFS;
These return statements are eliminable by removing redundant checks
that I commented above.
After all, I think you should add
void nilfs_clear_dirty_page(struct page *page, bool silent);
which separates the loop body of the nilfs_clear_dirty_pages(),
and use it instead of nilfs_discard_page_in_ro_mode().
Thanks,
Ryusuke Konishi
> +}
> +
> +/**
> * nilfs_copy_buffer -- copy buffer data and flags
> * @dbh: destination buffer
> * @sbh: source buffer
> @@ -370,7 +425,7 @@ repeat:
> goto repeat;
> }
>
> -void nilfs_clear_dirty_pages(struct address_space *mapping)
> +void nilfs_clear_dirty_pages(struct address_space *mapping, bool silent)
> {
> struct pagevec pvec;
> unsigned int i;
> @@ -384,12 +439,24 @@ void nilfs_clear_dirty_pages(struct address_space
> *mapping)
> struct page *page = pvec.pages[i];
> struct buffer_head *bh, *head;
>
> + if (!silent) {
> + nilfs_warning(mapping->host->i_sb, __func__,
> + "discard page: offset %lld, ino %lu\n",
> + page_offset(page), mapping->host->i_ino);
> + }
> +
> lock_page(page);
> ClearPageUptodate(page);
> ClearPageMappedToDisk(page);
> bh = head = page_buffers(page);
> do {
> lock_buffer(bh);
> + if (!silent) {
> + nilfs_warning(mapping->host->i_sb,
> + __func__,
> + "discard block %llu, size %lu\n",
> + (u64)bh->b_blocknr, bh->b_size);
> + }
> clear_buffer_dirty(bh);
> clear_buffer_nilfs_volatile(bh);
> clear_buffer_nilfs_checked(bh);
> diff --git a/fs/nilfs2/page.h b/fs/nilfs2/page.h
> index fb7de71..3dcc5c8 100644
> --- a/fs/nilfs2/page.h
> +++ b/fs/nilfs2/page.h
> @@ -49,13 +49,14 @@ int __nilfs_clear_page_dirty(struct page *);
> struct buffer_head *nilfs_grab_buffer(struct inode *, struct address_space *,
> unsigned long, unsigned long);
> void nilfs_forget_buffer(struct buffer_head *);
> +int nilfs_discard_page_in_ro_mode(struct page *page);
> void nilfs_copy_buffer(struct buffer_head *, struct buffer_head *);
> int nilfs_page_buffers_clean(struct page *);
> void nilfs_page_bug(struct page *);
>
> int nilfs_copy_dirty_pages(struct address_space *, struct address_space *);
> void nilfs_copy_back_pages(struct address_space *, struct address_space *);
> -void nilfs_clear_dirty_pages(struct address_space *);
> +void nilfs_clear_dirty_pages(struct address_space *, bool);
> void nilfs_mapping_init(struct address_space *mapping, struct inode *inode,
> struct backing_dev_info *bdi);
> unsigned nilfs_page_count_clean_buffers(struct page *, unsigned, unsigned);
> --
> 1.7.9.5
>
>
>
> --
> To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
> the body of a message to [email protected]
> More majordomo info at http://vger.kernel.org/majordomo-info.html
--
To unsubscribe from this list: send the line "unsubscribe linux-nilfs" in
the body of a message to [email protected]
More majordomo info at http://vger.kernel.org/majordomo-info.html