If run btrfs/125 with nospace_cache or space_cache=v2 mount option,
btrfs will block with the following backtrace:

Call Trace:
 __schedule+0x2d4/0xae0
 schedule+0x3d/0x90
 btrfs_start_ordered_extent+0x160/0x200 [btrfs]
 ? wake_atomic_t_function+0x60/0x60
 btrfs_run_ordered_extent_work+0x25/0x40 [btrfs]
 btrfs_scrubparity_helper+0x1c1/0x620 [btrfs]
 btrfs_flush_delalloc_helper+0xe/0x10 [btrfs]
 process_one_work+0x2af/0x720
 ? process_one_work+0x22b/0x720
 worker_thread+0x4b/0x4f0
 kthread+0x10f/0x150
 ? process_one_work+0x720/0x720
 ? kthread_create_on_node+0x40/0x40
 ret_from_fork+0x2e/0x40

The direct cause is the error handler in run_delalloc_nocow() doesn't
handle error from btrfs_reloc_clone_csums() well.

The related part call path will be:
__extent_writepage
|- writepage_delalloc()
|  |- run_delalloc_range()
|     |- run_delalloc_nocow()
|        |- btrfs_add_ordered_extent()
|        |  Now one ordered extent for file range, e.g [0, 1M) is inserted
|        |
|        |- btrfs_reloc_clone_csums()
|        |  Fails with -EIO, as RAID5/6 doesn't repair some csum tree
|        |  blocks
|        |
|        |- extent_clear_unlock_delalloc()
|           Error routine, unlock and clear page DIRTY, end page writeback
|           So the remaining 255 pages will not go through writeback
|
|- __extent_writepage_io()
   |- writepage_end_io_hook()  
      |- btrfs_dev_test_ordered_pending()
         Reduce ordered_extent->bytes_left by 4K.
         Still have (1M - 4K) to finish.

While the remaining 255 pages will not go through IO nor trigger
writepage_end_io_hook(), the ordered extent for [0, 1M) will
never finish, and blocking current transaction forever.

Although the root cause is still in RAID5/6, it won't hurt to fix the
error routine first.

This patch will cleanup the ordered extent in error routine, so at least
we won't cause deadlock.

Signed-off-by: Qu Wenruo <[email protected]>
---
 fs/btrfs/extent_io.c    |  1 -
 fs/btrfs/inode.c        | 10 ++++++++--
 fs/btrfs/ordered-data.c | 25 +++++++++++++++++++++++++
 fs/btrfs/ordered-data.h | 10 ++++++++++
 4 files changed, 43 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index 4ac383a3a649..a14d1b0840c5 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -3258,7 +3258,6 @@ static noinline_for_stack int writepage_delalloc(struct 
inode *inode,
                                               delalloc_end,
                                               &page_started,
                                               nr_written);
-               /* File system has been set read-only */
                if (ret) {
                        SetPageError(page);
                        /* fill_delalloc should be return < 0 for error
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1e861a063721..3c3ade58afd7 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -1052,8 +1052,11 @@ static noinline int cow_file_range(struct inode *inode,
                    BTRFS_DATA_RELOC_TREE_OBJECTID) {
                        ret = btrfs_reloc_clone_csums(inode, start,
                                                      cur_alloc_size);
-                       if (ret)
+                       if (ret) {
+                               btrfs_clean_ordered_extent(inode, start,
+                                                          ram_size);
                                goto out_drop_extent_cache;
+                       }
                }
 
                btrfs_dec_block_group_reservations(fs_info, ins.objectid);
@@ -1538,7 +1541,7 @@ static noinline int run_delalloc_nocow(struct inode 
*inode,
        if (!ret)
                ret = err;
 
-       if (ret && cur_offset < end)
+       if (ret && cur_offset < end) {
                extent_clear_unlock_delalloc(inode, cur_offset, end, end,
                                             locked_page, EXTENT_LOCKED |
                                             EXTENT_DELALLOC | EXTENT_DEFRAG |
@@ -1546,6 +1549,9 @@ static noinline int run_delalloc_nocow(struct inode 
*inode,
                                             PAGE_CLEAR_DIRTY |
                                             PAGE_SET_WRITEBACK |
                                             PAGE_END_WRITEBACK);
+               btrfs_clean_ordered_extent(inode, cur_offset,
+                                          end - cur_offset + 1);
+       }
        btrfs_free_path(path);
        return ret;
 }
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 041c3326d109..dba1cf3464a7 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -650,6 +650,31 @@ void btrfs_remove_ordered_extent(struct inode *inode,
        wake_up(&entry->wait);
 }
 
+void btrfs_clean_ordered_extent(struct inode *inode, u64 file_offset,
+                               u64 ram_len)
+{
+       struct btrfs_ordered_extent *entry;
+       struct btrfs_root *root = BTRFS_I(inode)->root;
+
+       entry = btrfs_lookup_ordered_range(inode, file_offset, ram_len);
+       if (!entry || entry->file_offset != file_offset ||
+           entry->len != ram_len)
+               goto not_found;
+
+       /* Same as btrfs_finish_ordered_io() */
+       btrfs_remove_ordered_extent(inode, entry);
+       btrfs_put_ordered_extent(entry);
+       btrfs_put_ordered_extent(entry);
+       return;
+
+not_found:
+       WARN_ON(1);
+       btrfs_err(root->fs_info,
+       "failed to find and clean ordered extent: root %llu ino %llu 
file_offset %llu len %llu",
+                 root->objectid, btrfs_ino(inode), file_offset, ram_len);
+       return;
+}
+
 static void btrfs_run_ordered_extent_work(struct btrfs_work *work)
 {
        struct btrfs_ordered_extent *ordered;
diff --git a/fs/btrfs/ordered-data.h b/fs/btrfs/ordered-data.h
index 5f2b0ca28705..7a989778aa89 100644
--- a/fs/btrfs/ordered-data.h
+++ b/fs/btrfs/ordered-data.h
@@ -163,6 +163,16 @@ btrfs_ordered_inode_tree_init(struct 
btrfs_ordered_inode_tree *t)
 void btrfs_put_ordered_extent(struct btrfs_ordered_extent *entry);
 void btrfs_remove_ordered_extent(struct inode *inode,
                                struct btrfs_ordered_extent *entry);
+
+/*
+ * Function to cleanup an allocated ordered extent in error routine.
+ *
+ * As error handler in run_delalloc_range() will clear all related pages
+ * and skip their IO, we have no method to finish inserted ordered extent.
+ * So we must use this function to clean it up.
+ */
+void btrfs_clean_ordered_extent(struct inode *inode, u64 file_offset,
+                               u64 ram_len);
 int btrfs_dec_test_ordered_pending(struct inode *inode,
                                   struct btrfs_ordered_extent **cached,
                                   u64 file_offset, u64 io_size, int uptodate);
-- 
2.11.1



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to