On 2021/4/16 上午2:50, Josef Bacik wrote:
On 4/15/21 1:04 AM, Qu Wenruo wrote:
The new function, end_bio_subpage_eb_writepage(), will handle the
metadata writeback endio.

The major differences involved are:
- How to grab extent buffer
   Now page::private is a pointer to btrfs_subpage, we can no longer grab
   extent buffer directly.
   Thus we need to use the bv_offset to locate the extent buffer manually
   and iterate through the whole range.

- Use btrfs_subpage_end_writeback() caller
   This helper will handle the subpage writeback for us.

Since this function is executed under endio context, when grabbing
extent buffers it can't grab eb->refs_lock as that lock is not designed
to be grabbed under hardirq context.

So here introduce a helper, find_extent_buffer_nospinlock(), for such
situation, and convert find_extent_buffer() to use that helper.

Signed-off-by: Qu Wenruo <w...@suse.com>
---
  fs/btrfs/extent_io.c | 135 +++++++++++++++++++++++++++++++++----------
  1 file changed, 106 insertions(+), 29 deletions(-)

diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index a50adbd8808d..21a14b1cb065 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4080,13 +4080,97 @@ static void set_btree_ioerr(struct page *page, struct extent_buffer *eb)
      }
  }
+/*
+ * This is the endio specific version which won't touch any unsafe spinlock
+ * in endio context.
+ */
+static struct extent_buffer *find_extent_buffer_nospinlock(
+        struct btrfs_fs_info *fs_info, u64 start)
+{
+    struct extent_buffer *eb;
+
+    rcu_read_lock();
+    eb = radix_tree_lookup(&fs_info->buffer_radix,
+                   start >> fs_info->sectorsize_bits);
+    if (eb && atomic_inc_not_zero(&eb->refs)) {
+        rcu_read_unlock();
+        return eb;
+    }
+    rcu_read_unlock();
+    return NULL;
+}
+/*
+ * The endio function for subpage extent buffer write.
+ *
+ * Unlike end_bio_extent_buffer_writepage(), we only call end_page_writeback()
+ * after all extent buffers in the page has finished their writeback.
+ */
+static void end_bio_subpage_eb_writepage(struct btrfs_fs_info *fs_info,
+                     struct bio *bio)
+{
+    struct bio_vec *bvec;
+    struct bvec_iter_all iter_all;
+
+    ASSERT(!bio_flagged(bio, BIO_CLONED));
+    bio_for_each_segment_all(bvec, bio, iter_all) {
+        struct page *page = bvec->bv_page;
+        u64 bvec_start = page_offset(page) + bvec->bv_offset;
+        u64 bvec_end = bvec_start + bvec->bv_len - 1;
+        u64 cur_bytenr = bvec_start;
+
+        ASSERT(IS_ALIGNED(bvec->bv_len, fs_info->nodesize));
+
+        /* Iterate through all extent buffers in the range */
+        while (cur_bytenr <= bvec_end) {
+            struct extent_buffer *eb;
+            int done;
+
+            /*
+             * Here we can't use find_extent_buffer(), as it may
+             * try to lock eb->refs_lock, which is not safe in endio
+             * context.
+             */
+            eb = find_extent_buffer_nospinlock(fs_info, cur_bytenr);
+            ASSERT(eb);
+
+            cur_bytenr = eb->start + eb->len;
+
+            ASSERT(test_bit(EXTENT_BUFFER_WRITEBACK, &eb->bflags));
+            done = atomic_dec_and_test(&eb->io_pages);
+            ASSERT(done);
+
+            if (bio->bi_status ||
+                test_bit(EXTENT_BUFFER_WRITE_ERR, &eb->bflags)) {
+                ClearPageUptodate(page);
+                set_btree_ioerr(page, eb);
+            }
+
+            btrfs_subpage_clear_writeback(fs_info, page, eb->start,
+                              eb->len);
+            end_extent_buffer_writeback(eb);
+            /*
+             * free_extent_buffer() will grab spinlock which is not
+             * safe in endio context. Thus here we manually dec
+             * the ref.
+             */
+            atomic_dec(&eb->refs);
+        }
+    }
+    bio_put(bio);
+}
+
  static void end_bio_extent_buffer_writepage(struct bio *bio)
  {
+    struct btrfs_fs_info *fs_info;
      struct bio_vec *bvec;
      struct extent_buffer *eb;
      int done;
      struct bvec_iter_all iter_all;
+    fs_info = btrfs_sb(bio_first_page_all(bio)->mapping->host->i_sb);
+    if (fs_info->sectorsize < PAGE_SIZE)
+        return end_bio_subpage_eb_writepage(fs_info, bio);
+

You replace the write_one_eb() call with one specifically for subpage, why not just use your special endio from there without polluting the normal writepage helper?  Thanks,

That makes sense, I'd go that direction.

Thanks,
Qu


Josef

Reply via email to