From: Matthew Wilcox <mawil...@microsoft.com>

Eliminate the buffer_lock as the internal xa_lock provides all the
necessary protection.  We can remove the radix_tree_preload calls, but
I can't find a good way to use the 'exists' result from xa_cmpxchg().
We could resort to the advanced API to improve this, but it's a really
unlikely case (nothing in the xarray when we first look; something there
when we try to add the newly-allocated extent buffer), so I think it's
not worth optimising for.

Signed-off-by: Matthew Wilcox <mawil...@microsoft.com>
---
 fs/btrfs/ctree.h             |  5 ++-
 fs/btrfs/disk-io.c           |  3 +-
 fs/btrfs/extent_io.c         | 82 ++++++++++++++++++--------------------------
 fs/btrfs/tests/btrfs-tests.c | 26 +++-----------
 4 files changed, 40 insertions(+), 76 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index 272d099bed7e..87984ce3a4c2 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1058,9 +1058,8 @@ struct btrfs_fs_info {
        /* readahead works cnt */
        atomic_t reada_works_cnt;
 
-       /* Extent buffer radix tree */
-       spinlock_t buffer_lock;
-       struct radix_tree_root buffer_radix;
+       /* Extent buffer array */
+       struct xarray buffer_array;
 
        /* next backup root to be overwritten */
        int backup_root_index;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 1eae29045d43..650d1350b64d 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2429,7 +2429,7 @@ int open_ctree(struct super_block *sb,
        }
 
        xa_init(&fs_info->fs_roots);
-       INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
+       xa_init(&fs_info->buffer_array);
        INIT_LIST_HEAD(&fs_info->trans_list);
        INIT_LIST_HEAD(&fs_info->dead_roots);
        INIT_LIST_HEAD(&fs_info->delayed_iputs);
@@ -2442,7 +2442,6 @@ int open_ctree(struct super_block *sb,
        spin_lock_init(&fs_info->tree_mod_seq_lock);
        spin_lock_init(&fs_info->super_lock);
        spin_lock_init(&fs_info->qgroup_op_lock);
-       spin_lock_init(&fs_info->buffer_lock);
        spin_lock_init(&fs_info->unused_bgs_lock);
        rwlock_init(&fs_info->tree_mod_log_lock);
        mutex_init(&fs_info->unused_bg_unpin_mutex);
diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
index fd5e9d887328..2b43fa11c9e2 100644
--- a/fs/btrfs/extent_io.c
+++ b/fs/btrfs/extent_io.c
@@ -4884,8 +4884,7 @@ struct extent_buffer *find_extent_buffer(struct 
btrfs_fs_info *fs_info,
        struct extent_buffer *eb;
 
        rcu_read_lock();
-       eb = radix_tree_lookup(&fs_info->buffer_radix,
-                              start >> PAGE_SHIFT);
+       eb = xa_load(&fs_info->buffer_array, start >> PAGE_SHIFT);
        if (eb && atomic_inc_not_zero(&eb->refs)) {
                rcu_read_unlock();
                /*
@@ -4919,31 +4918,24 @@ struct extent_buffer *find_extent_buffer(struct 
btrfs_fs_info *fs_info,
 struct extent_buffer *alloc_test_extent_buffer(struct btrfs_fs_info *fs_info,
                                        u64 start)
 {
-       struct extent_buffer *eb, *exists = NULL;
-       int ret;
+       struct extent_buffer *exists, *eb = NULL;
 
-       eb = find_extent_buffer(fs_info, start);
-       if (eb)
-               return eb;
-       eb = alloc_dummy_extent_buffer(fs_info, start);
-       if (!eb)
-               return NULL;
-       eb->fs_info = fs_info;
 again:
-       ret = radix_tree_preload(GFP_NOFS);
-       if (ret)
+       exists = find_extent_buffer(fs_info, start);
+       if (exists)
                goto free_eb;
-       spin_lock(&fs_info->buffer_lock);
-       ret = radix_tree_insert(&fs_info->buffer_radix,
-                               start >> PAGE_SHIFT, eb);
-       spin_unlock(&fs_info->buffer_lock);
-       radix_tree_preload_end();
-       if (ret == -EEXIST) {
-               exists = find_extent_buffer(fs_info, start);
-               if (exists)
+       if (!eb)
+               eb = alloc_dummy_extent_buffer(fs_info, start);
+       if (!eb)
+               return NULL;
+       exists = xa_cmpxchg(&fs_info->buffer_array, start >> PAGE_SHIFT,
+                               NULL, eb, GFP_NOFS);
+       if (unlikely(exists)) {
+               if (xa_is_err(exists)) {
+                       exists = NULL;
                        goto free_eb;
-               else
-                       goto again;
+               }
+               goto again;
        }
        check_buffer_tree_ref(eb);
        set_bit(EXTENT_BUFFER_IN_TREE, &eb->bflags);
@@ -4957,7 +4949,8 @@ struct extent_buffer *alloc_test_extent_buffer(struct 
btrfs_fs_info *fs_info,
        atomic_inc(&eb->refs);
        return eb;
 free_eb:
-       btrfs_release_extent_buffer(eb);
+       if (eb)
+               btrfs_release_extent_buffer(eb);
        return exists;
 }
 #endif
@@ -4969,22 +4962,24 @@ struct extent_buffer *alloc_extent_buffer(struct 
btrfs_fs_info *fs_info,
        unsigned long num_pages = num_extent_pages(start, len);
        unsigned long i;
        unsigned long index = start >> PAGE_SHIFT;
-       struct extent_buffer *eb;
+       struct extent_buffer *eb = NULL;
        struct extent_buffer *exists = NULL;
        struct page *p;
        struct address_space *mapping = fs_info->btree_inode->i_mapping;
        int uptodate = 1;
-       int ret;
 
        if (!IS_ALIGNED(start, fs_info->sectorsize)) {
                btrfs_err(fs_info, "bad tree block start %llu", start);
                return ERR_PTR(-EINVAL);
        }
 
-       eb = find_extent_buffer(fs_info, start);
-       if (eb)
-               return eb;
+again:
+       exists = find_extent_buffer(fs_info, start);
+       if (exists)
+               goto free_eb;
 
+       if (eb)
+               goto add;
        eb = __alloc_extent_buffer(fs_info, start, len);
        if (!eb)
                return ERR_PTR(-ENOMEM);
@@ -5037,24 +5032,15 @@ struct extent_buffer *alloc_extent_buffer(struct 
btrfs_fs_info *fs_info,
        }
        if (uptodate)
                set_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags);
-again:
-       ret = radix_tree_preload(GFP_NOFS);
-       if (ret) {
-               exists = ERR_PTR(ret);
-               goto free_eb;
-       }
-
-       spin_lock(&fs_info->buffer_lock);
-       ret = radix_tree_insert(&fs_info->buffer_radix,
-                               start >> PAGE_SHIFT, eb);
-       spin_unlock(&fs_info->buffer_lock);
-       radix_tree_preload_end();
-       if (ret == -EEXIST) {
-               exists = find_extent_buffer(fs_info, start);
-               if (exists)
+add:
+       exists = xa_cmpxchg(&fs_info->buffer_array, start >> PAGE_SHIFT,
+                               NULL, eb, GFP_NOFS);
+       if (unlikely(exists)) {
+               if (xa_is_err(exists)) {
+                       exists = NULL;
                        goto free_eb;
-               else
-                       goto again;
+               }
+               goto again;
        }
        /* add one reference for the tree */
        check_buffer_tree_ref(eb);
@@ -5107,10 +5093,8 @@ static int release_extent_buffer(struct extent_buffer 
*eb)
 
                        spin_unlock(&eb->refs_lock);
 
-                       spin_lock(&fs_info->buffer_lock);
-                       radix_tree_delete(&fs_info->buffer_radix,
+                       xa_erase(&fs_info->buffer_array,
                                          eb->start >> PAGE_SHIFT);
-                       spin_unlock(&fs_info->buffer_lock);
                } else {
                        spin_unlock(&eb->refs_lock);
                }
diff --git a/fs/btrfs/tests/btrfs-tests.c b/fs/btrfs/tests/btrfs-tests.c
index 570bce31a301..f80fd54903e9 100644
--- a/fs/btrfs/tests/btrfs-tests.c
+++ b/fs/btrfs/tests/btrfs-tests.c
@@ -110,7 +110,6 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 
nodesize, u32 sectorsize)
                return NULL;
        }
 
-       spin_lock_init(&fs_info->buffer_lock);
        spin_lock_init(&fs_info->qgroup_lock);
        spin_lock_init(&fs_info->qgroup_op_lock);
        spin_lock_init(&fs_info->super_lock);
@@ -125,7 +124,7 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 
nodesize, u32 sectorsize)
        INIT_LIST_HEAD(&fs_info->dirty_qgroups);
        INIT_LIST_HEAD(&fs_info->dead_roots);
        INIT_LIST_HEAD(&fs_info->tree_mod_seq_list);
-       INIT_RADIX_TREE(&fs_info->buffer_radix, GFP_ATOMIC);
+       xa_init(&fs_info->buffer_array);
        xa_init(&fs_info->fs_roots);
        extent_io_tree_init(&fs_info->freed_extents[0], NULL);
        extent_io_tree_init(&fs_info->freed_extents[1], NULL);
@@ -139,8 +138,8 @@ struct btrfs_fs_info *btrfs_alloc_dummy_fs_info(u32 
nodesize, u32 sectorsize)
 
 void btrfs_free_dummy_fs_info(struct btrfs_fs_info *fs_info)
 {
-       struct radix_tree_iter iter;
-       void **slot;
+       struct extent_buffer *eb;
+       unsigned long index = 0;
 
        if (!fs_info)
                return;
@@ -151,25 +150,8 @@ void btrfs_free_dummy_fs_info(struct btrfs_fs_info 
*fs_info)
 
        test_mnt->mnt_sb->s_fs_info = NULL;
 
-       spin_lock(&fs_info->buffer_lock);
-       radix_tree_for_each_slot(slot, &fs_info->buffer_radix, &iter, 0) {
-               struct extent_buffer *eb;
-
-               eb = radix_tree_deref_slot_protected(slot, 
&fs_info->buffer_lock);
-               if (!eb)
-                       continue;
-               /* Shouldn't happen but that kind of thinking creates CVE's */
-               if (radix_tree_exception(eb)) {
-                       if (radix_tree_deref_retry(eb))
-                               slot = radix_tree_iter_retry(&iter);
-                       continue;
-               }
-               slot = radix_tree_iter_resume(slot, &iter);
-               spin_unlock(&fs_info->buffer_lock);
+       xa_for_each(&fs_info->buffer_array, eb, index, ULONG_MAX, XA_PRESENT)
                free_extent_buffer_stale(eb);
-               spin_lock(&fs_info->buffer_lock);
-       }
-       spin_unlock(&fs_info->buffer_lock);
 
        btrfs_free_qgroup_config(fs_info);
        btrfs_free_fs_roots(fs_info);
-- 
2.15.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to