qgroups need to have a consistent view of the references for a particular extent
record.  Currently they do this through sequence numbers on delayed refs, but
this is no longer acceptable.  So instead introduce lock_ref/unlock_ref.  This
will provide the qgroup code with a consistent view of the reference while it
does its accounting calculations without interfering with the delayed ref code.
Thanks,

Signed-off-by: Josef Bacik <jba...@fb.com>
---
 fs/btrfs/ctree.h       |  11 ++++++
 fs/btrfs/delayed-ref.c |   2 +
 fs/btrfs/delayed-ref.h |   1 +
 fs/btrfs/extent-tree.c | 102 +++++++++++++++++++++++++++++++++++++++++++++++--
 4 files changed, 113 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index a924274..8b3fd61 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1273,6 +1273,9 @@ struct btrfs_block_group_cache {
 
        /* For delayed block group creation */
        struct list_head new_bg_list;
+
+       /* For locking reference modifications */
+       struct extent_io_tree ref_lock;
 };
 
 /* delayed seq elem */
@@ -3319,6 +3322,14 @@ int btrfs_init_space_info(struct btrfs_fs_info *fs_info);
 int btrfs_delayed_refs_qgroup_accounting(struct btrfs_trans_handle *trans,
                                         struct btrfs_fs_info *fs_info);
 int __get_raid_index(u64 flags);
+int lock_ref(struct btrfs_fs_info *fs_info, u64 root_objectid, u64 bytenr,
+            u64 num_bytes, int for_cow,
+            struct btrfs_block_group_cache **block_group,
+            struct extent_state **cached_state);
+int unlock_ref(struct btrfs_fs_info *fs_info, u64 root_objectid, u64 bytenr,
+              u64 num_bytes, int for_cow,
+              struct btrfs_block_group_cache *block_group,
+              struct extent_state **cached_state);
 /* ctree.c */
 int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
                     int level, int *slot);
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index fab60c1..ee1c29d 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -680,6 +680,7 @@ static noinline void add_delayed_tree_ref(struct 
btrfs_fs_info *fs_info,
        ref->action = action;
        ref->is_head = 0;
        ref->in_tree = 1;
+       ref->for_cow = for_cow;
 
        if (need_ref_seq(for_cow, ref_root))
                seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
@@ -739,6 +740,7 @@ static noinline void add_delayed_data_ref(struct 
btrfs_fs_info *fs_info,
        ref->action = action;
        ref->is_head = 0;
        ref->in_tree = 1;
+       ref->for_cow = for_cow;
 
        if (need_ref_seq(for_cow, ref_root))
                seq = btrfs_get_tree_mod_seq(fs_info, &trans->delayed_ref_elem);
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index a54c9d4..db71a37 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -52,6 +52,7 @@ struct btrfs_delayed_ref_node {
 
        unsigned int action:8;
        unsigned int type:8;
+       unsigned int for_cow:1;
        /* is this node still in the rbtree? */
        unsigned int is_head:1;
        unsigned int in_tree:1;
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index cd4d9ca..03b536c 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -672,6 +672,79 @@ struct btrfs_block_group_cache *btrfs_lookup_block_group(
        return cache;
 }
 
+
+/* This is used to lock the modification to an extent ref.  This only does
+ * something if the reference is a fs tree.
+ *
+ * @fs_info: the fs_info for this filesystem.
+ * @root_objectid: the root objectid that we are modifying for this extent.
+ * @bytenr: the byte we are modifying the reference for
+ * @num_bytes: the number of bytes we are locking.
+ * @for_cow: if this operation is for cow then we don't need to lock
+ * @block_group: we will store the block group we looked up so that the unlock
+ * doesn't have to do another search.
+ * @cached_state: this is for caching our location so when we unlock we don't
+ * have to do a tree search.
+ *
+ * This can return -ENOMEM if we cannot allocate our extent state.
+ */
+int lock_ref(struct btrfs_fs_info *fs_info, u64 root_objectid, u64 bytenr,
+            u64 num_bytes, int for_cow,
+            struct btrfs_block_group_cache **block_group,
+            struct extent_state **cached_state)
+{
+       struct btrfs_block_group_cache *cache;
+       int ret;
+
+       if (!fs_info->quota_enabled || !need_ref_seq(for_cow, root_objectid))
+               return 0;
+
+       cache = btrfs_lookup_block_group(fs_info, bytenr);
+       ASSERT(cache);
+       ASSERT(cache->key.objectid <= bytenr &&
+              (cache->key.objectid + cache->key.offset >=
+               bytenr + num_bytes));
+       ret = lock_extent_bits(&cache->ref_lock, bytenr,
+                              bytenr + num_bytes - 1, 0, cached_state);
+       if (!ret)
+               *block_group = cache;
+       else
+               btrfs_put_block_group(cache);
+       return ret;
+}
+
+/*
+ * Unlock the extent ref, this only does something if the reference is for an 
fs
+ * tree.
+ *
+ * @fs_info: the fs_info for this filesystem.
+ * @root_objectid: the root objectid that we are modifying for this extent.
+ * @bytenr: the byte we are modifying the reference for
+ * @num_bytes: the number of bytes we are locking.
+ * @for_cow: if this ref update is for cow we didn't take the lock.
+ * @block_group: the block_group we got from lock_ref.
+ * @cached_state: this is for caching our location so when we unlock we don't
+ * have to do a tree search.
+ *
+ * This can return -ENOMEM if we fail to allocate an extent state.
+ */
+int unlock_ref(struct btrfs_fs_info *fs_info, u64 root_objectid, u64 bytenr,
+              u64 num_bytes, int for_cow,
+              struct btrfs_block_group_cache *block_group,
+              struct extent_state **cached_state)
+{
+       int ret;
+
+       if (!fs_info->quota_enabled || !need_ref_seq(for_cow, root_objectid))
+               return 0;
+
+       ret = unlock_extent_cached(&block_group->ref_lock, bytenr,
+                                  bytenr + num_bytes - 1, cached_state,
+                                  GFP_NOFS);
+       btrfs_put_block_group(block_group);
+       return ret;
+}
+
 static struct btrfs_space_info *__find_space_info(struct btrfs_fs_info *info,
                                                  u64 flags)
 {
@@ -2024,10 +2097,13 @@ static int run_delayed_data_ref(struct 
btrfs_trans_handle *trans,
 {
        int ret = 0;
        struct btrfs_delayed_data_ref *ref;
+       struct btrfs_block_group_cache *block_group;
+       struct extent_state *cached_state = NULL;
        struct btrfs_key ins;
        u64 parent = 0;
        u64 ref_root = 0;
        u64 flags = 0;
+       int err;
 
        ins.objectid = node->bytenr;
        ins.offset = node->num_bytes;
@@ -2041,6 +2117,10 @@ static int run_delayed_data_ref(struct 
btrfs_trans_handle *trans,
        else
                ref_root = ref->root;
 
+       ret = lock_ref(root->fs_info, ref->root, node->bytenr, node->num_bytes,
+                      node->for_cow, &block_group, &cached_state);
+       if (ret)
+               return ret;
        if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
                if (extent_op)
                        flags |= extent_op->flags_to_set;
@@ -2063,7 +2143,10 @@ static int run_delayed_data_ref(struct 
btrfs_trans_handle *trans,
        } else {
                BUG();
        }
-       return ret;
+       err = unlock_ref(root->fs_info, ref->root, node->bytenr,
+                        node->num_bytes, node->for_cow, block_group,
+                        &cached_state);
+       return ret ? ret : err;
 }
 
 static void __run_delayed_extent_op(struct btrfs_delayed_extent_op *extent_op,
@@ -2185,9 +2268,12 @@ static int run_delayed_tree_ref(struct 
btrfs_trans_handle *trans,
 {
        int ret = 0;
        struct btrfs_delayed_tree_ref *ref;
+       struct btrfs_block_group_cache *block_group;
+       struct extent_state *cached_state = NULL;
        struct btrfs_key ins;
        u64 parent = 0;
        u64 ref_root = 0;
+       int err;
        bool skinny_metadata = btrfs_fs_incompat(root->fs_info,
                                                 SKINNY_METADATA);
 
@@ -2208,6 +2294,10 @@ static int run_delayed_tree_ref(struct 
btrfs_trans_handle *trans,
                ins.type = BTRFS_EXTENT_ITEM_KEY;
        }
 
+       ret = lock_ref(root->fs_info, ref->root, node->bytenr, node->num_bytes,
+                      node->for_cow, &block_group, &cached_state);
+       if (ret)
+               return ret;
        BUG_ON(node->ref_mod != 1);
        if (node->action == BTRFS_ADD_DELAYED_REF && insert_reserved) {
                BUG_ON(!extent_op || !extent_op->update_flags);
@@ -2227,7 +2317,10 @@ static int run_delayed_tree_ref(struct 
btrfs_trans_handle *trans,
        } else {
                BUG();
        }
-       return ret;
+       err = unlock_ref(root->fs_info, ref->root, node->bytenr,
+                        node->num_bytes, node->for_cow, block_group,
+                        &cached_state);
+       return ret ? ret : err;
 }
 
 /* helper function to actually process a single delayed ref entry */
@@ -8490,7 +8583,8 @@ int btrfs_read_block_groups(struct btrfs_root *root)
                cache->fs_info = info;
                INIT_LIST_HEAD(&cache->list);
                INIT_LIST_HEAD(&cache->cluster_list);
-
+               extent_io_tree_init(&cache->ref_lock,
+                                   info->btree_inode->i_mapping);
                if (need_clear) {
                        /*
                         * When we mount with old space cache, we need to
@@ -8689,6 +8783,8 @@ int btrfs_make_block_group(struct btrfs_trans_handle 
*trans,
        INIT_LIST_HEAD(&cache->list);
        INIT_LIST_HEAD(&cache->cluster_list);
        INIT_LIST_HEAD(&cache->new_bg_list);
+       extent_io_tree_init(&cache->ref_lock,
+                           root->fs_info->btree_inode->i_mapping);
 
        btrfs_init_free_space_ctl(cache);
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to