On Thu, Apr 12, 2018 at 06:00:03PM +0800, Qu Wenruo wrote:
> The original first key check is handled in
> btree_read_extent_buffer_pages(), which is good enough for level check,
> but to verify first key, we need at least read lock on the extent
> buffer, or we can hit some race and cause false alert.
> 
> Fix it by adding new verifcation function: btrfs_verify_first_key(),
> which will first check lock context of both parent and child extent
> buffer, and only when we have proper lock hold (write or read lock if
> extent buffer is not in commit root) we will verify the first key.
> 
> Most of the verification happens in ctree.c after function call like
> read_tree_block(), read_node_slot() and read_block_for_search(), and
> only call btrfs_verify_first_key() after we acquire read/write lock.
> 
> Also, since first_key check is not as good as level and could easily
> trigger false alerts due to lock context, put it under
> CONFIG_BTRFS_FS_CHECK_INTEGRITY to avoid damage to end user.
> 
> Signed-off-by: Qu Wenruo <w...@suse.com>
> ---
>  fs/btrfs/ctree.c       | 69 ++++++++++++++++++++++++++++++++++++++++++
>  fs/btrfs/ctree.h       | 58 +++++++++++++++++++++++++++++++++++
>  fs/btrfs/extent-tree.c |  6 ++++
>  fs/btrfs/qgroup.c      |  5 +++
>  fs/btrfs/ref-verify.c  |  6 ++++
>  fs/btrfs/relocation.c  | 13 ++++++++
>  6 files changed, 157 insertions(+)
> 
> diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
> index e044b51a2789..57721aead371 100644
> --- a/fs/btrfs/ctree.c
> +++ b/fs/btrfs/ctree.c
> @@ -1649,6 +1649,11 @@ int btrfs_realloc_node(struct btrfs_trans_handle 
> *trans,
>  
>               btrfs_tree_lock(cur);
>               btrfs_set_lock_blocking(cur);
> +             if (btrfs_verify_first_key(cur, parent, i)) {
> +                     err = -EUCLEAN;
> +                     btrfs_tree_unlock(cur);
> +                     free_extent_buffer(cur);
> +             }
>               err = __btrfs_cow_block(trans, root, cur, parent, i,
>                                       &cur, search_start,
>                                       min(16 * blocksize,
> @@ -1863,6 +1868,12 @@ static noinline int balance_level(struct 
> btrfs_trans_handle *trans,
>  
>               btrfs_tree_lock(child);
>               btrfs_set_lock_blocking(child);
> +             if (btrfs_verify_first_key(child, mid, 0)) {
> +                     ret = -EUCLEAN;
> +                     btrfs_tree_unlock(child);
> +                     free_extent_buffer(child);
> +                     goto enospc;
> +             }
>               ret = btrfs_cow_block(trans, root, child, mid, 0, &child);
>               if (ret) {
>                       btrfs_tree_unlock(child);
> @@ -1901,6 +1912,10 @@ static noinline int balance_level(struct 
> btrfs_trans_handle *trans,
>       if (left) {
>               btrfs_tree_lock(left);
>               btrfs_set_lock_blocking(left);
> +             if (btrfs_verify_first_key(left, parent, pslot - 1)) {
> +                     ret = -EUCLEAN;
> +                     goto enospc;
> +             }
>               wret = btrfs_cow_block(trans, root, left,
>                                      parent, pslot - 1, &left);
>               if (wret) {
> @@ -1916,6 +1931,10 @@ static noinline int balance_level(struct 
> btrfs_trans_handle *trans,
>       if (right) {
>               btrfs_tree_lock(right);
>               btrfs_set_lock_blocking(right);
> +             if (btrfs_verify_first_key(right, parent, pslot + 1)) {
> +                     ret = -EUCLEAN;
> +                     goto enospc;
> +             }
>               wret = btrfs_cow_block(trans, root, right,
>                                      parent, pslot + 1, &right);
>               if (wret) {
> @@ -2079,6 +2098,8 @@ static noinline int push_nodes_for_insert(struct 
> btrfs_trans_handle *trans,
>  
>               btrfs_tree_lock(left);
>               btrfs_set_lock_blocking(left);
> +             if (btrfs_verify_first_key(left, parent, pslot - 1))
> +                     goto left_out;
>  
>               left_nr = btrfs_header_nritems(left);
>               if (left_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
> @@ -2119,6 +2140,7 @@ static noinline int push_nodes_for_insert(struct 
> btrfs_trans_handle *trans,
>                       }
>                       return 0;
>               }
> +left_out:
>               btrfs_tree_unlock(left);
>               free_extent_buffer(left);
>       }
> @@ -2134,6 +2156,8 @@ static noinline int push_nodes_for_insert(struct 
> btrfs_trans_handle *trans,
>  
>               btrfs_tree_lock(right);
>               btrfs_set_lock_blocking(right);
> +             if (btrfs_verify_first_key(right, parent, pslot + 1))
> +                     goto right_out;
>  
>               right_nr = btrfs_header_nritems(right);
>               if (right_nr >= BTRFS_NODEPTRS_PER_BLOCK(fs_info) - 1) {
> @@ -2174,6 +2198,7 @@ static noinline int push_nodes_for_insert(struct 
> btrfs_trans_handle *trans,
>                       }
>                       return 0;
>               }
> +right_out:
>               btrfs_tree_unlock(right);
>               free_extent_buffer(right);
>       }
> @@ -2868,6 +2893,11 @@ int btrfs_search_slot(struct btrfs_trans_handle 
> *trans, struct btrfs_root *root,
>                                       p->locks[level] = BTRFS_READ_LOCK;
>                               }
>                               p->nodes[level] = b;
> +                             if (btrfs_verify_first_key(b, p->nodes[level + 
> 1],
> +                                                        slot)) {
> +                                     ret = -EUCLEAN;
> +                                     goto done;
> +                             }
>                       }
>               } else {
>                       p->slots[level] = slot;
> @@ -2981,6 +3011,12 @@ int btrfs_search_old_slot(struct btrfs_root *root, 
> const struct btrfs_key *key,
>                               goto done;
>                       }
>  
> +                     /*
> +                      * NOTE: both parent and child go through
> +                      * tree_mod_log_rewind(), first key check is no
> +                      * longer consistent. So no btrfs_verify_first_key()
> +                      * for this read_block_for_search().
> +                      */
>                       err = read_block_for_search(root, p, &b, level,
>                                                   slot, key);
>                       if (err == -EAGAIN)
> @@ -3753,6 +3789,8 @@ static int push_leaf_right(struct btrfs_trans_handle 
> *trans, struct btrfs_root
>  
>       btrfs_tree_lock(right);
>       btrfs_set_lock_blocking(right);
> +     if (btrfs_verify_first_key(right, upper, slot + 1))
> +             goto out_unlock;
>  
>       free_space = btrfs_leaf_free_space(fs_info, right);
>       if (free_space < data_size)
> @@ -3987,6 +4025,10 @@ static int push_leaf_left(struct btrfs_trans_handle 
> *trans, struct btrfs_root
>  
>       btrfs_tree_lock(left);
>       btrfs_set_lock_blocking(left);
> +     if (btrfs_verify_first_key(left, path->nodes[1], slot - 1)) {
> +             ret = 1;
> +             goto out;
> +     }
>  
>       free_space = btrfs_leaf_free_space(fs_info, left);
>       if (free_space < data_size) {
> @@ -5205,6 +5247,12 @@ int btrfs_search_forward(struct btrfs_root *root, 
> struct btrfs_key *min_key,
>               }
>  
>               btrfs_tree_read_lock(cur);
> +             if (btrfs_verify_first_key(cur, path->nodes[level], slot)) {
> +                     btrfs_tree_read_unlock(cur);
> +                     free_extent_buffer(cur);
> +                     ret = -EUCLEAN;
> +                     goto out;
> +             }
>  
>               path->locks[level - 1] = BTRFS_READ_LOCK;
>               path->nodes[level - 1] = cur;
> @@ -5232,6 +5280,11 @@ static int tree_move_down(struct btrfs_fs_info 
> *fs_info,
>       if (IS_ERR(eb))
>               return PTR_ERR(eb);
>  
> +     if (btrfs_verify_first_key(eb, path->nodes[*level],
> +                                path->slots[*level])) {
> +             free_extent_buffer(eb);
> +             return -EUCLEAN;
> +     }
>       path->nodes[*level - 1] = eb;
>       path->slots[*level - 1] = 0;
>       (*level)--;
> @@ -5786,6 +5839,14 @@ int btrfs_next_old_leaf(struct btrfs_root *root, 
> struct btrfs_path *path,
>                                                         BTRFS_READ_LOCK);
>                       }
>                       next_rw_lock = BTRFS_READ_LOCK;
> +                     if (btrfs_verify_first_key(next, path->nodes[level],
> +                                                slot)) {
> +                             ret = -EUCLEAN;
> +                             btrfs_tree_read_unlock(next);
> +                             free_extent_buffer(next);
> +                             btrfs_release_path(path);
> +                             goto done;
> +                     }
>               }
>               break;
>       }
> @@ -5823,6 +5884,14 @@ int btrfs_next_old_leaf(struct btrfs_root *root, 
> struct btrfs_path *path,
>                                                         BTRFS_READ_LOCK);
>                       }
>                       next_rw_lock = BTRFS_READ_LOCK;
> +                     if (btrfs_verify_first_key(next, path->nodes[level],
> +                                                0)) {
> +                             ret = -EUCLEAN;
> +                             btrfs_tree_read_unlock(next);
> +                             free_extent_buffer(next);
> +                             btrfs_release_path(path);
> +                             goto done;
> +                     }
>               }
>       }
>       ret = 0;
> diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
> index 0eb55825862a..033d8ae027c3 100644
> --- a/fs/btrfs/ctree.h
> +++ b/fs/btrfs/ctree.h
> @@ -44,6 +44,7 @@
>  #include "extent_io.h"
>  #include "extent_map.h"
>  #include "async-thread.h"
> +#include "print-tree.h"
>  
>  struct btrfs_trans_handle;
>  struct btrfs_transaction;
> @@ -3752,4 +3753,61 @@ static inline int btrfs_is_testing(struct 
> btrfs_fs_info *fs_info)
>  #endif
>       return 0;
>  }
> +
> +/*
> + * Verify the first key of an extent buffer matches with its parent pointer.
> + *
> + * For tree blocks in current transaction, caller should hold at least read 
> lock
> + * for both @eb and @parent to avoid race.
> + * While for tree blocks in commit root, no such requirement.
> + */
> +static inline int btrfs_verify_first_key(struct extent_buffer *eb,

Too big for a static inline and too much code for a header. Please add
an #ifdef that will export a static inline for { return 0; } and a
normal function that will be declared in a header and defined in .c
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to