On Mon, Jun 20, 2011 at 06:12:10PM +0800, Miao Xie wrote:
> >From 457f39393b2e3d475fbba029b90b6a4e17b94d43 Mon Sep 17 00:00:00 2001
> From: Miao Xie <[email protected]>
> Date: Mon, 20 Jun 2011 17:21:51 +0800
> Subject: [PATCH] btrfs: fix inconsonant inode information
> 
> When iputting the inode, We may leave the delayed nodes if they have some
> delayed items that have not been dealt with. So when the inode is read again,
> we must look up the relative delayed node, and use the information in it to
> initialize the inode. Or we will get inconsonant inode information.
> 
> Signed-off-by: Miao Xie <[email protected]>
> ---
>  fs/btrfs/delayed-inode.c |  104 
> +++++++++++++++++++++++++++++++++++-----------
>  fs/btrfs/delayed-inode.h |    1 +
>  fs/btrfs/inode.c         |   12 ++++-
>  3 files changed, 91 insertions(+), 26 deletions(-)
> 
> diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
> index f1cbd02..280755e 100644
> --- a/fs/btrfs/delayed-inode.c
> +++ b/fs/btrfs/delayed-inode.c
> @@ -82,19 +82,16 @@ static inline struct btrfs_delayed_root 
> *btrfs_get_delayed_root(
>       return root->fs_info->delayed_root;
>  }
>  
> -static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
> -                                                     struct inode *inode)
> +static struct btrfs_delayed_node *btrfs_get_delayed_node(struct inode *inode)
>  {
> -     struct btrfs_delayed_node *node;
>       struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
>       struct btrfs_root *root = btrfs_inode->root;
>       u64 ino = btrfs_ino(inode);
> -     int ret;
> +     struct btrfs_delayed_node *node;
>  
> -again:
>       node = ACCESS_ONCE(btrfs_inode->delayed_node);

do you still need the volatile access here, after the again: label has
been removed? it does not break things if it's there, but it raises
questions ...

>       if (node) {
> -             atomic_inc(&node->refs);        /* can be accessed */
> +             atomic_inc(&node->refs);
>               return node;
>       }
>  
> @@ -103,7 +100,9 @@ again:
>       if (node) {
>               if (btrfs_inode->delayed_node) {
>                       spin_unlock(&root->inode_lock);
> -                     goto again;
> +                     BUG_ON(btrfs_inode->delayed_node != node);
> +                     atomic_inc(&node->refs);        /* can be accessed */
> +                     return node;
>               }
>               btrfs_inode->delayed_node = node;
>               atomic_inc(&node->refs);        /* can be accessed */
> @@ -113,6 +112,23 @@ again:
>       }
>       spin_unlock(&root->inode_lock);
>  
> +     return NULL;
> +}
> +
> +static struct btrfs_delayed_node *btrfs_get_or_create_delayed_node(
> +                                                     struct inode *inode)
> +{
> +     struct btrfs_delayed_node *node;
> +     struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
> +     struct btrfs_root *root = btrfs_inode->root;
> +     u64 ino = btrfs_ino(inode);
> +     int ret;
> +
> +again:
> +     node = btrfs_get_delayed_node(inode);

... aha, it's been somehow moved here, which copies the original logic.
Now reading inode->delayed_node is inside a function and I do not think
that compiler could optimize reading value of btrfs_inode->delayed_node
that it would require ACCESS_ONCE.

And there is another ACCESS_ONCE in btrfs_remove_delayed_node. I wonder
what's the reason for that. Sorry to abuse this thread, but I'd like to
be sure about protection of the ->delayed_node members inside
btrfs_inode. Can you please comment on that?

If you recall one message from the original report:

[ 5447.554187] err add delayed dir index item(name: pglog_0.965_0)
into the insertion tree of the delayed node(root id:
262, inode id: 258, errno: -17)

(-17 == -EEXIST)

a printk after return from __btrfs_add_delayed_item (which is
able to return -EEXIST) in btrfs_insert_delayed_dir_index. I haven't
looked farther, but it seems that the item is being inserted (at least)
twice and I suspect missing locking or other type of protection.


thanks,
david

> +     if (node)
> +             return node;
> +
>       node = kmem_cache_alloc(delayed_node_cache, GFP_NOFS);
>       if (!node)
>               return ERR_PTR(-ENOMEM);
> @@ -548,19 +564,6 @@ struct btrfs_delayed_item *__btrfs_next_delayed_item(
>       return next;
>  }
>  
> -static inline struct btrfs_delayed_node *btrfs_get_delayed_node(
> -                                                     struct inode *inode)
> -{
> -     struct btrfs_inode *btrfs_inode = BTRFS_I(inode);
> -     struct btrfs_delayed_node *delayed_node;
> -
> -     delayed_node = btrfs_inode->delayed_node;
> -     if (delayed_node)
> -             atomic_inc(&delayed_node->refs);
> -
> -     return delayed_node;
> -}
> -
>  static inline struct btrfs_root *btrfs_get_fs_root(struct btrfs_root *root,
>                                                  u64 root_id)
>  {
> @@ -1404,8 +1407,7 @@ end:
>  
>  int btrfs_inode_delayed_dir_index_count(struct inode *inode)
>  {
> -     struct btrfs_delayed_node *delayed_node = BTRFS_I(inode)->delayed_node;
> -     int ret = 0;
> +     struct btrfs_delayed_node *delayed_node = btrfs_get_delayed_node(inode);
>  
>       if (!delayed_node)
>               return -ENOENT;
> @@ -1415,11 +1417,14 @@ int btrfs_inode_delayed_dir_index_count(struct inode 
> *inode)
>        * a new directory index is added into the delayed node and index_cnt
>        * is updated now. So we needn't lock the delayed node.
>        */
> -     if (!delayed_node->index_cnt)
> +     if (!delayed_node->index_cnt) {
> +             btrfs_release_delayed_node(delayed_node);
>               return -EINVAL;
> +     }
>  
>       BTRFS_I(inode)->index_cnt = delayed_node->index_cnt;
> -     return ret;
> +     btrfs_release_delayed_node(delayed_node);
> +     return 0;
>  }
>  
>  void btrfs_get_delayed_items(struct inode *inode, struct list_head *ins_list,
> @@ -1613,6 +1618,57 @@ static void fill_stack_inode_item(struct 
> btrfs_trans_handle *trans,
>                                     inode->i_ctime.tv_nsec);
>  }
>  
> +int btrfs_fill_inode(struct inode *inode, u32 *rdev)
> +{
> +     struct btrfs_delayed_node *delayed_node;
> +     struct btrfs_inode_item *inode_item;
> +     struct btrfs_timespec *tspec;
> +
> +     delayed_node = btrfs_get_delayed_node(inode);
> +     if (!delayed_node)
> +             return -ENOENT;
> +
> +     mutex_lock(&delayed_node->mutex);
> +     if (!delayed_node->inode_dirty) {
> +             mutex_unlock(&delayed_node->mutex);
> +             btrfs_release_delayed_node(delayed_node);
> +             return -ENOENT;
> +     }
> +
> +     inode_item = &delayed_node->inode_item;
> +
> +     inode->i_uid = btrfs_stack_inode_uid(inode_item);
> +     inode->i_gid = btrfs_stack_inode_gid(inode_item);
> +     btrfs_i_size_write(inode, btrfs_stack_inode_size(inode_item));
> +     inode->i_mode = btrfs_stack_inode_mode(inode_item);
> +     inode->i_nlink = btrfs_stack_inode_nlink(inode_item);
> +     inode_set_bytes(inode, btrfs_stack_inode_nbytes(inode_item));
> +     BTRFS_I(inode)->generation = btrfs_stack_inode_generation(inode_item);
> +     BTRFS_I(inode)->sequence = btrfs_stack_inode_sequence(inode_item);
> +     inode->i_rdev = 0;
> +     *rdev = btrfs_stack_inode_rdev(inode_item);
> +     BTRFS_I(inode)->flags = btrfs_stack_inode_flags(inode_item);
> +
> +     tspec = btrfs_inode_atime(inode_item);
> +     inode->i_atime.tv_sec = btrfs_stack_timespec_sec(tspec);
> +     inode->i_atime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
> +
> +     tspec = btrfs_inode_mtime(inode_item);
> +     inode->i_mtime.tv_sec = btrfs_stack_timespec_sec(tspec);
> +     inode->i_mtime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
> +
> +     tspec = btrfs_inode_ctime(inode_item);
> +     inode->i_ctime.tv_sec = btrfs_stack_timespec_sec(tspec);
> +     inode->i_ctime.tv_nsec = btrfs_stack_timespec_nsec(tspec);
> +
> +     inode->i_generation = BTRFS_I(inode)->generation;
> +     BTRFS_I(inode)->index_cnt = (u64)-1;
> +
> +     mutex_unlock(&delayed_node->mutex);
> +     btrfs_release_delayed_node(delayed_node);
> +     return 0;
> +}
> +
>  int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
>                              struct btrfs_root *root, struct inode *inode)
>  {
> diff --git a/fs/btrfs/delayed-inode.h b/fs/btrfs/delayed-inode.h
> index d1a6a29..8d27af4 100644
> --- a/fs/btrfs/delayed-inode.h
> +++ b/fs/btrfs/delayed-inode.h
> @@ -119,6 +119,7 @@ void btrfs_kill_delayed_inode_items(struct inode *inode);
>  
>  int btrfs_delayed_update_inode(struct btrfs_trans_handle *trans,
>                              struct btrfs_root *root, struct inode *inode);
> +int btrfs_fill_inode(struct inode *inode, u32 *rdev);
>  
>  /* Used for drop dead root */
>  void btrfs_kill_all_delayed_nodes(struct btrfs_root *root);
> diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
> index 5813dec..1d25a04 100644
> --- a/fs/btrfs/inode.c
> +++ b/fs/btrfs/inode.c
> @@ -2509,6 +2509,11 @@ static void btrfs_read_locked_inode(struct inode 
> *inode)
>       int maybe_acls;
>       u32 rdev;
>       int ret;
> +     bool filled = false;
> +
> +     ret = btrfs_fill_inode(inode, &rdev);
> +     if (!ret)
> +             filled = true;
>  
>       path = btrfs_alloc_path();
>       BUG_ON(!path);
> @@ -2520,6 +2525,10 @@ static void btrfs_read_locked_inode(struct inode 
> *inode)
>               goto make_bad;
>  
>       leaf = path->nodes[0];
> +
> +     if (filled)
> +             goto cache_acl;
> +
>       inode_item = btrfs_item_ptr(leaf, path->slots[0],
>                                   struct btrfs_inode_item);
>       if (!leaf->map_token)
> @@ -2556,7 +2565,7 @@ static void btrfs_read_locked_inode(struct inode *inode)
>  
>       BTRFS_I(inode)->index_cnt = (u64)-1;
>       BTRFS_I(inode)->flags = btrfs_inode_flags(leaf, inode_item);
> -
> +cache_acl:
>       /*
>        * try to precache a NULL acl entry for files that don't have
>        * any xattrs or acls
> @@ -2572,7 +2581,6 @@ static void btrfs_read_locked_inode(struct inode *inode)
>       }
>  
>       btrfs_free_path(path);
> -     inode_item = NULL;
>  
>       switch (inode->i_mode & S_IFMT) {
>       case S_IFREG:
> -- 
> 1.7.4
> 

--
To unsubscribe from this list: send the line "unsubscribe ceph-devel" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to