On Tue, May 22, 2012 at 12:29:59PM +0200, Christian Brunner wrote:
> 2012/5/21 Miao Xie <[email protected]>:
> > Hi Josef,
> >
> > On fri, 18 May 2012 15:01:05 -0400, Josef Bacik wrote:
> >> diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
> >> index 9b9b15f..492c74f 100644
> >> --- a/fs/btrfs/btrfs_inode.h
> >> +++ b/fs/btrfs/btrfs_inode.h
> >> @@ -57,9 +57,6 @@ struct btrfs_inode {
> >>       /* used to order data wrt metadata */
> >>       struct btrfs_ordered_inode_tree ordered_tree;
> >>
> >> -     /* for keeping track of orphaned inodes */
> >> -     struct list_head i_orphan;
> >> -
> >>       /* list of all the delalloc inodes in the FS.  There are times we 
> >> need
> >>        * to write all the delalloc pages to disk, and this list is used
> >>        * to walk them all.
> >> @@ -156,6 +153,8 @@ struct btrfs_inode {
> >>       unsigned dummy_inode:1;
> >>       unsigned in_defrag:1;
> >>       unsigned delalloc_meta_reserved:1;
> >> +     unsigned has_orphan_item:1;
> >> +     unsigned doing_truncate:1;
> >
> > I think the problem is we should not use the different lock to protect the 
> > bit fields which
> > are stored in the same machine word. Or some bit fields may be covered by 
> > the others when
> > someone change those fields. Could you try to declare 
> > ->delalloc_meta_reserved and ->has_orphan_item
> > as a integer?
> 
> I have tried changing it to:
> 
> struct btrfs_inode {
>         unsigned orphan_meta_reserved:1;
>         unsigned dummy_inode:1;
>         unsigned in_defrag:1;
> -       unsigned delalloc_meta_reserved:1;
> +       int delalloc_meta_reserved;
> +       int has_orphan_item;
> +       int doing_truncate;
> 
> The strange thing is, that I'm no longer hitting the BUG_ON, but the
> old WARNING (no additional messages):
> 

Yeah you would also need to change orphan_meta_reserved.  I fixed this by just
taking the BTRFS_I(inode)->lock when messing with these since we don't want to
take up all that space in the inode just for a marker.  I ran this patch for 3
hours with no issues, let me know if it works for you.  Thanks,

Josef


diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index 3771b85..559e716 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -57,9 +57,6 @@ struct btrfs_inode {
        /* used to order data wrt metadata */
        struct btrfs_ordered_inode_tree ordered_tree;
 
-       /* for keeping track of orphaned inodes */
-       struct list_head i_orphan;
-
        /* list of all the delalloc inodes in the FS.  There are times we need
         * to write all the delalloc pages to disk, and this list is used
         * to walk them all.
@@ -153,6 +150,7 @@ struct btrfs_inode {
        unsigned dummy_inode:1;
        unsigned in_defrag:1;
        unsigned delalloc_meta_reserved:1;
+       unsigned has_orphan_item:1;
 
        /*
         * always compress this one file
diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
index ba8743b..72cdf98 100644
--- a/fs/btrfs/ctree.h
+++ b/fs/btrfs/ctree.h
@@ -1375,7 +1375,7 @@ struct btrfs_root {
        struct list_head root_list;
 
        spinlock_t orphan_lock;
-       struct list_head orphan_list;
+       atomic_t orphan_inodes;
        struct btrfs_block_rsv *orphan_block_rsv;
        int orphan_item_inserted;
        int orphan_cleanup_state;
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 19f5b45..25dba7a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -1153,7 +1153,6 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 
sectorsize,
        root->orphan_block_rsv = NULL;
 
        INIT_LIST_HEAD(&root->dirty_list);
-       INIT_LIST_HEAD(&root->orphan_list);
        INIT_LIST_HEAD(&root->root_list);
        spin_lock_init(&root->orphan_lock);
        spin_lock_init(&root->inode_lock);
@@ -1166,6 +1165,7 @@ static void __setup_root(u32 nodesize, u32 leafsize, u32 
sectorsize,
        atomic_set(&root->log_commit[0], 0);
        atomic_set(&root->log_commit[1], 0);
        atomic_set(&root->log_writers, 0);
+       atomic_set(&root->orphan_inodes, 0);
        root->log_batch = 0;
        root->log_transid = 0;
        root->last_log_commit = 0;
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 54ae3df..54f1b30 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -2104,12 +2104,12 @@ void btrfs_orphan_commit_root(struct btrfs_trans_handle 
*trans,
        struct btrfs_block_rsv *block_rsv;
        int ret;
 
-       if (!list_empty(&root->orphan_list) ||
+       if (atomic_read(&root->orphan_inodes) ||
            root->orphan_cleanup_state != ORPHAN_CLEANUP_DONE)
                return;
 
        spin_lock(&root->orphan_lock);
-       if (!list_empty(&root->orphan_list)) {
+       if (atomic_read(&root->orphan_inodes)) {
                spin_unlock(&root->orphan_lock);
                return;
        }
@@ -2166,8 +2166,9 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, 
struct inode *inode)
                block_rsv = NULL;
        }
 
-       if (list_empty(&BTRFS_I(inode)->i_orphan)) {
-               list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
+       spin_lock(&BTRFS_I(inode)->lock);
+       if (!BTRFS_I(inode)->has_orphan_item) {
+               BTRFS_I(inode)->has_orphan_item = 1;
 #if 0
                /*
                 * For proper ENOSPC handling, we should do orphan
@@ -2180,12 +2181,14 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, 
struct inode *inode)
                        insert = 1;
 #endif
                insert = 1;
+               atomic_inc(&root->orphan_inodes);
        }
 
        if (!BTRFS_I(inode)->orphan_meta_reserved) {
                BTRFS_I(inode)->orphan_meta_reserved = 1;
                reserve = 1;
        }
+       spin_unlock(&BTRFS_I(inode)->lock);
        spin_unlock(&root->orphan_lock);
 
        /* grab metadata reservation from transaction handle */
@@ -2198,6 +2201,9 @@ int btrfs_orphan_add(struct btrfs_trans_handle *trans, 
struct inode *inode)
        if (insert >= 1) {
                ret = btrfs_insert_orphan_item(trans, root, btrfs_ino(inode));
                if (ret && ret != -EEXIST) {
+                       spin_lock(&BTRFS_I(inode)->lock);
+                       BTRFS_I(inode)->has_orphan_item = 0;
+                       spin_unlock(&BTRFS_I(inode)->lock);
                        btrfs_abort_transaction(trans, root, ret);
                        return ret;
                }
@@ -2227,26 +2233,41 @@ int btrfs_orphan_del(struct btrfs_trans_handle *trans, 
struct inode *inode)
        int release_rsv = 0;
        int ret = 0;
 
-       spin_lock(&root->orphan_lock);
-       if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
-               list_del_init(&BTRFS_I(inode)->i_orphan);
-               delete_item = 1;
+       /*
+        * evict_inode gets called without holding the i_mutex so we need to
+        * take the orphan lock to make sure we are safe in messing with these.
+        */
+       spin_lock(&BTRFS_I(inode)->lock);
+       if (BTRFS_I(inode)->has_orphan_item) {
+               if (trans) {
+                       BTRFS_I(inode)->has_orphan_item = 0;
+                       delete_item = 1;
+               } else {
+                       WARN_ON(1);
+               }
        }
 
-       if (BTRFS_I(inode)->orphan_meta_reserved) {
+       if (trans && BTRFS_I(inode)->orphan_meta_reserved) {
                BTRFS_I(inode)->orphan_meta_reserved = 0;
                release_rsv = 1;
        }
-       spin_unlock(&root->orphan_lock);
+       spin_unlock(&BTRFS_I(inode)->lock);
 
        if (trans && delete_item) {
                ret = btrfs_del_orphan_item(trans, root, btrfs_ino(inode));
+               if (ret)
+                       printk(KERN_ERR "couldn't find orphan item for %Lu, 
nlink %d, root %Lu, root being deleted %s\n",
+                              btrfs_ino(inode), inode->i_nlink, root->objectid,
+                              root->orphan_item_inserted ? "yes" : "no");
                BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
        }
 
        if (release_rsv)
                btrfs_orphan_release_metadata(inode);
 
+       if (trans && delete_item)
+               atomic_dec(&root->orphan_inodes);
+
        return 0;
 }
 
@@ -2373,6 +2394,8 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                                ret = PTR_ERR(trans);
                                goto out;
                        }
+                       printk(KERN_ERR "auto deleting %Lu\n",
+                              found_key.objectid);
                        ret = btrfs_del_orphan_item(trans, root,
                                                    found_key.objectid);
                        BUG_ON(ret); /* -ENOMEM or corruption (JDM: Recheck) */
@@ -2384,9 +2407,11 @@ int btrfs_orphan_cleanup(struct btrfs_root *root)
                 * add this inode to the orphan list so btrfs_orphan_del does
                 * the proper thing when we hit it
                 */
-               spin_lock(&root->orphan_lock);
-               list_add(&BTRFS_I(inode)->i_orphan, &root->orphan_list);
-               spin_unlock(&root->orphan_lock);
+               spin_lock(&BTRFS_I(inode)->lock);
+               atomic_inc(&root->orphan_inodes);
+               WARN_ON(BTRFS_I(inode)->has_orphan_item);
+               BTRFS_I(inode)->has_orphan_item = 1;
+               spin_unlock(&BTRFS_I(inode)->lock);
 
                /* if we have links, this was a truncate, lets do that */
                if (inode->i_nlink) {
@@ -3707,7 +3732,7 @@ void btrfs_evict_inode(struct inode *inode)
        btrfs_wait_ordered_range(inode, 0, (u64)-1);
 
        if (root->fs_info->log_root_recovering) {
-               BUG_ON(!list_empty(&BTRFS_I(inode)->i_orphan));
+               BUG_ON(!BTRFS_I(inode)->has_orphan_item);
                goto no_delete;
        }
 
@@ -6638,7 +6663,7 @@ static int btrfs_truncate(struct inode *inode)
 
        ret = btrfs_truncate_page(inode->i_mapping, inode->i_size);
        if (ret)
-               return ret;
+               goto real_out;
 
        btrfs_wait_ordered_range(inode, inode->i_size & (~mask), (u64)-1);
        btrfs_ordered_update_i_size(inode, inode->i_size, NULL);
@@ -6680,8 +6705,10 @@ static int btrfs_truncate(struct inode *inode)
         * updating the inode.
         */
        rsv = btrfs_alloc_block_rsv(root);
-       if (!rsv)
-               return -ENOMEM;
+       if (!rsv) {
+               ret = -ENOMEM;
+               goto real_out;
+       }
        rsv->size = min_size;
 
        /*
@@ -6800,7 +6827,7 @@ end_trans:
 
 out:
        btrfs_free_block_rsv(root, rsv);
-
+real_out:
        if (ret && !err)
                err = ret;
 
@@ -6866,6 +6893,7 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        ei->dummy_inode = 0;
        ei->in_defrag = 0;
        ei->delalloc_meta_reserved = 0;
+       ei->has_orphan_item = 0;
        ei->force_compress = BTRFS_COMPRESS_NONE;
 
        ei->delayed_node = NULL;
@@ -6879,7 +6907,6 @@ struct inode *btrfs_alloc_inode(struct super_block *sb)
        mutex_init(&ei->log_mutex);
        mutex_init(&ei->delalloc_mutex);
        btrfs_ordered_inode_tree_init(&ei->ordered_tree);
-       INIT_LIST_HEAD(&ei->i_orphan);
        INIT_LIST_HEAD(&ei->delalloc_inodes);
        INIT_LIST_HEAD(&ei->ordered_operations);
        RB_CLEAR_NODE(&ei->rb_node);
@@ -6924,13 +6951,11 @@ void btrfs_destroy_inode(struct inode *inode)
                spin_unlock(&root->fs_info->ordered_extent_lock);
        }
 
-       spin_lock(&root->orphan_lock);
-       if (!list_empty(&BTRFS_I(inode)->i_orphan)) {
+       if (BTRFS_I(inode)->has_orphan_item) {
                printk(KERN_INFO "BTRFS: inode %llu still on the orphan list\n",
                       (unsigned long long)btrfs_ino(inode));
-               list_del_init(&BTRFS_I(inode)->i_orphan);
+               atomic_dec(&root->orphan_inodes);
        }
-       spin_unlock(&root->orphan_lock);
 
        while (1) {
                ordered = btrfs_lookup_first_ordered_extent(inode, (u64)-1);
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to [email protected]
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Reply via email to