[PATCH 6.1/6] improve the balancing code

2009-05-11 Thread Yan Zheng
This patch contains some random changes required by
the balancing code.

Signed-off-by: Yan Zheng 

---
diff -urpN 6/fs/btrfs/ctree.c 7/fs/btrfs/ctree.c
--- 6/fs/btrfs/ctree.c  2009-05-11 15:53:33.0 +0800
+++ 7/fs/btrfs/ctree.c  2009-05-11 16:02:34.0 +0800
@@ -594,7 +594,7 @@ static int comp_keys(struct btrfs_disk_k
 /*
  * same as comp_keys only with two btrfs_key's
  */
-static int comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
+int btrfs_comp_cpu_keys(struct btrfs_key *k1, struct btrfs_key *k2)
 {
if (k1->objectid > k2->objectid)
return 1;
@@ -970,6 +970,12 @@ static int bin_search(struct extent_buff
return -1;
 }
 
+int btrfs_bin_search(struct extent_buffer *eb, struct btrfs_key *key,
+int level, int *slot)
+{
+   return bin_search(eb, key, level, slot);
+}
+
 /* given a node and slot number, this reads the blocks it points to.  The
  * extent buffer is returned with a reference taken (but unlocked).
  * NULL is returned on error.
@@ -1709,10 +1715,17 @@ int btrfs_search_slot(struct btrfs_trans
lowest_unlock = 2;
 
 again:
-   if (p->skip_locking)
-   b = btrfs_root_node(root);
-   else
-   b = btrfs_lock_root_node(root);
+   if (p->search_commit_root) {
+   b = root->commit_root;
+   extent_buffer_get(b);
+   if (!p->skip_locking)
+   btrfs_tree_lock(b);
+   } else {
+   if (p->skip_locking)
+   b = btrfs_root_node(root);
+   else
+   b = btrfs_lock_root_node(root);
+   }
 
while (b) {
level = btrfs_header_level(b);
@@ -1852,138 +1865,6 @@ done:
return ret;
 }
 
-int btrfs_merge_path(struct btrfs_trans_handle *trans,
-struct btrfs_root *root,
-struct btrfs_key *node_keys,
-u64 *nodes, int lowest_level)
-{
-   struct extent_buffer *eb;
-   struct extent_buffer *parent;
-   struct btrfs_key key;
-   u64 bytenr;
-   u64 generation;
-   u32 blocksize;
-   int level;
-   int slot;
-   int key_match;
-   int ret;
-
-   eb = btrfs_lock_root_node(root);
-   ret = btrfs_cow_block(trans, root, eb, NULL, 0, &eb);
-   BUG_ON(ret);
-
-   btrfs_set_lock_blocking(eb);
-
-   parent = eb;
-   while (1) {
-   level = btrfs_header_level(parent);
-   if (level == 0 || level <= lowest_level)
-   break;
-
-   ret = bin_search(parent, &node_keys[lowest_level], level,
-&slot);
-   if (ret && slot > 0)
-   slot--;
-
-   bytenr = btrfs_node_blockptr(parent, slot);
-   if (nodes[level - 1] == bytenr)
-   break;
-
-   blocksize = btrfs_level_size(root, level - 1);
-   generation = btrfs_node_ptr_generation(parent, slot);
-   btrfs_node_key_to_cpu(eb, &key, slot);
-   key_match = !memcmp(&key, &node_keys[level - 1], sizeof(key));
-
-   if (generation == trans->transid) {
-   eb = read_tree_block(root, bytenr, blocksize,
-generation);
-   btrfs_tree_lock(eb);
-   btrfs_set_lock_blocking(eb);
-   }
-
-   /*
-* if node keys match and node pointer hasn't been modified
-* in the running transaction, we can merge the path. for
-* blocks owened by reloc trees, the node pointer check is
-* skipped, this is because these blocks are fully controlled
-* by the space balance code, no one else can modify them.
-*/
-   if (!nodes[level - 1] || !key_match ||
-   (generation == trans->transid &&
-btrfs_header_owner(eb) != BTRFS_TREE_RELOC_OBJECTID)) {
-   if (level == 1 || level == lowest_level + 1) {
-   if (generation == trans->transid) {
-   btrfs_tree_unlock(eb);
-   free_extent_buffer(eb);
-   }
-   break;
-   }
-
-   if (generation != trans->transid) {
-   eb = read_tree_block(root, bytenr, blocksize,
-   generation);
-   btrfs_tree_lock(eb);
-   btrfs_set_lock_blocking(eb);
-   }
-
-   ret = btrfs_cow_block(trans, root, eb, parent, slot,
- &eb);
-   BUG_ON(ret);
-
-   if (root->root

[PATCH 2/2] btrfs-progs: update fsck for the mixed back ref

2009-05-11 Thread Yan Zheng
This patch adds check that verifies the correctness of
mixed back ref to btrfsck. The check verifies if all tree
blocks that use the new back ref format are referenced by
their owner trees.

Signed-off-by: Yan Zheng 

---
diff -urp btrfs-progs-unstable/btrfsck.c btrfs-progs-2/btrfsck.c
--- btrfs-progs-unstable/btrfsck.c  2009-01-23 06:01:44.064370471 +0800
+++ btrfs-progs-2/btrfsck.c 2009-05-06 13:16:50.0 +0800
@@ -32,19 +32,35 @@
 static u64 bytes_used = 0;
 static u64 total_csum_bytes = 0;
 static u64 total_btree_bytes = 0;
+static u64 total_fs_tree_bytes = 0;
 static u64 btree_space_waste = 0;
 static u64 data_bytes_allocated = 0;
 static u64 data_bytes_referenced = 0;
 
 struct extent_backref {
struct list_head list;
-   u64 parent;
-   u64 root;
-   u64 generation;
+   unsigned int is_data:1;
+   unsigned int found_extent_tree:1;
+   unsigned int full_backref:1;
+   unsigned int found_ref:1;
+};
+
+struct data_backref {
+   struct extent_backref node;
+   union {
+   u64 parent;
+   u64 root;
+   };
u64 owner;
+   u64 offset;
u32 num_refs;
u32 found_ref;
-   int found_extent_tree;
+};
+
+struct tree_backref {
+   struct extent_backref node;
+   u64 parent;
+   u64 root;
 };
 
 struct extent_record {
@@ -53,9 +69,12 @@ struct extent_record {
struct btrfs_disk_key parent_key;
u64 start;
u64 nr;
-   u32 refs;
-   u32 extent_item_refs;
-   int checked;
+   u64 refs;
+   u64 extent_item_refs;
+   u64 block_owner;
+   unsigned int content_checked:1;
+   unsigned int owner_ref_checked:1;
+   unsigned int is_root:1;
 };
 
 struct inode_backref {
@@ -84,6 +103,7 @@ struct inode_backref {
 struct inode_record {
struct list_head backrefs;
unsigned int checked:1;
+   unsigned int merging:1;
unsigned int found_inode_item:1;
unsigned int found_dir_item:1;
unsigned int found_file_extent:1;
@@ -120,6 +140,7 @@ struct inode_record {
 #define I_ERR_FILE_NBYTES_WRONG(1 << 10)
 #define I_ERR_ODD_CSUM_ITEM(1 << 11)
 #define I_ERR_SOME_CSUM_MISSING(1 << 12)
+#define I_ERR_LINK_COUNT_WRONG (1 << 13)
 
 struct ptr_node {
struct cache_extent cache;
@@ -258,7 +279,7 @@ static void maybe_free_inode_rec(struct 
}
}
 
-   if (!rec->checked)
+   if (!rec->checked || rec->merging)
return;
 
if (S_ISDIR(rec->imode)) {
@@ -425,6 +446,7 @@ static int merge_inode_recs(struct inode
struct inode_backref *backref;
struct cache_tree *dst_cache = &dst_node->inode_cache;
 
+   dst->merging = 1;
list_for_each_entry(backref, &src->backrefs, list) {
if (backref->found_dir_index) {
add_inode_backref(dst_cache, dst->ino, backref->dir,
@@ -492,6 +514,7 @@ static int merge_inode_recs(struct inode
if (dst_node->current == dst)
dst_node->current = NULL;
}
+   dst->merging = 0;
maybe_free_inode_rec(dst_cache, dst);
return 0;
 }
@@ -1001,13 +1024,13 @@ static int walk_down_tree(struct btrfs_r
struct extent_buffer *cur;
u32 blocksize;
int ret;
-   u32 refs;
+   u64 refs;
 
WARN_ON(*level < 0);
WARN_ON(*level >= BTRFS_MAX_LEVEL);
-   ret = btrfs_lookup_extent_ref(NULL, root,
- path->nodes[*level]->start,
- path->nodes[*level]->len, &refs);
+   ret = btrfs_lookup_extent_info(NULL, root,
+  path->nodes[*level]->start,
+  path->nodes[*level]->len, &refs, NULL);
BUG_ON(ret);
if (refs > 1) {
ret = enter_shared_node(root, path->nodes[*level]->start,
@@ -1033,8 +1056,8 @@ static int walk_down_tree(struct btrfs_r
bytenr = btrfs_node_blockptr(cur, path->slots[*level]);
ptr_gen = btrfs_node_ptr_generation(cur, path->slots[*level]);
blocksize = btrfs_level_size(root, *level - 1);
-   ret = btrfs_lookup_extent_ref(NULL, root, bytenr, blocksize,
- &refs);
+   ret = btrfs_lookup_extent_info(NULL, root, bytenr, blocksize,
+  &refs, NULL);
BUG_ON(ret);
 
if (refs > 1) {
@@ -1159,6 +1182,8 @@ static int check_inode_recs(struct btrfs
error++;
if (!rec->found_inode_item)
rec->errors |= I_ERR_NO_INODE_ITEM;
+   if (rec->found_link != rec->nlink)
+   rec->errors |= I_ERR_LINK_COUNT_WRONG;
fprintf(stderr, "root %llu inode %llu errors %x\n",
root->root_key.

Re: [PATCH] Spelling fix in btrfs code comment

2009-05-11 Thread Jiri Kosina
On Sat, 9 May 2009, Sankar P wrote:

> Fix a trivial spelling error in a comment
> 
> Signed-off-by: Sankar P 
> 
> ---
> diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
> index e496644..3e2c7c7 100644
> --- a/fs/btrfs/extent-tree.c
> +++ b/fs/btrfs/extent-tree.c
> @@ -312,7 +312,7 @@ btrfs_lookup_first_block_group(struct btrfs_fs_info 
> *info, u64 bytenr)
>  }
>  
>  /*
> - * return the block group that contains teh given bytenr
> + * return the block group that contains the given bytenr
>   */
>  struct btrfs_block_group_cache *btrfs_lookup_block_group(
>struct btrfs_fs_info *info,

Appplied to trivial tree. If it has been already applied to btrfs pileup, 
please let me know so that I could drop it.

Thanks,

-- 
Jiri Kosina
SUSE Labs
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/6] btrfs: Avoid producing dead subvolume trees

2009-05-11 Thread Yan Zheng
When a non-shared tree block is cow'd, the reference count of
all blocks it points to is increased by one. After the commit
finishes, the old block will get freed when dropping the dead
tree. At that time, the reference count of all blocks the old
block points to will be decreased by one. The increments and
decrements cancel out. Actually the COW operation can be done
in a smarter way. When a non-shared tree block is cow'd, we
free the old block at once, the new block inherits old block's
references. When a shared tree block is cow'd, we decrease its
reference count by one, and increase the reference count of all
blocks it points to by one for the new block.

Signed-off-by: Yan Zheng 

---
diff -urp 1/fs/btrfs/ctree.c 2/fs/btrfs/ctree.c
--- 1/fs/btrfs/ctree.c  2009-04-21 07:03:51.343320128 +0800
+++ 2/fs/btrfs/ctree.c  2009-05-11 15:40:07.0 +0800
@@ -244,6 +244,147 @@ int btrfs_copy_root(struct btrfs_trans_h
 }
 
 /*
+ * check if the tree block can be shared by multiple trees
+ */
+int btrfs_block_can_be_shared(struct btrfs_root *root,
+ struct extent_buffer *buf)
+{
+   /*
+* Tree blocks not in refernece counted trees and tree roots
+* are never shared. If a block was allocated after the last
+* snapshot and the block was not allocated by tree relocation,
+* we know the block is not shared.
+*/
+   if (root->ref_cows &&
+   buf != root->node && buf != root->commit_root &&
+   (btrfs_header_generation(buf) <=
+btrfs_root_last_snapshot(&root->root_item) ||
+btrfs_header_flag(buf, BTRFS_HEADER_FLAG_RELOC)))
+   return 1;
+   return 0;
+}
+
+static noinline int update_ref_for_cow(struct btrfs_trans_handle *trans,
+  struct btrfs_root *root,
+  struct extent_buffer *buf,
+  struct extent_buffer *cow)
+{
+   u64 refs;
+   u64 owner;
+   u64 flags;
+   u64 new_flags = 0;
+   int ret;
+
+   /*
+* Backrefs update rules:
+*
+* Always use full backrefs for extent pointers in tree block
+* allocated by tree relocation.
+*
+* If a shared tree block is no longer referenced by its owner
+* tree (btrfs_header_owner(buf) == root->root_key.objectid),
+* set the BTRFS_BLOCK_NO_OWNER_REF extent flag and use full
+* backrefs for extent pointers in tree block.
+*
+* If a tree block is been relocating
+* (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID),
+* use full backrefs for extent pointers in tree block.
+* The reason for this is some operations (such as drop tree)
+* used by the relocation code are only allowed for the full
+* backrefs format.
+*/
+
+   owner = btrfs_header_owner(buf);
+   BUG_ON(owner == BTRFS_TREE_RELOC_OBJECTID);
+
+   if (!btrfs_block_can_be_shared(root, buf)) {
+   if (root->root_key.objectid ==
+   BTRFS_TREE_RELOC_OBJECTID) {
+   ret = btrfs_inc_ref(trans, root, cow, 1);
+   BUG_ON(ret);
+   ret = btrfs_dec_ref(trans, root, buf, 1);
+   BUG_ON(ret);
+   }
+   clean_tree_block(trans, root, buf);
+   return 0;
+   }
+
+   ret = btrfs_lookup_extent_info(trans, root, buf->start,
+  buf->len, &refs, &flags);
+   BUG_ON(ret);
+   BUG_ON(refs == 0);
+
+   /* relocation doesn't change blocks' BTRFS_BLOCK_NO_OWNER_REF flag */
+   if ((flags & BTRFS_BLOCK_NO_OWNER_REF) &&
+   root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+   ret = btrfs_set_block_flags(trans, root, cow->start, cow->len,
+   BTRFS_BLOCK_NO_OWNER_REF);
+   BUG_ON(ret);
+   }
+
+   if (refs > 1) {
+   if ((owner == root->root_key.objectid ||
+root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) &&
+   !(flags & BTRFS_BLOCK_FULL_BACKREF)) {
+   ret = btrfs_inc_ref(trans, root, buf, 1);
+   BUG_ON(ret);
+
+   if (root->root_key.objectid ==
+   BTRFS_TREE_RELOC_OBJECTID) {
+   ret = btrfs_dec_ref(trans, root, buf, 0);
+   BUG_ON(ret);
+   ret = btrfs_inc_ref(trans, root, cow, 1);
+   BUG_ON(ret);
+   } else {
+   /* owner == root->root_key.objectid */
+   BUG_ON(flags & BTRFS_BLOCK_NO_OWNER_REF);
+   new_flags |= BTRFS_BLOCK_NO_OWNER_REF;
+   }
+   new_flags |=

[PATCH 5/6] btrfs: add per subvolume cached inode tree

2009-05-11 Thread Yan Zheng
This patch adds per subvolume red-black tree to keep trace of cached
inodes. The red-black tree helps the balancing code to find cached
inodes whose inode numbers within a given range.

Signed-off-by: Yan Zheng 

---
diff -urp 5/fs/btrfs/btrfs_inode.h 6/fs/btrfs/btrfs_inode.h
--- 5/fs/btrfs/btrfs_inode.h2009-05-11 15:28:11.0 +0800
+++ 6/fs/btrfs/btrfs_inode.h2009-05-11 15:59:02.0 +0800
@@ -72,6 +72,9 @@ struct btrfs_inode {
 */
struct list_head ordered_operations;
 
+   /* node for the red-black tree that links inodes in subvolume root */
+   struct rb_node rb_node;
+
/* the space_info for where this inode's data allocations are done */
struct btrfs_space_info *space_info;
 
diff -urp 5/fs/btrfs/ctree.h 6/fs/btrfs/ctree.h
--- 5/fs/btrfs/ctree.h  2009-05-11 16:23:00.0 +0800
+++ 6/fs/btrfs/ctree.h  2009-05-11 15:53:33.0 +0800
@@ -981,6 +981,10 @@ struct btrfs_root {
spinlock_t list_lock;
struct list_head orphan_list;
 
+   spinlock_t inode_lock;
+   /* red-black tree that keeps track of in-memory inodes */
+   struct rb_root inode_tree;
+
/*
 * right now this just gets used so that a root has its own devid
 * for stat.  It may be used for more later
@@ -2175,7 +2179,6 @@ int btrfs_page_mkwrite(struct vm_area_st
 int btrfs_readpage(struct file *file, struct page *page);
 void btrfs_delete_inode(struct inode *inode);
 void btrfs_put_inode(struct inode *inode);
-void btrfs_read_locked_inode(struct inode *inode);
 int btrfs_write_inode(struct inode *inode, int wait);
 void btrfs_dirty_inode(struct inode *inode);
 struct inode *btrfs_alloc_inode(struct super_block *sb);
@@ -2183,12 +2186,8 @@ void btrfs_destroy_inode(struct inode *i
 int btrfs_init_cachep(void);
 void btrfs_destroy_cachep(void);
 long btrfs_ioctl_trans_end(struct file *file);
-struct inode *btrfs_ilookup(struct super_block *s, u64 objectid,
-   struct btrfs_root *root, int wait);
-struct inode *btrfs_iget_locked(struct super_block *s, u64 objectid,
-   struct btrfs_root *root);
 struct inode *btrfs_iget(struct super_block *s, struct btrfs_key *location,
-struct btrfs_root *root, int *is_new);
+struct btrfs_root *root);
 int btrfs_commit_write(struct file *file, struct page *page,
   unsigned from, unsigned to);
 struct extent_map *btrfs_get_extent(struct inode *inode, struct page *page,
diff -urp 5/fs/btrfs/disk-io.c 6/fs/btrfs/disk-io.c
--- 5/fs/btrfs/disk-io.c2009-05-11 16:16:11.0 +0800
+++ 6/fs/btrfs/disk-io.c2009-05-11 15:59:02.0 +0800
@@ -1598,6 +1598,7 @@ struct btrfs_root *open_ctree(struct sup
fs_info->btree_inode->i_mapping->a_ops = &btree_aops;
fs_info->btree_inode->i_mapping->backing_dev_info = &fs_info->bdi;
 
+   RB_CLEAR_NODE(&BTRFS_I(fs_info->btree_inode)->rb_node);
extent_io_tree_init(&BTRFS_I(fs_info->btree_inode)->io_tree,
 fs_info->btree_inode->i_mapping,
 GFP_NOFS);
@@ -2181,6 +2182,7 @@ int write_ctree_super(struct btrfs_trans
 
 int btrfs_free_fs_root(struct btrfs_fs_info *fs_info, struct btrfs_root *root)
 {
+   WARN_ON(!RB_EMPTY_ROOT(&root->inode_tree));
radix_tree_delete(&fs_info->fs_roots_radix,
  (unsigned long)root->root_key.objectid);
if (root->anon_super.s_dev) {
diff -urp 5/fs/btrfs/export.c 6/fs/btrfs/export.c
--- 5/fs/btrfs/export.c 2009-05-11 15:28:11.0 +0800
+++ 6/fs/btrfs/export.c 2009-05-11 15:59:02.0 +0800
@@ -78,7 +78,7 @@ static struct dentry *btrfs_get_dentry(s
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
key.offset = 0;
 
-   inode = btrfs_iget(sb, &key, root, NULL);
+   inode = btrfs_iget(sb, &key, root);
if (IS_ERR(inode))
return (void *)inode;
 
@@ -192,7 +192,7 @@ static struct dentry *btrfs_get_parent(s
btrfs_set_key_type(&key, BTRFS_INODE_ITEM_KEY);
key.offset = 0;
 
-   return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root, NULL));
+   return d_obtain_alias(btrfs_iget(root->fs_info->sb, &key, root));
 }
 
 const struct export_operations btrfs_export_ops = {
diff -urp 5/fs/btrfs/inode.c 6/fs/btrfs/inode.c
--- 5/fs/btrfs/inode.c  2009-05-11 16:16:11.0 +0800
+++ 6/fs/btrfs/inode.c  2009-05-11 15:59:02.0 +0800
@@ -3098,6 +3098,45 @@ static int fixup_tree_root_location(stru
return 0;
 }
 
+static void inode_tree_add(struct inode *inode)
+{
+   struct btrfs_root *root = BTRFS_I(inode)->root;
+   struct btrfs_inode *entry;
+   struct rb_node **p = &root->inode_tree.rb_node;
+   struct rb_node *parent = NULL;
+
+   spin_lock(&root->inode_lock);
+   while (*p) {
+   parent = *p;
+   entry = rb_entry(parent, struct btrfs_inode, rb_node

[PATCH 4/6] btrfs: various changes for the mixed back ref

2009-05-11 Thread Yan Zheng
This patch contains various update for the mixed back ref.



Signed-off-by: Yan Zheng 

---
diff -urp 4/fs/btrfs/ctree.c 5/fs/btrfs/ctree.c
--- 4/fs/btrfs/ctree.c  2009-05-11 16:38:07.0 +0800
+++ 5/fs/btrfs/ctree.c  2009-05-11 16:16:11.0 +0800
@@ -197,14 +197,7 @@ int btrfs_copy_root(struct btrfs_trans_h
u32 nritems;
int ret = 0;
int level;
-   struct btrfs_root *new_root;
-
-   new_root = kmalloc(sizeof(*new_root), GFP_NOFS);
-   if (!new_root)
-   return -ENOMEM;
-
-   memcpy(new_root, root, sizeof(*new_root));
-   new_root->root_key.objectid = new_root_objectid;
+   struct btrfs_disk_key disk_key;
 
WARN_ON(root->ref_cows && trans->transid !=
root->fs_info->running_transaction->transid);
@@ -212,28 +205,36 @@ int btrfs_copy_root(struct btrfs_trans_h
 
level = btrfs_header_level(buf);
nritems = btrfs_header_nritems(buf);
+   if (level == 0)
+   btrfs_item_key(buf, &disk_key, 0);
+   else
+   btrfs_node_key(buf, &disk_key, 0);
 
-   cow = btrfs_alloc_free_block(trans, new_root, buf->len, 0,
-new_root_objectid, trans->transid,
-level, buf->start, 0);
-   if (IS_ERR(cow)) {
-   kfree(new_root);
+   cow = btrfs_alloc_free_block(trans, root, buf->len, 0,
+new_root_objectid, &disk_key, level,
+buf->start, 0);
+   if (IS_ERR(cow))
return PTR_ERR(cow);
-   }
 
copy_extent_buffer(cow, buf, 0, 0, cow->len);
btrfs_set_header_bytenr(cow, cow->start);
btrfs_set_header_generation(cow, trans->transid);
-   btrfs_set_header_owner(cow, new_root_objectid);
-   btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN);
+   btrfs_clear_header_flag(cow, BTRFS_HEADER_FLAG_WRITTEN |
+BTRFS_HEADER_FLAG_RELOC);
+   if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+   btrfs_set_header_flag(cow, BTRFS_HEADER_FLAG_RELOC);
+   else
+   btrfs_set_header_owner(cow, new_root_objectid);
 
write_extent_buffer(cow, root->fs_info->fsid,
(unsigned long)btrfs_header_fsid(cow),
BTRFS_FSID_SIZE);
 
WARN_ON(btrfs_header_generation(buf) > trans->transid);
-   ret = btrfs_inc_ref(trans, new_root, buf, cow, NULL);
-   kfree(new_root);
+   if (new_root_objectid == BTRFS_TREE_RELOC_OBJECTID)
+   ret = btrfs_inc_ref(trans, root, cow, 1);
+   else
+   ret = btrfs_inc_ref(trans, root, cow, 0);
 
if (ret)
return ret;
@@ -403,34 +404,39 @@ static noinline int __btrfs_cow_block(st
 struct extent_buffer **cow_ret,
 u64 search_start, u64 empty_size)
 {
-   u64 parent_start;
+   struct btrfs_disk_key disk_key;
struct extent_buffer *cow;
-   u32 nritems;
-   int ret = 0;
int level;
int unlock_orig = 0;
+   u64 parent_start;
 
if (*cow_ret == buf)
unlock_orig = 1;
 
btrfs_assert_tree_locked(buf);
 
-   if (parent)
-   parent_start = parent->start;
-   else
-   parent_start = 0;
-
WARN_ON(root->ref_cows && trans->transid !=
root->fs_info->running_transaction->transid);
WARN_ON(root->ref_cows && trans->transid != root->last_trans);
 
level = btrfs_header_level(buf);
-   nritems = btrfs_header_nritems(buf);
 
-   cow = btrfs_alloc_free_block(trans, root, buf->len,
-parent_start, root->root_key.objectid,
-trans->transid, level,
-search_start, empty_size);
+   if (level == 0)
+   btrfs_item_key(buf, &disk_key, 0);
+   else
+   btrfs_node_key(buf, &disk_key, 0);
+
+   if (root->root_key.objectid == BTRFS_TREE_RELOC_OBJECTID) {
+   if (parent)
+   parent_start = parent->start;
+   else
+   parent_start = 0;
+   } else
+   parent_start = 0;
+
+   cow = btrfs_alloc_free_block(trans, root, buf->len, parent_start,
+root->root_key.objectid, &disk_key,
+level, search_start, empty_size);
if (IS_ERR(cow))
return PTR_ERR(cow);
 
@@ -1040,13 +1046,6 @@ static noinline int balance_level(struct
root->node = child;
spin_unlock(&root->node_lock);
 
-   ret = btrfs_update_extent_ref(trans, root, child->start,
- child->len,
- mid->start, child->start,
-

[PATCH 3/6] btrfs: update delayed back ref for the mixed back ref

2009-05-11 Thread Yan Zheng
This patch updates the delayed back ref for the mixed back ref.
It defines different data structures and interfaces for tree
block and file data.

Signed-off-by: Yan Zheng 

---
diff -urp 3/fs/btrfs/delayed-ref.c 4/fs/btrfs/delayed-ref.c
--- 3/fs/btrfs/delayed-ref.c2009-05-11 16:54:52.0 +0800
+++ 4/fs/btrfs/delayed-ref.c2009-05-11 16:44:39.0 +0800
@@ -29,27 +29,87 @@
  * add extents in the middle of btrfs_search_slot, and it allows
  * us to buffer up frequently modified backrefs in an rb tree instead
  * of hammering updates on the extent allocation tree.
- *
- * Right now this code is only used for reference counted trees, but
- * the long term goal is to get rid of the similar code for delayed
- * extent tree modifications.
  */
 
 /*
- * entries in the rb tree are ordered by the byte number of the extent
- * and by the byte number of the parent block.
+ * compare two delayed tree backrefs with same bytenr and type
+ */
+static int comp_tree_refs(struct btrfs_delayed_tree_ref *ref2,
+ struct btrfs_delayed_tree_ref *ref1)
+{
+   if (ref1->node.type == BTRFS_TREE_BLOCK_REF_KEY) {
+   if (ref1->root < ref2->root)
+   return -1;
+   if (ref1->root > ref2->root)
+   return 1;
+   } else {
+   if (ref1->parent < ref2->parent)
+   return -1;
+   if (ref1->parent > ref2->parent)
+   return 1;
+   }
+   return 0;
+}
+
+/*
+ * compare two delayed data backrefs with same bytenr and type
+ */
+static int comp_data_refs(struct btrfs_delayed_data_ref *ref2,
+ struct btrfs_delayed_data_ref *ref1)
+{
+   if (ref1->node.type == BTRFS_EXTENT_DATA_REF_KEY) {
+   if (ref1->root < ref2->root)
+   return -1;
+   if (ref1->root > ref2->root)
+   return 1;
+   if (ref1->objectid < ref2->objectid)
+   return -1;
+   if (ref1->objectid > ref2->objectid)
+   return 1;
+   if (ref1->offset < ref2->offset)
+   return -1;
+   if (ref1->offset > ref2->offset)
+   return 1;
+   } else {
+   if (ref1->parent < ref2->parent)
+   return -1;
+   if (ref1->parent > ref2->parent)
+   return 1;
+   }
+   return 0;
+}
+
+/*
+ * entries in the rb tree are ordered by the byte number of the extent,
+ * type of the delayed backrefs and content of delayed backrefs.
  */
-static int comp_entry(struct btrfs_delayed_ref_node *ref,
- u64 bytenr, u64 parent)
+static int comp_entry(struct btrfs_delayed_ref_node *ref2,
+ struct btrfs_delayed_ref_node *ref1)
 {
-   if (bytenr < ref->bytenr)
+   if (ref1->bytenr < ref2->bytenr)
return -1;
-   if (bytenr > ref->bytenr)
+   if (ref1->bytenr > ref2->bytenr)
return 1;
-   if (parent < ref->parent)
+   if (ref1->is_head && ref2->is_head)
+   return 0;
+   if (ref2->is_head)
+   return -1;
+   if (ref1->is_head)
+   return 1;
+   if (ref1->type < ref2->type)
return -1;
-   if (parent > ref->parent)
+   if (ref1->type > ref2->type)
return 1;
+   if (ref1->type == BTRFS_TREE_BLOCK_REF_KEY ||
+   ref1->type == BTRFS_SHARED_BLOCK_REF_KEY) {
+   return comp_tree_refs(btrfs_delayed_node_to_tree_ref(ref2),
+ btrfs_delayed_node_to_tree_ref(ref1));
+   } else if (ref1->type == BTRFS_EXTENT_DATA_REF_KEY ||
+  ref1->type == BTRFS_SHARED_DATA_REF_KEY) {
+   return comp_data_refs(btrfs_delayed_node_to_data_ref(ref2),
+ btrfs_delayed_node_to_data_ref(ref1));
+   }
+   BUG();
return 0;
 }
 
@@ -59,20 +119,21 @@ static int comp_entry(struct btrfs_delay
  * inserted.
  */
 static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root,
- u64 bytenr, u64 parent,
  struct rb_node *node)
 {
struct rb_node **p = &root->rb_node;
struct rb_node *parent_node = NULL;
struct btrfs_delayed_ref_node *entry;
+   struct btrfs_delayed_ref_node *ins;
int cmp;
 
+   ins = rb_entry(node, struct btrfs_delayed_ref_node, rb_node);
while (*p) {
parent_node = *p;
entry = rb_entry(parent_node, struct btrfs_delayed_ref_node,
 rb_node);
 
-   cmp = comp_entry(entry, bytenr, parent);
+   cmp = comp_entry(entry, ins);
if (cmp < 0)
p = &(*p)->rb_left;
else if (cmp >