Hello,

This patch fixes the nodatacow check. The new test function can always
detect extents referenced by multiple snatshots. If a extent was
allocated in recent two transactions and no snapshot happened in these
two transactions, we can always avoid cow. To check given extent's
reference, the test function walks down old tree root, then check
backref info in the path. This initial version takes old tree root
from root->commit. A new version using the oldest tree root will be
sent later.

Regards
YZ
diff -r 3f0eee804974 ctree.h
--- a/ctree.h	Thu Jun 26 10:34:20 2008 -0400
+++ b/ctree.h	Fri Jul 18 03:22:34 2008 +0800
@@ -1348,9 +1348,6 @@ static inline struct dentry *fdentry(str
 }

 /* extent-tree.c */
-u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
-				  struct btrfs_path *count_path,
-				  u64 expected_owner, u64 first_extent);
 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
 			 struct btrfs_root *root);
 int btrfs_copy_pinned(struct btrfs_root *root, struct extent_io_tree *copy);
@@ -1405,6 +1402,8 @@ int btrfs_make_block_group(struct btrfs_
 			   struct btrfs_root *root, u64 bytes_used,
 			   u64 type, u64 chunk_objectid, u64 chunk_offset,
 			   u64 size);
+int btrfs_has_cross_reference(struct btrfs_root *root,
+			      struct btrfs_key *key, u64 bytenr);
 /* ctree.c */
 int btrfs_previous_item(struct btrfs_root *root,
 			struct btrfs_path *path, u64 min_objectid,
diff -r 3f0eee804974 extent-tree.c
--- a/extent-tree.c	Thu Jun 26 10:34:20 2008 -0400
+++ b/extent-tree.c	Fri Jul 18 03:22:34 2008 +0800
@@ -793,70 +793,58 @@ out:
 	return 0;
 }

-u32 btrfs_count_snapshots_in_path(struct btrfs_root *root,
-				  struct btrfs_path *count_path,
-				  u64 expected_owner,
-				  u64 first_extent)
+static int get_reference_status(struct btrfs_root *root,
+				u64 parent_owner, u64 parent_gen,
+			        u64 ref_objectid, u64 bytenr,
+			        u64 *min_generation, u32 *ref_count)
 {
 	struct btrfs_root *extent_root = root->fs_info->extent_root;
 	struct btrfs_path *path;
-	u64 bytenr;
-	u64 found_objectid;
-	u64 found_owner;
-	u64 root_objectid = root->root_key.objectid;
-	u32 total_count = 0;
-	u32 extent_refs;
-	u32 cur_count;
-	u32 nritems;
-	int ret;
+	struct extent_buffer *leaf;
+	struct btrfs_extent_ref *ref_item;
 	struct btrfs_key key;
 	struct btrfs_key found_key;
-	struct extent_buffer *l;
-	struct btrfs_extent_item *item;
-	struct btrfs_extent_ref *ref_item;
-	int level = -1;
-
-	/* FIXME, needs locking */
-	BUG();
-
-	mutex_lock(&root->fs_info->alloc_mutex);
-	path = btrfs_alloc_path();
-again:
-	if (level == -1)
-		bytenr = first_extent;
-	else
-		bytenr = count_path->nodes[level]->start;
-
-	cur_count = 0;
+	u64 ref_generation;
+	u32 nritems;
+	int ret;
+
 	key.objectid = bytenr;
 	key.offset = 0;
-
-	btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
+	key.type = BTRFS_EXTENT_ITEM_KEY;
+
+	path = btrfs_alloc_path();
+	mutex_lock(&root->fs_info->alloc_mutex);
 	ret = btrfs_search_slot(NULL, extent_root, &key, path, 0, 0);
 	if (ret < 0)
 		goto out;
 	BUG_ON(ret == 0);

-	l = path->nodes[0];
-	btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
+	leaf = path->nodes[0];
+	btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);

 	if (found_key.objectid != bytenr ||
 	    found_key.type != BTRFS_EXTENT_ITEM_KEY) {
+		ret = 1;
 		goto out;
 	}

-	item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
-	extent_refs = btrfs_extent_refs(l, item);
+	if (ref_count)
+		*ref_count = 0;
+	if (min_generation)
+		*min_generation = (u64)-1;
+
 	while (1) {
-		l = path->nodes[0];
-		nritems = btrfs_header_nritems(l);
+		leaf = path->nodes[0];
+		nritems = btrfs_header_nritems(leaf);
 		if (path->slots[0] >= nritems) {
 			ret = btrfs_next_leaf(extent_root, path);
+			if (ret < 0)
+				goto out;
 			if (ret == 0)
 				continue;
 			break;
 		}
-		btrfs_item_key_to_cpu(l, &found_key, path->slots[0]);
+		btrfs_item_key_to_cpu(leaf, &found_key, path->slots[0]);
 		if (found_key.objectid != bytenr)
 			break;

@@ -865,57 +853,126 @@ again:
 			continue;
 		}

-		cur_count++;
-		ref_item = btrfs_item_ptr(l, path->slots[0],
+		ref_item = btrfs_item_ptr(leaf, path->slots[0],
 					  struct btrfs_extent_ref);
-		found_objectid = btrfs_ref_root(l, ref_item);
-
-		if (found_objectid != root_objectid) {
-			total_count = 2;
+		ref_generation = btrfs_ref_generation(leaf, ref_item);
+		/*
+		 * For (parent_gen > 0 && parent_gen > ref_gen):
+		 *
+		 * we reach here through the oldest root, therefore
+		 * all other reference from same snapshot should have
+		 * a larger generation.
+		 */
+		if ((parent_owner != btrfs_ref_root(leaf, ref_item)) ||
+		    (parent_gen > 0 && parent_gen > ref_generation) ||
+		    (ref_objectid >= BTRFS_FIRST_FREE_OBJECTID &&
+		     ref_objectid != btrfs_ref_objectid(leaf, ref_item))) {
+			if (ref_count)
+				*ref_count = 2;
+			break;
+		}
+		if (ref_count)
+			*ref_count = 1;
+		if (min_generation && *min_generation > ref_generation)
+			*min_generation = ref_generation;
+
+		path->slots[0]++;
+	}
+	ret = 0;
+out:
+	mutex_unlock(&root->fs_info->alloc_mutex);
+	btrfs_free_path(path);
+	return ret;
+}
+
+int btrfs_has_cross_reference(struct btrfs_root *root,
+			      struct btrfs_key *key, u64 bytenr)
+{
+	struct btrfs_trans_handle *trans;
+	struct btrfs_path *path = NULL;
+	struct btrfs_root *oldest_root;
+	struct extent_buffer *eb;
+	struct btrfs_file_extent_item *item;
+	u64 ref_owner = root->root_key.objectid;
+	u64 ref_generation;
+	u64 min_generation;
+	u64 extent_start;
+	u32 ref_count;
+	int level;
+	int ret;
+
+	BUG_ON(key->type != BTRFS_EXTENT_DATA_KEY);
+
+	ret = get_reference_status(root, ref_owner, 0, key->objectid,
+				   bytenr, &min_generation, &ref_count);
+	if (ret)
+		return ret;
+
+	if (ref_count != 1)
+		return 1;
+
+	oldest_root = kmalloc(sizeof(*oldest_root), GFP_NOFS);
+	BUG_ON(!oldest_root);
+
+	trans = btrfs_start_transaction(root, 0);
+	BUG_ON(!trans);
+
+	memcpy(oldest_root, root, sizeof(*oldest_root));
+	oldest_root->node = root->commit_root;
+	oldest_root->commit_root = NULL;
+	ref_generation = oldest_root->root_key.offset;
+
+	/* all references are created after the latest snapshot */
+	if (min_generation > ref_generation) {
+		ret = 0;
+		goto out;
+	}
+
+	path = btrfs_alloc_path();
+	path->skip_locking = 1;
+
+	/* if no item found, the extent is referenced by other snapshot */
+	ret = btrfs_search_slot(NULL, oldest_root, key, path, 0, 0);
+	if (ret)
+		goto out;
+
+	eb = path->nodes[0];
+	item = btrfs_item_ptr(eb, path->slots[0],
+			      struct btrfs_file_extent_item);
+	if (btrfs_file_extent_type(eb, item) != BTRFS_FILE_EXTENT_REG ||
+	    btrfs_file_extent_disk_bytenr(eb, item) != bytenr) {
+		ret = 1;
+		goto out;
+	}
+
+	for (level = BTRFS_MAX_LEVEL - 1; level >= -1; level--) {
+		if (level >= 0) {
+			eb = path->nodes[level];
+			if (!eb)
+				continue;
+			extent_start = eb->start;
+		} else
+			extent_start = bytenr;
+
+		ret = get_reference_status(root, ref_owner, ref_generation, 0,
+					   extent_start, NULL, &ref_count);
+		if (ret)
 			goto out;
-		}
-		if (level == -1) {
-			found_owner = btrfs_ref_objectid(l, ref_item);
-			if (found_owner != expected_owner) {
-				total_count = 2;
-				goto out;
-			}
-			/*
-			 * nasty.  we don't count a reference held by
-			 * the running transaction.  This allows nodatacow
-			 * to avoid cow most of the time
-			 */
-			if (found_owner >= BTRFS_FIRST_FREE_OBJECTID &&
-			    btrfs_ref_generation(l, ref_item) ==
-			    root->fs_info->generation) {
-				extent_refs--;
-			}
-		}
-		total_count = 1;
-		path->slots[0]++;
-	}
-	/*
-	 * if there is more than one reference against a data extent,
-	 * we have to assume the other ref is another snapshot
-	 */
-	if (level == -1 && extent_refs > 1) {
-		total_count = 2;
-		goto out;
-	}
-	if (cur_count == 0) {
-		total_count = 0;
-		goto out;
-	}
-	if (level >= 0 && root->node == count_path->nodes[level])
-		goto out;
-	level++;
-	btrfs_release_path(root, path);
-	goto again;
-
+
+		if (ref_count != 1) {
+			ret = 1;
+			goto out;
+		}
+		if (level >= 0)
+			ref_generation = btrfs_header_generation(eb);
+	}
+	ret = 0;
 out:
-	btrfs_free_path(path);
-	mutex_unlock(&root->fs_info->alloc_mutex);
-	return total_count;
+	if (path)
+		btrfs_free_path(path);
+	btrfs_end_transaction(trans, root);
+	kfree(oldest_root);
+	return ret;
 }

 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
diff -r 3f0eee804974 inode.c
--- a/inode.c	Thu Jun 26 10:34:20 2008 -0400
+++ b/inode.c	Fri Jul 18 03:22:34 2008 +0800
@@ -232,8 +232,7 @@ again:
 		if (bytenr == 0)
 			goto not_found;

-		if (btrfs_count_snapshots_in_path(root, path, inode->i_ino,
-						  bytenr) != 1) {
+		if (btrfs_has_cross_reference(root, &found_key, bytenr)) {
 			goto not_found;
 		}

@@ -260,6 +259,7 @@ loop:
 	goto again;

 not_found:
+	btrfs_release_path(root, path);
 	cow_file_range(inode, start, end);
 	start = end + 1;
 	goto loop;

Reply via email to