[PATCH v2 3/6] Btrfs: cleanup code of btrfs_balance_delayed_items()

2013-12-25 Thread Miao Xie
- move the condition check for wait into a function
- use wait_event_interruptible instead of prepare-schedule-finish process

Signed-off-by: Miao Xie mi...@cn.fujitsu.com
---
Changelog v1 - v2:
- None.
---
 fs/btrfs/delayed-inode.c | 34 +++---
 1 file changed, 11 insertions(+), 23 deletions(-)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index bb101b0..5229106 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1349,52 +1349,40 @@ void btrfs_assert_delayed_root_empty(struct btrfs_root 
*root)
WARN_ON(btrfs_first_delayed_node(delayed_root));
 }
 
-static int refs_newer(struct btrfs_delayed_root *delayed_root,
- int seq, int count)
+static int could_end_wait(struct btrfs_delayed_root *delayed_root, int seq)
 {
int val = atomic_read(delayed_root-items_seq);
 
-   if (val  seq || val = seq + count)
+   if (val  seq || val = seq + BTRFS_DELAYED_BATCH)
return 1;
+
+   if (atomic_read(delayed_root-items)  BTRFS_DELAYED_BACKGROUND)
+   return 1;
+
return 0;
 }
 
 void btrfs_balance_delayed_items(struct btrfs_root *root)
 {
struct btrfs_delayed_root *delayed_root;
-   int seq;
 
delayed_root = btrfs_get_delayed_root(root);
 
if (atomic_read(delayed_root-items)  BTRFS_DELAYED_BACKGROUND)
return;
 
-   seq = atomic_read(delayed_root-items_seq);
-
if (atomic_read(delayed_root-items) = BTRFS_DELAYED_WRITEBACK) {
+   int seq;
int ret;
-   DEFINE_WAIT(__wait);
+
+   seq = atomic_read(delayed_root-items_seq);
 
ret = btrfs_wq_run_delayed_node(delayed_root, root, 0);
if (ret)
return;
 
-   while (1) {
-   prepare_to_wait(delayed_root-wait, __wait,
-   TASK_INTERRUPTIBLE);
-
-   if (refs_newer(delayed_root, seq,
-  BTRFS_DELAYED_BATCH) ||
-   atomic_read(delayed_root-items) 
-   BTRFS_DELAYED_BACKGROUND) {
-   break;
-   }
-   if (!signal_pending(current))
-   schedule();
-   else
-   break;
-   }
-   finish_wait(delayed_root-wait, __wait);
+   wait_event_interruptible(delayed_root-wait,
+could_end_wait(delayed_root, seq));
return;
}
 
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 2/6] Btrfs: don't run delayed nodes again after all nodes flush

2013-12-25 Thread Miao Xie
If the number of the delayed items is greater than the upper limit, we will
try to flush all the delayed items. After that, it is unnecessary to run
them again because they are being dealt with by the wokers or the number of
them is less than the lower limit.

Signed-off-by: Miao Xie mi...@cn.fujitsu.com
---
Changelog v1 - v2:
- None.
---
 fs/btrfs/delayed-inode.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index e832621..bb101b0 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1395,6 +1395,7 @@ void btrfs_balance_delayed_items(struct btrfs_root *root)
break;
}
finish_wait(delayed_root-wait, __wait);
+   return;
}
 
btrfs_wq_run_delayed_node(delayed_root, root, BTRFS_DELAYED_BATCH);
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 6/6] Btrfs: introduce the delayed inode ref deletion for the single link inode

2013-12-25 Thread Miao Xie
The inode reference item is close to inode item, so we insert it simultaneously
with the inode item insertion when we create a file/directory.. In fact, we also
can handle the inode reference deletion by the same way. So we made this patch 
to
introduce the delayed inode reference deletion for the single link inode(At most
case, the file doesn't has hard link, so we don't take the hard link into 
account).

This function is based on the delayed inode mechanism. After applying this 
patch,
we can reduce the time of the file/directory deletion by ~10%.

Signed-off-by: Miao Xie mi...@cn.fujitsu.com
---
Changelog v1 - v2:
- Use ASSERT instead of BUG_ON(), pointed by Josef.
---
 fs/btrfs/btrfs_inode.h   |   3 ++
 fs/btrfs/delayed-inode.c | 103 +--
 fs/btrfs/delayed-inode.h |   2 +
 fs/btrfs/inode.c |  55 -
 4 files changed, 157 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/btrfs_inode.h b/fs/btrfs/btrfs_inode.h
index ac0b39d..661b0ac 100644
--- a/fs/btrfs/btrfs_inode.h
+++ b/fs/btrfs/btrfs_inode.h
@@ -135,6 +135,9 @@ struct btrfs_inode {
 */
u64 index_cnt;
 
+   /* Cache the directory index number to speed the dir/file remove */
+   u64 dir_index;
+
/* the fsync log has some corner cases that mean we have to check
 * directories to see if any unlinks have been done before
 * the directory was logged.  See tree-log.c for all the
diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 979db56..a79a363 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1015,6 +1015,18 @@ static void btrfs_release_delayed_inode(struct 
btrfs_delayed_node *delayed_node)
}
 }
 
+static void btrfs_release_delayed_iref(struct btrfs_delayed_node *delayed_node)
+{
+   struct btrfs_delayed_root *delayed_root;
+
+   ASSERT(delayed_node-root);
+   clear_bit(BTRFS_DELAYED_NODE_DEL_IREF, delayed_node-flags);
+   delayed_node-count--;
+
+   delayed_root = delayed_node-root-fs_info-delayed_root;
+   finish_one_item(delayed_root);
+}
+
 static int __btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
struct btrfs_root *root,
struct btrfs_path *path,
@@ -1023,13 +1035,19 @@ static int __btrfs_update_delayed_inode(struct 
btrfs_trans_handle *trans,
struct btrfs_key key;
struct btrfs_inode_item *inode_item;
struct extent_buffer *leaf;
+   int mod;
int ret;
 
key.objectid = node-inode_id;
btrfs_set_key_type(key, BTRFS_INODE_ITEM_KEY);
key.offset = 0;
 
-   ret = btrfs_lookup_inode(trans, root, path, key, 1);
+   if (test_bit(BTRFS_DELAYED_NODE_DEL_IREF, node-flags))
+   mod = -1;
+   else
+   mod = 1;
+
+   ret = btrfs_lookup_inode(trans, root, path, key, mod);
if (ret  0) {
btrfs_release_path(path);
return -ENOENT;
@@ -1037,19 +1055,58 @@ static int __btrfs_update_delayed_inode(struct 
btrfs_trans_handle *trans,
return ret;
}
 
-   btrfs_unlock_up_safe(path, 1);
leaf = path-nodes[0];
inode_item = btrfs_item_ptr(leaf, path-slots[0],
struct btrfs_inode_item);
write_extent_buffer(leaf, node-inode_item, (unsigned long)inode_item,
sizeof(struct btrfs_inode_item));
btrfs_mark_buffer_dirty(leaf);
-   btrfs_release_path(path);
 
+   if (!test_bit(BTRFS_DELAYED_NODE_DEL_IREF, node-flags))
+   goto no_iref;
+
+   path-slots[0]++;
+   if (path-slots[0] = btrfs_header_nritems(leaf))
+   goto search;
+again:
+   btrfs_item_key_to_cpu(leaf, key, path-slots[0]);
+   if (key.objectid != node-inode_id)
+   goto out;
+
+   if (key.type != BTRFS_INODE_REF_KEY 
+   key.type != BTRFS_INODE_EXTREF_KEY)
+   goto out;
+
+   /*
+* Delayed iref deletion is for the inode who has only one link,
+* so there is only one iref. The case that several irefs are
+* in the same item doesn't exist.
+*/
+   btrfs_del_item(trans, root, path);
+out:
+   btrfs_release_delayed_iref(node);
+no_iref:
+   btrfs_release_path(path);
+err_out:
btrfs_delayed_inode_release_metadata(root, node);
btrfs_release_delayed_inode(node);
 
-   return 0;
+   return ret;
+
+search:
+   btrfs_release_path(path);
+
+   btrfs_set_key_type(key, BTRFS_INODE_EXTREF_KEY);
+   key.offset = -1;
+   ret = btrfs_search_slot(trans, root, key, path, -1, 1);
+   if (ret  0)
+   goto err_out;
+   ASSERT(ret);
+
+   ret = 0;
+   leaf = path-nodes[0];
+   path-slots[0]--;
+   goto again;
 }
 
 static inline int btrfs_update_delayed_inode(struct btrfs_trans_handle *trans,
@@ 

[PATCH v2 1/6] Btrfs: remove residual code in delayed inode async helper

2013-12-25 Thread Miao Xie
Before applying the patch
  commit de3cb945db4d8eb3b046dc7a5ea89a893372750c
  title: Btrfs: improve the delayed inode throttling

We need requeue the async work after the current work was done, it
introduced a deadlock problem. So we wrote the code that this patch
removes to avoid the above problem. But after applying the above
patch, the deadlock problem didn't exist. So we should remove that
fix code.

Signed-off-by: Miao Xie mi...@cn.fujitsu.com
---
Changelog v1 - v2:
- None.
---
 fs/btrfs/delayed-inode.c | 27 ---
 1 file changed, 27 deletions(-)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 8d292fb..e832621 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1300,33 +1300,6 @@ again:
trans-block_rsv = root-fs_info-delayed_block_rsv;
 
__btrfs_commit_inode_delayed_items(trans, path, delayed_node);
-   /*
-* Maybe new delayed items have been inserted, so we need requeue
-* the work. Besides that, we must dequeue the empty delayed nodes
-* to avoid the race between delayed items balance and the worker.
-* The race like this:
-*  Task1   Worker thread
-*  count == 0, needn't requeue
-*also needn't insert the
-*delayed node into prepare
-*list again.
-*  add lots of delayed items
-*  queue the delayed node
-*already in the list,
-*and not in the prepare
-*list, it means the delayed
-*node is being dealt with
-*by the worker.
-*  do delayed items balance
-*the delayed node is being
-*dealt with by the worker
-*now, just wait.
-*  the worker goto idle.
-* Task1 will sleep until the transaction is commited.
-*/
-   mutex_lock(delayed_node-mutex);
-   btrfs_dequeue_delayed_node(root-fs_info-delayed_root, delayed_node);
-   mutex_unlock(delayed_node-mutex);
 
trans-block_rsv = block_rsv;
btrfs_end_transaction_dmeta(trans, root);
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 5/6] Btrfs: use flags instead of the bool variants in delayed node

2013-12-25 Thread Miao Xie
Signed-off-by: Miao Xie mi...@cn.fujitsu.com
---
Changelog v1 - v2:
- None.
---
 fs/btrfs/delayed-inode.c | 33 +
 fs/btrfs/delayed-inode.h |  6 --
 2 files changed, 21 insertions(+), 18 deletions(-)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 7d55443..979db56 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -55,8 +55,7 @@ static inline void btrfs_init_delayed_node(
delayed_node-inode_id = inode_id;
atomic_set(delayed_node-refs, 0);
delayed_node-count = 0;
-   delayed_node-in_list = 0;
-   delayed_node-inode_dirty = 0;
+   delayed_node-flags = 0;
delayed_node-ins_root = RB_ROOT;
delayed_node-del_root = RB_ROOT;
mutex_init(delayed_node-mutex);
@@ -172,7 +171,7 @@ static void btrfs_queue_delayed_node(struct 
btrfs_delayed_root *root,
 int mod)
 {
spin_lock(root-lock);
-   if (node-in_list) {
+   if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, node-flags)) {
if (!list_empty(node-p_list))
list_move_tail(node-p_list, root-prepare_list);
else if (mod)
@@ -182,7 +181,7 @@ static void btrfs_queue_delayed_node(struct 
btrfs_delayed_root *root,
list_add_tail(node-p_list, root-prepare_list);
atomic_inc(node-refs);/* inserted into list */
root-nodes++;
-   node-in_list = 1;
+   set_bit(BTRFS_DELAYED_NODE_IN_LIST, node-flags);
}
spin_unlock(root-lock);
 }
@@ -192,13 +191,13 @@ static void btrfs_dequeue_delayed_node(struct 
btrfs_delayed_root *root,
   struct btrfs_delayed_node *node)
 {
spin_lock(root-lock);
-   if (node-in_list) {
+   if (test_bit(BTRFS_DELAYED_NODE_IN_LIST, node-flags)) {
root-nodes--;
atomic_dec(node-refs);/* not in the list */
list_del_init(node-n_list);
if (!list_empty(node-p_list))
list_del_init(node-p_list);
-   node-in_list = 0;
+   clear_bit(BTRFS_DELAYED_NODE_IN_LIST, node-flags);
}
spin_unlock(root-lock);
 }
@@ -231,7 +230,8 @@ static struct btrfs_delayed_node *btrfs_next_delayed_node(
 
delayed_root = node-root-fs_info-delayed_root;
spin_lock(delayed_root-lock);
-   if (!node-in_list) {   /* not in the list */
+   if (!test_bit(BTRFS_DELAYED_NODE_IN_LIST, node-flags)) {
+   /* not in the list */
if (list_empty(delayed_root-node_list))
goto out;
p = delayed_root-node_list.next;
@@ -1004,9 +1004,10 @@ static void btrfs_release_delayed_inode(struct 
btrfs_delayed_node *delayed_node)
 {
struct btrfs_delayed_root *delayed_root;
 
-   if (delayed_node  delayed_node-inode_dirty) {
+   if (delayed_node 
+   test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, delayed_node-flags)) {
BUG_ON(!delayed_node-root);
-   delayed_node-inode_dirty = 0;
+   clear_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, delayed_node-flags);
delayed_node-count--;
 
delayed_root = delayed_node-root-fs_info-delayed_root;
@@ -1059,7 +1060,7 @@ static inline int btrfs_update_delayed_inode(struct 
btrfs_trans_handle *trans,
int ret;
 
mutex_lock(node-mutex);
-   if (!node-inode_dirty) {
+   if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, node-flags)) {
mutex_unlock(node-mutex);
return 0;
}
@@ -1203,7 +1204,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode)
return 0;
 
mutex_lock(delayed_node-mutex);
-   if (!delayed_node-inode_dirty) {
+   if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, delayed_node-flags)) {
mutex_unlock(delayed_node-mutex);
btrfs_release_delayed_node(delayed_node);
return 0;
@@ -1227,7 +1228,7 @@ int btrfs_commit_inode_delayed_inode(struct inode *inode)
trans-block_rsv = delayed_node-root-fs_info-delayed_block_rsv;
 
mutex_lock(delayed_node-mutex);
-   if (delayed_node-inode_dirty)
+   if (test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, delayed_node-flags))
ret = __btrfs_update_delayed_inode(trans, delayed_node-root,
   path, delayed_node);
else
@@ -1721,7 +1722,7 @@ int btrfs_fill_inode(struct inode *inode, u32 *rdev)
return -ENOENT;
 
mutex_lock(delayed_node-mutex);
-   if (!delayed_node-inode_dirty) {
+   if (!test_bit(BTRFS_DELAYED_NODE_INODE_DIRTY, delayed_node-flags)) {
mutex_unlock(delayed_node-mutex);
btrfs_release_delayed_node(delayed_node);
return -ENOENT;
@@ -1772,7 +1773,7 @@ int 

[PATCH v2 4/6] Btrfs: remove btrfs_end_transaction_dmeta()

2013-12-25 Thread Miao Xie
Two reasons:
- btrfs_end_transaction_dmeta() is the same as btrfs_end_transaction_throttle()
  so it is unnecessary.
- All the delayed items should be dealt in the current transaction, so the
  workers should not commit the transaction, instead, deal with the delayed
  items as many as possible.

So we can remove btrfs_end_transaction_dmeta()

Signed-off-by: Miao Xie mi...@cn.fujitsu.com
---
Changelog v1 - v2:
- Add the detail changelog.
---
 fs/btrfs/delayed-inode.c | 2 +-
 fs/btrfs/transaction.c   | 6 --
 fs/btrfs/transaction.h   | 2 --
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/fs/btrfs/delayed-inode.c b/fs/btrfs/delayed-inode.c
index 5229106..7d55443 100644
--- a/fs/btrfs/delayed-inode.c
+++ b/fs/btrfs/delayed-inode.c
@@ -1302,7 +1302,7 @@ again:
__btrfs_commit_inode_delayed_items(trans, path, delayed_node);
 
trans-block_rsv = block_rsv;
-   btrfs_end_transaction_dmeta(trans, root);
+   btrfs_end_transaction(trans, root);
btrfs_btree_balance_dirty_nodelay(root);
 
 release_path:
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index c6a872a..df919b4 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -788,12 +788,6 @@ int btrfs_end_transaction_throttle(struct 
btrfs_trans_handle *trans,
return __btrfs_end_transaction(trans, root, 1);
 }
 
-int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
-   struct btrfs_root *root)
-{
-   return __btrfs_end_transaction(trans, root, 1);
-}
-
 /*
  * when btree blocks are allocated, they have some corresponding bits set for
  * them in one of two extent_io trees.  This is used to make sure all of
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 7657d11..d05b601 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -154,8 +154,6 @@ int btrfs_commit_transaction_async(struct 
btrfs_trans_handle *trans,
   int wait_for_unblock);
 int btrfs_end_transaction_throttle(struct btrfs_trans_handle *trans,
   struct btrfs_root *root);
-int btrfs_end_transaction_dmeta(struct btrfs_trans_handle *trans,
-   struct btrfs_root *root);
 int btrfs_should_end_transaction(struct btrfs_trans_handle *trans,
 struct btrfs_root *root);
 void btrfs_throttle(struct btrfs_root *root);
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/3] Btrfs: remove unused argument from select_reloc_root()

2013-12-25 Thread Wang Shilong
@nr is no longer used, remove it from select_reloc_root()

Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
---
 fs/btrfs/relocation.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 8cf99c4..277b8e3 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2454,7 +2454,7 @@ static noinline_for_stack
 struct btrfs_root *select_reloc_root(struct btrfs_trans_handle *trans,
 struct reloc_control *rc,
 struct backref_node *node,
-struct backref_edge *edges[], int *nr)
+struct backref_edge *edges[])
 {
struct backref_node *next;
struct btrfs_root *root;
@@ -2496,7 +2496,6 @@ struct btrfs_root *select_reloc_root(struct 
btrfs_trans_handle *trans,
if (!root)
return NULL;
 
-   *nr = index;
next = node;
/* setup backref node path for btrfs_reloc_cow_block */
while (1) {
@@ -2643,7 +2642,6 @@ static int do_relocation(struct btrfs_trans_handle *trans,
u32 blocksize;
u64 bytenr;
u64 generation;
-   int nr;
int slot;
int ret;
int err = 0;
@@ -2656,7 +2654,7 @@ static int do_relocation(struct btrfs_trans_handle *trans,
cond_resched();
 
upper = edge-node[UPPER];
-   root = select_reloc_root(trans, rc, upper, edges, nr);
+   root = select_reloc_root(trans, rc, upper, edges);
BUG_ON(!root);
 
if (upper-eb  !upper-locked) {
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH 3/3] Btrfs: improve balance relocation with ENOSPC case

2013-12-25 Thread Wang Shilong
Previouly, we will try to merge reloc roots even if enospc
error happens, and this make merging process very slowly.

We improve ENOSPC case by two ways.

If we fail to reserve metadata space, we will skip merging relocation
roots, just do the cleanup work that drops relocation tree.

If error happens, @rc-merge_error will be set which will avoid
unnecessary relocation tree creation while we are still dropping relocation
tree.

Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
---
 fs/btrfs/relocation.c | 147 +-
 1 file changed, 111 insertions(+), 36 deletions(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 9189f9e..b530ed9 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -187,6 +187,7 @@ struct reloc_control {
unsigned int create_reloc_tree:1;
unsigned int merge_reloc_tree:1;
unsigned int found_file_extent:1;
+   unsigned int merge_error:1;
 };
 
 /* stages of data relocation */
@@ -2271,6 +2272,8 @@ again:
}
 
rc-merge_reloc_tree = 1;
+   if (err)
+   rc-merge_error = 1;
 
while (!list_empty(rc-reloc_roots)) {
reloc_root = list_entry(rc-reloc_roots.next,
@@ -2288,6 +2291,8 @@ again:
 */
if (!err)
btrfs_set_root_refs(reloc_root-root_item, 1);
+   else
+   btrfs_set_root_refs(reloc_root-root_item, 0);
btrfs_update_reloc_root(trans, root);
 
list_add(reloc_root-root_list, reloc_roots);
@@ -2314,15 +2319,104 @@ void free_reloc_roots(struct list_head *list)
}
 }
 
+static int drop_relocation_tree(struct reloc_control *rc,
+  struct btrfs_root *reloc_root)
+{
+   u64 last_snap;
+   u64 otransid;
+   u64 objectid;
+   struct btrfs_root *root;
+   struct btrfs_trans_handle *trans;
+   int ret;
+
+   /*
+* we keep the old last snapshod transid in rtranid when we
+* created the relocation tree.
+*/
+   last_snap = btrfs_root_rtransid(reloc_root-root_item);
+   otransid = btrfs_root_otransid(reloc_root-root_item);
+   objectid = reloc_root-root_key.offset;
+
+   ret = btrfs_drop_snapshot(reloc_root, rc-block_rsv, 0, 1);
+   if (ret)
+   return ret;
+   /*
+* recover the last snapshot tranid to avoid
+* the space balance break NOCOW.
+*/
+   root = read_fs_root(rc-extent_root-fs_info,
+   objectid);
+   if (IS_ERR(root))
+   return 0;
+
+   trans = btrfs_join_transaction(root);
+   ASSERT(!IS_ERR(trans));
+
+   /* Check if the fs/file tree was snapshoted or not. */
+   if (btrfs_root_last_snapshot(root-root_item) ==
+   otransid - 1)
+   btrfs_set_root_last_snapshot(root-root_item,
+last_snap);
+   btrfs_end_transaction(trans, root);
+   return 0;
+}
+
+static noinline_for_stack
+int clean_reloc_roots(struct reloc_control *rc)
+{
+   struct btrfs_root *root;
+   struct btrfs_root *reloc_root;
+   LIST_HEAD(reloc_roots);
+   int ret = 0;
+   root = rc-extent_root;
+
+   mutex_lock(root-fs_info-reloc_mutex);
+   list_splice_init(rc-reloc_roots, reloc_roots);
+   mutex_unlock(root-fs_info-reloc_mutex);
+
+   while (!list_empty(reloc_roots)) {
+   reloc_root = list_entry(reloc_roots.next,
+   struct btrfs_root, root_list);
+   if (btrfs_root_refs(reloc_root-root_item)  0) {
+   root = read_fs_root(reloc_root-fs_info,
+   reloc_root-root_key.offset);
+   ASSERT(!IS_ERR(root));
+   ASSERT(root-reloc_root == reloc_root);
+
+   mutex_lock(root-fs_info-reloc_mutex);
+   root-reloc_root = NULL;
+   mutex_unlock(root-fs_info-reloc_mutex);
+
+   __del_reloc_root(reloc_root);
+   } else {
+   list_del_init(reloc_root-root_list);
+   }
+   ret = drop_relocation_tree(rc, reloc_root);
+   if (ret) {
+   if (list_empty(reloc_root-root_list))
+   list_add_tail(reloc_root-root_list,
+ reloc_roots);
+   goto out;
+   }
+   }
+
+out:
+   if (ret) {
+   btrfs_std_error(root-fs_info, ret);
+   if (!list_empty(reloc_roots))
+   free_reloc_roots(reloc_roots);
+   }
+
+   BUG_ON(!RB_EMPTY_ROOT(rc-reloc_root_tree.rb_root));
+   return ret;
+}
+
+
 static noinline_for_stack
 int merge_reloc_roots(struct reloc_control *rc)
 {
-   struct 

[PATCH 2/3] Btrfs: fix an oops when we fail to merge reloc roots

2013-12-25 Thread Wang Shilong
Previously, we will free reloc root memory and then force filesystem
to be readonly. The problem is that there may be another thread commiting
transaction which will try to access freed reloc root during merging reloc
roots process.

To keep consistency snapshots shared space, we should allow snapshot
finished if possible, so here we don't free reloc root memory.

signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
---
 fs/btrfs/relocation.c | 10 +++---
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 277b8e3..9189f9e 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -2311,9 +2311,6 @@ void free_reloc_roots(struct list_head *list)
reloc_root = list_entry(list-next, struct btrfs_root,
root_list);
__del_reloc_root(reloc_root);
-   free_extent_buffer(reloc_root-node);
-   free_extent_buffer(reloc_root-commit_root);
-   kfree(reloc_root);
}
 }
 
@@ -2355,10 +2352,9 @@ again:
 
ret = merge_reloc_root(rc, root);
if (ret) {
-   __del_reloc_root(reloc_root);
-   free_extent_buffer(reloc_root-node);
-   free_extent_buffer(reloc_root-commit_root);
-   kfree(reloc_root);
+   if (list_empty(reloc_root-root_list))
+   list_add_tail(reloc_root-root_list,
+ reloc_roots);
goto out;
}
} else {
-- 
1.8.3.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Barrier remount failure

2013-12-25 Thread Daniel J Blueman
On 3.13-rc5, it's possible to remount a mounted BTRFS filesystem with
'nobarrier', but not possible to remount with 'barrier'.

Is this expected?

Many thanks,
  Daniel
-- 
Daniel J Blueman
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Barrier remount failure

2013-12-25 Thread Qu Wenruo

On thu, 26 Dec 2013 13:31:08 +0800, Daniel J Blueman wrote:

On 3.13-rc5, it's possible to remount a mounted BTRFS filesystem with
'nobarrier', but not possible to remount with 'barrier'.

Is this expected?

Many thanks,
   Daniel

Not quite sure, but from the source of btrfs_parse_options,
barrier is not in the options that btrfs supports.

Other fs like ext4 supports it, so I think btrfs should also support it.
I'll try to add the barrier option soon.

For now, you can umount and mount again without any barrier related 
options, and btrfs will enable barrier by default.


Qu
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: Add barrier option to support -o remount,barrier

2013-12-25 Thread Qu Wenruo
Btrfs can be remounted without barrier, but there is no barrier option
so nobody can remount btrfs back with barrier on. Only umount and
mount again can re-enable barrier.(Quite awkward)

Reported-by: Daniel Blueman dan...@quora.org
Signed-off-by: Qu Wenruo quwen...@cn.fujitsu.com
---
 Documentation/filesystems/btrfs.txt | 6 ++
 fs/btrfs/super.c| 8 +++-
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/Documentation/filesystems/btrfs.txt 
b/Documentation/filesystems/btrfs.txt
index 5dd282d..f6f71d6 100644
--- a/Documentation/filesystems/btrfs.txt
+++ b/Documentation/filesystems/btrfs.txt
@@ -51,6 +51,12 @@ Unless otherwise specified, all options default to off.
defrag process.  Works best for small files; Not well suited for
large database workloads.
 
+  barrier
+   Enable the use of block layer write barriers.  Write barriers ensure
+   that certain IOs make it through the device cache and are on persistent
+   storage.
+   Barriers are enabled by default.
+   
   check_int
   check_int_data
   check_int_print_mask=value
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index e9c13fb..c8f4aca 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -323,7 +323,7 @@ enum {
Opt_no_space_cache, Opt_recovery, Opt_skip_balance,
Opt_check_integrity, Opt_check_integrity_including_extent_data,
Opt_check_integrity_print_mask, Opt_fatal_errors, Opt_rescan_uuid_tree,
-   Opt_commit_interval,
+   Opt_commit_interval, Opt_barrier,
Opt_err,
 };
 
@@ -335,6 +335,7 @@ static match_table_t tokens = {
{Opt_nodatasum, nodatasum},
{Opt_nodatacow, nodatacow},
{Opt_nobarrier, nobarrier},
+   {Opt_barrier, barrier},
{Opt_max_inline, max_inline=%s},
{Opt_alloc_start, alloc_start=%s},
{Opt_thread_pool, thread_pool=%d},
@@ -498,6 +499,11 @@ int btrfs_parse_options(struct btrfs_root *root, char 
*options)
btrfs_info(root-fs_info, turning off barriers);
btrfs_set_opt(info-mount_opt, NOBARRIER);
break;
+   case Opt_barrier:
+   if (btrfs_test_opt(root, NOBARRIER))
+   btrfs_info(root-fs_info, turning on 
barriers);
+   btrfs_clear_opt(info-mount_opt, NOBARRIER);
+   break;
case Opt_thread_pool:
ret = match_int(args[0], intarg);
if (ret) {
-- 
1.8.5.2

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html