[PATCH] _test_generic_punch: Extend $testfile's size to work with 64k block.

2013-08-08 Thread chandan
From cf6e1fc3a8d7806a97055b5f483cf50f58c8294f Mon Sep 17 00:00:00 2001
From: chandan chan...@linux.vnet.ibm.com
Date: Thu, 8 Aug 2013 11:33:10 +0530
Subject: [PATCH] _test_generic_punch: Extend $testfile's size to work with 64k
 block.

The current script does not work with 64k block size. This patch fixes it
by creating a larger $testfile.

Signed-off-by: chandan chan...@linux.vnet.ibm.com
---
 common/punch  | 119 +++--
 tests/generic/255.out | 476 +-
 tests/xfs/242.out | 118 ++---
 tests/xfs/252.out | 476 +-
 4 files changed, 594 insertions(+), 595 deletions(-)

diff --git a/common/punch b/common/punch
index d8f73d0..e6f0b2a 100644
--- a/common/punch
+++ b/common/punch
@@ -316,8 +316,8 @@ _test_generic_punch()
if [ $remove_testfile ]; then
rm -f $testfile
fi
-   $XFS_IO_PROG -f -c truncate 20k \
-   -c $zero_cmd 4k 8k \
+   $XFS_IO_PROG -f -c truncate 320k \
+   -c $zero_cmd 64k 128k \
-c $map_cmd -v $testfile | $filter_cmd
[ $? -ne 0 ]  die_now
_md5_checksum $testfile
@@ -326,9 +326,9 @@ _test_generic_punch()
if [ $remove_testfile ]; then
rm -f $testfile
fi
-   $XFS_IO_PROG -f -c truncate 20k \
-   -c pwrite 0 20k $sync_cmd \
-   -c $zero_cmd 4k 8k \
+   $XFS_IO_PROG -f -c truncate 320k \
+   -c pwrite 0 320k $sync_cmd \
+   -c $zero_cmd 64k 128k \
-c $map_cmd -v $testfile | $filter_cmd
[ $? -ne 0 ]  die_now
_md5_checksum $testfile
@@ -337,9 +337,9 @@ _test_generic_punch()
if [ $remove_testfile ]; then
rm -f $testfile
fi
-   $XFS_IO_PROG -f -c truncate 20k \
-   -c $alloc_cmd 0 20k \
-   -c $zero_cmd 4k 8k \
+   $XFS_IO_PROG -f -c truncate 320k \
+   -c $alloc_cmd 0 320k \
+   -c $zero_cmd 64k 128k \
-c $map_cmd -v $testfile | $filter_cmd
[ $? -ne 0 ]  die_now
_md5_checksum $testfile
@@ -348,9 +348,9 @@ _test_generic_punch()
if [ $remove_testfile ]; then
rm -f $testfile
fi
-   $XFS_IO_PROG -f -c truncate 20k \
-   -c pwrite 8k 8k $sync_cmd \
-   -c $zero_cmd 4k 8k \
+   $XFS_IO_PROG -f -c truncate 320k \
+   -c pwrite 128k 128k $sync_cmd \
+   -c $zero_cmd 64k 128k \
-c $map_cmd -v $testfile | $filter_cmd
[ $? -ne 0 ]  die_now
_md5_checksum $testfile
@@ -359,9 +359,9 @@ _test_generic_punch()
if [ $remove_testfile ]; then
rm -f $testfile
fi
-   $XFS_IO_PROG -f -c truncate 20k \
-   -c $alloc_cmd 8k 8k \
-   -c $zero_cmd 4k 8k \
+   $XFS_IO_PROG -f -c truncate 320k \
+   -c $alloc_cmd 128k 128k \
+   -c $zero_cmd 64k 128k \
-c $map_cmd -v $testfile | $filter_cmd
[ $? -ne 0 ]  die_now
_md5_checksum $testfile
@@ -370,9 +370,9 @@ _test_generic_punch()
if [ $remove_testfile ]; then
rm -f $testfile
fi
-   $XFS_IO_PROG -f -c truncate 20k \
-   -c pwrite 0 8k $sync_cmd \
-   -c $zero_cmd 4k 8k \
+   $XFS_IO_PROG -f -c truncate 320k \
+   -c pwrite 0 128k $sync_cmd \
+   -c $zero_cmd 64k 128k \
-c $map_cmd -v $testfile | $filter_cmd
[ $? -ne 0 ]  die_now
_md5_checksum $testfile
@@ -381,10 +381,10 @@ _test_generic_punch()
if [ $remove_testfile ]; then
rm -f $testfile
fi
-   $XFS_IO_PROG -f -c truncate 20k \
-   -c pwrite 0 8k $sync_cmd \
-   -c $alloc_cmd 8k 8k \
-   -c $zero_cmd 4k 8k \
+   $XFS_IO_PROG -f -c truncate 320k \
+   -c pwrite 0 128k $sync_cmd \
+   -c $alloc_cmd 128k 128k \
+   -c $zero_cmd 64k 128k \
-c $map_cmd -v $testfile | $filter_cmd
[ $? -ne 0 ]  die_now
_md5_checksum $testfile
@@ -393,9 +393,9 @@ _test_generic_punch()
if [ $remove_testfile ]; then
rm -f $testfile
fi
-   $XFS_IO_PROG -f -c truncate 20k \
-   -c $alloc_cmd 0 8k \
-   -c $zero_cmd 4k 8k \
+   $XFS_IO_PROG -f -c truncate 320k \
+   -c $alloc_cmd 0 128k \
+   -c $zero_cmd 64k 128k \
-c $map_cmd -v $testfile | $filter_cmd
[ $? -ne 0 ]  die_now
_md5_checksum $testfile
@@ -404,10 +404,10 @@ _test_generic_punch()
if [ $remove_testfile ]; then
rm -f $testfile
fi
-   $XFS_IO_PROG -f -c truncate 20k \
-   -c $alloc_cmd 0 8k \
-   -c pwrite 8k 8k $sync_cmd \
-   -c $zero_cmd 4k 8k \
+   

Re: [PATCH] Btrfs: stop using GFP_ATOMIC when allocating rewind ebs

2013-08-08 Thread Jan Schmidt
 
On Wed, August 07, 2013 at 23:11 (+0200), Josef Bacik wrote:
 There is no reason we can't just set the path to blocking and then do normal
 GFP_NOFS allocations for these extent buffers.  Thanks,
 
 Signed-off-by: Josef Bacik jba...@fusionio.com
 ---
  fs/btrfs/ctree.c |   16 ++--
  fs/btrfs/extent_io.c |8 
  2 files changed, 14 insertions(+), 10 deletions(-)
 
 diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
 index 1dd8a71..414a2d7 100644
 --- a/fs/btrfs/ctree.c
 +++ b/fs/btrfs/ctree.c
 @@ -1191,8 +1191,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
 struct extent_buffer *eb,
   * is freed (its refcount is decremented).
   */
  static struct extent_buffer *
 -tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb,
 - u64 time_seq)
 +tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 + struct extent_buffer *eb, u64 time_seq)
  {
   struct extent_buffer *eb_rewin;
   struct tree_mod_elem *tm;
 @@ -1207,12 +1207,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
 struct extent_buffer *eb,
   if (!tm)
   return eb;
  
 + btrfs_set_path_blocking(path);
 + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
 +
   if (tm-op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
   BUG_ON(tm-slot != 0);
   eb_rewin = alloc_dummy_extent_buffer(eb-start,
   fs_info-tree_root-nodesize);
   if (!eb_rewin) {
 - btrfs_tree_read_unlock(eb);
 + btrfs_tree_read_unlock_blocking(eb);
   free_extent_buffer(eb);
   return NULL;
   }
 @@ -1224,13 +1227,14 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
 struct extent_buffer *eb,
   } else {
   eb_rewin = btrfs_clone_extent_buffer(eb);
   if (!eb_rewin) {
 - btrfs_tree_read_unlock(eb);
 + btrfs_tree_read_unlock_blocking(eb);
   free_extent_buffer(eb);
   return NULL;
   }
   }
  
 - btrfs_tree_read_unlock(eb);
 + btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK);
 + btrfs_tree_read_unlock_blocking(eb);

unlock_blocking? Rest looks ok to me.

Thanks,
-Jan

   free_extent_buffer(eb);
  
   extent_buffer_get(eb_rewin);
 @@ -2779,7 +2783,7 @@ again:
   btrfs_clear_path_blocking(p, b,
 BTRFS_READ_LOCK);
   }
 - b = tree_mod_log_rewind(root-fs_info, b, time_seq);
 + b = tree_mod_log_rewind(root-fs_info, p, b, time_seq);
   if (!b) {
   ret = -ENOMEM;
   goto done;
 diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
 index b422cba..beda5a8 100644
 --- a/fs/btrfs/extent_io.c
 +++ b/fs/btrfs/extent_io.c
 @@ -4340,12 +4340,12 @@ struct extent_buffer 
 *btrfs_clone_extent_buffer(struct extent_buffer *src)
   struct extent_buffer *new;
   unsigned long num_pages = num_extent_pages(src-start, src-len);
  
 - new = __alloc_extent_buffer(NULL, src-start, src-len, GFP_ATOMIC);
 + new = __alloc_extent_buffer(NULL, src-start, src-len, GFP_NOFS);
   if (new == NULL)
   return NULL;
  
   for (i = 0; i  num_pages; i++) {
 - p = alloc_page(GFP_ATOMIC);
 + p = alloc_page(GFP_NOFS);
   if (!p) {
   btrfs_release_extent_buffer(new);
   return NULL;
 @@ -4369,12 +4369,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 
 start, unsigned long len)
   unsigned long num_pages = num_extent_pages(0, len);
   unsigned long i;
  
 - eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC);
 + eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS);
   if (!eb)
   return NULL;
  
   for (i = 0; i  num_pages; i++) {
 - eb-pages[i] = alloc_page(GFP_ATOMIC);
 + eb-pages[i] = alloc_page(GFP_NOFS);
   if (!eb-pages[i])
   goto err;
   }
 
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/7] btrfs: cleanup: removed unused 'btrfs_reada_detach'

2013-08-08 Thread Arne Jansen
On 07.08.2013 23:43, Sergei Trofimovich wrote:
 From: Sergei Trofimovich sly...@gentoo.org
 
 Found by uselex.rb:
 btrfs_reada_detach: [R]: exported from: fs/btrfs/btrfs.o fs/btrfs/built-in.o 
 fs/btrfs/reada.o

even though the function is currently unused, I'm hesitating to remove it
as it's part of the reada-API and might be handy for anyone going to use
the API in the future.

-Arne

 
 Signed-off-by: Sergei Trofimovich sly...@gentoo.org
 ---
  fs/btrfs/ctree.h | 1 -
  fs/btrfs/reada.c | 9 +
  2 files changed, 1 insertion(+), 9 deletions(-)
 
 diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h
 index e91ab9e..f35e086 100644
 --- a/fs/btrfs/ctree.h
 +++ b/fs/btrfs/ctree.h
 @@ -3861,7 +3861,6 @@ struct reada_control {
  struct reada_control *btrfs_reada_add(struct btrfs_root *root,
 struct btrfs_key *start, struct btrfs_key *end);
  int btrfs_reada_wait(void *handle);
 -void btrfs_reada_detach(void *handle);
  int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb,
u64 start, int err);
  
 diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c
 index 1031b69..c41d470 100644
 --- a/fs/btrfs/reada.c
 +++ b/fs/btrfs/reada.c
 @@ -37,7 +37,7 @@
   * To trigger a readahead, btrfs_reada_add must be called. It will start
   * a read ahead for the given range [start, end) on tree root. The returned
   * handle can either be used to wait on the readahead to finish
 - * (btrfs_reada_wait), or to send it to the background (btrfs_reada_detach).
 + * (btrfs_reada_wait).
   *
   * The read ahead works as follows:
   * On btrfs_reada_add, the root of the tree is inserted into a radix_tree.
 @@ -979,10 +979,3 @@ int btrfs_reada_wait(void *handle)
   return 0;
  }
  #endif
 -
 -void btrfs_reada_detach(void *handle)
 -{
 - struct reada_control *rc = handle;
 -
 - kref_put(rc-refcnt, reada_control_release);
 -}

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: deal with enomem in the rewind path V3

2013-08-08 Thread Jan Schmidt
On Wed, August 07, 2013 at 23:03 (+0200), Josef Bacik wrote:
 We can get ENOMEM trying to allocate dummy bufs for the rewind operation of 
 the
 tree mod log.  Instead of BUG_ON()'ing in this case pass up ENOMEM.  I looked
 back through the callers and I'm pretty sure I got everybody who did 
 BUG_ON(ret)
 in this path.  Thanks,
 
 Signed-off-by: Josef Bacik jba...@fusionio.com
 ---
 V2-V3:
 -unlock and free the original buffer on error
 -return NULL instead of ERR_PTR(-ENOMEM)
 V1-V2: missed a BUG_ON() for alloc_dummy_extent_buffer.
 
  fs/btrfs/ctree.c |   16 +-
  fs/btrfs/extent_io.c |  145 +
  2 files changed, 88 insertions(+), 73 deletions(-)
 
 diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
 index 0d5c686..1dd8a71 100644
 --- a/fs/btrfs/ctree.c
 +++ b/fs/btrfs/ctree.c
 @@ -1211,7 +1211,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
 struct extent_buffer *eb,
   BUG_ON(tm-slot != 0);
   eb_rewin = alloc_dummy_extent_buffer(eb-start,
   fs_info-tree_root-nodesize);
 - BUG_ON(!eb_rewin);
 + if (!eb_rewin) {
 + btrfs_tree_read_unlock(eb);
 + free_extent_buffer(eb);
 + return NULL;
 + }
   btrfs_set_header_bytenr(eb_rewin, eb-start);
   btrfs_set_header_backref_rev(eb_rewin,
btrfs_header_backref_rev(eb));
 @@ -1219,7 +1223,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
 struct extent_buffer *eb,
   btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
   } else {
   eb_rewin = btrfs_clone_extent_buffer(eb);
 - BUG_ON(!eb_rewin);
 + if (!eb_rewin) {
 + btrfs_tree_read_unlock(eb);
 + free_extent_buffer(eb);
 + return NULL;
 + }
   }
  
   btrfs_tree_read_unlock(eb);
 @@ -2772,6 +2780,10 @@ again:
 BTRFS_READ_LOCK);
   }
   b = tree_mod_log_rewind(root-fs_info, b, time_seq);
 + if (!b) {
 + ret = -ENOMEM;
 + goto done;
 + }
   p-locks[level] = BTRFS_READ_LOCK;
   p-nodes[level] = b;
   } else {
 diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
 index deaea9c..b422cba 100644
 --- a/fs/btrfs/extent_io.c
 +++ b/fs/btrfs/extent_io.c
 @@ -4222,6 +4222,76 @@ static void __free_extent_buffer(struct extent_buffer 
 *eb)
   kmem_cache_free(extent_buffer_cache, eb);
  }
  
 +static int extent_buffer_under_io(struct extent_buffer *eb)
 +{
 + return (atomic_read(eb-io_pages) ||
 + test_bit(EXTENT_BUFFER_WRITEBACK, eb-bflags) ||
 + test_bit(EXTENT_BUFFER_DIRTY, eb-bflags));
 +}
 +
 +/*
 + * Helper for releasing extent buffer page.
 + */
 +static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
 + unsigned long start_idx)
 +{
 + unsigned long index;
 + unsigned long num_pages;
 + struct page *page;
 + int mapped = !test_bit(EXTENT_BUFFER_DUMMY, eb-bflags);
 +
 + BUG_ON(extent_buffer_under_io(eb));
 +
 + num_pages = num_extent_pages(eb-start, eb-len);
 + index = start_idx + num_pages;
 + if (start_idx = index)
 + return;
 +
 + do {
 + index--;
 + page = extent_buffer_page(eb, index);
 + if (page  mapped) {
 + spin_lock(page-mapping-private_lock);
 + /*
 +  * We do this since we'll remove the pages after we've
 +  * removed the eb from the radix tree, so we could race
 +  * and have this page now attached to the new eb.  So
 +  * only clear page_private if it's still connected to
 +  * this eb.
 +  */
 + if (PagePrivate(page) 
 + page-private == (unsigned long)eb) {
 + BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, 
 eb-bflags));
 + BUG_ON(PageDirty(page));
 + BUG_ON(PageWriteback(page));
 + /*
 +  * We need to make sure we haven't be attached
 +  * to a new eb.
 +  */
 + ClearPagePrivate(page);
 + set_page_private(page, 0);
 + /* One for the page private */
 + page_cache_release(page);
 + }
 + spin_unlock(page-mapping-private_lock);

Re: [PATCH] Btrfs: pass gfp_t to __add_prelim_ref() to avoid always using GFP_ATOMIC

2013-08-08 Thread Jan Schmidt
On Tue, August 06, 2013 at 04:29 (+0200), Wang Shilong wrote:
 Currently, only add_delayed_refs have to allocate with GFP_ATOMIC,
 So just pass arg 'gfp_t' to decide which allocation mode.
 
 Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/backref.c | 30 +++---
  1 file changed, 15 insertions(+), 15 deletions(-)
 
 diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
 index 8bc5e8c..cb73a12 100644
 --- a/fs/btrfs/backref.c
 +++ b/fs/btrfs/backref.c
 @@ -160,12 +160,12 @@ struct __prelim_ref {
  
  static int __add_prelim_ref(struct list_head *head, u64 root_id,
   struct btrfs_key *key, int level,
 - u64 parent, u64 wanted_disk_byte, int count)
 + u64 parent, u64 wanted_disk_byte, int count,
 + gfp_t gfp_mask)
  {
   struct __prelim_ref *ref;
  
 - /* in case we're adding delayed refs, we're holding the refs spinlock */
 - ref = kmalloc(sizeof(*ref), GFP_ATOMIC);
 + ref = kmalloc(sizeof(*ref), gfp_mask);
   if (!ref)
   return -ENOMEM;
  
 @@ -548,7 +548,7 @@ static int __add_delayed_refs(struct 
 btrfs_delayed_ref_head *head, u64 seq,
   ref = btrfs_delayed_node_to_tree_ref(node);
   ret = __add_prelim_ref(prefs, ref-root, op_key,
  ref-level + 1, 0, node-bytenr,
 -node-ref_mod * sgn);
 +node-ref_mod * sgn, GFP_ATOMIC);
   break;
   }
   case BTRFS_SHARED_BLOCK_REF_KEY: {
 @@ -558,7 +558,7 @@ static int __add_delayed_refs(struct 
 btrfs_delayed_ref_head *head, u64 seq,
   ret = __add_prelim_ref(prefs, ref-root, NULL,
  ref-level + 1, ref-parent,
  node-bytenr,
 -node-ref_mod * sgn);
 +node-ref_mod * sgn, GFP_ATOMIC);
   break;
   }
   case BTRFS_EXTENT_DATA_REF_KEY: {
 @@ -570,7 +570,7 @@ static int __add_delayed_refs(struct 
 btrfs_delayed_ref_head *head, u64 seq,
   key.offset = ref-offset;
   ret = __add_prelim_ref(prefs, ref-root, key, 0, 0,
  node-bytenr,
 -node-ref_mod * sgn);
 +node-ref_mod * sgn, GFP_ATOMIC);
   break;
   }
   case BTRFS_SHARED_DATA_REF_KEY: {
 @@ -583,7 +583,7 @@ static int __add_delayed_refs(struct 
 btrfs_delayed_ref_head *head, u64 seq,
   key.offset = ref-offset;
   ret = __add_prelim_ref(prefs, ref-root, key, 0,
  ref-parent, node-bytenr,
 -node-ref_mod * sgn);
 +node-ref_mod * sgn, GFP_ATOMIC);
   break;
   }
   default:
 @@ -657,7 +657,7 @@ static int __add_inline_refs(struct btrfs_fs_info 
 *fs_info,
   case BTRFS_SHARED_BLOCK_REF_KEY:
   ret = __add_prelim_ref(prefs, 0, NULL,
   *info_level + 1, offset,
 - bytenr, 1);
 + bytenr, 1, GFP_NOFS);
   break;
   case BTRFS_SHARED_DATA_REF_KEY: {
   struct btrfs_shared_data_ref *sdref;
 @@ -666,13 +666,13 @@ static int __add_inline_refs(struct btrfs_fs_info 
 *fs_info,
   sdref = (struct btrfs_shared_data_ref *)(iref + 1);
   count = btrfs_shared_data_ref_count(leaf, sdref);
   ret = __add_prelim_ref(prefs, 0, NULL, 0, offset,
 -bytenr, count);
 +bytenr, count, GFP_NOFS);
   break;
   }
   case BTRFS_TREE_BLOCK_REF_KEY:
   ret = __add_prelim_ref(prefs, offset, NULL,
  *info_level + 1, 0,
 -bytenr, 1);
 +bytenr, 1, GFP_NOFS);
   break;
   case BTRFS_EXTENT_DATA_REF_KEY: {
   struct btrfs_extent_data_ref *dref;
 @@ -687,7 +687,7 @@ static int __add_inline_refs(struct btrfs_fs_info 
 *fs_info,
   key.offset = btrfs_extent_data_ref_offset(leaf, dref);
   root = btrfs_extent_data_ref_root(leaf, dref);
   

[PATCH 0/2 v2] introduce btrfs filesystem show --kernel

2013-08-08 Thread Anand Jain
This patch set introduces --kernel option for filesystem show 
for the reason as mentioned in the patch 1/2 below
1/1 is the preparatory patch

Anand Jain (2):
  btrfs-progs: move out print in cmd_df to another function
  btrfs-progs: introduce btrfs filesystem show --kernel

 cmds-filesystem.c | 355 --
 ctree.h   |  11 ++
 man/btrfs.8.in|   5 +-
 3 files changed, 281 insertions(+), 90 deletions(-)

-- 
1.8.1.191.g414c78c

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 1/2] btrfs-progs: move out print in cmd_df to another function

2013-08-08 Thread Anand Jain
This is a prepatory work for the following btrfs fi show command
fixes. So that we have a function get_df to get the fs sizes

v2:
combined the other patches as below and rebase
 btrfs-progs: get string for the group profile and type

Signed-off-by: Anand Jain anand.j...@oracle.com
---
 cmds-filesystem.c | 190 +++---
 ctree.h   |  11 
 2 files changed, 122 insertions(+), 79 deletions(-)

diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index a4e30ea..be8afde 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -44,28 +44,51 @@ static const char * const cmd_df_usage[] = {
NULL
 };
 
-static int cmd_df(int argc, char **argv)
+static char * group_type_str(u64 flag)
 {
-   struct btrfs_ioctl_space_args *sargs, *sargs_orig;
-   u64 count = 0, i;
-   int ret;
-   int fd;
-   int e;
-   char *path;
-   DIR  *dirstream = NULL;
-
-   if (check_argc_exact(argc, 2))
-   usage(cmd_df_usage);
-
-   path = argv[1];
+   switch (flag  BTRFS_BLOCK_GROUP_TYPE_MASK) {
+   case BTRFS_BLOCK_GROUP_DATA:
+   return data;
+   case BTRFS_BLOCK_GROUP_SYSTEM:
+   return system;
+   case BTRFS_BLOCK_GROUP_METADATA:
+   return metadata;
+   case BTRFS_BLOCK_GROUP_DATA|BTRFS_BLOCK_GROUP_METADATA:
+   return mixed;
+   default:
+   return unknown;
+   }
+}
 
-   fd = open_file_or_dir(path, dirstream);
-   if (fd  0) {
-   fprintf(stderr, ERROR: can't access to '%s'\n, path);
-   return 12;
+static char * group_profile_str(u64 flag)
+{
+   switch (flag  BTRFS_BLOCK_GROUP_PROFILE_MASK) {
+   case 0:
+   return single;
+   case BTRFS_BLOCK_GROUP_RAID0:
+   return RAID0;
+   case BTRFS_BLOCK_GROUP_RAID1:
+   return RAID1;
+   case BTRFS_BLOCK_GROUP_RAID5:
+   return RAID5;
+   case BTRFS_BLOCK_GROUP_RAID6:
+   return RAID6;
+   case BTRFS_BLOCK_GROUP_DUP:
+   return DUP;
+   case BTRFS_BLOCK_GROUP_RAID10:
+   return RAID10;
+   default:
+   return unknown;
}
+}
+
+static int get_df(int fd, struct btrfs_ioctl_space_args **sargs_ret)
+{
+   u64 count = 0;
+   int ret, e;
+   struct btrfs_ioctl_space_args *sargs;
 
-   sargs_orig = sargs = malloc(sizeof(struct btrfs_ioctl_space_args));
+   sargs = malloc(sizeof(struct btrfs_ioctl_space_args));
if (!sargs)
return -ENOMEM;
 
@@ -75,89 +98,98 @@ static int cmd_df(int argc, char **argv)
ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs);
e = errno;
if (ret) {
-   fprintf(stderr, ERROR: couldn't get space info on '%s' - %s\n,
-   path, strerror(e));
-   goto out;
+   fprintf(stderr, ERROR: couldn't get space info - %s\n,
+   strerror(e));
+   free(sargs);
+   return ret;
}
if (!sargs-total_spaces) {
-   ret = 0;
-   goto out;
+   free(sargs);
+   return 0;
}
-
count = sargs-total_spaces;
+   free(sargs);
 
-   sargs = realloc(sargs, sizeof(struct btrfs_ioctl_space_args) +
+   sargs = malloc(sizeof(struct btrfs_ioctl_space_args) +
(count * sizeof(struct btrfs_ioctl_space_info)));
-   if (!sargs) {
-   sargs = sargs_orig;
+   if (!sargs)
ret = -ENOMEM;
-   goto out;
-   }
 
sargs-space_slots = count;
sargs-total_spaces = 0;
-
ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs);
e = errno;
if (ret) {
-   fprintf(stderr, ERROR: couldn't get space info on '%s' - %s\n,
-   path, strerror(e));
-   goto out;
+   fprintf(stderr, ERROR: get space info count %llu - %s\n,
+   count, strerror(e));
+   free(sargs);
+   return ret;
}
+   *sargs_ret = sargs;
+   return 0;
+}
 
-   for (i = 0; i  sargs-total_spaces; i++) {
-   char description[80];
-   int written = 0;
-   u64 flags = sargs-spaces[i].flags;
+static void print_df(struct btrfs_ioctl_space_args *sargs)
+{
+   char description[80];
+   char *total_bytes;
+   char *used_bytes;
+   u64 flags;
+   u64 i;
+   int written;
+   char g_str[64];
+   int g_sz;
 
+   for (i = 0; i  sargs-total_spaces; i++) {
+   flags = sargs-spaces[i].flags;
+   written = 0;
memset(description, 0, 80);
 
-   if (flags  BTRFS_BLOCK_GROUP_DATA) {
-   if (flags  BTRFS_BLOCK_GROUP_METADATA) {
-   snprintf(description, 14, %s,
- 

[PATCH 2/2] btrfs-progs: introduce btrfs filesystem show --kernel

2013-08-08 Thread Anand Jain
As of now btrfs filesystem show reads directly from
disks. So sometimes output can be stale, mainly when
user want to verify their last operation like,
labeling or device delete or add... etc.

This patch adds --kernel option to the 'filesystem show'
subcli, which will read from the kernel instead of
the disks directly.

also this path adds the group profile info to the
output

eg:
-
btrfs fi show --kernel
Label: none  uuid: 39f55f14-e5ca-4a01-899d-915fd35bde05 mounted: /btrfs
Group profile: metadata: RAID1  data: RAID1
Total devices 2 FS bytes used 7.40GB
devid1 size 48.23GB used 11.04GB path /dev/dm-5
devid2 size 44.99GB used 11.03GB path /dev/mapper/mpathe

Label: none  uuid: a0beeb78-0019-4bdf-8002-0900a123ee07 mounted: /btrfs1
Group profile: mixed: single
Total devices 1 FS bytes used 7.40GB
devid1 size 15.00GB used 9.01GB path /dev/mapper/mpathbp1

btrfs fi show --kernel /btrfs2
Label: none  uuid: 9d6a347e-e8a0-44fe-9d2a-d28ee45ef33f mounted: /btrfs2
Group profile: metadata: DUP  data: single
Total devices 1 FS bytes used 2.22MB
devid1 size 15.00GB used 1.32GB path /dev/mapper/mpathcp1

btrfs fi show --kernel 9d6a347e-e8a0-44fe-9d2a-d28ee45ef33f
Label: none  uuid: 9d6a347e-e8a0-44fe-9d2a-d28ee45ef33f mounted: /btrfs2
Group profile: metadata: DUP  data: single
Total devices 1 FS bytes used 2.22MB
devid1 size 15.00GB used 1.32GB path /dev/mapper/mpathcp1


v3-v4:
dropped the dependence of used_bytes from the ioctl
kernel, Instead used the get_df to calculate the
used space.
dropped the function device_list_add_from_kernel
to update the original device_list_add instead
I have my own print and device filters, this way I
can add the group profile information in the show
output.
v2-v3:
Do the stuffs without adding new ioctl
new dependencies: this patch also depends on
path 9/13 to 12/13 also sent here.
v1-v2:
code optimized to remove redundancy

Signed-off-by: Anand Jain anand.j...@oracle.com
---
 cmds-filesystem.c | 165 +++---
 man/btrfs.8.in|   5 +-
 2 files changed, 159 insertions(+), 11 deletions(-)

diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index be8afde..74ad30b 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -22,6 +22,9 @@
 #include errno.h
 #include uuid/uuid.h
 #include ctype.h
+#include mntent.h
+#include fcntl.h
+#include linux/limits.h
 
 #include kerncompat.h
 #include ctree.h
@@ -251,8 +254,124 @@ static void print_one_uuid(struct btrfs_fs_devices 
*fs_devices)
printf(\n);
 }
 
+/* adds up all the used spaces as reported by the space info ioctl
+ */
+static u64 cal_used_bytes(struct btrfs_ioctl_space_args *si)
+{
+   u64 ret = 0;
+   int i;
+   for (i = 0; i  si-total_spaces; i++)
+   ret += si-spaces[i].used_bytes;
+   return ret;
+}
+
+static int print_one_fs(struct btrfs_ioctl_fs_info_args *fi,
+   struct btrfs_ioctl_dev_info_args *di_n,
+   struct btrfs_ioctl_space_args *si_n, char *label, char *path)
+{
+   int i;
+   char uuidbuf[37];
+   struct btrfs_ioctl_dev_info_args *di = di_n;
+   u64 flags;
+
+   uuid_unparse(fi-fsid, uuidbuf);
+   printf(Label: %s  uuid: %s mounted: %s\n,
+   strlen(label)?label:none, uuidbuf, path);
+   printf(\tGroup profile:);
+   for (i = si_n-total_spaces - 1; i = 0; i--) {
+   flags = si_n-spaces[i].flags;
+   if (flags  BTRFS_BLOCK_GROUP_SYSTEM)
+   continue;
+   printf( %s: %s, group_type_str(flags),
+   group_profile_str(flags));
+   printf( );
+   }
+   printf(\n);
+
+   printf(\tTotal devices %llu FS bytes used %s\n,
+   fi-num_devices,
+   pretty_size(cal_used_bytes(si_n)));
+
+   for (i = 0; i  fi-num_devices; i++) {
+   di = (struct btrfs_ioctl_dev_info_args *)di_n[i];
+   printf(\tdevid%llu size %s used %s path %s\n,
+   di-devid,
+   pretty_size(di-total_bytes),
+   pretty_size(di-bytes_used),
+   di-path);
+   }
+
+   printf(\n);
+   return 0;
+}
+
+/* This function checks if the given input parameter is
+ * an uuid or a path
+ * return -1: some error in the given input
+ * return 0: unknow input
+ * return 1: given input is uuid
+ * return 2: given input is path
+ */
+static int check_arg_type(char *input, u8 *processed)
+{
+   int ret = 0;
+   if (!uuid_parse(input, processed))
+   ret = 1;
+   else if (realpath(input, (char *)processed))
+   ret = 2;
+   return ret;
+}
+
+static int btrfs_scan_kernel(void 

[PATCH 1/2] btrfs-progs: btrfs_scan_one_dir not to skip links when /dev/mapper is provided

2013-08-08 Thread Anand Jain
This is preparatory work to introduce /dev/mapper path usage

we need btrfs_scan_one_dir to san devs under /dev/mapper,
but /dev/mapper has links to the actual devs and current implementation
of btrfs_scan_one_dir skips links so it does not pick any
dev under /dev/mapper. skip the links are fine when scanning whole of
/dev But not when we just want to scan /dev/mapper

This patch just adds to check if we are scanning devs or
/dev/mapper only, if when latter it will not skip links

Thanks

v2: changes as per David review

Signed-off-by: Anand Jain anand.j...@oracle.com
---
 utils.c | 19 ---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/utils.c b/utils.c
index 8a57967..038e599 100644
--- a/utils.c
+++ b/utils.c
@@ -1039,13 +1039,26 @@ int btrfs_scan_one_dir(char *dirname, int run_ioctl)
struct list_head pending_list;
struct btrfs_fs_devices *tmp_devices;
u64 num_devices;
+   int skip_link = 1;
+   char rdir[PATH_MAX];
+   char rdirp = NULL;
 
INIT_LIST_HEAD(pending_list);
 
pending = malloc(sizeof(*pending));
if (!pending)
return -ENOMEM;
-   strcpy(pending-name, dirname);
+
+   rdirp = realpath(dirname, rdir);
+   if (!rdirp) {
+   free(pending);
+   return -errno;
+   }
+
+   strcpy(pending-name, rdir);
+
+   if (!strcmp(rdir, /dev/mapper))
+   skip_link = 0;
 
 again:
dirname_len = strlen(pending-name);
@@ -1078,7 +1091,7 @@ again:
fprintf(stderr, failed to stat %s\n, fullpath);
continue;
}
-   if (S_ISLNK(st.st_mode))
+   if (skip_link  S_ISLNK(st.st_mode))
continue;
if (S_ISDIR(st.st_mode)) {
struct pending_dir *next = malloc(sizeof(*next));
@@ -1089,7 +1102,7 @@ again:
strcpy(next-name, fullpath);
list_add_tail(next-list, pending_list);
}
-   if (!S_ISBLK(st.st_mode)) {
+   if (skip_link  !S_ISBLK(st.st_mode)) {
continue;
}
fd = open(fullpath, O_RDONLY);
-- 
1.8.1.191.g414c78c

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 0/2] scan /dev/mapper in filesystem show and device scan

2013-08-08 Thread Anand Jain
This patch brings the /dev/mapper to be used as the path for
the btrfs kernel through dev scan
1/2 is the preparatory patch

Anand Jain (2):
  btrfs-progs: btrfs_scan_one_dir not to skip links when /dev/mapper is
provided
  btrfs-progs: scan /dev/mapper in filesystem show and device scan

 cmds-device.c |  8 +++-
 cmds-filesystem.c |  7 +--
 man/btrfs.8.in| 22 --
 utils.c   | 22 +++---
 utils.h   |  1 +
 5 files changed, 44 insertions(+), 16 deletions(-)

-- 
1.8.1.191.g414c78c

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 2/2] btrfs-progs: scan /dev/mapper in filesystem show and device scan

2013-08-08 Thread Anand Jain
Currently, btrsf fi show and btrfs dev scan uses
/proc/partitions (by default) (which gives priority
to dm-x over sdy paths) and with --all-devices it
will scan /dev only (where it skips links under /dev/mapper).

However using /dev/mapper paths are in common practice
with mount, fstab, and lvm, so its better to be consistent
with them.

This patch adds --mapper option to btrfs device scan and
btrfs filesystem show cli, when used will look for btrfs
devs under /dev/mapper and will use the links provided
under the /dev/mapper.

eg:
btrfs fi show --mapper
Label: none  uuid: 0a62-ad84-4d80-842a-dd9c1c60bf51
Total devices 2 FS bytes used 1.17MB
devid1 size 44.99GB used 2.04GB path /dev/mapper/mpathe
devid2 size 48.23GB used 2.03GB path /dev/mapper/mpathd

Label: none  uuid: bad9105f-bdc6-4626-9ba7-80bd97aebe19
Total devices 1 FS bytes used 28.00KB
devid1 size 15.00GB used 2.04GB path /dev/mapper/mpathbp1

In the long run mapper path when present (along with /proc/partitions)
can be the default option to scan for the btrfs devs.
(/proc/partitions must be scanned as well because to
include the mapper blacklisted (from mapper) devs.)

Signed-off-by: Anand Jain anand.j...@oracle.com
---
 cmds-device.c |  8 +++-
 cmds-filesystem.c |  7 +--
 man/btrfs.8.in| 22 --
 utils.c   |  3 +++
 utils.h   |  1 +
 5 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/cmds-device.c b/cmds-device.c
index be2aaff..6d1b378 100644
--- a/cmds-device.c
+++ b/cmds-device.c
@@ -186,7 +186,7 @@ static int cmd_rm_dev(int argc, char **argv)
 }
 
 static const char * const cmd_scan_dev_usage[] = {
-   btrfs device scan [--all-devices|device [device...]],
+   btrfs device scan [--all-devices|--mapper|device [device...]],
Scan devices for a btrfs filesystem,
NULL
 };
@@ -203,6 +203,12 @@ static int cmd_scan_dev(int argc, char **argv)
 
where = BTRFS_SCAN_DEV;
devstart += 1;
+   } else if( argc  1  !strcmp(argv[1],--mapper)){
+   if (check_argc_max(argc, 2))
+   usage(cmd_scan_dev_usage);
+
+   where = BTRFS_SCAN_MAPPER;
+   devstart += 1;
}
 
if(argc=devstart){
diff --git a/cmds-filesystem.c b/cmds-filesystem.c
index 74ad30b..88cace3 100644
--- a/cmds-filesystem.c
+++ b/cmds-filesystem.c
@@ -371,7 +371,7 @@ static int btrfs_scan_kernel(void *input, int type)
 }
 
 static const char * const cmd_show_usage[] = {
-   btrfs filesystem show [--all-devices|--mapper|--kernel|uuid],
+   btrfs filesystem show [--all-devices|--mapper|--kernel] 
[uuid|path],
Show the structure of a filesystem,
If no argument is given, structure of all present filesystems is 
shown.,
NULL
@@ -388,9 +388,12 @@ static int cmd_show(int argc, char **argv)
int searchstart = 1;
u8 processed[PATH_MAX];
 
-   if( argc  1  !strcmp(argv[1], --all-devices)){
+   if (argc  1  !strcmp(argv[1], --all-devices)){
where = BTRFS_SCAN_DEV;
searchstart += 1;
+   } else if (argc  1  !strcmp(argv[1], --mapper)) {
+   where = BTRFS_SCAN_MAPPER;
+   searchstart += 1;
} else if (argc  1  !strcmp(argv[1], --kernel)) {
where = 0;
searchstart += 1;
diff --git a/man/btrfs.8.in b/man/btrfs.8.in
index 6383469..821f138 100644
--- a/man/btrfs.8.in
+++ b/man/btrfs.8.in
@@ -25,7 +25,7 @@ btrfs \- control a btrfs filesystem
 .PP
 \fBbtrfs\fP \fBfilesystem df\fP\fI path\fP
 .PP
-\fBbtrfs\fP \fBfilesystem show\fP\fI [--all-devices|--kernel] 
[\fIuuid|path]\fP\fP
+\fBbtrfs\fP \fBfilesystem show 
[\fP\fI--all-devices\fP|\fI--mapper\fP|\fI--kernel\fP] 
[\fIuuid\fP|\fIpath\fP]
 .PP
 \fBbtrfs\fP \fBfilesystem sync\fP\fI path \fP
 .PP
@@ -51,7 +51,7 @@ btrfs \- control a btrfs filesystem
 .PP
 \fBbtrfs\fP \fBdevice delete\fP \fIdevice\fP [\fIdevice...\fP] \fIpath\fP
 .PP
-\fBbtrfs\fP \fBdevice scan\fP [--all-devices|\fIdevice \fP[\fIdevice...\fP]
+\fBbtrfs\fP \fBdevice scan\fP 
[\fI--all-devices\fP|\fI--mapper\fP|\fIdevice\fP [\fIdevice...\fP]
 .PP
 \fBbtrfs\fP \fBdevice ready\fP\fI device\fP
 .PP
@@ -254,12 +254,13 @@ Show information of a given subvolume in the \fIpath\fR.
 Show space usage information for a mount point.
 .TP
 
-\fBfilesystem show\fR [--all-devices|--kernel] [\fIuuid|path]\fP\fP
-Show the btrfs filesystem with some additional info. If no \fIUUID\fP or
-\fIlabel\fP is passed, \fBbtrfs\fR show info of all the btrfs filesystem.
-If \fB--all-devices\fP is passed, all the devices under /dev are scanned;
+\fBfilesystem show\fR [\fI--all-devices\fP|\fI--mapper\fP|\fI--kernel\fP] 
[\fIuuid\fP|\fIpath\fP]\fP
+Show the btrfs filesystem with some additional info. If no \fIuuid\fP
+is passed, it will show info of all the btrfs filesystem.
+If \fI--all-devices\fP is passed, all the devices under /dev are 

Re: [PATCH 0/2] scan /dev/mapper in filesystem show and device scan

2013-08-08 Thread anand jain



 Oh., I missed the libblkid part of David recommendation.
 I will be rewriting this patch set. sorry about that.

Thanks, Anand

On 08/08/2013 16:09, Anand Jain wrote:

This patch brings the /dev/mapper to be used as the path for
the btrfs kernel through dev scan
1/2 is the preparatory patch

Anand Jain (2):
   btrfs-progs: btrfs_scan_one_dir not to skip links when /dev/mapper is
 provided
   btrfs-progs: scan /dev/mapper in filesystem show and device scan

  cmds-device.c |  8 +++-
  cmds-filesystem.c |  7 +--
  man/btrfs.8.in| 22 --
  utils.c   | 22 +++---
  utils.h   |  1 +
  5 files changed, 44 insertions(+), 16 deletions(-)


--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v3 2/2] xfstests btrfs/316: test send / receive

2013-08-08 Thread Jan Schmidt
Basic send / receive functionality test for btrfs. Requires current
version of fsstress built (-x support). Relies on fssum tool but can
skip the test if it failed to build.

Signed-off-by: Jan Schmidt list@jan-o-sch.net
Reviewed-by: Josef Bacik jba...@fusionio.com
---
 tests/btrfs/316 |  113 +++
 tests/btrfs/316.out |4 ++
 tests/btrfs/group   |1 +
 3 files changed, 118 insertions(+), 0 deletions(-)
 create mode 100755 tests/btrfs/316
 create mode 100644 tests/btrfs/316.out

diff --git a/tests/btrfs/316 b/tests/btrfs/316
new file mode 100755
index 000..087978a
--- /dev/null
+++ b/tests/btrfs/316
@@ -0,0 +1,113 @@
+#! /bin/bash
+# FSQA Test No. 316
+#
+# Run fsstress to create a reasonably strange file system, make a
+# snapshot (base) and run more fsstress. Then take another snapshot
+# (incr) and send both snapshots to a temp file. Remake the file
+# system and receive from the files. Check both states with fssum.
+#
+#---
+# Copyright (C) 2013 STRATO.  All rights reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#
+#---
+#
+# creator
+owner=list.bt...@jan-o-sch.net
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo QA output created by $seq
+
+here=`pwd`
+tmp=`mktemp -d`
+status=1
+
+_cleanup()
+{
+   echo *** unmount
+   umount $SCRATCH_MNT 2/dev/null
+   rm -f $tmp.*
+}
+trap _cleanup; exit \$status 0 1 2 3 15
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# real QA test starts here
+_need_to_be_root
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+_require_command $FSSUM_PROG fssum
+
+rm -f $seqres.full
+
+workout()
+{
+   fsz=$1
+   ops=$2
+
+   umount $SCRATCH_DEV /dev/null 21
+   echo *** mkfs -dsize=$fsz$seqres.full
+   echo  $seqres.full
+   _scratch_mkfs_sized $fsz $seqres.full 21 \
+   || _fail size=$fsz mkfs failed
+   run_check _scratch_mount -o noatime
+
+   run_check $FSSTRESS_PROG -d $SCRATCH_MNT -n $ops $FSSTRESS_AVOID -x \
+   $BTRFS_UTIL_PROG subvol snap -r $SCRATCH_MNT $SCRATCH_MNT/base
+
+   run_check $BTRFS_UTIL_PROG subvol snap -r $SCRATCH_MNT $SCRATCH_MNT/incr
+
+   echo # $BTRFS_UTIL_PROG send $SCRATCH_MNT/base  $tmp/base.snap \
+$seqres.full
+   $BTRFS_UTIL_PROG send $SCRATCH_MNT/base  $tmp/base.snap 2 
$seqres.full \
+   || _fail failed: '$@'
+   echo # $BTRFS_UTIL_PROG send -p $SCRATCH_MNT/base\
+   $SCRATCH_MNT/incr  $tmp/incr.snap  $seqres.full
+   $BTRFS_UTIL_PROG send -p $SCRATCH_MNT/base \
+   $SCRATCH_MNT/incr  $tmp/incr.snap 2 $seqres.full \
+   || _fail failed: '$@'
+
+   run_check $FSSUM_PROG -A -f -w $tmp/base.fssum $SCRATCH_MNT/base
+   run_check $FSSUM_PROG -A -f -w $tmp/incr.fssum -x 
$SCRATCH_MNT/incr/base \
+   $SCRATCH_MNT/incr
+
+   umount $SCRATCH_DEV /dev/null 21
+   echo *** mkfs -dsize=$fsz$seqres.full
+   echo  $seqres.full
+   _scratch_mkfs_sized $fsz $seqres.full 21 \
+   || _fail size=$fsz mkfs failed
+   run_check _scratch_mount -o noatime
+
+   run_check $BTRFS_UTIL_PROG receive $SCRATCH_MNT  $tmp/base.snap
+   run_check $FSSUM_PROG -r $tmp/base.fssum $SCRATCH_MNT/base
+
+   run_check $BTRFS_UTIL_PROG receive $SCRATCH_MNT  $tmp/incr.snap
+   run_check $FSSUM_PROG -r $tmp/incr.fssum $SCRATCH_MNT/incr
+}
+
+echo *** test send / receive
+
+fssize=`expr 2000 \* 1024 \* 1024`
+ops=200
+
+workout $fssize $ops
+
+echo *** done
+status=0
+exit
diff --git a/tests/btrfs/316.out b/tests/btrfs/316.out
new file mode 100644
index 000..4564c85
--- /dev/null
+++ b/tests/btrfs/316.out
@@ -0,0 +1,4 @@
+QA output created by 316
+*** test send / receive
+*** done
+*** unmount
diff --git a/tests/btrfs/group b/tests/btrfs/group
index bc6c256..11d708a 100644
--- a/tests/btrfs/group
+++ b/tests/btrfs/group
@@ -9,3 +9,4 @@
 276 auto rw metadata
 284 auto
 307 auto quick
+316 auto rw metadata
-- 
1.7.2.5

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More 

[PATCH v3 1/2] xfstests: add fssum tool

2013-08-08 Thread Jan Schmidt
fssum is a tool to build a recursive checksum for a file system. The home
repository of fssum is

git://git.kernel.org/pub/scm/linux/kernel/git/arne/far-progs.git

It is added as an optional target, because it depends on glibc = 2.15 for
SEEK_HOLE / SEEK_DATA. The test to be added using fssum will just be skipped
if fssum wasn't built.

Signed-off-by: Jan Schmidt list@jan-o-sch.net
---
 .gitignore|1 +
 common/config |2 +
 src/Makefile  |   11 +-
 src/fssum.c   |  819 +
 4 files changed, 832 insertions(+), 1 deletions(-)
 create mode 100644 src/fssum.c

diff --git a/.gitignore b/.gitignore
index 11594aa..c2fc6e3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -45,6 +45,7 @@
 /src/fill
 /src/fill2
 /src/fs_perms
+/src/fssum
 /src/fstest
 /src/fsync-tester
 /src/ftrunc
diff --git a/common/config b/common/config
index 67c1498..c8bee29 100644
--- a/common/config
+++ b/common/config
@@ -146,6 +146,8 @@ export SED_PROG=`set_prog_path sed`
 export BC_PROG=`set_prog_path bc`
 [ $BC_PROG =  ]  _fatal bc not found
 
+export FSSUM_PROG=`set_prog_path fssum $here/src/fssum`
+
 export PS_ALL_FLAGS=-ef
 
 export DF_PROG=`set_prog_path df`
diff --git a/src/Makefile b/src/Makefile
index cc679e8..10a4d3c 100644
--- a/src/Makefile
+++ b/src/Makefile
@@ -20,10 +20,14 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize 
preallo_rw_pattern_reader \
stale_handle pwrite_mmap_blocked t_dir_offset2 seek_sanity_test \
seek_copy_test t_readdir_1 t_readdir_2 fsync-tester
 
+OPT_TARGETS = fssum
+
 SUBDIRS =
 
 LLDLIBS = $(LIBATTR) $(LIBHANDLE) $(LIBACL)
 
+OPT_LDLIBS = -lssl -lcrypto
+
 ifeq ($(HAVE_XLOG_ASSIGN_LSN), true)
 LINUX_TARGETS += loggen
 endif
@@ -60,7 +64,7 @@ CFILES = $(TARGETS:=.c)
 LDIRT = $(TARGETS)
 
 
-default: depend $(TARGETS) $(SUBDIRS)
+default: depend $(TARGETS) $(OPT_TARGETS) $(SUBDIRS)
 
 depend: .dep
 
@@ -70,11 +74,16 @@ $(TARGETS): $(LIBTEST)
@echo [CC]$@
$(Q)$(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(LIBTEST)
 
+$(OPT_TARGETS): $(LIBTEST)
+   @echo [CC]$@
+   -$(Q)$(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(OPT_LDLIBS) 
$(LIBTEST)
+
 LINKTEST = $(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS)
 
 install: default $(addsuffix -install,$(SUBDIRS))
$(INSTALL) -m 755 -d $(PKG_LIB_DIR)/src
$(LTINSTALL) -m 755 $(TARGETS) $(PKG_LIB_DIR)/src
+   -$(LTINSTALL) -m 755 $(OPT_TARGETS) $(PKG_LIB_DIR)/src
$(LTINSTALL) -m 755 fill2attr fill2fs fill2fs_check scaleread.sh 
$(PKG_LIB_DIR)/src
$(LTINSTALL) -m 644 dumpfile $(PKG_LIB_DIR)/src
 
diff --git a/src/fssum.c b/src/fssum.c
new file mode 100644
index 000..ecddb6a
--- /dev/null
+++ b/src/fssum.c
@@ -0,0 +1,819 @@
+/*
+ * Copyright (C) 2012 STRATO AG.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+#define _BSD_SOURCE
+#define _LARGEFILE64_SOURCE
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
+#include stdio.h
+#include stdlib.h
+#include unistd.h
+#include string.h
+#include fcntl.h
+#include dirent.h
+#include errno.h
+#include sys/types.h
+#include sys/stat.h
+#ifdef __SOLARIS__
+#include sys/mkdev.h
+#endif
+#include openssl/md5.h
+#include netinet/in.h
+#include inttypes.h
+#include assert.h
+
+#define CS_SIZE 16
+#define CHUNKS 128
+
+#if __BYTE_ORDER == __LITTLE_ENDIAN
+#define htonll(x) __bswap_64 (x)
+#endif
+
+/* TODO: add hardlink recognition */
+/* TODO: add xattr/acl */
+
+struct excludes {
+   char *path;
+   int len;
+};
+
+typedef struct _sum {
+   MD5_CTX md5;
+   unsigned char   out[16];
+} sum_t;
+
+typedef int (*sum_file_data_t)(int fd, sum_t *dst);
+
+int gen_manifest = 0;
+int in_manifest = 0;
+char *checksum = NULL;
+struct excludes *excludes;
+int n_excludes = 0;
+int verbose = 0;
+FILE *out_fp;
+FILE *in_fp;
+
+enum _flags {
+   FLAG_UID,
+   FLAG_GID,
+   FLAG_MODE,
+   FLAG_ATIME,
+   FLAG_MTIME,
+   FLAG_CTIME,
+   FLAG_DATA,
+   FLAG_OPEN_ERROR,
+   FLAG_STRUCTURE,
+   NUM_FLAGS
+};
+
+const char flchar[] = ugoamcdes;
+char line[65536];
+
+int flags[NUM_FLAGS] = {1, 1, 1, 1, 1, 0, 1, 0, 0};
+
+char *
+getln(char *buf, int size, FILE *fp)
+{
+   char *p;
+   int l;
+
+   p = fgets(buf, size, fp);
+   if (!p)
+   return NULL;
+
+   l 

[PATCH v3 0/2] xfstest btrfs/316: test send / receive

2013-08-08 Thread Jan Schmidt
These two patches add the announced tests for btrfs send / receive. As
requested, the fssum tool is now included.

One drawback is that I'm unable to edit configure.ac or whatever needs
to be modified in an autotools preferred way. Any hints appreciated,
preferrably hints containing all the modifications required to introduce
something like HAVE_SEEK_HOLE.

I do not want to make modifications to fssum.c here, if that's
absolutely required (because one /could/ get along using linux/fs.h,
which is not the way I would like to go), I'd like to have that changed
in the far-progs repository where fssum.c comes from as well.

--
v1-v2:
 - included fssum
 - test number is now 316 (was 314)
v2-v3:
 - added missing -lcrypto to build fssum
 - removed obsolete change in README now that fssum is included
 - fixed comment in test/btrfs/316's header (314 - 316)

Jan Schmidt (2):
  xfstests: add fssum tool
  xfstests btrfs/316: test send / receive

 .gitignore  |1 +
 common/config   |2 +
 src/Makefile|   11 +-
 src/fssum.c |  819 +++
 tests/btrfs/316 |  113 +++
 tests/btrfs/316.out |4 +
 tests/btrfs/group   |1 +
 7 files changed, 950 insertions(+), 1 deletions(-)
 create mode 100644 src/fssum.c
 create mode 100755 tests/btrfs/316
 create mode 100644 tests/btrfs/316.out

-- 
1.7.2.5

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v6 1/5] Btrfs: skip merge part for delayed data refs

2013-08-08 Thread Liu Bo
When we have data deduplication on, we'll hang on the merge part
because it needs to verify every queued delayed data refs related to
this disk offset but we may have millions refs.

And in the case of delayed data refs, we don't usually have too much
data refs to merge.

So it's safe to shut it down for data refs.

Signed-off-by: Liu Bo bo.li@oracle.com
---
 fs/btrfs/delayed-ref.c |7 +++
 1 files changed, 7 insertions(+), 0 deletions(-)

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index f7be9f7..fc4ce8b 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -320,6 +320,13 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle 
*trans,
struct rb_node *node;
u64 seq = 0;
 
+   /*
+* We don't have too much refs to merge in the case of delayed data
+* refs.
+*/
+   if (head-is_data)
+   return;
+
spin_lock(fs_info-tree_mod_seq_lock);
if (!list_empty(fs_info-tree_mod_seq_list)) {
struct seq_list *elem;
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v6 3/5] Btrfs: introduce a head ref rbtree

2013-08-08 Thread Liu Bo
The way how we process delayed refs is
1) get a bunch of head refs,
2) pick up one head ref,
3) go one node back for any delayed ref updates.

The head ref is also linked in the same rbtree as the delayed ref is,
so in 1) stage, we have to walk one by one including not only head refs, but
delayed refs.

When we have a great number of delayed refs pending to process,
this'll cost time a lot.

Here we introduce a head ref specific rbtree, it only has head refs, so troubles
go away.

Signed-off-by: Liu Bo bo.li@oracle.com
---
 fs/btrfs/delayed-ref.c |  124 
 fs/btrfs/delayed-ref.h |5 ++
 fs/btrfs/disk-io.c |3 +
 fs/btrfs/extent-tree.c |   21 +---
 fs/btrfs/transaction.c |4 +-
 5 files changed, 98 insertions(+), 59 deletions(-)

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 198b7ad..73a3e55 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -161,35 +161,61 @@ static struct btrfs_delayed_ref_node *tree_insert(struct 
rb_root *root,
return NULL;
 }
 
+/* insert a new ref to head ref rbtree */
+static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root,
+  struct rb_node *node)
+{
+   struct rb_node **p = root-rb_node;
+   struct rb_node *parent_node = NULL;
+   struct btrfs_delayed_ref_head *entry;
+   struct btrfs_delayed_ref_head *ins;
+   u64 bytenr;
+
+   ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node);
+   bytenr = ins-node.bytenr;
+   while (*p) {
+   parent_node = *p;
+   entry = rb_entry(parent_node, struct btrfs_delayed_ref_head,
+href_node);
+
+   if (bytenr  entry-node.bytenr)
+   p = (*p)-rb_left;
+   else if (bytenr  entry-node.bytenr)
+   p = (*p)-rb_right;
+   else
+   return entry;
+   }
+
+   rb_link_node(node, parent_node, p);
+   rb_insert_color(node, root);
+   return NULL;
+}
+
 /*
  * find an head entry based on bytenr. This returns the delayed ref
  * head if it was able to find one, or NULL if nothing was in that spot.
  * If return_bigger is given, the next bigger entry is returned if no exact
  * match is found.
  */
-static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root,
- u64 bytenr,
- struct btrfs_delayed_ref_node **last,
- int return_bigger)
+static struct btrfs_delayed_ref_head *
+find_ref_head(struct rb_root *root, u64 bytenr,
+ struct btrfs_delayed_ref_head **last, int return_bigger)
 {
struct rb_node *n;
-   struct btrfs_delayed_ref_node *entry;
+   struct btrfs_delayed_ref_head *entry;
int cmp = 0;
 
 again:
n = root-rb_node;
entry = NULL;
while (n) {
-   entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node);
-   WARN_ON(!entry-in_tree);
+   entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node);
if (last)
*last = entry;
 
-   if (bytenr  entry-bytenr)
+   if (bytenr  entry-node.bytenr)
cmp = -1;
-   else if (bytenr  entry-bytenr)
-   cmp = 1;
-   else if (!btrfs_delayed_ref_is_head(entry))
+   else if (bytenr  entry-node.bytenr)
cmp = 1;
else
cmp = 0;
@@ -203,12 +229,12 @@ again:
}
if (entry  return_bigger) {
if (cmp  0) {
-   n = rb_next(entry-rb_node);
+   n = rb_next(entry-href_node);
if (!n)
n = rb_first(root);
-   entry = rb_entry(n, struct btrfs_delayed_ref_node,
-rb_node);
-   bytenr = entry-bytenr;
+   entry = rb_entry(n, struct btrfs_delayed_ref_head,
+href_node);
+   bytenr = entry-node.bytenr;
return_bigger = 0;
goto again;
}
@@ -246,6 +272,12 @@ static void inline drop_delayed_ref(struct 
btrfs_trans_handle *trans,
struct btrfs_delayed_ref_node *ref)
 {
rb_erase(ref-rb_node, delayed_refs-root);
+   if (btrfs_delayed_ref_is_head(ref)) {
+   struct btrfs_delayed_ref_head *head;
+
+   head = btrfs_delayed_node_to_head(ref);
+   rb_erase(head-href_node, delayed_refs-href_root);
+   }
ref-in_tree = 0;
btrfs_put_delayed_ref(ref);
delayed_refs-num_entries--;
@@ -386,42 +418,35 @@ int 

[RFC PATCH v6 0/5] Online data deduplication

2013-08-08 Thread Liu Bo
Data deduplication is a specialized data compression technique for eliminating
duplicate copies of repeating data.[1]

This patch set is also related to Content based storage in project ideas[2].

PATCH 1 is a hang fix with deduplication on, but it's also useful without
dedup in practice use.

PATCH 2 and 3 are targetting delayed refs' scalability problems, which are
uncovered by the dedup feature.

PATCH 4 is a speed-up improvement, which is about dedup and quota.

PATCH 5 is full of real things, all details about implementation of dedup.

Plus, there is also a btrfs-progs patch which helps to enable/disable dedup
feature.

TODO:
* a bit-to-bit comparison callback.

All comments are welcome!

[1]: http://en.wikipedia.org/wiki/Data_deduplication
[2]: https://btrfs.wiki.kernel.org/index.php/Project_ideas#Content_based_storage

v5-v6:
- remove BUG_ON()s and use proper error handling.
- make dedup hash endian safe on disk.
- refractor dedup tree item.
- fix a bug of deleting file extents with dedup disabled.
- some cleanups
- add manpage for dedup subcommand.

v4-v5:
- go back to one dedup key with a special backref for dedup tree because
  the disk format understands backref well.
- fix a fsync hang with dedup enabled.
- rebase onto the latest btrfs.


Liu Bo (5):
  Btrfs: skip merge part for delayed data refs
  Btrfs: improve the delayed refs process in rm case
  Btrfs: introduce a head ref rbtree
  Btrfs: disable qgroups accounting when quata_enable is 0
  Btrfs: online data deduplication

 fs/btrfs/backref.c |9 +
 fs/btrfs/ctree.c   |2 +-
 fs/btrfs/ctree.h   |   82 ++
 fs/btrfs/delayed-ref.c |  159 +++
 fs/btrfs/delayed-ref.h |8 +
 fs/btrfs/disk-io.c |   31 ++
 fs/btrfs/extent-tree.c |  190 +++--
 fs/btrfs/extent_io.c   |   29 ++-
 fs/btrfs/extent_io.h   |   16 +
 fs/btrfs/file-item.c   |  211 ++
 fs/btrfs/inode.c   |  673 +++-
 fs/btrfs/ioctl.c   |   93 ++
 fs/btrfs/ordered-data.c|   38 ++-
 fs/btrfs/ordered-data.h|   13 +-
 fs/btrfs/qgroup.c  |3 +
 fs/btrfs/relocation.c  |3 +
 fs/btrfs/super.c   |   27 ++-
 fs/btrfs/transaction.c |4 +-
 include/uapi/linux/btrfs.h |5 +
 19 files changed, 1420 insertions(+), 176 deletions(-)

-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[RFC PATCH v6 4/5] Btrfs: disable qgroups accounting when quata_enable is 0

2013-08-08 Thread Liu Bo
It's unnecessary to do qgroups accounting without enabling quota.

Signed-off-by: Liu Bo bo.li@oracle.com
---
v6:
* don't record seq for qgroups with quota disabled as we do not need to,
  and keep the checker of qgroups.

 fs/btrfs/ctree.c   |2 +-
 fs/btrfs/delayed-ref.c |   18 ++
 fs/btrfs/qgroup.c  |3 +++
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
index d5387dd..2d22ddf 100644
--- a/fs/btrfs/ctree.c
+++ b/fs/btrfs/ctree.c
@@ -408,7 +408,7 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info,
 
tree_mod_log_write_lock(fs_info);
spin_lock(fs_info-tree_mod_seq_lock);
-   if (!elem-seq) {
+   if (elem  !elem-seq) {
elem-seq = btrfs_inc_tree_mod_seq_major(fs_info);
list_add_tail(elem-list, fs_info-tree_mod_seq_list);
}
diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index 73a3e55..af57cfc 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -691,8 +691,13 @@ static noinline void add_delayed_tree_ref(struct 
btrfs_fs_info *fs_info,
ref-is_head = 0;
ref-in_tree = 1;
 
-   if (need_ref_seq(for_cow, ref_root))
-   seq = btrfs_get_tree_mod_seq(fs_info, trans-delayed_ref_elem);
+   if (need_ref_seq(for_cow, ref_root)) {
+   struct seq_list *elem = NULL;
+
+   if (fs_info-quota_enabled)
+   elem = trans-delayed_ref_elem;
+   seq = btrfs_get_tree_mod_seq(fs_info, elem);
+   }
ref-seq = seq;
 
full_ref = btrfs_delayed_node_to_tree_ref(ref);
@@ -750,8 +755,13 @@ static noinline void add_delayed_data_ref(struct 
btrfs_fs_info *fs_info,
ref-is_head = 0;
ref-in_tree = 1;
 
-   if (need_ref_seq(for_cow, ref_root))
-   seq = btrfs_get_tree_mod_seq(fs_info, trans-delayed_ref_elem);
+   if (need_ref_seq(for_cow, ref_root)) {
+   struct seq_list *elem = NULL;
+
+   if (fs_info-quota_enabled)
+   elem = trans-delayed_ref_elem;
+   seq = btrfs_get_tree_mod_seq(fs_info, elem);
+   }
ref-seq = seq;
 
full_ref = btrfs_delayed_node_to_data_ref(ref);
diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c
index 1280eff..780ff14 100644
--- a/fs/btrfs/qgroup.c
+++ b/fs/btrfs/qgroup.c
@@ -1200,6 +1200,9 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle 
*trans,
 {
struct qgroup_update *u;
 
+   if (!trans-root-fs_info-quota_enabled)
+   return 0;
+
BUG_ON(!trans-delayed_ref_elem.seq);
u = kmalloc(sizeof(*u), GFP_NOFS);
if (!u)
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2] Btrfs-progs: add dedup subcommand

2013-08-08 Thread Liu Bo
This aims to add deduplication subcommand, 'btrfs dedup command path',
ie. register/unregister'.

It can be used to enable or disable dedup support for a filesystem.

Signed-off-by: Liu Bo bo.li@oracle.com
---
v2: add manpage

 Makefile   |2 +-
 btrfs.c|1 +
 cmds-dedup.c   |  101 
 commands.h |2 +
 ctree.h|2 +
 ioctl.h|5 +++
 man/btrfs.8.in |   12 +++
 7 files changed, 124 insertions(+), 1 deletions(-)
 create mode 100644 cmds-dedup.c

diff --git a/Makefile b/Makefile
index da7438e..5b4a07d 100644
--- a/Makefile
+++ b/Makefile
@@ -10,7 +10,7 @@ objects = ctree.o disk-io.o radix-tree.o extent-tree.o 
print-tree.o \
 cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \
   cmds-inspect.o cmds-balance.o cmds-send.o cmds-receive.o \
   cmds-quota.o cmds-qgroup.o cmds-replace.o cmds-check.o \
-  cmds-restore.o
+  cmds-restore.o cmds-dedup.o
 libbtrfs_objects = send-stream.o send-utils.o rbtree.o btrfs-list.o crc32c.o
 libbtrfs_headers = send-stream.h send-utils.h send.h rbtree.h btrfs-list.h \
   crc32c.h list.h kerncompat.h radix-tree.h extent-cache.h \
diff --git a/btrfs.c b/btrfs.c
index 691adef..956905c 100644
--- a/btrfs.c
+++ b/btrfs.c
@@ -254,6 +254,7 @@ const struct cmd_group btrfs_cmd_group = {
{ quota, cmd_quota, NULL, quota_cmd_group, 0 },
{ qgroup, cmd_qgroup, NULL, qgroup_cmd_group, 0 },
{ replace, cmd_replace, NULL, replace_cmd_group, 0 },
+   { dedup, cmd_dedup, NULL, dedup_cmd_group, 0 },
{ help, cmd_help, cmd_help_usage, NULL, 0 },
{ version, cmd_version, cmd_version_usage, NULL, 0 },
{ 0, 0, 0, 0, 0 }
diff --git a/cmds-dedup.c b/cmds-dedup.c
new file mode 100644
index 000..a977585
--- /dev/null
+++ b/cmds-dedup.c
@@ -0,0 +1,101 @@
+/*
+ * Copyright (C) 2013 Oracle.  All rights reserved.
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public
+ * License v2 as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public
+ * License along with this program; if not, write to the
+ * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
+ * Boston, MA 021110-1307, USA.
+ */
+
+#include sys/ioctl.h
+#include unistd.h
+
+#include ctree.h
+#include ioctl.h
+
+#include commands.h
+#include utils.h
+
+static const char * const dedup_cmd_group_usage[] = {
+   btrfs dedup command [options] path,
+   NULL
+};
+
+int dedup_ctl(int cmd, int argc, char **argv)
+{
+   int ret = 0;
+   int fd;
+   int e;
+   char *path = argv[1];
+
+   if (check_argc_exact(argc, 2))
+   return -1;
+
+   fd = open_file_or_dir(path);
+   if (fd  0) {
+   fprintf(stderr, ERROR: can't access '%s'\n, path);
+   return -EACCES;
+   }
+
+   ret = ioctl(fd, BTRFS_IOC_DEDUP_CTL, cmd);
+   e = errno;
+   close(fd);
+   if (ret  0) {
+   fprintf(stderr, ERROR: dedup command failed: %s\n,
+   strerror(e));
+   if (cmd == BTRFS_DEDUP_CTL_UNREG)
+   fprintf(stderr, please refer to 'dmesg | tail' for 
more info\n);
+   return -EINVAL;
+   }
+   return 0;
+}
+
+static const char * const cmd_dedup_reg_usage[] = {
+   btrfs dedup register path,
+   Enable data deduplication support for a filesystem.,
+   NULL
+};
+
+static int cmd_dedup_reg(int argc, char **argv)
+{
+   int ret = dedup_ctl(BTRFS_DEDUP_CTL_REG, argc, argv);
+   if (ret  0)
+   usage(cmd_dedup_reg_usage);
+   return ret;
+}
+
+static const char * const cmd_dedup_unreg_usage[] = {
+   btrfs dedup unregister path,
+   Disable data deduplication support for a filesystem.,
+   NULL
+};
+
+static int cmd_dedup_unreg(int argc, char **argv)
+{
+   int ret = dedup_ctl(BTRFS_DEDUP_CTL_UNREG, argc, argv);
+   if (ret  0)
+   usage(cmd_dedup_unreg_usage);
+   return ret;
+}
+
+const struct cmd_group dedup_cmd_group = {
+   dedup_cmd_group_usage, NULL, {
+   { register, cmd_dedup_reg, cmd_dedup_reg_usage, NULL, 0 },
+   { unregister, cmd_dedup_unreg, cmd_dedup_unreg_usage, 0, 0 },
+   { 0, 0, 0, 0, 0 }
+   }
+};
+
+int cmd_dedup(int argc, char **argv)
+{
+   return handle_command_group(dedup_cmd_group, argc, argv);
+}
diff --git a/commands.h b/commands.h
index 15c616d..d31afa4 100644
--- a/commands.h
+++ b/commands.h
@@ 

[RFC PATCH v6 2/5] Btrfs: improve the delayed refs process in rm case

2013-08-08 Thread Liu Bo
While removing a file with dedup extents, we could have a great number of
delayed refs pending to process, and these refs refer to droping
a ref of the extent, which is of BTRFS_DROP_DELAYED_REF type.

But in order to prevent an extent's ref count from going down to zero when
there still are pending delayed refs, we first select those adding a ref
ones, which is of BTRFS_ADD_DELAYED_REF type.

So in removing case, all of our delayed refs are of BTRFS_DROP_DELAYED_REF type,
but we have to walk all the refs issued to the extent to find any
BTRFS_ADD_DELAYED_REF types and end up there is no such thing, and then start
over again to find BTRFS_DROP_DELAYED_REF.

This is really unnecessary, we can improve this by tracking how many
BTRFS_ADD_DELAYED_REF refs we have and search by the right type.

Signed-off-by: Liu Bo bo.li@oracle.com
---
 fs/btrfs/delayed-ref.c |   10 ++
 fs/btrfs/delayed-ref.h |3 +++
 fs/btrfs/extent-tree.c |   17 -
 3 files changed, 29 insertions(+), 1 deletions(-)

diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c
index fc4ce8b..198b7ad 100644
--- a/fs/btrfs/delayed-ref.c
+++ b/fs/btrfs/delayed-ref.c
@@ -543,6 +543,10 @@ update_existing_head_ref(struct btrfs_delayed_ref_node 
*existing,
 * update the reference mod on the head to reflect this new operation
 */
existing-ref_mod += update-ref_mod;
+
+   WARN_ON_ONCE(update-ref_mod  1);
+   if (update-ref_mod == 1)
+   existing_ref-add_cnt++;
 }
 
 /*
@@ -604,6 +608,12 @@ static noinline void add_delayed_ref_head(struct 
btrfs_fs_info *fs_info,
head_ref-must_insert_reserved = must_insert_reserved;
head_ref-is_data = is_data;
 
+   /* track added ref, more comments in select_delayed_ref() */
+   if (count_mod == 1)
+   head_ref-add_cnt = 1;
+   else
+   head_ref-add_cnt = 0;
+
INIT_LIST_HEAD(head_ref-cluster);
mutex_init(head_ref-mutex);
 
diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h
index 70b962c..9377b27 100644
--- a/fs/btrfs/delayed-ref.h
+++ b/fs/btrfs/delayed-ref.h
@@ -84,6 +84,9 @@ struct btrfs_delayed_ref_head {
struct list_head cluster;
 
struct btrfs_delayed_extent_op *extent_op;
+
+   int add_cnt;
+
/*
 * when a new extent is allocated, it is just reserved in memory
 * The actual extent isn't inserted into the extent allocation tree
diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c
index 70002ea..2b8729e 100644
--- a/fs/btrfs/extent-tree.c
+++ b/fs/btrfs/extent-tree.c
@@ -2260,6 +2260,16 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head)
struct rb_node *node;
struct btrfs_delayed_ref_node *ref;
int action = BTRFS_ADD_DELAYED_REF;
+
+   /*
+* track the count of BTRFS_ADD_DELAYED_REF,
+* in the case that there's no BTRFS_ADD_DELAYED_REF while there're a
+* a great number of BTRFS_DROP_DELAYED_REF,
+* it'll waste time on searching BTRFS_ADD_DELAYED_REF, usually this
+* happens with dedup enabled.
+*/
+   if (head-add_cnt == 0)
+   action = BTRFS_DROP_DELAYED_REF;
 again:
/*
 * select delayed ref of type BTRFS_ADD_DELAYED_REF first.
@@ -2274,8 +2284,11 @@ again:
rb_node);
if (ref-bytenr != head-node.bytenr)
break;
-   if (ref-action == action)
+   if (ref-action == action) {
+   if (action == BTRFS_ADD_DELAYED_REF)
+   head-add_cnt--;
return ref;
+   }
node = rb_prev(node);
}
if (action == BTRFS_ADD_DELAYED_REF) {
@@ -2351,6 +2364,8 @@ static noinline int run_clustered_refs(struct 
btrfs_trans_handle *trans,
 * there are still refs with lower seq numbers in the
 * process of being added. Don't run this ref yet.
 */
+   if (ref-action == BTRFS_ADD_DELAYED_REF)
+   locked_ref-add_cnt++;
list_del_init(locked_ref-cluster);
btrfs_delayed_ref_unlock(locked_ref);
locked_ref = NULL;
-- 
1.7.7

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] Btrfs: catch error return value from find_extent_in_eb()

2013-08-08 Thread Filipe David Manana
On Thu, Aug 8, 2013 at 6:04 AM, Wang Shilong wangsl.f...@cn.fujitsu.com wrote:
 find_extent_in_eb() may return ENOMEM, catch this error return value.

 Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/backref.c | 4 
  1 file changed, 4 insertions(+)

 diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
 index 54e7610..f7781e6 100644
 --- a/fs/btrfs/backref.c
 +++ b/fs/btrfs/backref.c
 @@ -934,6 +934,10 @@ again:
 }
 ret = find_extent_in_eb(eb, bytenr,
 *extent_item_pos, 
 eie);
 +   if (ret) {
 +   free_extent_buffer(eb);
 +   goto out;
 +   }
 ref-inode_list = eie;
 free_extent_buffer(eb);
 }

Hello, this is a duplicate of:  https://patchwork.kernel.org/patch/2835989/

thanks
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs-progs: fix a regression in mkfs.btrfs

2013-08-08 Thread Stefan Behrens
Commit 55061a98 adds a cut  paste error that makes mkfs.btrfs fail
if leafsize != sectorsize.

Signed-off-by: Stefan Behrens sbehr...@giantdisaster.de
---
 utils.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils.c b/utils.c
index 15b991f..691b075 100644
--- a/utils.c
+++ b/utils.c
@@ -430,7 +430,7 @@ int make_btrfs(int fd, const char *device, const char 
*label,
ret = pwrite(fd, buf-data, sectorsize, blocks[0]);
if (ret  0)
return -errno;
-   else if (ret != leafsize)
+   else if (ret != sectorsize)
return -EIO;
 
free(buf);
-- 
1.8.3.4

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/5] Btrfs-progs: return error on write failure in make_btrfs()

2013-08-08 Thread Stefan Behrens
On Thu,  4 Jul 2013 10:48:39 +0100, Filipe David Borba Manana wrote:
 Instead of aborting with a BUG_ON() statement, return a
 negated errno code. Also updated mkfs and convert tools
 to print a nicer error message when make_btrfs() returns
 an error.
 
 Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
[...]
   ret = pwrite(fd, buf-data, sectorsize, blocks[0]);
 - BUG_ON(ret != sectorsize);
 -
 + if (ret  0)
 + return -errno;
 + else if (ret != leafsize)
 + return -EIO;

mkfs.btrfs with leafsize != sectorsize fails. I've sent a patch for it.

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs-progs: fix a regression in mkfs.btrfs

2013-08-08 Thread Filipe David Manana
On Thu, Aug 8, 2013 at 11:51 AM, Stefan Behrens
sbehr...@giantdisaster.de wrote:
 Commit 55061a98 adds a cut  paste error that makes mkfs.btrfs fail
 if leafsize != sectorsize.

 Signed-off-by: Stefan Behrens sbehr...@giantdisaster.de
 ---
  utils.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/utils.c b/utils.c
 index 15b991f..691b075 100644
 --- a/utils.c
 +++ b/utils.c
 @@ -430,7 +430,7 @@ int make_btrfs(int fd, const char *device, const char 
 *label,
 ret = pwrite(fd, buf-data, sectorsize, blocks[0]);
 if (ret  0)
 return -errno;
 -   else if (ret != leafsize)
 +   else if (ret != sectorsize)
 return -EIO;

Reviewed-by: Filipe Manana fdman...@gmail.com


 free(buf);
 --
 1.8.3.4

 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a message to majord...@vger.kernel.org
 More majordomo info at  http://vger.kernel.org/majordomo-info.html



-- 
Filipe David Manana,

Reasonable men adapt themselves to the world.
 Unreasonable men adapt the world to themselves.
 That's why all progress depends on unreasonable men.
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/5] Btrfs-progs: return error on write failure in make_btrfs()

2013-08-08 Thread Filipe David Manana
On Thu, Aug 8, 2013 at 11:52 AM, Stefan Behrens
sbehr...@giantdisaster.de wrote:
 On Thu,  4 Jul 2013 10:48:39 +0100, Filipe David Borba Manana wrote:
 Instead of aborting with a BUG_ON() statement, return a
 negated errno code. Also updated mkfs and convert tools
 to print a nicer error message when make_btrfs() returns
 an error.

 Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
 [...]
   ret = pwrite(fd, buf-data, sectorsize, blocks[0]);
 - BUG_ON(ret != sectorsize);
 -
 + if (ret  0)
 + return -errno;
 + else if (ret != leafsize)
 + return -EIO;

 mkfs.btrfs with leafsize != sectorsize fails. I've sent a patch for it.

Copy paste error :(
Thanks for catching and fixing it.





-- 
Filipe David Manana,

Reasonable men adapt themselves to the world.
 Unreasonable men adapt the world to themselves.
 That's why all progress depends on unreasonable men.
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs-progs: drop weird indirections dead code from send/receive

2013-08-08 Thread Stefan Behrens
On Fri, 02 Aug 2013 20:24:55 -0500, Eric Sandeen wrote:
 cmds-recieve.c  cmds-send.c seem to have weird wrappers and
 indirections, and groups of commands which have only
 one member, which are never referenced in the code.
 
 I think these can be removed.
 
 Signed-off-by: Eric Sandeen sand...@redhat.com
 ---
 
  cmds-receive.c |   19 +--
  cmds-send.c|   12 +---
  commands.h |2 --
  3 files changed, 2 insertions(+), 31 deletions(-)
 
 Stefan, I'd appreciate your review  testing of this though!
 TBH it's an old patch I had laying around, but I have
 not re-tested it recently.

Reviewed  tested without issues.


 diff --git a/cmds-receive.c b/cmds-receive.c
 index 4e480f9..7abce76 100644
 --- a/cmds-receive.c
 +++ b/cmds-receive.c
 @@ -907,7 +907,7 @@ out:
   return ret;
  }
  
 -static int do_cmd_receive(int argc, char **argv)
 +int cmd_receive(int argc, char **argv)
  {
   int c;
   char *tomnt = NULL;
 @@ -960,11 +960,6 @@ static int do_cmd_receive(int argc, char **argv)
   return ret;
  }
  
 -static const char * const receive_cmd_group_usage[] = {
 - btrfs receive command args,
 - NULL
 -};
 -
  const char * const cmd_receive_usage[] = {
   btrfs receive [-ve] [-f infile] mount,
   Receive subvolumes from stdin.,
 @@ -988,15 +983,3 @@ const char * const cmd_receive_usage[] = {
is recognized or on EOF.,
   NULL
  };
 -
 -const struct cmd_group receive_cmd_group = {
 - receive_cmd_group_usage, NULL, {
 - { receive, do_cmd_receive, cmd_receive_usage, NULL, 0 },
 - { 0, 0, 0, 0, 0 },
 -},
 -};
 -
 -int cmd_receive(int argc, char **argv)
 -{
 - return do_cmd_receive(argc, argv);
 -}
 diff --git a/cmds-send.c b/cmds-send.c
 index 7209aba..f9899f4 100644
 --- a/cmds-send.c
 +++ b/cmds-send.c
 @@ -465,7 +465,7 @@ out:
   return ret;
  }
  
 -int cmd_send_start(int argc, char **argv)
 +int cmd_send(int argc, char **argv)
  {
   char *subvol = NULL;
   int c;
 @@ -718,11 +718,6 @@ out:
   return ret;
  }
  
 -static const char * const send_cmd_group_usage[] = {
 - btrfs send command args,
 - NULL
 -};
 -
  const char * const cmd_send_usage[] = {
   btrfs send [-ve] [-p parent] [-c clone-src] subvol,
   Send the subvolume to stdout.,
 @@ -750,8 +745,3 @@ const char * const cmd_send_usage[] = {
use pipes.,
   NULL
  };
 -
 -int cmd_send(int argc, char **argv)
 -{
 - return cmd_send_start(argc, argv);
 -}
 diff --git a/commands.h b/commands.h
 index 65829f4..3f12fab 100644
 --- a/commands.h
 +++ b/commands.h
 @@ -85,8 +85,6 @@ extern const struct cmd_group balance_cmd_group;
  extern const struct cmd_group device_cmd_group;
  extern const struct cmd_group scrub_cmd_group;
  extern const struct cmd_group inspect_cmd_group;
 -extern const struct cmd_group send_cmd_group;
 -extern const struct cmd_group receive_cmd_group;
  extern const struct cmd_group quota_cmd_group;
  extern const struct cmd_group qgroup_cmd_group;
  extern const struct cmd_group replace_cmd_group;
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] Btrfs: catch error return value from find_extent_in_eb()

2013-08-08 Thread Jan Schmidt
 
On Thu, August 08, 2013 at 12:24 (+0200), Filipe David Manana wrote:
 On Thu, Aug 8, 2013 at 6:04 AM, Wang Shilong wangsl.f...@cn.fujitsu.com 
 wrote:
 find_extent_in_eb() may return ENOMEM, catch this error return value.

 Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/backref.c | 4 
  1 file changed, 4 insertions(+)

 diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
 index 54e7610..f7781e6 100644
 --- a/fs/btrfs/backref.c
 +++ b/fs/btrfs/backref.c
 @@ -934,6 +934,10 @@ again:
 }
 ret = find_extent_in_eb(eb, bytenr,
 *extent_item_pos, 
 eie);
 +   if (ret) {
 +   free_extent_buffer(eb);
 +   goto out;
 +   }
 ref-inode_list = eie;
 free_extent_buffer(eb);
 }
 
 Hello, this is a duplicate of:  https://patchwork.kernel.org/patch/2835989/

Your linked patch checks for ret  0, which is a safer option since there are
functions down the stack returning  0 or 0 for success and  0 for errors.
Currently, find_extent_in_eb doesn't return their return values, but I'd rather
be a bit more on the safe side and use your patch.

Thanks,
-Jan
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] Btrfs: fix possible memory leak in find_parent_nodes()

2013-08-08 Thread Wang Shilong
On 08/08/2013 07:02 PM, Jan Schmidt wrote:
  
 On Thu, August 08, 2013 at 07:04 (+0200), Wang Shilong wrote:
 Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/backref.c | 2 +-
  1 file changed, 1 insertion(+), 1 deletion(-)

 diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
 index cb73a12..54e7610 100644
 --- a/fs/btrfs/backref.c
 +++ b/fs/btrfs/backref.c
 @@ -911,7 +911,6 @@ again:
  
  while (!list_empty(prefs)) {
  ref = list_first_entry(prefs, struct __prelim_ref, list);
 -list_del(ref-list);
  WARN_ON(ref-count  0);
  if (ref-count  ref-root_id  ref-parent == 0) {
  /* no parent == root of tree */
 @@ -954,6 +953,7 @@ again:
  eie-next = ref-inode_list;
  }
  }
 +list_del(ref-list);
  kfree(ref);
  }
  

 
 I'm not convinced, you're not calling kfree() more often. Can you please add
 some patch description?

Yeah. i will add more description in V2.

Thanks
Wang
 
 -Jan
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/3] Btrfs: catch error return value from find_extent_in_eb()

2013-08-08 Thread Wang Shilong
On 08/08/2013 06:24 PM, Filipe David Manana wrote:
 On Thu, Aug 8, 2013 at 6:04 AM, Wang Shilong wangsl.f...@cn.fujitsu.com 
 wrote:
 find_extent_in_eb() may return ENOMEM, catch this error return value.

 Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/backref.c | 4 
  1 file changed, 4 insertions(+)

 diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
 index 54e7610..f7781e6 100644
 --- a/fs/btrfs/backref.c
 +++ b/fs/btrfs/backref.c
 @@ -934,6 +934,10 @@ again:
 }
 ret = find_extent_in_eb(eb, bytenr,
 *extent_item_pos, 
 eie);
 +   if (ret) {
 +   free_extent_buffer(eb);
 +   goto out;
 +   }
 ref-inode_list = eie;
 free_extent_buffer(eb);
 }
 
 Hello, this is a duplicate of:  https://patchwork.kernel.org/patch/2835989/

Yeah, just ignore my patch.

Thanks,
Wang
 
 thanks
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] Btrfs: allocate prelim_ref with a slab allocater

2013-08-08 Thread Jan Schmidt
 
On Thu, August 08, 2013 at 07:04 (+0200), Wang Shilong wrote:
 struct __prelim_ref is allocated and freed frequently when
 walking backref tree, using slab allocater can not only
 speed up allocating but also detect memory leaks.
 
 Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/backref.c | 30 +-
  fs/btrfs/backref.h |  2 ++
  fs/btrfs/super.c   |  8 
  3 files changed, 35 insertions(+), 5 deletions(-)
 
 diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
 index f7781e6..916e4f1 100644
 --- a/fs/btrfs/backref.c
 +++ b/fs/btrfs/backref.c
 @@ -119,6 +119,26 @@ struct __prelim_ref {
   u64 wanted_disk_byte;
  };
  
 +static struct kmem_cache *prelim_ref_cache;
 +
 +int __init btrfs_prelim_ref_init(void)
 +{
 + prelim_ref_cache = kmem_cache_create(btrfs_prelim_ref,
 + sizeof(struct __prelim_ref),
 + 0,
 + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
 + NULL);
 + if (!prelim_ref_cache)
 + return -ENOMEM;
 + return 0;
 +}
 +
 +void btrfs_prelim_ref_exit(void)
 +{
 + if (prelim_ref_cache)
 + kmem_cache_destroy(prelim_ref_cache);
 +}
 +
  /*
   * the rules for all callers of this function are:
   * - obtaining the parent is the goal
 @@ -165,7 +185,7 @@ static int __add_prelim_ref(struct list_head *head, u64 
 root_id,
  {
   struct __prelim_ref *ref;
  
 - ref = kmalloc(sizeof(*ref), gfp_mask);
 + ref = kmem_cache_alloc(prelim_ref_cache, gfp_mask);
   if (!ref)
   return -ENOMEM;
  
 @@ -493,7 +513,7 @@ static void __merge_refs(struct list_head *head, int mode)
   ref1-count += ref2-count;
  
   list_del(ref2-list);
 - kfree(ref2);
 + kmem_cache_free(prelim_ref_cache, ref2);
   }
  
   }
 @@ -958,7 +978,7 @@ again:
   }
   }
   list_del(ref-list);
 - kfree(ref);
 + kmem_cache_free(prelim_ref_cache, ref);
   }
  
  out:
 @@ -966,13 +986,13 @@ out:
   while (!list_empty(prefs)) {
   ref = list_first_entry(prefs, struct __prelim_ref, list);
   list_del(ref-list);
 - kfree(ref);
 + kmem_cache_free(prelim_ref_cache, ref);
   }
   while (!list_empty(prefs_delayed)) {
   ref = list_first_entry(prefs_delayed, struct __prelim_ref,
  list);
   list_del(ref-list);
 - kfree(ref);
 + kmem_cache_free(prelim_ref_cache, ref);
   }
  
   return ret;
 diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
 index 8f2e767..a910b27 100644
 --- a/fs/btrfs/backref.h
 +++ b/fs/btrfs/backref.h
 @@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 
 inode_objectid,
 struct btrfs_inode_extref **ret_extref,
 u64 *found_off);
  
 +int __init btrfs_prelim_ref_init(void);
 +void btrfs_prelim_ref_exit(void);
  #endif
 diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
 index b64d762..de7eb3d 100644
 --- a/fs/btrfs/super.c
 +++ b/fs/btrfs/super.c
 @@ -56,6 +56,7 @@
  #include rcu-string.h
  #include dev-replace.h
  #include free-space-cache.h
 +#include backref.h
  
  #define CREATE_TRACE_POINTS
  #include trace/events/btrfs.h
 @@ -1774,6 +1775,10 @@ static int __init init_btrfs_fs(void)
   if (err)
   goto free_auto_defrag;
  
 + err = btrfs_prelim_ref_init();
 + if (err)
 + goto free_prelim_ref;
 +
   err = btrfs_interface_init();
   if (err)
   goto free_delayed_ref;
 @@ -1791,6 +1796,8 @@ static int __init init_btrfs_fs(void)
  
  unregister_ioctl:
   btrfs_interface_exit();
 +free_prelim_ref:
 + btrfs_prelim_ref_exit();
  free_delayed_ref:
   btrfs_delayed_ref_exit();
  free_auto_defrag:
 @@ -1817,6 +1824,7 @@ static void __exit exit_btrfs_fs(void)
   btrfs_delayed_ref_exit();
   btrfs_auto_defrag_exit();
   btrfs_delayed_inode_exit();
 + btrfs_prelim_ref_exit();
   ordered_data_exit();
   extent_map_exit();
   extent_io_exit();
 

I generally like the idea of using a custom cache here. What about this one?

 324 static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
[...]
 367 /* additional parents require new refs being added here */
 368 while ((node = ulist_next(parents, uiter))) {
 369 new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);

That new_ref will also be freed with kmem_cache_free after your patch, I think.

Thanks,
-Jan
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  

Re: [PATCH 3/3] Btrfs: allocate prelim_ref with a slab allocater

2013-08-08 Thread Wang Shilong

 On Thu, August 08, 2013 at 07:04 (+0200), Wang Shilong wrote:
 struct __prelim_ref is allocated and freed frequently when
 walking backref tree, using slab allocater can not only
 speed up allocating but also detect memory leaks.
 
 Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 ---
 fs/btrfs/backref.c | 30 +-
 fs/btrfs/backref.h |  2 ++
 fs/btrfs/super.c   |  8 
 3 files changed, 35 insertions(+), 5 deletions(-)
 
 diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
 index f7781e6..916e4f1 100644
 --- a/fs/btrfs/backref.c
 +++ b/fs/btrfs/backref.c
 @@ -119,6 +119,26 @@ struct __prelim_ref {
  u64 wanted_disk_byte;
 };
 
 +static struct kmem_cache *prelim_ref_cache;
 +
 +int __init btrfs_prelim_ref_init(void)
 +{
 +prelim_ref_cache = kmem_cache_create(btrfs_prelim_ref,
 +sizeof(struct __prelim_ref),
 +0,
 +SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
 +NULL);
 +if (!prelim_ref_cache)
 +return -ENOMEM;
 +return 0;
 +}
 +
 +void btrfs_prelim_ref_exit(void)
 +{
 +if (prelim_ref_cache)
 +kmem_cache_destroy(prelim_ref_cache);
 +}
 +
 /*
  * the rules for all callers of this function are:
  * - obtaining the parent is the goal
 @@ -165,7 +185,7 @@ static int __add_prelim_ref(struct list_head *head, u64 
 root_id,
 {
  struct __prelim_ref *ref;
 
 -ref = kmalloc(sizeof(*ref), gfp_mask);
 +ref = kmem_cache_alloc(prelim_ref_cache, gfp_mask);
  if (!ref)
  return -ENOMEM;
 
 @@ -493,7 +513,7 @@ static void __merge_refs(struct list_head *head, int 
 mode)
  ref1-count += ref2-count;
 
  list_del(ref2-list);
 -kfree(ref2);
 +kmem_cache_free(prelim_ref_cache, ref2);
  }
 
  }
 @@ -958,7 +978,7 @@ again:
  }
  }
  list_del(ref-list);
 -kfree(ref);
 +kmem_cache_free(prelim_ref_cache, ref);
  }
 
 out:
 @@ -966,13 +986,13 @@ out:
  while (!list_empty(prefs)) {
  ref = list_first_entry(prefs, struct __prelim_ref, list);
  list_del(ref-list);
 -kfree(ref);
 +kmem_cache_free(prelim_ref_cache, ref);
  }
  while (!list_empty(prefs_delayed)) {
  ref = list_first_entry(prefs_delayed, struct __prelim_ref,
 list);
  list_del(ref-list);
 -kfree(ref);
 +kmem_cache_free(prelim_ref_cache, ref);
  }
 
  return ret;
 diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
 index 8f2e767..a910b27 100644
 --- a/fs/btrfs/backref.h
 +++ b/fs/btrfs/backref.h
 @@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 
 inode_objectid,
struct btrfs_inode_extref **ret_extref,
u64 *found_off);
 
 +int __init btrfs_prelim_ref_init(void);
 +void btrfs_prelim_ref_exit(void);
 #endif
 diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
 index b64d762..de7eb3d 100644
 --- a/fs/btrfs/super.c
 +++ b/fs/btrfs/super.c
 @@ -56,6 +56,7 @@
 #include rcu-string.h
 #include dev-replace.h
 #include free-space-cache.h
 +#include backref.h
 
 #define CREATE_TRACE_POINTS
 #include trace/events/btrfs.h
 @@ -1774,6 +1775,10 @@ static int __init init_btrfs_fs(void)
  if (err)
  goto free_auto_defrag;
 
 +err = btrfs_prelim_ref_init();
 +if (err)
 +goto free_prelim_ref;
 +
  err = btrfs_interface_init();
  if (err)
  goto free_delayed_ref;
 @@ -1791,6 +1796,8 @@ static int __init init_btrfs_fs(void)
 
 unregister_ioctl:
  btrfs_interface_exit();
 +free_prelim_ref:
 +btrfs_prelim_ref_exit();
 free_delayed_ref:
  btrfs_delayed_ref_exit();
 free_auto_defrag:
 @@ -1817,6 +1824,7 @@ static void __exit exit_btrfs_fs(void)
  btrfs_delayed_ref_exit();
  btrfs_auto_defrag_exit();
  btrfs_delayed_inode_exit();
 +btrfs_prelim_ref_exit();
  ordered_data_exit();
  extent_map_exit();
  extent_io_exit();
 
 
 I generally like the idea of using a custom cache here. What about this one?
 
 324 static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info,
 [...]
 367 /* additional parents require new refs being added here */
 368 while ((node = ulist_next(parents, uiter))) {
 369 new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
 
 That new_ref will also be freed with kmem_cache_free after your patch, I 
 think.

Yeah, you are right, i just have a question, why i can not cause problems when 
i free
it with kmem_cahce_free during my test ~_~.


Thanks,
Wang
 
 Thanks,
 -Jan
 --
 To unsubscribe from this list: send the line unsubscribe linux-btrfs in
 the body of a 

Re: [PATCH][RESEND] vfs: allow /proc/PID/maps to get device from stat

2013-08-08 Thread Christoph Hellwig
On Wed, Aug 07, 2013 at 04:51:46PM -0400, Josef Bacik wrote:
 Not possible, this will break other things as subvolumes have their own inode
 space, it will confuse applications that get multiples of an inode number for
 different devices with the same st_dev.  Each subvolume has it's own anonymous
 dev to segregate things.  Thanks,

Yes, it's the same old issue of btrfs volumes misbehaving, and the
solution is still the same as 5 years ago: make sure each subvolume
has it's own sb, vfsmount and gets automounted, similar to what nfs4
does for this case.
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RESEND] vfs: allow /proc/PID/maps to get device from stat

2013-08-08 Thread Josef Bacik
On Thu, Aug 08, 2013 at 05:13:49AM -0700, Christoph Hellwig wrote:
 On Wed, Aug 07, 2013 at 04:51:46PM -0400, Josef Bacik wrote:
  Not possible, this will break other things as subvolumes have their own 
  inode
  space, it will confuse applications that get multiples of an inode number 
  for
  different devices with the same st_dev.  Each subvolume has it's own 
  anonymous
  dev to segregate things.  Thanks,
 
 Yes, it's the same old issue of btrfs volumes misbehaving, and the
 solution is still the same as 5 years ago: make sure each subvolume
 has it's own sb, vfsmount and gets automounted, similar to what nfs4
 does for this case.

This won't work, try having 1 subvolumes with dirty inodes and do sync then
go skiing, you'll have time :).  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: deal with enomem in the rewind path V3

2013-08-08 Thread Josef Bacik
On Thu, Aug 08, 2013 at 09:36:52AM +0200, Jan Schmidt wrote:
 On Wed, August 07, 2013 at 23:03 (+0200), Josef Bacik wrote:
  We can get ENOMEM trying to allocate dummy bufs for the rewind operation of 
  the
  tree mod log.  Instead of BUG_ON()'ing in this case pass up ENOMEM.  I 
  looked
  back through the callers and I'm pretty sure I got everybody who did 
  BUG_ON(ret)
  in this path.  Thanks,
  
  Signed-off-by: Josef Bacik jba...@fusionio.com
  ---
  V2-V3:
  -unlock and free the original buffer on error
  -return NULL instead of ERR_PTR(-ENOMEM)
  V1-V2: missed a BUG_ON() for alloc_dummy_extent_buffer.
  
   fs/btrfs/ctree.c |   16 +-
   fs/btrfs/extent_io.c |  145 
  +
   2 files changed, 88 insertions(+), 73 deletions(-)
  
  diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
  index 0d5c686..1dd8a71 100644
  --- a/fs/btrfs/ctree.c
  +++ b/fs/btrfs/ctree.c
  @@ -1211,7 +1211,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
  struct extent_buffer *eb,
  BUG_ON(tm-slot != 0);
  eb_rewin = alloc_dummy_extent_buffer(eb-start,
  fs_info-tree_root-nodesize);
  -   BUG_ON(!eb_rewin);
  +   if (!eb_rewin) {
  +   btrfs_tree_read_unlock(eb);
  +   free_extent_buffer(eb);
  +   return NULL;
  +   }
  btrfs_set_header_bytenr(eb_rewin, eb-start);
  btrfs_set_header_backref_rev(eb_rewin,
   btrfs_header_backref_rev(eb));
  @@ -1219,7 +1223,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
  struct extent_buffer *eb,
  btrfs_set_header_level(eb_rewin, btrfs_header_level(eb));
  } else {
  eb_rewin = btrfs_clone_extent_buffer(eb);
  -   BUG_ON(!eb_rewin);
  +   if (!eb_rewin) {
  +   btrfs_tree_read_unlock(eb);
  +   free_extent_buffer(eb);
  +   return NULL;
  +   }
  }
   
  btrfs_tree_read_unlock(eb);
  @@ -2772,6 +2780,10 @@ again:
BTRFS_READ_LOCK);
  }
  b = tree_mod_log_rewind(root-fs_info, b, time_seq);
  +   if (!b) {
  +   ret = -ENOMEM;
  +   goto done;
  +   }
  p-locks[level] = BTRFS_READ_LOCK;
  p-nodes[level] = b;
  } else {
  diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c
  index deaea9c..b422cba 100644
  --- a/fs/btrfs/extent_io.c
  +++ b/fs/btrfs/extent_io.c
  @@ -4222,6 +4222,76 @@ static void __free_extent_buffer(struct 
  extent_buffer *eb)
  kmem_cache_free(extent_buffer_cache, eb);
   }
   
  +static int extent_buffer_under_io(struct extent_buffer *eb)
  +{
  +   return (atomic_read(eb-io_pages) ||
  +   test_bit(EXTENT_BUFFER_WRITEBACK, eb-bflags) ||
  +   test_bit(EXTENT_BUFFER_DIRTY, eb-bflags));
  +}
  +
  +/*
  + * Helper for releasing extent buffer page.
  + */
  +static void btrfs_release_extent_buffer_page(struct extent_buffer *eb,
  +   unsigned long start_idx)
  +{
  +   unsigned long index;
  +   unsigned long num_pages;
  +   struct page *page;
  +   int mapped = !test_bit(EXTENT_BUFFER_DUMMY, eb-bflags);
  +
  +   BUG_ON(extent_buffer_under_io(eb));
  +
  +   num_pages = num_extent_pages(eb-start, eb-len);
  +   index = start_idx + num_pages;
  +   if (start_idx = index)
  +   return;
  +
  +   do {
  +   index--;
  +   page = extent_buffer_page(eb, index);
  +   if (page  mapped) {
  +   spin_lock(page-mapping-private_lock);
  +   /*
  +* We do this since we'll remove the pages after we've
  +* removed the eb from the radix tree, so we could race
  +* and have this page now attached to the new eb.  So
  +* only clear page_private if it's still connected to
  +* this eb.
  +*/
  +   if (PagePrivate(page) 
  +   page-private == (unsigned long)eb) {
  +   BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, 
  eb-bflags));
  +   BUG_ON(PageDirty(page));
  +   BUG_ON(PageWriteback(page));
  +   /*
  +* We need to make sure we haven't be attached
  +* to a new eb.
  +*/
  +   ClearPagePrivate(page);
  +   set_page_private(page, 0);
  +   /* One for the page private */
  +   page_cache_release(page);
  +   }
  +

Re: [PATCH] Btrfs: stop using GFP_ATOMIC when allocating rewind ebs

2013-08-08 Thread Josef Bacik
On Thu, Aug 08, 2013 at 09:23:06AM +0200, Jan Schmidt wrote:
  
 On Wed, August 07, 2013 at 23:11 (+0200), Josef Bacik wrote:
  There is no reason we can't just set the path to blocking and then do normal
  GFP_NOFS allocations for these extent buffers.  Thanks,
  
  Signed-off-by: Josef Bacik jba...@fusionio.com
  ---
   fs/btrfs/ctree.c |   16 ++--
   fs/btrfs/extent_io.c |8 
   2 files changed, 14 insertions(+), 10 deletions(-)
  
  diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
  index 1dd8a71..414a2d7 100644
  --- a/fs/btrfs/ctree.c
  +++ b/fs/btrfs/ctree.c
  @@ -1191,8 +1191,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
  struct extent_buffer *eb,
* is freed (its refcount is decremented).
*/
   static struct extent_buffer *
  -tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer 
  *eb,
  -   u64 time_seq)
  +tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
  +   struct extent_buffer *eb, u64 time_seq)
   {
  struct extent_buffer *eb_rewin;
  struct tree_mod_elem *tm;
  @@ -1207,12 +1207,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
  struct extent_buffer *eb,
  if (!tm)
  return eb;
   
  +   btrfs_set_path_blocking(path);
  +   btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
  +
  if (tm-op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
  BUG_ON(tm-slot != 0);
  eb_rewin = alloc_dummy_extent_buffer(eb-start,
  fs_info-tree_root-nodesize);
  if (!eb_rewin) {
  -   btrfs_tree_read_unlock(eb);
  +   btrfs_tree_read_unlock_blocking(eb);
  free_extent_buffer(eb);
  return NULL;
  }
  @@ -1224,13 +1227,14 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
  struct extent_buffer *eb,
  } else {
  eb_rewin = btrfs_clone_extent_buffer(eb);
  if (!eb_rewin) {
  -   btrfs_tree_read_unlock(eb);
  +   btrfs_tree_read_unlock_blocking(eb);
  free_extent_buffer(eb);
  return NULL;
  }
  }
   
  -   btrfs_tree_read_unlock(eb);
  +   btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK);
  +   btrfs_tree_read_unlock_blocking(eb);
 
 unlock_blocking? Rest looks ok to me.
 

Yeah I change the lock to blocking above, so I have to do read_unlock_blocking
here.  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: stop using GFP_ATOMIC when allocating rewind ebs

2013-08-08 Thread Jan Schmidt
On Thu, August 08, 2013 at 15:12 (+0200), Josef Bacik wrote:
 On Thu, Aug 08, 2013 at 09:23:06AM +0200, Jan Schmidt wrote:
  
 On Wed, August 07, 2013 at 23:11 (+0200), Josef Bacik wrote:
 There is no reason we can't just set the path to blocking and then do normal
 GFP_NOFS allocations for these extent buffers.  Thanks,

 Signed-off-by: Josef Bacik jba...@fusionio.com
 ---
  fs/btrfs/ctree.c |   16 ++--
  fs/btrfs/extent_io.c |8 
  2 files changed, 14 insertions(+), 10 deletions(-)

 diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c
 index 1dd8a71..414a2d7 100644
 --- a/fs/btrfs/ctree.c
 +++ b/fs/btrfs/ctree.c
 @@ -1191,8 +1191,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
 struct extent_buffer *eb,
   * is freed (its refcount is decremented).
   */
  static struct extent_buffer *
 -tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer 
 *eb,
 -   u64 time_seq)
 +tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path,
 +   struct extent_buffer *eb, u64 time_seq)
  {
 struct extent_buffer *eb_rewin;
 struct tree_mod_elem *tm;
 @@ -1207,12 +1207,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
 struct extent_buffer *eb,
 if (!tm)
 return eb;
  
 +   btrfs_set_path_blocking(path);
 +   btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK);
 +
 if (tm-op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) {
 BUG_ON(tm-slot != 0);
 eb_rewin = alloc_dummy_extent_buffer(eb-start,
 fs_info-tree_root-nodesize);
 if (!eb_rewin) {
 -   btrfs_tree_read_unlock(eb);
 +   btrfs_tree_read_unlock_blocking(eb);
 free_extent_buffer(eb);
 return NULL;
 }
 @@ -1224,13 +1227,14 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, 
 struct extent_buffer *eb,
 } else {
 eb_rewin = btrfs_clone_extent_buffer(eb);
 if (!eb_rewin) {
 -   btrfs_tree_read_unlock(eb);
 +   btrfs_tree_read_unlock_blocking(eb);
 free_extent_buffer(eb);
 return NULL;
 }
 }
  
 -   btrfs_tree_read_unlock(eb);
 +   btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK);
 +   btrfs_tree_read_unlock_blocking(eb);

 unlock_blocking? Rest looks ok to me.

 
 Yeah I change the lock to blocking above, so I have to do read_unlock_blocking
 here.  Thanks,

Uh, obviously. Got confused by the btrfs_clear_path_blocking above, but of
course we're locking eb explicitly ourselves.

Reviewed-by: Jan Schmidt list.bt...@jan-o-sch.net

Thanks!
-Jan
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/3] Btrfs: fix oops when writing dirty qgroups to disk

2013-08-08 Thread Josef Bacik
On Wed, Aug 07, 2013 at 01:12:29PM +0800, Wang Shilong wrote:
 When disabling quota, we should clear out list 'dirty_qgroups',otherwise,
 we will get oops if enabling quota again. Fix this by abstracting similar
 code from del_qgroup_rb().
 
 Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com

Can we get an xfstest for this, or at the very least a generic xfstest to
exercise qgroups in general so I can be sure all these qgroup patches I take
don't cause regressions?  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/7] btrfs: cleanup: removed unused 'btrfs_reada_detach'

2013-08-08 Thread David Sterba
On Thu, Aug 08, 2013 at 09:33:14AM +0200, Arne Jansen wrote:
 On 07.08.2013 23:43, Sergei Trofimovich wrote:
  From: Sergei Trofimovich sly...@gentoo.org
  
  Found by uselex.rb:
  btrfs_reada_detach: [R]: exported from: fs/btrfs/btrfs.o 
  fs/btrfs/built-in.o fs/btrfs/reada.o
 
 even though the function is currently unused, I'm hesitating to remove it
 as it's part of the reada-API and might be handy for anyone going to use
 the API in the future.

I agree. As replied here,
http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg24047.html
please keep the function.

david
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 4/7] btrfs: cleanup: removed unused 'btrfs_start_transaction_lflush'

2013-08-08 Thread David Sterba
On Thu, Aug 08, 2013 at 12:43:20AM +0300, Sergei Trofimovich wrote:
 From: Sergei Trofimovich sly...@gentoo.org
 
 Found by uselex.rb:
  btrfs_start_transaction_lflush: [R]: exported from: fs/btrfs/btrfs.o 
  fs/btrfs/transaction.o fs/btrfs/built-in.o

http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg24047.html

 btrfs_start_transaction_lflush()

Transcaction API, removing the func does not make sense without removing
BTRFS_RESERVE_FLUSH_LIMIT at the same time.


Miao introduced this function in 08e007d2e57744472a9424735a to enhance
flushing logic to avoid deadlocks.

david
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/7] btrfs: cleanup: mark 'btrfs_write_and_wait_marked_extents' as static

2013-08-08 Thread David Sterba
On Thu, Aug 08, 2013 at 12:43:19AM +0300, Sergei Trofimovich wrote:
 From: Sergei Trofimovich sly...@gentoo.org
 
 Found by uselex.rb:
  btrfs_write_and_wait_marked_extents: [R]: exported from: fs/btrfs/btrfs.o 
  fs/btrfs/transaction.o fs/btrfs/built-in.o
 
 Signed-off-by: Sergei Trofimovich sly...@gentoo.org
 ---
  fs/btrfs/transaction.c | 4 ++--
  fs/btrfs/transaction.h | 2 --
  2 files changed, 2 insertions(+), 4 deletions(-)
 
 diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
 index d58cce7..ff891d2 100644
 --- a/fs/btrfs/transaction.c
 +++ b/fs/btrfs/transaction.c
 @@ -837,8 +837,8 @@ int btrfs_wait_marked_extents(struct btrfs_root *root,
   * them in one of two extent_io trees.  This is used to make sure all of
   * those extents are on disk for transaction or log commit
   */
 -int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
 - struct extent_io_tree *dirty_pages, int mark)
 +static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root,
 +struct extent_io_tree 
 *dirty_pages, int mark)

You may want to run the output through checkpatch.pl and fix obvious
style violations (line too long).

david
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RESEND] vfs: allow /proc/PID/maps to get device from stat

2013-08-08 Thread Christoph Hellwig
On Thu, Aug 08, 2013 at 09:02:07AM -0400, Josef Bacik wrote:
 This won't work, try having 1 subvolumes with dirty inodes and do sync 
 then
 go skiing, you'll have time :).  Thanks,

Why would the dirty inodes make any difference?  If you share the bdi
between the subvolumes the sync workflow should be exactly the same
still.

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 7/7] btrfs: cleanup: removed unused 'btrfs_get_inode_ref_index'

2013-08-08 Thread David Sterba
On Thu, Aug 08, 2013 at 12:43:23AM +0300, Sergei Trofimovich wrote:
 From: Sergei Trofimovich sly...@gentoo.org
 
 Found by uselex.rb:
  btrfs_get_inode_ref_index: [R]: exported from: fs/btrfs/inode-item.o 
  fs/btrfs/btrfs.o fs/btrfs/built-in.o
 
 Signed-off-by: Sergei Trofimovich sly...@gentoo.org

Safe to remove.

Reviewed-by: David Sterba dste...@suse.cz
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 3/3] Btrfs: allocate prelim_ref with a slab allocater

2013-08-08 Thread David Sterba
On Thu, Aug 08, 2013 at 01:04:19PM +0800, Wang Shilong wrote:
 struct __prelim_ref is allocated and freed frequently when
 walking backref tree, using slab allocater can not only
 speed up allocating but also detect memory leaks.
 
 Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
 Reviewed-by: Miao Xie mi...@cn.fujitsu.com
 ---
  fs/btrfs/backref.c | 30 +-
  fs/btrfs/backref.h |  2 ++
  fs/btrfs/super.c   |  8 
  3 files changed, 35 insertions(+), 5 deletions(-)
 
 diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
 index f7781e6..916e4f1 100644
 --- a/fs/btrfs/backref.c
 +++ b/fs/btrfs/backref.c
 @@ -119,6 +119,26 @@ struct __prelim_ref {
   u64 wanted_disk_byte;
  };
  
 +static struct kmem_cache *prelim_ref_cache;
 +
 +int __init btrfs_prelim_ref_init(void)
 +{
 + prelim_ref_cache = kmem_cache_create(btrfs_prelim_ref,
 + sizeof(struct __prelim_ref),

Would be nice to give it a name that matches the slab cache,
btrfs_prelim_ref.

david
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: deal with enomem in the rewind path V3

2013-08-08 Thread David Sterba
On Thu, Aug 08, 2013 at 09:36:52AM +0200, Jan Schmidt wrote:
 Weird patch formatting concerning extent_io.c, I assume there are no changes 
 in
 extent_buffer_under_io and btrfs_release_extent_buffer_page, you just moved
 btrfs_clone_extent_buffer, right? Perhaps --patience or --minimal could do
 better? Otherwise,

git diff --patience produces identical result for me (1.8.3.1).

 Reviewed-by: Jan Schmidt list@jan-o-sch.net
 ^^^
xfs? :)
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: deal with enomem in the rewind path V3

2013-08-08 Thread Jan Schmidt
On Thu, August 08, 2013 at 16:28 (+0200), David Sterba wrote:
 On Thu, Aug 08, 2013 at 09:36:52AM +0200, Jan Schmidt wrote:
 Weird patch formatting concerning extent_io.c, I assume there are no changes 
 in
 extent_buffer_under_io and btrfs_release_extent_buffer_page, you just moved
 btrfs_clone_extent_buffer, right? Perhaps --patience or --minimal could do
 better? Otherwise,
 
 git diff --patience produces identical result for me (1.8.3.1).

Yeah, I expected that after Josef said that he actually moved the other two
functions, so the structure really changed in a way git cannot diff any better.

 Reviewed-by: Jan Schmidt list@jan-o-sch.net
  ^^^
 xfs? :)

Whoops :-) Replace that by btrfs if you wish.

-Jan
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RESEND] vfs: allow /proc/PID/maps to get device from stat

2013-08-08 Thread Josef Bacik
On Thu, Aug 08, 2013 at 06:48:05AM -0700, Christoph Hellwig wrote:
 On Thu, Aug 08, 2013 at 09:02:07AM -0400, Josef Bacik wrote:
  This won't work, try having 1 subvolumes with dirty inodes and do sync 
  then
  go skiing, you'll have time :).  Thanks,
 
 Why would the dirty inodes make any difference?  If you share the bdi
 between the subvolumes the sync workflow should be exactly the same
 still.
 

The inodes are in the per-sb list, so we may start all the writing but we don't
wait all at once, so in the case of btrfs we will write all the dirty inodes,
and then wait on the ones in whatever sb we have, and then sync, which will
commit the transaction.  Then we go to the next sb and wait on those inodes
which will dirty metadata which means we'll have another transaction and we'll
commit the transaction and so on and so forth.  This means we write the
superblock 1 times for one sync when we could have just done it once.

Now we could probably get around this by having -sync_fs wait itself for all of
the inodes to complete and then commit the transaction once, but we're still
going to get called the 9 times for the same damned file system that has
already had everything done.

And this is just one example, IIRC there were a few other issues that popped up
because we assume sb == completely separate file system, freeze I think is one
of those things.  I'm sure there were other ones but the last time I tried to do
this was 2010/2011 and many brain cells have died since then.  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH][RESEND] vfs: allow /proc/PID/maps to get device from stat

2013-08-08 Thread Josef Bacik
On Thu, Aug 08, 2013 at 06:48:05AM -0700, Christoph Hellwig wrote:
 On Thu, Aug 08, 2013 at 09:02:07AM -0400, Josef Bacik wrote:
  This won't work, try having 1 subvolumes with dirty inodes and do sync 
  then
  go skiing, you'll have time :).  Thanks,
 
 Why would the dirty inodes make any difference?  If you share the bdi
 between the subvolumes the sync workflow should be exactly the same
 still.
 

If we could dis-entangle vfsmounts from sb's and have it so you could have
multiple vfsmounts with just one sb that would solve at least the in-kernel
confusion, but I think we still have the userspace confusion.  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 0/4] btrfs: out-of-band (aka offline) dedupe v4

2013-08-08 Thread David Sterba
On Tue, Aug 06, 2013 at 11:42:47AM -0700, Mark Fasheh wrote:
 The following series of patches implements in btrfs an ioctl to do
 out-of-band deduplication of file extents.

Reviewed-by: David Sterba dste...@suse.cz
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Why does btrfs benchmark so badly in this case?

2013-08-08 Thread John Williams
Phoronix periodically runs benchmarks on filesystems, and one thing I
have noticed is that btrfs always does terribly on their fio Intel
IOMeter fileserver access pattern benchmark:

http://www.phoronix.com/scan.php?page=articleitem=linux_310_10fsnum=2

Here, btrfs is more than 6 times slower than ext4, and about 3 times
slower than XFS.

Lest we attribute it to an unavoidable downside of COW filesystems and
move on...no, we cannot do that, because ZFS does well here -- btrfs
is about 6 times slower than ZFS!

Note that btrfs does quite well in the other Phoronix benchmarks. It
is just the fio fileserver benchmark that btrfs has problems with.

What is going on here? Why is btrfs doing so poorly?
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Why does btrfs benchmark so badly in this case?

2013-08-08 Thread Josef Bacik
On Thu, Aug 08, 2013 at 09:13:04AM -0700, John Williams wrote:
 Phoronix periodically runs benchmarks on filesystems, and one thing I
 have noticed is that btrfs always does terribly on their fio Intel
 IOMeter fileserver access pattern benchmark:
 
 http://www.phoronix.com/scan.php?page=articleitem=linux_310_10fsnum=2
 
 Here, btrfs is more than 6 times slower than ext4, and about 3 times
 slower than XFS.
 
 Lest we attribute it to an unavoidable downside of COW filesystems and
 move on...no, we cannot do that, because ZFS does well here -- btrfs
 is about 6 times slower than ZFS!
 
 Note that btrfs does quite well in the other Phoronix benchmarks. It
 is just the fio fileserver benchmark that btrfs has problems with.
 
 What is going on here? Why is btrfs doing so poorly?

Excellent question, I'll get back to you on that.  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 0/2] xfstest btrfs/316: test send / receive

2013-08-08 Thread Eric Sandeen
On 8/8/13 3:17 AM, Jan Schmidt wrote:
 These two patches add the announced tests for btrfs send / receive. As
 requested, the fssum tool is now included.
 
 One drawback is that I'm unable to edit configure.ac or whatever needs
 to be modified in an autotools preferred way. Any hints appreciated,
 preferrably hints containing all the modifications required to introduce
 something like HAVE_SEEK_HOLE.

Other tests in the tree just add:

#ifndef SEEK_DATA
#define SEEK_DATA   3
#define SEEK_HOLE   4
#endif

 I do not want to make modifications to fssum.c here, if that's
 absolutely required (because one /could/ get along using linux/fs.h,
 which is not the way I would like to go), I'd like to have that changed
 in the far-progs repository where fssum.c comes from as well.

Well, unfortunately it breaks the build w/o some change or other,
on older distros:

Building src
[CC]fssum
fssum.c: In function 'sum_file_data_permissive':
fssum.c:243: error: 'SEEK_DATA' undeclared (first use in this function)

so this can't be merged as-is.

Adding the 4 lines above to the xfstests copy seems like a pretty obvious fix
to get the tool building and move this along.

Then, to simply skip this test if the kernel we're running on doesn't grok
SEEK_DATA, add:

_require_seek_data_hole

to your new test in patch 2.

Thanks,
-Eric

 --
 v1-v2:
  - included fssum
  - test number is now 316 (was 314)
 v2-v3:
  - added missing -lcrypto to build fssum
  - removed obsolete change in README now that fssum is included
  - fixed comment in test/btrfs/316's header (314 - 316)
 
 Jan Schmidt (2):
   xfstests: add fssum tool
   xfstests btrfs/316: test send / receive
 
  .gitignore  |1 +
  common/config   |2 +
  src/Makefile|   11 +-
  src/fssum.c |  819 
 +++
  tests/btrfs/316 |  113 +++
  tests/btrfs/316.out |4 +
  tests/btrfs/group   |1 +
  7 files changed, 950 insertions(+), 1 deletions(-)
  create mode 100644 src/fssum.c
  create mode 100755 tests/btrfs/316
  create mode 100644 tests/btrfs/316.out
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 1/2] xfstests: add fssum tool

2013-08-08 Thread Eric Sandeen
On 8/8/13 3:17 AM, Jan Schmidt wrote:
 fssum is a tool to build a recursive checksum for a file system. The home
 repository of fssum is
 
 git://git.kernel.org/pub/scm/linux/kernel/git/arne/far-progs.git
 
 It is added as an optional target, because it depends on glibc = 2.15 for
 SEEK_HOLE / SEEK_DATA. The test to be added using fssum will just be skipped
 if fssum wasn't built.
 
 Signed-off-by: Jan Schmidt list@jan-o-sch.net
 ---
  .gitignore|1 +
  common/config |2 +
  src/Makefile  |   11 +-
  src/fssum.c   |  819 
 +
  4 files changed, 832 insertions(+), 1 deletions(-)
  create mode 100644 src/fssum.c
 
 diff --git a/.gitignore b/.gitignore
 index 11594aa..c2fc6e3 100644
 --- a/.gitignore
 +++ b/.gitignore
 @@ -45,6 +45,7 @@
  /src/fill
  /src/fill2
  /src/fs_perms
 +/src/fssum
  /src/fstest
  /src/fsync-tester
  /src/ftrunc
 diff --git a/common/config b/common/config
 index 67c1498..c8bee29 100644
 --- a/common/config
 +++ b/common/config
 @@ -146,6 +146,8 @@ export SED_PROG=`set_prog_path sed`
  export BC_PROG=`set_prog_path bc`
  [ $BC_PROG =  ]  _fatal bc not found
  
 +export FSSUM_PROG=`set_prog_path fssum $here/src/fssum`

So this will pick up a local copy of fssum if it exists;
is that really desired?  (If there's any difference in
behavior, then the one in src/ presumably would need to
be fixed...)

 +
  export PS_ALL_FLAGS=-ef
  
  export DF_PROG=`set_prog_path df`
 diff --git a/src/Makefile b/src/Makefile
 index cc679e8..10a4d3c 100644
 --- a/src/Makefile
 +++ b/src/Makefile
 @@ -20,10 +20,14 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize 
 preallo_rw_pattern_reader \
   stale_handle pwrite_mmap_blocked t_dir_offset2 seek_sanity_test \
   seek_copy_test t_readdir_1 t_readdir_2 fsync-tester
  
 +OPT_TARGETS = fssum
 +

I'm not sure how this helps . . .

  SUBDIRS =
  
  LLDLIBS = $(LIBATTR) $(LIBHANDLE) $(LIBACL)
  
 +OPT_LDLIBS = -lssl -lcrypto

Hm, new deps.  I guess it's not a huge problem, these should always
be available, right?

  ifeq ($(HAVE_XLOG_ASSIGN_LSN), true)
  LINUX_TARGETS += loggen
  endif
 @@ -60,7 +64,7 @@ CFILES = $(TARGETS:=.c)
  LDIRT = $(TARGETS)
  
  
 -default: depend $(TARGETS) $(SUBDIRS)
 +default: depend $(TARGETS) $(OPT_TARGETS) $(SUBDIRS)

Anyway, OPT_TARGETS isn't optional, because you still build it by default.  :)

  depend: .dep
  
 @@ -70,11 +74,16 @@ $(TARGETS): $(LIBTEST)
   @echo [CC]$@
   $(Q)$(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(LIBTEST)
  
 +$(OPT_TARGETS): $(LIBTEST)
 + @echo [CC]$@
 + -$(Q)$(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(OPT_LDLIBS) 
 $(LIBTEST)

Oh, I see, you ignore the error.  Well, that's still pretty ugly.
I'd really rather you just add the #defines as I suggested in my
reply to [PATCH 0/2], so it'll build for everyone.

Thanks,
-Eric

 +
  LINKTEST = $(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS)
  
  install: default $(addsuffix -install,$(SUBDIRS))
   $(INSTALL) -m 755 -d $(PKG_LIB_DIR)/src
   $(LTINSTALL) -m 755 $(TARGETS) $(PKG_LIB_DIR)/src
 + -$(LTINSTALL) -m 755 $(OPT_TARGETS) $(PKG_LIB_DIR)/src
   $(LTINSTALL) -m 755 fill2attr fill2fs fill2fs_check scaleread.sh 
 $(PKG_LIB_DIR)/src
   $(LTINSTALL) -m 644 dumpfile $(PKG_LIB_DIR)/src
  

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3 2/2] xfstests btrfs/316: test send / receive

2013-08-08 Thread Eric Sandeen
On 8/8/13 3:17 AM, Jan Schmidt wrote:
 Basic send / receive functionality test for btrfs. Requires current
 version of fsstress built (-x support). Relies on fssum tool but can
 skip the test if it failed to build.
 
 Signed-off-by: Jan Schmidt list@jan-o-sch.net
 Reviewed-by: Josef Bacik jba...@fusionio.com
 ---
  tests/btrfs/316 |  113 
 +++
  tests/btrfs/316.out |4 ++
  tests/btrfs/group   |1 +
  3 files changed, 118 insertions(+), 0 deletions(-)
  create mode 100755 tests/btrfs/316
  create mode 100644 tests/btrfs/316.out
 
 diff --git a/tests/btrfs/316 b/tests/btrfs/316
 new file mode 100755
 index 000..087978a
 --- /dev/null
 +++ b/tests/btrfs/316
 @@ -0,0 +1,113 @@
 +#! /bin/bash
 +# FSQA Test No. 316
 +#
 +# Run fsstress to create a reasonably strange file system, make a
 +# snapshot (base) and run more fsstress. Then take another snapshot
 +# (incr) and send both snapshots to a temp file. Remake the file
 +# system and receive from the files. Check both states with fssum.
 +#
 +#---
 +# Copyright (C) 2013 STRATO.  All rights reserved.
 +#
 +# This program is free software; you can redistribute it and/or
 +# modify it under the terms of the GNU General Public License as
 +# published by the Free Software Foundation.
 +#
 +# This program is distributed in the hope that it would be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +# GNU General Public License for more details.
 +#
 +# You should have received a copy of the GNU General Public License
 +# along with this program; if not, write the Free Software Foundation,
 +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 +#
 +#---
 +#
 +# creator
 +owner=list.bt...@jan-o-sch.net
 +
 +seq=`basename $0`
 +seqres=$RESULT_DIR/$seq
 +echo QA output created by $seq
 +
 +here=`pwd`
 +tmp=`mktemp -d`
 +status=1
 +
 +_cleanup()
 +{
 + echo *** unmount
 + umount $SCRATCH_MNT 2/dev/null
 + rm -f $tmp.*
 +}
 +trap _cleanup; exit \$status 0 1 2 3 15
 +
 +# get standard environment, filters and checks
 +. ./common/rc
 +. ./common/filter
 +
 +# real QA test starts here
 +_need_to_be_root
 +_supported_fs btrfs
 +_supported_os Linux
 +_require_scratch

_require_seek_data_hole

 +_require_command $FSSUM_PROG fssum

Usually for local binaries in src/ we'd just do:

FSSUM_PROG=$here/src/fssum
[ -x $FSSUM_PROG ] || _notrun fssum not built

There's no other src/* binary that gets set in common/config;
every test just does it locally, so probably best to
stick with that convention.

I think we're almost there!  :)

-Eric

 +
 +rm -f $seqres.full
 +
 +workout()
 +{
 + fsz=$1
 + ops=$2
 +
 + umount $SCRATCH_DEV /dev/null 21
 + echo *** mkfs -dsize=$fsz$seqres.full
 + echo  $seqres.full
 + _scratch_mkfs_sized $fsz $seqres.full 21 \
 + || _fail size=$fsz mkfs failed
 + run_check _scratch_mount -o noatime
 +
 + run_check $FSSTRESS_PROG -d $SCRATCH_MNT -n $ops $FSSTRESS_AVOID -x \
 + $BTRFS_UTIL_PROG subvol snap -r $SCRATCH_MNT $SCRATCH_MNT/base
 +
 + run_check $BTRFS_UTIL_PROG subvol snap -r $SCRATCH_MNT $SCRATCH_MNT/incr
 +
 + echo # $BTRFS_UTIL_PROG send $SCRATCH_MNT/base  $tmp/base.snap \
 +  $seqres.full
 + $BTRFS_UTIL_PROG send $SCRATCH_MNT/base  $tmp/base.snap 2 
 $seqres.full \
 + || _fail failed: '$@'
 + echo # $BTRFS_UTIL_PROG send -p $SCRATCH_MNT/base\
 + $SCRATCH_MNT/incr  $tmp/incr.snap  $seqres.full
 + $BTRFS_UTIL_PROG send -p $SCRATCH_MNT/base \
 + $SCRATCH_MNT/incr  $tmp/incr.snap 2 $seqres.full \
 + || _fail failed: '$@'
 +
 + run_check $FSSUM_PROG -A -f -w $tmp/base.fssum $SCRATCH_MNT/base
 + run_check $FSSUM_PROG -A -f -w $tmp/incr.fssum -x 
 $SCRATCH_MNT/incr/base \
 + $SCRATCH_MNT/incr
 +
 + umount $SCRATCH_DEV /dev/null 21
 + echo *** mkfs -dsize=$fsz$seqres.full
 + echo  $seqres.full
 + _scratch_mkfs_sized $fsz $seqres.full 21 \
 + || _fail size=$fsz mkfs failed
 + run_check _scratch_mount -o noatime
 +
 + run_check $BTRFS_UTIL_PROG receive $SCRATCH_MNT  $tmp/base.snap
 + run_check $FSSUM_PROG -r $tmp/base.fssum $SCRATCH_MNT/base
 +
 + run_check $BTRFS_UTIL_PROG receive $SCRATCH_MNT  $tmp/incr.snap
 + run_check $FSSUM_PROG -r $tmp/incr.fssum $SCRATCH_MNT/incr
 +}
 +
 +echo *** test send / receive
 +
 +fssize=`expr 2000 \* 1024 \* 1024`
 +ops=200
 +
 +workout $fssize $ops
 +
 +echo *** done
 +status=0
 +exit
 diff --git a/tests/btrfs/316.out b/tests/btrfs/316.out
 new file mode 100644
 index 000..4564c85
 --- /dev/null
 +++ b/tests/btrfs/316.out
 @@ 

Re: [PATCH 6/7] btrfs: cleanup: removed unused 'btrfs_reada_detach'

2013-08-08 Thread Zach Brown
  even though the function is currently unused, I'm hesitating to remove it
  as it's part of the reada-API and might be handy for anyone going to use
  the API in the future.
 
 I agree. As replied here,
 http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg24047.html
 please keep the function.

If we're keeping score, put me down for being in favour of removing dead
untested code.  git ressurection is easy. 

- z
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 2/2] btrfs-progs: introduce btrfs filesystem show --kernel

2013-08-08 Thread Zach Brown
On Thu, Aug 08, 2013 at 04:07:07PM +0800, Anand Jain wrote:
 As of now btrfs filesystem show reads directly from
 disks. So sometimes output can be stale, mainly when
 user want to verify their last operation like,
 labeling or device delete or add... etc.
 
 This patch adds --kernel option to the 'filesystem show'
 subcli, which will read from the kernel instead of
 the disks directly.

Why should this be an option?

When mounted, the kernel cache is authoritative.  It was always a bug to
read stale data from disk.

The kernel should be read first, and if that isn't available it can fall
back to offering unreliable data from disk with a giant wraning.

Right?

- z
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Why does btrfs benchmark so badly in this case?

2013-08-08 Thread Clemens Eisserer
 What is going on here? Why is btrfs doing so poorly?

Funny thing, I was thinking exactly the same when reading the article ;)

Regards
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/7] btrfs: cleanup: removed unused 'btrfs_reada_detach'

2013-08-08 Thread Arne Jansen
On 08/08/13 19:46, Zach Brown wrote:
 even though the function is currently unused, I'm hesitating to remove it
 as it's part of the reada-API and might be handy for anyone going to use
 the API in the future.

 I agree. As replied here,
 http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg24047.html
 please keep the function.
 
 If we're keeping score, put me down for being in favour of removing dead
 untested code.  git ressurection is easy. 

It's not really untested, it has been in use some time ago. But of
course there's a chance that some changes broke it.
Yes, git ressurection is easy. To inform potential users, you might
just leave a comment like this:

/*
 * There has been a function once to detach from a running reada.
 * If you need such functionality, just revert the commit that
 * added this comment.
 */

-Arne

 
 - z
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Why does btrfs benchmark so badly in this case?

2013-08-08 Thread Josef Bacik
On Thu, Aug 08, 2013 at 09:13:04AM -0700, John Williams wrote:
 Phoronix periodically runs benchmarks on filesystems, and one thing I
 have noticed is that btrfs always does terribly on their fio Intel
 IOMeter fileserver access pattern benchmark:
 
 http://www.phoronix.com/scan.php?page=articleitem=linux_310_10fsnum=2
 
 Here, btrfs is more than 6 times slower than ext4, and about 3 times
 slower than XFS.
 
 Lest we attribute it to an unavoidable downside of COW filesystems and
 move on...no, we cannot do that, because ZFS does well here -- btrfs
 is about 6 times slower than ZFS!
 
 Note that btrfs does quite well in the other Phoronix benchmarks. It
 is just the fio fileserver benchmark that btrfs has problems with.
 
 What is going on here? Why is btrfs doing so poorly?

So the reason this workload sucks for btrfs is because we fall back on buffered
IO because fio does not do block size aligned writes for this workload.  If you
add

ba=4k

to the iometer fio file then we go the same speed as xfs and ext4.  Not a whole
lot we can do about this since unaligned writes means we have to read in pages
to cow the block properly, which is why we fall back to buffered.  Once we do
that we end up having a lot of page locking stuff that gets in the way and makes
us twice as slow.  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: fix race between removing a dev and writing sbs

2013-08-08 Thread Filipe David Borba Manana
Since all code paths that update the number of devices in the
super copy (fs_info-super_copy) first lock the device list
(fs_info-fs_devices-device_list_mutex), and write_all_supers()
also needs to lock the devices list mutex, make write_all_supers()
read the number of devices from the super copy after it locks
the device list mutex (and before unlocking it of course).

The only code path that doesn't lock the device list mutex
before updating the number of devices in the super copy is
disk-io.c:next_root_backup(), called by open_ctree() during
mount time where concurrency issues can't happen.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/disk-io.c |2 +-
 fs/btrfs/volumes.c |   11 ---
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 254cdc8..c4b24c7 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -3313,7 +3313,6 @@ static int write_all_supers(struct btrfs_root *root, int 
max_mirrors)
int total_errors = 0;
u64 flags;
 
-   max_errors = btrfs_super_num_devices(root-fs_info-super_copy) - 1;
do_barriers = !btrfs_test_opt(root, NOBARRIER);
backup_super_roots(root-fs_info);
 
@@ -3322,6 +3321,7 @@ static int write_all_supers(struct btrfs_root *root, int 
max_mirrors)
 
mutex_lock(root-fs_info-fs_devices-device_list_mutex);
head = root-fs_info-fs_devices-devices;
+   max_errors = btrfs_super_num_devices(root-fs_info-super_copy) - 1;
 
if (do_barriers) {
ret = barrier_all_devices(root-fs_info);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index 090f57c..eddf386 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -1568,11 +1568,6 @@ int btrfs_rm_device(struct btrfs_root *root, char 
*device_path)
if (ret)
goto error_undo;
 
-   /*
-* TODO: the superblock still includes this device in its num_devices
-* counter although write_all_supers() is not locked out. This
-* could give a filesystem state which requires a degraded mount.
-*/
ret = btrfs_rm_dev_item(root-fs_info-chunk_root, device);
if (ret)
goto error_undo;
@@ -1588,7 +1583,9 @@ int btrfs_rm_device(struct btrfs_root *root, char 
*device_path)
/*
 * the device list mutex makes sure that we don't change
 * the device list while someone else is writing out all
-* the device supers.
+* the device supers. Whoever is writing all supers, should
+* lock the device list mutex before getting the number of
+* devices in the super block (super_copy).
 */
 
cur_devices = device-fs_devices;
@@ -1612,10 +1609,10 @@ int btrfs_rm_device(struct btrfs_root *root, char 
*device_path)
device-fs_devices-open_devices--;
 
call_rcu(device-rcu, free_device);
-   mutex_unlock(root-fs_info-fs_devices-device_list_mutex);
 
num_devices = btrfs_super_num_devices(root-fs_info-super_copy) - 1;
btrfs_set_super_num_devices(root-fs_info-super_copy, num_devices);
+   mutex_unlock(root-fs_info-fs_devices-device_list_mutex);
 
if (cur_devices-open_devices == 0) {
struct btrfs_fs_devices *fs_devices;
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs zero divide

2013-08-08 Thread Thorsten Glaser
tl;dr: we got the faulty code pinned down, it's m68k specific,
except the m68k specific part didn’t change from 3.2…


Joe Perches dixit:

Something like this maybe. (uncompiled/untested)

I tried this:

--- div64.h.orig2013-08-08 19:34:32.663540965 +
+++ -   2013-08-08 19:47:30.309776791 +
@@ -6,6 +6,8 @@
 #else

 #include linux/types.h
+#include linux/bug.h
+#include linux/printk.h

 /* n = n / base; return rem; */

@@ -16,6 +18,11 @@
} __n;  \
unsigned long __rem, __upper;   \
\
+if (base == 0) { \
+WARN(1, Attempted division by 0\n); \
+dump_stack(); \
+__rem = 0; \
+} else { \
__n.n64 = (n);  \
if ((__upper = __n.n32[0])) {   \
asm (divul.l %2,%1:%0 \
@@ -26,6 +33,7 @@
: =d (__n.n32[1]), =d (__rem)   \
: d (base), 1 (__upper), 0 (__n.n32[1])); \
(n) = __n.n64;  \
+} \
__rem;  \
 })



It didn’t trigger, apparently:

[817508.37] bio: create slab bio-1 at 1
[817508.51] Btrfs loaded
[817524.11] loop: module loaded
[817534.86] device fsid 01cfa645-5cde-4e4c-9b0b-df7b37bdc495 devid 1 
transid 4 /dev/loop0
[817534.86] btrfs: disk space caching is enabled
[817534.86] *** ZERO DIVIDE ***   FORMAT=2
[817534.86] Current process id is 32312
[817534.86] BAD KERNEL TRAP: 
[817534.86] Modules linked in: loop btrfs lzo_compress zlib_deflate 
raid6_pq crc32c libcrc32c xor ipv6 evdev mac_hid ext3 mbcache jbd [last 
unloaded: btrfs]
[817534.86] PC: [31c46612] __btrfs_map_block+0x134/0x147a [btrfs]
[817534.86] SR: 2000  SP: 0249fab0  a2: 3010f660
[817534.86] d0: d1: 00022000d2: d3: 
[817534.86] d4: 0001d5: 0001a0: 021777a4a1: 021777a4
[817534.86] Process mount (pid: 32312, task=3010f660)
[817534.86] Frame format=2 instr addr=31c4660e
[817534.86] Stack from 0249fae8:
 0020  1000  00022000 0766a928 07621800
00415d84 0070 077a97c0 0070 0249fb68 0009e250 00d106c0 00011220
0070 0020  00022000 00ff 0009 1000 
 021777a4  0020  0249fd14 0009e26c 0020
0003  0009dd8a 3007c02c 0766a928 00415d84 1000 
 0110 31c417ae 0766a928 00415d84 1000  
[817534.86] Call Trace: [1000] kernel_pg_dir+0x0/0x1000
[817534.86]  [00022000] _060_fpsp_effadd+0xb2c0/0xd518
[817534.86]  [0009e250] bvec_alloc+0xa2/0xbe
[817534.86]  [00011220] sasin+0x87c/0x944
[817534.86]  [00022000] _060_fpsp_effadd+0xb2c0/0xd518
[817534.86]  [1000] kernel_pg_dir+0x0/0x1000
[817534.86]  [0009e26c] bio_alloc_bioset+0x0/0x12e
[817534.86]  [0009dd8a] bio_add_page+0x4a/0x58
[817534.86]  [1000] kernel_pg_dir+0x0/0x1000
[817534.86]  [31c417ae] submit_extent_page.isra.44+0x170/0x1bc [btrfs]
[817534.86]  [1000] kernel_pg_dir+0x0/0x1000
[817534.86]  [1000] kernel_pg_dir+0x0/0x1000
[817534.86]  [31c4cbfe] btrfs_map_bio+0x60/0x48c [btrfs]
[817534.86]  [00022000] _060_fpsp_effadd+0xb2c0/0xd518
[817534.86]  [00022000] _060_fpsp_effadd+0xb2c0/0xd518
[817534.86]  [31c24bb2] btree_submit_bio_hook+0x0/0xae [btrfs]
[817534.86]  [31c41ae4] end_bio_extent_readpage+0x0/0x69c [btrfs]
[817534.86]  [1000] kernel_pg_dir+0x0/0x1000
[817534.86]  [31c24984] btrfs_bio_wq_end_io+0x16/0x50 [btrfs]
[817534.86]  [31c24c0e] btree_submit_bio_hook+0x5c/0xae [btrfs]
[817534.87]  [00022000] _060_fpsp_effadd+0xb2c0/0xd518
[817534.87]  [31c3ed7a] submit_one_bio+0x7c/0xb2 [btrfs]
[817534.87]  [00022000] _060_fpsp_effadd+0xb2c0/0xd518
[817534.87]  [31c421b8] __extent_read_full_page+0x0/0x70a [btrfs]
[817534.87]  [00058828] unlock_page+0x0/0x26
[817534.87]  [31c44780] read_extent_buffer_pages+0x1a8/0x218 [btrfs]
[817534.88]  [31c4c3b2] btrfs_num_copies+0x0/0x142 [btrfs]
[817534.88]  [31c23aa6] 
btree_read_extent_buffer_pages.constprop.52+0x42/0xca [btrfs]
[817534.88]  [31c22802] btree_get_extent+0x0/0x102 [btrfs]
[817534.88]  [00022000] _060_fpsp_effadd+0xb2c0/0xd518
[817534.88]  [1000] kernel_pg_dir+0x0/0x1000
[817534.88]  [31c2525e] read_tree_block+0x38/0x48 [btrfs]
[817534.88]  [31c25226] read_tree_block+0x0/0x48 [btrfs]
[817534.89]  [31c26d40] open_ctree+0xe80/0x15e6 [btrfs]
[817534.89]  [00022000] _060_fpsp_effadd+0xb2c0/0xd518
[817534.89]  [1000] kernel_pg_dir+0x0/0x1000
[817534.89]  [1000] kernel_pg_dir+0x0/0x1000
[817534.89]  [1000] kernel_pg_dir+0x0/0x1000

Re: Why does btrfs benchmark so badly in this case?

2013-08-08 Thread John Williams
On Thu, Aug 8, 2013 at 12:40 PM, Josef Bacik jba...@fusionio.com wrote:
 On Thu, Aug 08, 2013 at 09:13:04AM -0700, John Williams wrote:
 Phoronix periodically runs benchmarks on filesystems, and one thing I
 have noticed is that btrfs always does terribly on their fio Intel
 IOMeter fileserver access pattern benchmark:

 http://www.phoronix.com/scan.php?page=articleitem=linux_310_10fsnum=2

 So the reason this workload sucks for btrfs is because we fall back on 
 buffered
 IO because fio does not do block size aligned writes for this workload.  If 
 you
 add

 ba=4k

 to the iometer fio file then we go the same speed as xfs and ext4.  Not a 
 whole
 lot we can do about this since unaligned writes means we have to read in pages
 to cow the block properly, which is why we fall back to buffered.  Once we do
 that we end up having a lot of page locking stuff that gets in the way and 
 makes
 us twice as slow.  Thanks,

Thanks for looking into it.

So I guess the reason that ZFS does well with that workload is that
ZFS is using smaller blocks, maybe just 512B ?

I wonder how common these type of non-4K aligned workloads are.
Apparently, people with such workloads should avoid btrfs, but maybe
these types of workloads are very rare?
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Why does btrfs benchmark so badly in this case?

2013-08-08 Thread Josef Bacik
On Thu, Aug 08, 2013 at 01:23:22PM -0700, John Williams wrote:
 On Thu, Aug 8, 2013 at 12:40 PM, Josef Bacik jba...@fusionio.com wrote:
  On Thu, Aug 08, 2013 at 09:13:04AM -0700, John Williams wrote:
  Phoronix periodically runs benchmarks on filesystems, and one thing I
  have noticed is that btrfs always does terribly on their fio Intel
  IOMeter fileserver access pattern benchmark:
 
  http://www.phoronix.com/scan.php?page=articleitem=linux_310_10fsnum=2
 
  So the reason this workload sucks for btrfs is because we fall back on 
  buffered
  IO because fio does not do block size aligned writes for this workload.  If 
  you
  add
 
  ba=4k
 
  to the iometer fio file then we go the same speed as xfs and ext4.  Not a 
  whole
  lot we can do about this since unaligned writes means we have to read in 
  pages
  to cow the block properly, which is why we fall back to buffered.  Once we 
  do
  that we end up having a lot of page locking stuff that gets in the way and 
  makes
  us twice as slow.  Thanks,
 
 Thanks for looking into it.
 
 So I guess the reason that ZFS does well with that workload is that
 ZFS is using smaller blocks, maybe just 512B ?
 

Yeah I'm not sure what ZFS does, but if you are writing over a block and the
size/offset isn't aligned then you'd see similar issues with ZFS since it would
have to read+modify+write.  It is likely that ZFS just is using a smaller
blocksize.

 I wonder how common these type of non-4K aligned workloads are.
 Apparently, people with such workloads should avoid btrfs, but maybe
 these types of workloads are very rare?

So most people who use AIO/O_DIRECT have really specific setups which generally
can adjust how they align stuff (databases for example this would be the db page
and those are usually large, like 16k-32k), or with virtual images which will
hopefully be doing things in block aligned io's, but this depends on the host
OS.  Like I said there isn't a whole lot we can do about this, you can do NOCOW
if you want to get around it without changing your application or you can change
the app to be blocksize aligned.  Thanks,

Josef
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [XFSTESTS PATCH] btrfs: Test deduplication

2013-08-08 Thread Josef Bacik
On Thu, Jun 27, 2013 at 12:40:30AM +0200, Gabriel de Perthuis wrote:
 ---
 The matching kernel patch is here:
 https://github.com/g2p/linux/tree/v3.10%2Bextent-same (rebased on 3.10, 
 fixing a small conflict)
 Requires the btrfs-extent-same command:
 
 - http://permalink.gmane.org/gmane.comp.file-systems.btrfs/26579
 - https://github.com/markfasheh/duperemove
 

Sorry it took me so long to get to this, but I wanted to have the dedup patches
merged before I looked at this.  So first of all just copy btrfs-extent-same
into xfstests since it's not part of a normally installed package.

 
  tests/btrfs/313 | 93 
 +
  tests/btrfs/313.out | 25 ++
  tests/btrfs/group   |  1 +
  3 files changed, 119 insertions(+)
  create mode 100755 tests/btrfs/313
  create mode 100644 tests/btrfs/313.out
 
 diff --git a/tests/btrfs/313 b/tests/btrfs/313
 new file mode 100755
 index 000..04e4ccb
 --- /dev/null
 +++ b/tests/btrfs/313
 @@ -0,0 +1,93 @@
 +#! /bin/bash
 +# FS QA Test No. 313
 +#
 +# Test the deduplication syscall
 +#
 +#---
 +# Copyright (c) 2013 Red Hat, Inc.  All Rights Reserved.
 +#
 +# This program is free software; you can redistribute it and/or
 +# modify it under the terms of the GNU General Public License as
 +# published by the Free Software Foundation.
 +#
 +# This program is distributed in the hope that it would be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +# GNU General Public License for more details.
 +#
 +# You should have received a copy of the GNU General Public License
 +# along with this program; if not, write the Free Software Foundation,
 +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
 +#---
 +#
 +
 +seq=`basename $0`
 +seqres=$RESULT_DIR/$seq
 +echo QA output created by $seq
 +
 +here=`pwd`
 +tmp=/tmp/$$
 +status=1 # failure is the default!
 +trap _cleanup; exit \$status 0 1 2 3 15
 +
 +_cleanup()
 +{
 +cd /
 +rm -f $tmp.*
 +}
 +
 +. ./common/rc
 +. ./common/filter
 +
 +ESAME=`set_prog_path btrfs-extent-same`
 +
 +_need_to_be_root
 +_supported_fs btrfs
 +_supported_os Linux
 +_require_command $ESAME
 +_require_command $XFS_IO_PROG
 +_require_scratch
 +
 +_scratch_mkfs /dev/null
 +_scratch_mount $seqres.full 21
 +
 +fiemap() {
 +xfs_io -r -c fiemap $1 |tail -n+2
 +}
 +
 +dedup() {
 +! diff -q (fiemap $1) (fiemap $2)
 +$ESAME $(stat -c %s $1) $1 0 $2 0
 +diff -u (fiemap $1) (fiemap $2)

These are spitting out the full path to SCRATCH, so you will want to use
something like _filter_scratch so that the output is consistent across people
running it.

 +}
 +
 +echo Silence is golden
 +set -e
 +
 +v1=$SCRATCH_MNT/v1
 +v2=$SCRATCH_MNT/v2
 +v3=$SCRATCH_MNT/v3
 +
 +$BTRFS_UTIL_PROG subvolume create $v1
 +$BTRFS_UTIL_PROG subvolume create $v2

Redirect the output of these commands to /dev/null

 +
 +dd bs=1M status=none if=/dev/urandom of=$v1/file1 count=1
 +dd bs=1M status=none if=/dev/urandom of=$v1/file2 count=1
 +dd bs=1M status=none if=$v1/file1 of=$v2/file3
 +dd bs=1M status=none if=$v1/file1 of=$v2/file4
 +

status=none doesn't work on my copy of dd, so don't use this, just do

dd  $seqres.full 21

or to /dev/null.

 +$BTRFS_UTIL_PROG subvolume snapshot -r $v2 $v3
 +
 +# identical, multiple volumes
 +dedup $v1/file1 $v2/file3
 +
 +# not identical, same volume
 +! $ESAME $((2**20)) $v1/file1 0 $v1/file2 0
 +
 +# identical, second file on a frozen volume
 +dedup $v1/file1 $v3/file4
 +
 +_scratch_unmount
 +_check_scratch_fs
 +status=0
 +exit
 diff --git a/tests/btrfs/313.out b/tests/btrfs/313.out
 new file mode 100644
 index 000..eabe6be
 --- /dev/null
 +++ b/tests/btrfs/313.out
 @@ -0,0 +1,25 @@
 +QA output created by 313
 +Silence is golden
 +Create subvolume 'sdir/v1'
 +Create subvolume 'sdir/v2'
 +Create a readonly snapshot of 'sdir/v2' in 'sdir/v3'
 +Files /dev/fd/63 and /dev/fd/62 differ
 +Deduping 2 total files
 +(0, 1048576): sdir/v1/file1
 +(0, 1048576): sdir/v2/file3
 +1 files asked to be deduped
 +i: 0, status: 0, bytes_deduped: 1048576
 +1048576 total bytes deduped in this operation
 +Deduping 2 total files
 +(0, 1048576): sdir/v1/file1
 +(0, 1048576): sdir/v1/file2
 +1 files asked to be deduped
 +i: 0, status: 1, bytes_deduped: 0
 +0 total bytes deduped in this operation
 +Files /dev/fd/63 and /dev/fd/62 differ
 +Deduping 2 total files
 +(0, 1048576): sdir/v1/file1
 +(0, 1048576): sdir/v3/file4
 +1 files asked to be deduped
 +i: 0, status: 0, bytes_deduped: 1048576
 +1048576 total bytes deduped in this operation
 diff --git a/tests/btrfs/group b/tests/btrfs/group
 index bc6c256..4c868c8 100644
 --- a/tests/btrfs/group
 +++ b/tests/btrfs/group
 @@ -7,5 +7,6 @@
  264 auto
  265 auto
  276 auto rw metadata
  284 auto
  307 auto quick
 

Re: Why does btrfs benchmark so badly in this case?

2013-08-08 Thread Chris Murphy

On Aug 8, 2013, at 2:23 PM, John Williams jwilliams4...@gmail.com wrote:
 
 So I guess the reason that ZFS does well with that workload is that
 ZFS is using smaller blocks, maybe just 512B ?

Likely. It uses a variable block size.


 I wonder how common these type of non-4K aligned workloads are.
 Apparently, people with such workloads should avoid btrfs, but maybe
 these types of workloads are very rare?

I can't directly answer the question, but all of the typical file systems on OS 
X, Linux, and Windows default to 4KB block sizes for many years now, baked in 
at creation time. On OS X, the block size varies automatically with respect to 
volume size at fs creation time (it goes to 8KB block sizes above 2TB, and 
scales up to 1MB block sizes), but still isn't ever less than 4KB unless 
manually created this way. So I'd think such workloads are rare.

I also don't know if any common use fs has an optimization whereby just the 
modified sector(s) is overwritten, rather than all sectors making up the file 
system block being modified.

Chris Murphy--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Why does btrfs benchmark so badly in this case?

2013-08-08 Thread Zach Brown
 I also don't know if any common use fs has an optimization whereby
 just the modified sector(s) is overwritten, rather than all sectors
 making up the file system block being modified.

Most of them do.  The generic direct io path allows sector sized dio.
The very first bit of do_blockdev_direct_IO() is testing first for file
system block size alignment then for block device sector size alignment.

You can see this easily with dd conv=notrunc oflags=direct and blktrace.

# blockdev --getss /dev/sda
512
# blockdev --getbsz /dev/sda
4096

# blktrace -d /dev/sda -a issue -o - | blkparse -i - 

$ dd if=/dev/zero of=file bs=4096 count=1 oflag=direct conv=notrunc
  8,03   1435.957320002 17941  D  WS 137297704 + 8 [dd]

$ dd if=/dev/zero of=file bs=512 count=1 oflag=direct conv=notrunc
  8,01431.405641362 17940  D  WS 137297704 + 1 [dd]

- z
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: set default max_inline to 8KiB instead of 8MiB

2013-08-08 Thread Filipe David Borba Manana
8MiB is way too large and likely set by mistake. This is not
a significant issue as in practice the max amount of data
added to an inline extent is also limited by the page cache
and btree leaf sizes.

Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
---
 fs/btrfs/disk-io.c |2 +-
 fs/btrfs/disk-io.h |2 ++
 fs/btrfs/super.c   |2 +-
 3 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 5de9ad7..aff37bd 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -2189,7 +2189,7 @@ int open_ctree(struct super_block *sb,
atomic_set(fs_info-defrag_running, 0);
atomic64_set(fs_info-tree_mod_seq, 0);
fs_info-sb = sb;
-   fs_info-max_inline = 8192 * 1024;
+   fs_info-max_inline = BTRFS_DEFAULT_MAX_INLINE;
fs_info-metadata_ratio = 0;
fs_info-defrag_inodes = RB_ROOT;
fs_info-free_chunk_space = 0;
diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
index b71acd6e..e76c1a2 100644
--- a/fs/btrfs/disk-io.h
+++ b/fs/btrfs/disk-io.h
@@ -25,6 +25,8 @@
 #define BTRFS_SUPER_MIRROR_MAX  3
 #define BTRFS_SUPER_MIRROR_SHIFT 12
 
+#define BTRFS_DEFAULT_MAX_INLINE 8192
+
 enum {
BTRFS_WQ_ENDIO_DATA = 0,
BTRFS_WQ_ENDIO_METADATA = 1,
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 1967903..7359a9e 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -941,7 +941,7 @@ static int btrfs_show_options(struct seq_file *seq, struct 
dentry *dentry)
seq_puts(seq, ,nodatacow);
if (btrfs_test_opt(root, NOBARRIER))
seq_puts(seq, ,nobarrier);
-   if (info-max_inline != 8192 * 1024)
+   if (info-max_inline != BTRFS_DEFAULT_MAX_INLINE)
seq_printf(seq, ,max_inline=%llu,
   (unsigned long long)info-max_inline);
if (info-alloc_start != 0)
-- 
1.7.9.5

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 6/7] btrfs: cleanup: removed unused 'btrfs_reada_detach'

2013-08-08 Thread David Sterba
On Thu, Aug 08, 2013 at 09:11:01PM +0200, Arne Jansen wrote:
 On 08/08/13 19:46, Zach Brown wrote:
  even though the function is currently unused, I'm hesitating to remove it
  as it's part of the reada-API and might be handy for anyone going to use
  the API in the future.
 
  I agree. As replied here,
  http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg24047.html
  please keep the function.
  
  If we're keeping score, put me down for being in favour of removing dead
  untested code.  git ressurection is easy. 
 
 It's not really untested, it has been in use some time ago. But of
 course there's a chance that some changes broke it.
 Yes, git ressurection is easy. To inform potential users, you might
 just leave a comment like this:
 
 /*
  * There has been a function once to detach from a running reada.
  * If you need such functionality, just revert the commit that
  * added this comment.
  */

And please write the exact commit sha1 instead of 'the commit' :)

I've used the _detach function when prototyping readdir readahead, that
did not bring the speedup as expected so more work is needed, that's why
I'm concerned about removing it. But, if Arne is ok with that, so be it.

david
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: set default max_inline to 8KiB instead of 8MiB

2013-08-08 Thread David Sterba
On Thu, Aug 08, 2013 at 10:45:48PM +0100, Filipe David Borba Manana wrote:
 8MiB is way too large and likely set by mistake. This is not
 a significant issue as in practice the max amount of data
 added to an inline extent is also limited by the page cache
 and btree leaf sizes.
 
 Signed-off-by: Filipe David Borba Manana fdman...@gmail.com

Reviewed-by: David Sterba dste...@suse.cz
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RFC] Btrfs: add support for persistent mount options

2013-08-08 Thread David Sterba
On Wed, Aug 07, 2013 at 03:46:20PM +0200, Martin Steigerwald wrote:
  Because really, the motivation sounds like it's primarily for significant
  on-disk format changes controlled by mount options.  I understand that
  motivation more than being able to persist something like noatime.
 
 For a hotplug-able SSD having noatime stored persistently IMHO makes a lot of 
 sense as well.

I agree, and we can let btrfs understand noatime (or ro) even if they get
processed by vfs layer.
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [v2 2/8] Btrfs-progs: add missing man information for btrfs-debug-tree

2013-08-08 Thread David Sterba
Thanks for updating the license. One comment that's applicable to all
patches:

On Wed, Aug 07, 2013 at 01:54:03PM +0800, Wang Shilong wrote:
 +.SH AVAILABILITY
 +.B btrfs-debug-tree
 +is part of btrfs-progs. Btrfs is currently under heavy development,
 +and not suitable for any uses other than benchmarking and review.

That's (arguably) not true anymore and I'd rather see it removed or
updated closer to current status.

 +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for
 +further details.
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RFC] Btrfs: track compression algorithm on inodes

2013-08-08 Thread David Sterba
On Wed, Aug 07, 2013 at 12:29:44PM +0100, Filipe David Borba Manana wrote:
 Currently the compression settings (algorithm and force mode) need
 to be specified at mount time in order to have newly created files
 compressed.
[...]

I think we should take the top-down approach and start with UI how to
set these attributes, then think where to store the information
(existing strucutures, xattrs). Tweaking compressin per-file is
desirable, but with your patch it's required to set it via a mount
option and that's not very practical (only via remount, root required).

david
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH RFC] Btrfs: add support for persistent mount options

2013-08-08 Thread David Sterba
On Wed, Aug 07, 2013 at 12:33:09PM +0100, Filipe David Manana wrote:
 Thanks, I missed to find that before.
 The implementation is very different from the one I proposed.

That's one of the fundaental questions how to store the information:
inside existing structures, via xattrs, under new tree items. Each one
has pros and cons.

  Designing and merging the properties feature takes time, but we want to tune
  simple things now. The wiki project mentions ‘tune2fs’ as an example, but 
  the
  project details are not always accurate about how to do the things, it’s 
  more
  like ideas what to do. If you’re going to work on that, please claim the
  project on the wiki, and possibly write more details abou the design.
 
 I will.

The project is titled as persistent mount options, are you willing to
take the more general per-object properties task? IMHO there's not
much difference, the UI should be the same, just that it implements
per-fs or per-subvolume properties like mount options. The rest of the
object properties has to be collected and agreed on. I'm sure there's
community knowledge of what's desired, so it's a matter of writing it
down and bikeshe^Wagreement on the naming syntax.

david
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch v2 1/2] Btrfs: fix possible memory leak in find_parent_nodes()

2013-08-08 Thread Wang Shilong
The origin code dealt with 'ref' as following steps:
|-list_del(ref-list)
|-some operations
|-kfree(ref)

If operations failed, it would goto label 'out' without freeing this 'ref'.
and then memory leak would happen.Just move list_del() after kfree()
will fix the problem.

Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
Reviewed-by: Miao Xie mi...@cn.fujitsu.com
---
V1-V2: add explanations to changelog
---
 fs/btrfs/backref.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 68048d6..7b55c95 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -911,7 +911,6 @@ again:
 
while (!list_empty(prefs)) {
ref = list_first_entry(prefs, struct __prelim_ref, list);
-   list_del(ref-list);
WARN_ON(ref-count  0);
if (ref-count  ref-root_id  ref-parent == 0) {
/* no parent == root of tree */
@@ -956,6 +955,7 @@ again:
eie-next = ref-inode_list;
}
}
+   list_del(ref-list);
kfree(ref);
}
 
-- 
1.8.0.1

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[patch v2 2/2] Btrfs: allocate prelim_ref with a slab allocater

2013-08-08 Thread Wang Shilong
struct __prelim_ref is allocated and freed frequently when
walking backref tree, using slab allocater can not only
speed up allocating but also detect memory leaks.

Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com
Reviewed-by: Miao Xie mi...@cn.fujitsu.com
---
V1-V2: 
1.fix a missing allocating case that should be used by 
kmem_cache_alloc()
  spotted by Jan Schmidt
2.rename prelim_ref to btrfs_prelim_ref addressed by David
---
 fs/btrfs/backref.c | 33 +++--
 fs/btrfs/backref.h |  2 ++
 fs/btrfs/super.c   |  8 
 3 files changed, 37 insertions(+), 6 deletions(-)

diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 7b55c95..b352d15 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -119,6 +119,26 @@ struct __prelim_ref {
u64 wanted_disk_byte;
 };
 
+static struct kmem_cache *btrfs_prelim_ref_cache;
+
+int __init btrfs_prelim_ref_init(void)
+{
+   btrfs_prelim_ref_cache = kmem_cache_create(btrfs_prelim_ref,
+   sizeof(struct __prelim_ref),
+   0,
+   SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD,
+   NULL);
+   if (!btrfs_prelim_ref_cache)
+   return -ENOMEM;
+   return 0;
+}
+
+void btrfs_prelim_ref_exit(void)
+{
+   if (btrfs_prelim_ref_cache)
+   kmem_cache_destroy(btrfs_prelim_ref_cache);
+}
+
 /*
  * the rules for all callers of this function are:
  * - obtaining the parent is the goal
@@ -165,7 +185,7 @@ static int __add_prelim_ref(struct list_head *head, u64 
root_id,
 {
struct __prelim_ref *ref;
 
-   ref = kmalloc(sizeof(*ref), gfp_mask);
+   ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask);
if (!ref)
return -ENOMEM;
 
@@ -369,7 +389,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info 
*fs_info,
 
/* additional parents require new refs being added here */
while ((node = ulist_next(parents, uiter))) {
-   new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS);
+   new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache,
+  GFP_NOFS);
if (!new_ref) {
ret = -ENOMEM;
goto out;
@@ -493,7 +514,7 @@ static void __merge_refs(struct list_head *head, int mode)
ref1-count += ref2-count;
 
list_del(ref2-list);
-   kfree(ref2);
+   kmem_cache_free(btrfs_prelim_ref_cache, ref2);
}
 
}
@@ -956,7 +977,7 @@ again:
}
}
list_del(ref-list);
-   kfree(ref);
+   kmem_cache_free(btrfs_prelim_ref_cache, ref);
}
 
 out:
@@ -964,13 +985,13 @@ out:
while (!list_empty(prefs)) {
ref = list_first_entry(prefs, struct __prelim_ref, list);
list_del(ref-list);
-   kfree(ref);
+   kmem_cache_free(btrfs_prelim_ref_cache, ref);
}
while (!list_empty(prefs_delayed)) {
ref = list_first_entry(prefs_delayed, struct __prelim_ref,
   list);
list_del(ref-list);
-   kfree(ref);
+   kmem_cache_free(btrfs_prelim_ref_cache, ref);
}
 
return ret;
diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h
index 8f2e767..a910b27 100644
--- a/fs/btrfs/backref.h
+++ b/fs/btrfs/backref.h
@@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 
inode_objectid,
  struct btrfs_inode_extref **ret_extref,
  u64 *found_off);
 
+int __init btrfs_prelim_ref_init(void);
+void btrfs_prelim_ref_exit(void);
 #endif
diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
index 1967903..812ab3d 100644
--- a/fs/btrfs/super.c
+++ b/fs/btrfs/super.c
@@ -56,6 +56,7 @@
 #include rcu-string.h
 #include dev-replace.h
 #include free-space-cache.h
+#include backref.h
 
 #define CREATE_TRACE_POINTS
 #include trace/events/btrfs.h
@@ -1800,6 +1801,10 @@ static int __init init_btrfs_fs(void)
if (err)
goto free_auto_defrag;
 
+   err = btrfs_prelim_ref_init();
+   if (err)
+   goto free_prelim_ref;
+
err = btrfs_interface_init();
if (err)
goto free_delayed_ref;
@@ -1817,6 +1822,8 @@ static int __init init_btrfs_fs(void)
 
 unregister_ioctl:
btrfs_interface_exit();
+free_prelim_ref:
+   btrfs_prelim_ref_exit();
 free_delayed_ref:
btrfs_delayed_ref_exit();
 free_auto_defrag:
@@ -1843,6 +1850,7 @@ static void __exit exit_btrfs_fs(void)
btrfs_delayed_ref_exit();
btrfs_auto_defrag_exit();
btrfs_delayed_inode_exit();
+   

btrfs qgroup destroy - ERROR: unable to create quota group: Device or resource busy

2013-08-08 Thread Tomasz Chmielewski
I'm using qgroups and have created a few hundreds of subvolumes in the
past.

It seems that btrfs automatically assigns a qgroup to newly created
snapshot/subvolume, but does not destroy the qgroup when the subvolume
is deleted.

So I've tried to destroy the unused qgroups, with mixed success. I was
able to destroy most of them, but some are still failing, i.e.:

# btrfs qgroup destroy 4494 /mnt/lxc1
ERROR: unable to create quota group: Device or resource busy


Note the negative number here, but I also have qgroups with both
positive numbers, which I'm not able to destroy as well:

# btrfs qgroup show /mnt/lxc1 | grep 4494
0/4494 839516160 -69632


qgroup 4494 is not used by any subvolume:

# btrfs sub list /mnt/lxc1 | grep 4494


I did run btrfs quota rescan for this filesystem, hoping it will fix
the problem, but it didn't.


Any advice?


-- 
Tomasz Chmielewski
http://wpkg.org
--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH] Btrfs: set default max_inline to 8KiB instead of 8MiB

2013-08-08 Thread Miao Xie
On thu, 8 Aug 2013 22:45:48 +0100, Filipe David Borba Manana wrote:
 8MiB is way too large and likely set by mistake. This is not
 a significant issue as in practice the max amount of data
 added to an inline extent is also limited by the page cache
 and btree leaf sizes.

I don't think 8KB is a reasonable value of the default max inline size
because it makes no sense on the machine whose page size is 4KB.

I think 4KB is a reasonable value, because we may mount the fs on
the machines with the different page size in the future, in order to
avoid the compatible problem, we should use the min page size as
the max inline size.

Thanks
Miao

 Signed-off-by: Filipe David Borba Manana fdman...@gmail.com
 ---
  fs/btrfs/disk-io.c |2 +-
  fs/btrfs/disk-io.h |2 ++
  fs/btrfs/super.c   |2 +-
  3 files changed, 4 insertions(+), 2 deletions(-)
 
 diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
 index 5de9ad7..aff37bd 100644
 --- a/fs/btrfs/disk-io.c
 +++ b/fs/btrfs/disk-io.c
 @@ -2189,7 +2189,7 @@ int open_ctree(struct super_block *sb,
   atomic_set(fs_info-defrag_running, 0);
   atomic64_set(fs_info-tree_mod_seq, 0);
   fs_info-sb = sb;
 - fs_info-max_inline = 8192 * 1024;
 + fs_info-max_inline = BTRFS_DEFAULT_MAX_INLINE;
   fs_info-metadata_ratio = 0;
   fs_info-defrag_inodes = RB_ROOT;
   fs_info-free_chunk_space = 0;
 diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h
 index b71acd6e..e76c1a2 100644
 --- a/fs/btrfs/disk-io.h
 +++ b/fs/btrfs/disk-io.h
 @@ -25,6 +25,8 @@
  #define BTRFS_SUPER_MIRROR_MAX3
  #define BTRFS_SUPER_MIRROR_SHIFT 12
  
 +#define BTRFS_DEFAULT_MAX_INLINE 8192
 +
  enum {
   BTRFS_WQ_ENDIO_DATA = 0,
   BTRFS_WQ_ENDIO_METADATA = 1,
 diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c
 index 1967903..7359a9e 100644
 --- a/fs/btrfs/super.c
 +++ b/fs/btrfs/super.c
 @@ -941,7 +941,7 @@ static int btrfs_show_options(struct seq_file *seq, 
 struct dentry *dentry)
   seq_puts(seq, ,nodatacow);
   if (btrfs_test_opt(root, NOBARRIER))
   seq_puts(seq, ,nobarrier);
 - if (info-max_inline != 8192 * 1024)
 + if (info-max_inline != BTRFS_DEFAULT_MAX_INLINE)
   seq_printf(seq, ,max_inline=%llu,
  (unsigned long long)info-max_inline);
   if (info-alloc_start != 0)
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs qgroup destroy - ERROR: unable to create quota group: Device or resource busy

2013-08-08 Thread Wang Shilong
Hello,

On 08/09/2013 01:39 PM, Tomasz Chmielewski wrote:
 I'm using qgroups and have created a few hundreds of subvolumes in the
 past.
 
 It seems that btrfs automatically assigns a qgroup to newly created
 snapshot/subvolume, but does not destroy the qgroup when the subvolume
 is deleted.

This should be implemented. And will soon.

 
 So I've tried to destroy the unused qgroups, with mixed success. I was
 able to destroy most of them, but some are still failing, i.e.:
 
 # btrfs qgroup destroy 4494 /mnt/lxc1
 ERROR: unable to create quota group: Device or resource busy

Just remove qgroup(4494)'s parent qgroup. then it can be removed.
Anyway, i think this is unnecessary.

Thanks,
Wang
 
 
 Note the negative number here, but I also have qgroups with both
 positive numbers, which I'm not able to destroy as well:
 
 # btrfs qgroup show /mnt/lxc1 | grep 4494
 0/4494 839516160 -69632
 
 
 qgroup 4494 is not used by any subvolume:
 
 # btrfs sub list /mnt/lxc1 | grep 4494
 
 
 I did run btrfs quota rescan for this filesystem, hoping it will fix
 the problem, but it didn't.
 
 
 Any advice?
 
 

--
To unsubscribe from this list: send the line unsubscribe linux-btrfs in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html