[PATCH] _test_generic_punch: Extend $testfile's size to work with 64k block.
From cf6e1fc3a8d7806a97055b5f483cf50f58c8294f Mon Sep 17 00:00:00 2001 From: chandan chan...@linux.vnet.ibm.com Date: Thu, 8 Aug 2013 11:33:10 +0530 Subject: [PATCH] _test_generic_punch: Extend $testfile's size to work with 64k block. The current script does not work with 64k block size. This patch fixes it by creating a larger $testfile. Signed-off-by: chandan chan...@linux.vnet.ibm.com --- common/punch | 119 +++-- tests/generic/255.out | 476 +- tests/xfs/242.out | 118 ++--- tests/xfs/252.out | 476 +- 4 files changed, 594 insertions(+), 595 deletions(-) diff --git a/common/punch b/common/punch index d8f73d0..e6f0b2a 100644 --- a/common/punch +++ b/common/punch @@ -316,8 +316,8 @@ _test_generic_punch() if [ $remove_testfile ]; then rm -f $testfile fi - $XFS_IO_PROG -f -c truncate 20k \ - -c $zero_cmd 4k 8k \ + $XFS_IO_PROG -f -c truncate 320k \ + -c $zero_cmd 64k 128k \ -c $map_cmd -v $testfile | $filter_cmd [ $? -ne 0 ] die_now _md5_checksum $testfile @@ -326,9 +326,9 @@ _test_generic_punch() if [ $remove_testfile ]; then rm -f $testfile fi - $XFS_IO_PROG -f -c truncate 20k \ - -c pwrite 0 20k $sync_cmd \ - -c $zero_cmd 4k 8k \ + $XFS_IO_PROG -f -c truncate 320k \ + -c pwrite 0 320k $sync_cmd \ + -c $zero_cmd 64k 128k \ -c $map_cmd -v $testfile | $filter_cmd [ $? -ne 0 ] die_now _md5_checksum $testfile @@ -337,9 +337,9 @@ _test_generic_punch() if [ $remove_testfile ]; then rm -f $testfile fi - $XFS_IO_PROG -f -c truncate 20k \ - -c $alloc_cmd 0 20k \ - -c $zero_cmd 4k 8k \ + $XFS_IO_PROG -f -c truncate 320k \ + -c $alloc_cmd 0 320k \ + -c $zero_cmd 64k 128k \ -c $map_cmd -v $testfile | $filter_cmd [ $? -ne 0 ] die_now _md5_checksum $testfile @@ -348,9 +348,9 @@ _test_generic_punch() if [ $remove_testfile ]; then rm -f $testfile fi - $XFS_IO_PROG -f -c truncate 20k \ - -c pwrite 8k 8k $sync_cmd \ - -c $zero_cmd 4k 8k \ + $XFS_IO_PROG -f -c truncate 320k \ + -c pwrite 128k 128k $sync_cmd \ + -c $zero_cmd 64k 128k \ -c $map_cmd -v $testfile | $filter_cmd [ $? -ne 0 ] die_now _md5_checksum $testfile @@ -359,9 +359,9 @@ _test_generic_punch() if [ $remove_testfile ]; then rm -f $testfile fi - $XFS_IO_PROG -f -c truncate 20k \ - -c $alloc_cmd 8k 8k \ - -c $zero_cmd 4k 8k \ + $XFS_IO_PROG -f -c truncate 320k \ + -c $alloc_cmd 128k 128k \ + -c $zero_cmd 64k 128k \ -c $map_cmd -v $testfile | $filter_cmd [ $? -ne 0 ] die_now _md5_checksum $testfile @@ -370,9 +370,9 @@ _test_generic_punch() if [ $remove_testfile ]; then rm -f $testfile fi - $XFS_IO_PROG -f -c truncate 20k \ - -c pwrite 0 8k $sync_cmd \ - -c $zero_cmd 4k 8k \ + $XFS_IO_PROG -f -c truncate 320k \ + -c pwrite 0 128k $sync_cmd \ + -c $zero_cmd 64k 128k \ -c $map_cmd -v $testfile | $filter_cmd [ $? -ne 0 ] die_now _md5_checksum $testfile @@ -381,10 +381,10 @@ _test_generic_punch() if [ $remove_testfile ]; then rm -f $testfile fi - $XFS_IO_PROG -f -c truncate 20k \ - -c pwrite 0 8k $sync_cmd \ - -c $alloc_cmd 8k 8k \ - -c $zero_cmd 4k 8k \ + $XFS_IO_PROG -f -c truncate 320k \ + -c pwrite 0 128k $sync_cmd \ + -c $alloc_cmd 128k 128k \ + -c $zero_cmd 64k 128k \ -c $map_cmd -v $testfile | $filter_cmd [ $? -ne 0 ] die_now _md5_checksum $testfile @@ -393,9 +393,9 @@ _test_generic_punch() if [ $remove_testfile ]; then rm -f $testfile fi - $XFS_IO_PROG -f -c truncate 20k \ - -c $alloc_cmd 0 8k \ - -c $zero_cmd 4k 8k \ + $XFS_IO_PROG -f -c truncate 320k \ + -c $alloc_cmd 0 128k \ + -c $zero_cmd 64k 128k \ -c $map_cmd -v $testfile | $filter_cmd [ $? -ne 0 ] die_now _md5_checksum $testfile @@ -404,10 +404,10 @@ _test_generic_punch() if [ $remove_testfile ]; then rm -f $testfile fi - $XFS_IO_PROG -f -c truncate 20k \ - -c $alloc_cmd 0 8k \ - -c pwrite 8k 8k $sync_cmd \ - -c $zero_cmd 4k 8k \ +
Re: [PATCH] Btrfs: stop using GFP_ATOMIC when allocating rewind ebs
On Wed, August 07, 2013 at 23:11 (+0200), Josef Bacik wrote: There is no reason we can't just set the path to blocking and then do normal GFP_NOFS allocations for these extent buffers. Thanks, Signed-off-by: Josef Bacik jba...@fusionio.com --- fs/btrfs/ctree.c | 16 ++-- fs/btrfs/extent_io.c |8 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1dd8a71..414a2d7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1191,8 +1191,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, * is freed (its refcount is decremented). */ static struct extent_buffer * -tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - u64 time_seq) +tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, + struct extent_buffer *eb, u64 time_seq) { struct extent_buffer *eb_rewin; struct tree_mod_elem *tm; @@ -1207,12 +1207,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (!tm) return eb; + btrfs_set_path_blocking(path); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (tm-op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm-slot != 0); eb_rewin = alloc_dummy_extent_buffer(eb-start, fs_info-tree_root-nodesize); if (!eb_rewin) { - btrfs_tree_read_unlock(eb); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); return NULL; } @@ -1224,13 +1227,14 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, } else { eb_rewin = btrfs_clone_extent_buffer(eb); if (!eb_rewin) { - btrfs_tree_read_unlock(eb); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); return NULL; } } - btrfs_tree_read_unlock(eb); + btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK); + btrfs_tree_read_unlock_blocking(eb); unlock_blocking? Rest looks ok to me. Thanks, -Jan free_extent_buffer(eb); extent_buffer_get(eb_rewin); @@ -2779,7 +2783,7 @@ again: btrfs_clear_path_blocking(p, b, BTRFS_READ_LOCK); } - b = tree_mod_log_rewind(root-fs_info, b, time_seq); + b = tree_mod_log_rewind(root-fs_info, p, b, time_seq); if (!b) { ret = -ENOMEM; goto done; diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index b422cba..beda5a8 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4340,12 +4340,12 @@ struct extent_buffer *btrfs_clone_extent_buffer(struct extent_buffer *src) struct extent_buffer *new; unsigned long num_pages = num_extent_pages(src-start, src-len); - new = __alloc_extent_buffer(NULL, src-start, src-len, GFP_ATOMIC); + new = __alloc_extent_buffer(NULL, src-start, src-len, GFP_NOFS); if (new == NULL) return NULL; for (i = 0; i num_pages; i++) { - p = alloc_page(GFP_ATOMIC); + p = alloc_page(GFP_NOFS); if (!p) { btrfs_release_extent_buffer(new); return NULL; @@ -4369,12 +4369,12 @@ struct extent_buffer *alloc_dummy_extent_buffer(u64 start, unsigned long len) unsigned long num_pages = num_extent_pages(0, len); unsigned long i; - eb = __alloc_extent_buffer(NULL, start, len, GFP_ATOMIC); + eb = __alloc_extent_buffer(NULL, start, len, GFP_NOFS); if (!eb) return NULL; for (i = 0; i num_pages; i++) { - eb-pages[i] = alloc_page(GFP_ATOMIC); + eb-pages[i] = alloc_page(GFP_NOFS); if (!eb-pages[i]) goto err; } -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 6/7] btrfs: cleanup: removed unused 'btrfs_reada_detach'
On 07.08.2013 23:43, Sergei Trofimovich wrote: From: Sergei Trofimovich sly...@gentoo.org Found by uselex.rb: btrfs_reada_detach: [R]: exported from: fs/btrfs/btrfs.o fs/btrfs/built-in.o fs/btrfs/reada.o even though the function is currently unused, I'm hesitating to remove it as it's part of the reada-API and might be handy for anyone going to use the API in the future. -Arne Signed-off-by: Sergei Trofimovich sly...@gentoo.org --- fs/btrfs/ctree.h | 1 - fs/btrfs/reada.c | 9 + 2 files changed, 1 insertion(+), 9 deletions(-) diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index e91ab9e..f35e086 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -3861,7 +3861,6 @@ struct reada_control { struct reada_control *btrfs_reada_add(struct btrfs_root *root, struct btrfs_key *start, struct btrfs_key *end); int btrfs_reada_wait(void *handle); -void btrfs_reada_detach(void *handle); int btree_readahead_hook(struct btrfs_root *root, struct extent_buffer *eb, u64 start, int err); diff --git a/fs/btrfs/reada.c b/fs/btrfs/reada.c index 1031b69..c41d470 100644 --- a/fs/btrfs/reada.c +++ b/fs/btrfs/reada.c @@ -37,7 +37,7 @@ * To trigger a readahead, btrfs_reada_add must be called. It will start * a read ahead for the given range [start, end) on tree root. The returned * handle can either be used to wait on the readahead to finish - * (btrfs_reada_wait), or to send it to the background (btrfs_reada_detach). + * (btrfs_reada_wait). * * The read ahead works as follows: * On btrfs_reada_add, the root of the tree is inserted into a radix_tree. @@ -979,10 +979,3 @@ int btrfs_reada_wait(void *handle) return 0; } #endif - -void btrfs_reada_detach(void *handle) -{ - struct reada_control *rc = handle; - - kref_put(rc-refcnt, reada_control_release); -} -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Btrfs: deal with enomem in the rewind path V3
On Wed, August 07, 2013 at 23:03 (+0200), Josef Bacik wrote: We can get ENOMEM trying to allocate dummy bufs for the rewind operation of the tree mod log. Instead of BUG_ON()'ing in this case pass up ENOMEM. I looked back through the callers and I'm pretty sure I got everybody who did BUG_ON(ret) in this path. Thanks, Signed-off-by: Josef Bacik jba...@fusionio.com --- V2-V3: -unlock and free the original buffer on error -return NULL instead of ERR_PTR(-ENOMEM) V1-V2: missed a BUG_ON() for alloc_dummy_extent_buffer. fs/btrfs/ctree.c | 16 +- fs/btrfs/extent_io.c | 145 + 2 files changed, 88 insertions(+), 73 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0d5c686..1dd8a71 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1211,7 +1211,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, BUG_ON(tm-slot != 0); eb_rewin = alloc_dummy_extent_buffer(eb-start, fs_info-tree_root-nodesize); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock(eb); + free_extent_buffer(eb); + return NULL; + } btrfs_set_header_bytenr(eb_rewin, eb-start); btrfs_set_header_backref_rev(eb_rewin, btrfs_header_backref_rev(eb)); @@ -1219,7 +1223,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); } else { eb_rewin = btrfs_clone_extent_buffer(eb); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock(eb); + free_extent_buffer(eb); + return NULL; + } } btrfs_tree_read_unlock(eb); @@ -2772,6 +2780,10 @@ again: BTRFS_READ_LOCK); } b = tree_mod_log_rewind(root-fs_info, b, time_seq); + if (!b) { + ret = -ENOMEM; + goto done; + } p-locks[level] = BTRFS_READ_LOCK; p-nodes[level] = b; } else { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index deaea9c..b422cba 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4222,6 +4222,76 @@ static void __free_extent_buffer(struct extent_buffer *eb) kmem_cache_free(extent_buffer_cache, eb); } +static int extent_buffer_under_io(struct extent_buffer *eb) +{ + return (atomic_read(eb-io_pages) || + test_bit(EXTENT_BUFFER_WRITEBACK, eb-bflags) || + test_bit(EXTENT_BUFFER_DIRTY, eb-bflags)); +} + +/* + * Helper for releasing extent buffer page. + */ +static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, + unsigned long start_idx) +{ + unsigned long index; + unsigned long num_pages; + struct page *page; + int mapped = !test_bit(EXTENT_BUFFER_DUMMY, eb-bflags); + + BUG_ON(extent_buffer_under_io(eb)); + + num_pages = num_extent_pages(eb-start, eb-len); + index = start_idx + num_pages; + if (start_idx = index) + return; + + do { + index--; + page = extent_buffer_page(eb, index); + if (page mapped) { + spin_lock(page-mapping-private_lock); + /* + * We do this since we'll remove the pages after we've + * removed the eb from the radix tree, so we could race + * and have this page now attached to the new eb. So + * only clear page_private if it's still connected to + * this eb. + */ + if (PagePrivate(page) + page-private == (unsigned long)eb) { + BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, eb-bflags)); + BUG_ON(PageDirty(page)); + BUG_ON(PageWriteback(page)); + /* + * We need to make sure we haven't be attached + * to a new eb. + */ + ClearPagePrivate(page); + set_page_private(page, 0); + /* One for the page private */ + page_cache_release(page); + } + spin_unlock(page-mapping-private_lock);
Re: [PATCH] Btrfs: pass gfp_t to __add_prelim_ref() to avoid always using GFP_ATOMIC
On Tue, August 06, 2013 at 04:29 (+0200), Wang Shilong wrote: Currently, only add_delayed_refs have to allocate with GFP_ATOMIC, So just pass arg 'gfp_t' to decide which allocation mode. Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com Reviewed-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/backref.c | 30 +++--- 1 file changed, 15 insertions(+), 15 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 8bc5e8c..cb73a12 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -160,12 +160,12 @@ struct __prelim_ref { static int __add_prelim_ref(struct list_head *head, u64 root_id, struct btrfs_key *key, int level, - u64 parent, u64 wanted_disk_byte, int count) + u64 parent, u64 wanted_disk_byte, int count, + gfp_t gfp_mask) { struct __prelim_ref *ref; - /* in case we're adding delayed refs, we're holding the refs spinlock */ - ref = kmalloc(sizeof(*ref), GFP_ATOMIC); + ref = kmalloc(sizeof(*ref), gfp_mask); if (!ref) return -ENOMEM; @@ -548,7 +548,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ref = btrfs_delayed_node_to_tree_ref(node); ret = __add_prelim_ref(prefs, ref-root, op_key, ref-level + 1, 0, node-bytenr, -node-ref_mod * sgn); +node-ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_SHARED_BLOCK_REF_KEY: { @@ -558,7 +558,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, ret = __add_prelim_ref(prefs, ref-root, NULL, ref-level + 1, ref-parent, node-bytenr, -node-ref_mod * sgn); +node-ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_EXTENT_DATA_REF_KEY: { @@ -570,7 +570,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, key.offset = ref-offset; ret = __add_prelim_ref(prefs, ref-root, key, 0, 0, node-bytenr, -node-ref_mod * sgn); +node-ref_mod * sgn, GFP_ATOMIC); break; } case BTRFS_SHARED_DATA_REF_KEY: { @@ -583,7 +583,7 @@ static int __add_delayed_refs(struct btrfs_delayed_ref_head *head, u64 seq, key.offset = ref-offset; ret = __add_prelim_ref(prefs, ref-root, key, 0, ref-parent, node-bytenr, -node-ref_mod * sgn); +node-ref_mod * sgn, GFP_ATOMIC); break; } default: @@ -657,7 +657,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, case BTRFS_SHARED_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, 0, NULL, *info_level + 1, offset, - bytenr, 1); + bytenr, 1, GFP_NOFS); break; case BTRFS_SHARED_DATA_REF_KEY: { struct btrfs_shared_data_ref *sdref; @@ -666,13 +666,13 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, sdref = (struct btrfs_shared_data_ref *)(iref + 1); count = btrfs_shared_data_ref_count(leaf, sdref); ret = __add_prelim_ref(prefs, 0, NULL, 0, offset, -bytenr, count); +bytenr, count, GFP_NOFS); break; } case BTRFS_TREE_BLOCK_REF_KEY: ret = __add_prelim_ref(prefs, offset, NULL, *info_level + 1, 0, -bytenr, 1); +bytenr, 1, GFP_NOFS); break; case BTRFS_EXTENT_DATA_REF_KEY: { struct btrfs_extent_data_ref *dref; @@ -687,7 +687,7 @@ static int __add_inline_refs(struct btrfs_fs_info *fs_info, key.offset = btrfs_extent_data_ref_offset(leaf, dref); root = btrfs_extent_data_ref_root(leaf, dref);
[PATCH 0/2 v2] introduce btrfs filesystem show --kernel
This patch set introduces --kernel option for filesystem show for the reason as mentioned in the patch 1/2 below 1/1 is the preparatory patch Anand Jain (2): btrfs-progs: move out print in cmd_df to another function btrfs-progs: introduce btrfs filesystem show --kernel cmds-filesystem.c | 355 -- ctree.h | 11 ++ man/btrfs.8.in| 5 +- 3 files changed, 281 insertions(+), 90 deletions(-) -- 1.8.1.191.g414c78c -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/2] btrfs-progs: move out print in cmd_df to another function
This is a prepatory work for the following btrfs fi show command fixes. So that we have a function get_df to get the fs sizes v2: combined the other patches as below and rebase btrfs-progs: get string for the group profile and type Signed-off-by: Anand Jain anand.j...@oracle.com --- cmds-filesystem.c | 190 +++--- ctree.h | 11 2 files changed, 122 insertions(+), 79 deletions(-) diff --git a/cmds-filesystem.c b/cmds-filesystem.c index a4e30ea..be8afde 100644 --- a/cmds-filesystem.c +++ b/cmds-filesystem.c @@ -44,28 +44,51 @@ static const char * const cmd_df_usage[] = { NULL }; -static int cmd_df(int argc, char **argv) +static char * group_type_str(u64 flag) { - struct btrfs_ioctl_space_args *sargs, *sargs_orig; - u64 count = 0, i; - int ret; - int fd; - int e; - char *path; - DIR *dirstream = NULL; - - if (check_argc_exact(argc, 2)) - usage(cmd_df_usage); - - path = argv[1]; + switch (flag BTRFS_BLOCK_GROUP_TYPE_MASK) { + case BTRFS_BLOCK_GROUP_DATA: + return data; + case BTRFS_BLOCK_GROUP_SYSTEM: + return system; + case BTRFS_BLOCK_GROUP_METADATA: + return metadata; + case BTRFS_BLOCK_GROUP_DATA|BTRFS_BLOCK_GROUP_METADATA: + return mixed; + default: + return unknown; + } +} - fd = open_file_or_dir(path, dirstream); - if (fd 0) { - fprintf(stderr, ERROR: can't access to '%s'\n, path); - return 12; +static char * group_profile_str(u64 flag) +{ + switch (flag BTRFS_BLOCK_GROUP_PROFILE_MASK) { + case 0: + return single; + case BTRFS_BLOCK_GROUP_RAID0: + return RAID0; + case BTRFS_BLOCK_GROUP_RAID1: + return RAID1; + case BTRFS_BLOCK_GROUP_RAID5: + return RAID5; + case BTRFS_BLOCK_GROUP_RAID6: + return RAID6; + case BTRFS_BLOCK_GROUP_DUP: + return DUP; + case BTRFS_BLOCK_GROUP_RAID10: + return RAID10; + default: + return unknown; } +} + +static int get_df(int fd, struct btrfs_ioctl_space_args **sargs_ret) +{ + u64 count = 0; + int ret, e; + struct btrfs_ioctl_space_args *sargs; - sargs_orig = sargs = malloc(sizeof(struct btrfs_ioctl_space_args)); + sargs = malloc(sizeof(struct btrfs_ioctl_space_args)); if (!sargs) return -ENOMEM; @@ -75,89 +98,98 @@ static int cmd_df(int argc, char **argv) ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs); e = errno; if (ret) { - fprintf(stderr, ERROR: couldn't get space info on '%s' - %s\n, - path, strerror(e)); - goto out; + fprintf(stderr, ERROR: couldn't get space info - %s\n, + strerror(e)); + free(sargs); + return ret; } if (!sargs-total_spaces) { - ret = 0; - goto out; + free(sargs); + return 0; } - count = sargs-total_spaces; + free(sargs); - sargs = realloc(sargs, sizeof(struct btrfs_ioctl_space_args) + + sargs = malloc(sizeof(struct btrfs_ioctl_space_args) + (count * sizeof(struct btrfs_ioctl_space_info))); - if (!sargs) { - sargs = sargs_orig; + if (!sargs) ret = -ENOMEM; - goto out; - } sargs-space_slots = count; sargs-total_spaces = 0; - ret = ioctl(fd, BTRFS_IOC_SPACE_INFO, sargs); e = errno; if (ret) { - fprintf(stderr, ERROR: couldn't get space info on '%s' - %s\n, - path, strerror(e)); - goto out; + fprintf(stderr, ERROR: get space info count %llu - %s\n, + count, strerror(e)); + free(sargs); + return ret; } + *sargs_ret = sargs; + return 0; +} - for (i = 0; i sargs-total_spaces; i++) { - char description[80]; - int written = 0; - u64 flags = sargs-spaces[i].flags; +static void print_df(struct btrfs_ioctl_space_args *sargs) +{ + char description[80]; + char *total_bytes; + char *used_bytes; + u64 flags; + u64 i; + int written; + char g_str[64]; + int g_sz; + for (i = 0; i sargs-total_spaces; i++) { + flags = sargs-spaces[i].flags; + written = 0; memset(description, 0, 80); - if (flags BTRFS_BLOCK_GROUP_DATA) { - if (flags BTRFS_BLOCK_GROUP_METADATA) { - snprintf(description, 14, %s, -
[PATCH 2/2] btrfs-progs: introduce btrfs filesystem show --kernel
As of now btrfs filesystem show reads directly from disks. So sometimes output can be stale, mainly when user want to verify their last operation like, labeling or device delete or add... etc. This patch adds --kernel option to the 'filesystem show' subcli, which will read from the kernel instead of the disks directly. also this path adds the group profile info to the output eg: - btrfs fi show --kernel Label: none uuid: 39f55f14-e5ca-4a01-899d-915fd35bde05 mounted: /btrfs Group profile: metadata: RAID1 data: RAID1 Total devices 2 FS bytes used 7.40GB devid1 size 48.23GB used 11.04GB path /dev/dm-5 devid2 size 44.99GB used 11.03GB path /dev/mapper/mpathe Label: none uuid: a0beeb78-0019-4bdf-8002-0900a123ee07 mounted: /btrfs1 Group profile: mixed: single Total devices 1 FS bytes used 7.40GB devid1 size 15.00GB used 9.01GB path /dev/mapper/mpathbp1 btrfs fi show --kernel /btrfs2 Label: none uuid: 9d6a347e-e8a0-44fe-9d2a-d28ee45ef33f mounted: /btrfs2 Group profile: metadata: DUP data: single Total devices 1 FS bytes used 2.22MB devid1 size 15.00GB used 1.32GB path /dev/mapper/mpathcp1 btrfs fi show --kernel 9d6a347e-e8a0-44fe-9d2a-d28ee45ef33f Label: none uuid: 9d6a347e-e8a0-44fe-9d2a-d28ee45ef33f mounted: /btrfs2 Group profile: metadata: DUP data: single Total devices 1 FS bytes used 2.22MB devid1 size 15.00GB used 1.32GB path /dev/mapper/mpathcp1 v3-v4: dropped the dependence of used_bytes from the ioctl kernel, Instead used the get_df to calculate the used space. dropped the function device_list_add_from_kernel to update the original device_list_add instead I have my own print and device filters, this way I can add the group profile information in the show output. v2-v3: Do the stuffs without adding new ioctl new dependencies: this patch also depends on path 9/13 to 12/13 also sent here. v1-v2: code optimized to remove redundancy Signed-off-by: Anand Jain anand.j...@oracle.com --- cmds-filesystem.c | 165 +++--- man/btrfs.8.in| 5 +- 2 files changed, 159 insertions(+), 11 deletions(-) diff --git a/cmds-filesystem.c b/cmds-filesystem.c index be8afde..74ad30b 100644 --- a/cmds-filesystem.c +++ b/cmds-filesystem.c @@ -22,6 +22,9 @@ #include errno.h #include uuid/uuid.h #include ctype.h +#include mntent.h +#include fcntl.h +#include linux/limits.h #include kerncompat.h #include ctree.h @@ -251,8 +254,124 @@ static void print_one_uuid(struct btrfs_fs_devices *fs_devices) printf(\n); } +/* adds up all the used spaces as reported by the space info ioctl + */ +static u64 cal_used_bytes(struct btrfs_ioctl_space_args *si) +{ + u64 ret = 0; + int i; + for (i = 0; i si-total_spaces; i++) + ret += si-spaces[i].used_bytes; + return ret; +} + +static int print_one_fs(struct btrfs_ioctl_fs_info_args *fi, + struct btrfs_ioctl_dev_info_args *di_n, + struct btrfs_ioctl_space_args *si_n, char *label, char *path) +{ + int i; + char uuidbuf[37]; + struct btrfs_ioctl_dev_info_args *di = di_n; + u64 flags; + + uuid_unparse(fi-fsid, uuidbuf); + printf(Label: %s uuid: %s mounted: %s\n, + strlen(label)?label:none, uuidbuf, path); + printf(\tGroup profile:); + for (i = si_n-total_spaces - 1; i = 0; i--) { + flags = si_n-spaces[i].flags; + if (flags BTRFS_BLOCK_GROUP_SYSTEM) + continue; + printf( %s: %s, group_type_str(flags), + group_profile_str(flags)); + printf( ); + } + printf(\n); + + printf(\tTotal devices %llu FS bytes used %s\n, + fi-num_devices, + pretty_size(cal_used_bytes(si_n))); + + for (i = 0; i fi-num_devices; i++) { + di = (struct btrfs_ioctl_dev_info_args *)di_n[i]; + printf(\tdevid%llu size %s used %s path %s\n, + di-devid, + pretty_size(di-total_bytes), + pretty_size(di-bytes_used), + di-path); + } + + printf(\n); + return 0; +} + +/* This function checks if the given input parameter is + * an uuid or a path + * return -1: some error in the given input + * return 0: unknow input + * return 1: given input is uuid + * return 2: given input is path + */ +static int check_arg_type(char *input, u8 *processed) +{ + int ret = 0; + if (!uuid_parse(input, processed)) + ret = 1; + else if (realpath(input, (char *)processed)) + ret = 2; + return ret; +} + +static int btrfs_scan_kernel(void
[PATCH 1/2] btrfs-progs: btrfs_scan_one_dir not to skip links when /dev/mapper is provided
This is preparatory work to introduce /dev/mapper path usage we need btrfs_scan_one_dir to san devs under /dev/mapper, but /dev/mapper has links to the actual devs and current implementation of btrfs_scan_one_dir skips links so it does not pick any dev under /dev/mapper. skip the links are fine when scanning whole of /dev But not when we just want to scan /dev/mapper This patch just adds to check if we are scanning devs or /dev/mapper only, if when latter it will not skip links Thanks v2: changes as per David review Signed-off-by: Anand Jain anand.j...@oracle.com --- utils.c | 19 --- 1 file changed, 16 insertions(+), 3 deletions(-) diff --git a/utils.c b/utils.c index 8a57967..038e599 100644 --- a/utils.c +++ b/utils.c @@ -1039,13 +1039,26 @@ int btrfs_scan_one_dir(char *dirname, int run_ioctl) struct list_head pending_list; struct btrfs_fs_devices *tmp_devices; u64 num_devices; + int skip_link = 1; + char rdir[PATH_MAX]; + char rdirp = NULL; INIT_LIST_HEAD(pending_list); pending = malloc(sizeof(*pending)); if (!pending) return -ENOMEM; - strcpy(pending-name, dirname); + + rdirp = realpath(dirname, rdir); + if (!rdirp) { + free(pending); + return -errno; + } + + strcpy(pending-name, rdir); + + if (!strcmp(rdir, /dev/mapper)) + skip_link = 0; again: dirname_len = strlen(pending-name); @@ -1078,7 +1091,7 @@ again: fprintf(stderr, failed to stat %s\n, fullpath); continue; } - if (S_ISLNK(st.st_mode)) + if (skip_link S_ISLNK(st.st_mode)) continue; if (S_ISDIR(st.st_mode)) { struct pending_dir *next = malloc(sizeof(*next)); @@ -1089,7 +1102,7 @@ again: strcpy(next-name, fullpath); list_add_tail(next-list, pending_list); } - if (!S_ISBLK(st.st_mode)) { + if (skip_link !S_ISBLK(st.st_mode)) { continue; } fd = open(fullpath, O_RDONLY); -- 1.8.1.191.g414c78c -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 0/2] scan /dev/mapper in filesystem show and device scan
This patch brings the /dev/mapper to be used as the path for the btrfs kernel through dev scan 1/2 is the preparatory patch Anand Jain (2): btrfs-progs: btrfs_scan_one_dir not to skip links when /dev/mapper is provided btrfs-progs: scan /dev/mapper in filesystem show and device scan cmds-device.c | 8 +++- cmds-filesystem.c | 7 +-- man/btrfs.8.in| 22 -- utils.c | 22 +++--- utils.h | 1 + 5 files changed, 44 insertions(+), 16 deletions(-) -- 1.8.1.191.g414c78c -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 2/2] btrfs-progs: scan /dev/mapper in filesystem show and device scan
Currently, btrsf fi show and btrfs dev scan uses /proc/partitions (by default) (which gives priority to dm-x over sdy paths) and with --all-devices it will scan /dev only (where it skips links under /dev/mapper). However using /dev/mapper paths are in common practice with mount, fstab, and lvm, so its better to be consistent with them. This patch adds --mapper option to btrfs device scan and btrfs filesystem show cli, when used will look for btrfs devs under /dev/mapper and will use the links provided under the /dev/mapper. eg: btrfs fi show --mapper Label: none uuid: 0a62-ad84-4d80-842a-dd9c1c60bf51 Total devices 2 FS bytes used 1.17MB devid1 size 44.99GB used 2.04GB path /dev/mapper/mpathe devid2 size 48.23GB used 2.03GB path /dev/mapper/mpathd Label: none uuid: bad9105f-bdc6-4626-9ba7-80bd97aebe19 Total devices 1 FS bytes used 28.00KB devid1 size 15.00GB used 2.04GB path /dev/mapper/mpathbp1 In the long run mapper path when present (along with /proc/partitions) can be the default option to scan for the btrfs devs. (/proc/partitions must be scanned as well because to include the mapper blacklisted (from mapper) devs.) Signed-off-by: Anand Jain anand.j...@oracle.com --- cmds-device.c | 8 +++- cmds-filesystem.c | 7 +-- man/btrfs.8.in| 22 -- utils.c | 3 +++ utils.h | 1 + 5 files changed, 28 insertions(+), 13 deletions(-) diff --git a/cmds-device.c b/cmds-device.c index be2aaff..6d1b378 100644 --- a/cmds-device.c +++ b/cmds-device.c @@ -186,7 +186,7 @@ static int cmd_rm_dev(int argc, char **argv) } static const char * const cmd_scan_dev_usage[] = { - btrfs device scan [--all-devices|device [device...]], + btrfs device scan [--all-devices|--mapper|device [device...]], Scan devices for a btrfs filesystem, NULL }; @@ -203,6 +203,12 @@ static int cmd_scan_dev(int argc, char **argv) where = BTRFS_SCAN_DEV; devstart += 1; + } else if( argc 1 !strcmp(argv[1],--mapper)){ + if (check_argc_max(argc, 2)) + usage(cmd_scan_dev_usage); + + where = BTRFS_SCAN_MAPPER; + devstart += 1; } if(argc=devstart){ diff --git a/cmds-filesystem.c b/cmds-filesystem.c index 74ad30b..88cace3 100644 --- a/cmds-filesystem.c +++ b/cmds-filesystem.c @@ -371,7 +371,7 @@ static int btrfs_scan_kernel(void *input, int type) } static const char * const cmd_show_usage[] = { - btrfs filesystem show [--all-devices|--mapper|--kernel|uuid], + btrfs filesystem show [--all-devices|--mapper|--kernel] [uuid|path], Show the structure of a filesystem, If no argument is given, structure of all present filesystems is shown., NULL @@ -388,9 +388,12 @@ static int cmd_show(int argc, char **argv) int searchstart = 1; u8 processed[PATH_MAX]; - if( argc 1 !strcmp(argv[1], --all-devices)){ + if (argc 1 !strcmp(argv[1], --all-devices)){ where = BTRFS_SCAN_DEV; searchstart += 1; + } else if (argc 1 !strcmp(argv[1], --mapper)) { + where = BTRFS_SCAN_MAPPER; + searchstart += 1; } else if (argc 1 !strcmp(argv[1], --kernel)) { where = 0; searchstart += 1; diff --git a/man/btrfs.8.in b/man/btrfs.8.in index 6383469..821f138 100644 --- a/man/btrfs.8.in +++ b/man/btrfs.8.in @@ -25,7 +25,7 @@ btrfs \- control a btrfs filesystem .PP \fBbtrfs\fP \fBfilesystem df\fP\fI path\fP .PP -\fBbtrfs\fP \fBfilesystem show\fP\fI [--all-devices|--kernel] [\fIuuid|path]\fP\fP +\fBbtrfs\fP \fBfilesystem show [\fP\fI--all-devices\fP|\fI--mapper\fP|\fI--kernel\fP] [\fIuuid\fP|\fIpath\fP] .PP \fBbtrfs\fP \fBfilesystem sync\fP\fI path \fP .PP @@ -51,7 +51,7 @@ btrfs \- control a btrfs filesystem .PP \fBbtrfs\fP \fBdevice delete\fP \fIdevice\fP [\fIdevice...\fP] \fIpath\fP .PP -\fBbtrfs\fP \fBdevice scan\fP [--all-devices|\fIdevice \fP[\fIdevice...\fP] +\fBbtrfs\fP \fBdevice scan\fP [\fI--all-devices\fP|\fI--mapper\fP|\fIdevice\fP [\fIdevice...\fP] .PP \fBbtrfs\fP \fBdevice ready\fP\fI device\fP .PP @@ -254,12 +254,13 @@ Show information of a given subvolume in the \fIpath\fR. Show space usage information for a mount point. .TP -\fBfilesystem show\fR [--all-devices|--kernel] [\fIuuid|path]\fP\fP -Show the btrfs filesystem with some additional info. If no \fIUUID\fP or -\fIlabel\fP is passed, \fBbtrfs\fR show info of all the btrfs filesystem. -If \fB--all-devices\fP is passed, all the devices under /dev are scanned; +\fBfilesystem show\fR [\fI--all-devices\fP|\fI--mapper\fP|\fI--kernel\fP] [\fIuuid\fP|\fIpath\fP]\fP +Show the btrfs filesystem with some additional info. If no \fIuuid\fP +is passed, it will show info of all the btrfs filesystem. +If \fI--all-devices\fP is passed, all the devices under /dev are
Re: [PATCH 0/2] scan /dev/mapper in filesystem show and device scan
Oh., I missed the libblkid part of David recommendation. I will be rewriting this patch set. sorry about that. Thanks, Anand On 08/08/2013 16:09, Anand Jain wrote: This patch brings the /dev/mapper to be used as the path for the btrfs kernel through dev scan 1/2 is the preparatory patch Anand Jain (2): btrfs-progs: btrfs_scan_one_dir not to skip links when /dev/mapper is provided btrfs-progs: scan /dev/mapper in filesystem show and device scan cmds-device.c | 8 +++- cmds-filesystem.c | 7 +-- man/btrfs.8.in| 22 -- utils.c | 22 +++--- utils.h | 1 + 5 files changed, 44 insertions(+), 16 deletions(-) -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v3 2/2] xfstests btrfs/316: test send / receive
Basic send / receive functionality test for btrfs. Requires current version of fsstress built (-x support). Relies on fssum tool but can skip the test if it failed to build. Signed-off-by: Jan Schmidt list@jan-o-sch.net Reviewed-by: Josef Bacik jba...@fusionio.com --- tests/btrfs/316 | 113 +++ tests/btrfs/316.out |4 ++ tests/btrfs/group |1 + 3 files changed, 118 insertions(+), 0 deletions(-) create mode 100755 tests/btrfs/316 create mode 100644 tests/btrfs/316.out diff --git a/tests/btrfs/316 b/tests/btrfs/316 new file mode 100755 index 000..087978a --- /dev/null +++ b/tests/btrfs/316 @@ -0,0 +1,113 @@ +#! /bin/bash +# FSQA Test No. 316 +# +# Run fsstress to create a reasonably strange file system, make a +# snapshot (base) and run more fsstress. Then take another snapshot +# (incr) and send both snapshots to a temp file. Remake the file +# system and receive from the files. Check both states with fssum. +# +#--- +# Copyright (C) 2013 STRATO. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# +#--- +# +# creator +owner=list.bt...@jan-o-sch.net + +seq=`basename $0` +seqres=$RESULT_DIR/$seq +echo QA output created by $seq + +here=`pwd` +tmp=`mktemp -d` +status=1 + +_cleanup() +{ + echo *** unmount + umount $SCRATCH_MNT 2/dev/null + rm -f $tmp.* +} +trap _cleanup; exit \$status 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common/rc +. ./common/filter + +# real QA test starts here +_need_to_be_root +_supported_fs btrfs +_supported_os Linux +_require_scratch +_require_command $FSSUM_PROG fssum + +rm -f $seqres.full + +workout() +{ + fsz=$1 + ops=$2 + + umount $SCRATCH_DEV /dev/null 21 + echo *** mkfs -dsize=$fsz$seqres.full + echo $seqres.full + _scratch_mkfs_sized $fsz $seqres.full 21 \ + || _fail size=$fsz mkfs failed + run_check _scratch_mount -o noatime + + run_check $FSSTRESS_PROG -d $SCRATCH_MNT -n $ops $FSSTRESS_AVOID -x \ + $BTRFS_UTIL_PROG subvol snap -r $SCRATCH_MNT $SCRATCH_MNT/base + + run_check $BTRFS_UTIL_PROG subvol snap -r $SCRATCH_MNT $SCRATCH_MNT/incr + + echo # $BTRFS_UTIL_PROG send $SCRATCH_MNT/base $tmp/base.snap \ +$seqres.full + $BTRFS_UTIL_PROG send $SCRATCH_MNT/base $tmp/base.snap 2 $seqres.full \ + || _fail failed: '$@' + echo # $BTRFS_UTIL_PROG send -p $SCRATCH_MNT/base\ + $SCRATCH_MNT/incr $tmp/incr.snap $seqres.full + $BTRFS_UTIL_PROG send -p $SCRATCH_MNT/base \ + $SCRATCH_MNT/incr $tmp/incr.snap 2 $seqres.full \ + || _fail failed: '$@' + + run_check $FSSUM_PROG -A -f -w $tmp/base.fssum $SCRATCH_MNT/base + run_check $FSSUM_PROG -A -f -w $tmp/incr.fssum -x $SCRATCH_MNT/incr/base \ + $SCRATCH_MNT/incr + + umount $SCRATCH_DEV /dev/null 21 + echo *** mkfs -dsize=$fsz$seqres.full + echo $seqres.full + _scratch_mkfs_sized $fsz $seqres.full 21 \ + || _fail size=$fsz mkfs failed + run_check _scratch_mount -o noatime + + run_check $BTRFS_UTIL_PROG receive $SCRATCH_MNT $tmp/base.snap + run_check $FSSUM_PROG -r $tmp/base.fssum $SCRATCH_MNT/base + + run_check $BTRFS_UTIL_PROG receive $SCRATCH_MNT $tmp/incr.snap + run_check $FSSUM_PROG -r $tmp/incr.fssum $SCRATCH_MNT/incr +} + +echo *** test send / receive + +fssize=`expr 2000 \* 1024 \* 1024` +ops=200 + +workout $fssize $ops + +echo *** done +status=0 +exit diff --git a/tests/btrfs/316.out b/tests/btrfs/316.out new file mode 100644 index 000..4564c85 --- /dev/null +++ b/tests/btrfs/316.out @@ -0,0 +1,4 @@ +QA output created by 316 +*** test send / receive +*** done +*** unmount diff --git a/tests/btrfs/group b/tests/btrfs/group index bc6c256..11d708a 100644 --- a/tests/btrfs/group +++ b/tests/btrfs/group @@ -9,3 +9,4 @@ 276 auto rw metadata 284 auto 307 auto quick +316 auto rw metadata -- 1.7.2.5 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More
[PATCH v3 1/2] xfstests: add fssum tool
fssum is a tool to build a recursive checksum for a file system. The home repository of fssum is git://git.kernel.org/pub/scm/linux/kernel/git/arne/far-progs.git It is added as an optional target, because it depends on glibc = 2.15 for SEEK_HOLE / SEEK_DATA. The test to be added using fssum will just be skipped if fssum wasn't built. Signed-off-by: Jan Schmidt list@jan-o-sch.net --- .gitignore|1 + common/config |2 + src/Makefile | 11 +- src/fssum.c | 819 + 4 files changed, 832 insertions(+), 1 deletions(-) create mode 100644 src/fssum.c diff --git a/.gitignore b/.gitignore index 11594aa..c2fc6e3 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ /src/fill /src/fill2 /src/fs_perms +/src/fssum /src/fstest /src/fsync-tester /src/ftrunc diff --git a/common/config b/common/config index 67c1498..c8bee29 100644 --- a/common/config +++ b/common/config @@ -146,6 +146,8 @@ export SED_PROG=`set_prog_path sed` export BC_PROG=`set_prog_path bc` [ $BC_PROG = ] _fatal bc not found +export FSSUM_PROG=`set_prog_path fssum $here/src/fssum` + export PS_ALL_FLAGS=-ef export DF_PROG=`set_prog_path df` diff --git a/src/Makefile b/src/Makefile index cc679e8..10a4d3c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -20,10 +20,14 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ stale_handle pwrite_mmap_blocked t_dir_offset2 seek_sanity_test \ seek_copy_test t_readdir_1 t_readdir_2 fsync-tester +OPT_TARGETS = fssum + SUBDIRS = LLDLIBS = $(LIBATTR) $(LIBHANDLE) $(LIBACL) +OPT_LDLIBS = -lssl -lcrypto + ifeq ($(HAVE_XLOG_ASSIGN_LSN), true) LINUX_TARGETS += loggen endif @@ -60,7 +64,7 @@ CFILES = $(TARGETS:=.c) LDIRT = $(TARGETS) -default: depend $(TARGETS) $(SUBDIRS) +default: depend $(TARGETS) $(OPT_TARGETS) $(SUBDIRS) depend: .dep @@ -70,11 +74,16 @@ $(TARGETS): $(LIBTEST) @echo [CC]$@ $(Q)$(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(LIBTEST) +$(OPT_TARGETS): $(LIBTEST) + @echo [CC]$@ + -$(Q)$(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(OPT_LDLIBS) $(LIBTEST) + LINKTEST = $(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) install: default $(addsuffix -install,$(SUBDIRS)) $(INSTALL) -m 755 -d $(PKG_LIB_DIR)/src $(LTINSTALL) -m 755 $(TARGETS) $(PKG_LIB_DIR)/src + -$(LTINSTALL) -m 755 $(OPT_TARGETS) $(PKG_LIB_DIR)/src $(LTINSTALL) -m 755 fill2attr fill2fs fill2fs_check scaleread.sh $(PKG_LIB_DIR)/src $(LTINSTALL) -m 644 dumpfile $(PKG_LIB_DIR)/src diff --git a/src/fssum.c b/src/fssum.c new file mode 100644 index 000..ecddb6a --- /dev/null +++ b/src/fssum.c @@ -0,0 +1,819 @@ +/* + * Copyright (C) 2012 STRATO AG. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ +#define _BSD_SOURCE +#define _LARGEFILE64_SOURCE +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include stdio.h +#include stdlib.h +#include unistd.h +#include string.h +#include fcntl.h +#include dirent.h +#include errno.h +#include sys/types.h +#include sys/stat.h +#ifdef __SOLARIS__ +#include sys/mkdev.h +#endif +#include openssl/md5.h +#include netinet/in.h +#include inttypes.h +#include assert.h + +#define CS_SIZE 16 +#define CHUNKS 128 + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define htonll(x) __bswap_64 (x) +#endif + +/* TODO: add hardlink recognition */ +/* TODO: add xattr/acl */ + +struct excludes { + char *path; + int len; +}; + +typedef struct _sum { + MD5_CTX md5; + unsigned char out[16]; +} sum_t; + +typedef int (*sum_file_data_t)(int fd, sum_t *dst); + +int gen_manifest = 0; +int in_manifest = 0; +char *checksum = NULL; +struct excludes *excludes; +int n_excludes = 0; +int verbose = 0; +FILE *out_fp; +FILE *in_fp; + +enum _flags { + FLAG_UID, + FLAG_GID, + FLAG_MODE, + FLAG_ATIME, + FLAG_MTIME, + FLAG_CTIME, + FLAG_DATA, + FLAG_OPEN_ERROR, + FLAG_STRUCTURE, + NUM_FLAGS +}; + +const char flchar[] = ugoamcdes; +char line[65536]; + +int flags[NUM_FLAGS] = {1, 1, 1, 1, 1, 0, 1, 0, 0}; + +char * +getln(char *buf, int size, FILE *fp) +{ + char *p; + int l; + + p = fgets(buf, size, fp); + if (!p) + return NULL; + + l
[PATCH v3 0/2] xfstest btrfs/316: test send / receive
These two patches add the announced tests for btrfs send / receive. As requested, the fssum tool is now included. One drawback is that I'm unable to edit configure.ac or whatever needs to be modified in an autotools preferred way. Any hints appreciated, preferrably hints containing all the modifications required to introduce something like HAVE_SEEK_HOLE. I do not want to make modifications to fssum.c here, if that's absolutely required (because one /could/ get along using linux/fs.h, which is not the way I would like to go), I'd like to have that changed in the far-progs repository where fssum.c comes from as well. -- v1-v2: - included fssum - test number is now 316 (was 314) v2-v3: - added missing -lcrypto to build fssum - removed obsolete change in README now that fssum is included - fixed comment in test/btrfs/316's header (314 - 316) Jan Schmidt (2): xfstests: add fssum tool xfstests btrfs/316: test send / receive .gitignore |1 + common/config |2 + src/Makefile| 11 +- src/fssum.c | 819 +++ tests/btrfs/316 | 113 +++ tests/btrfs/316.out |4 + tests/btrfs/group |1 + 7 files changed, 950 insertions(+), 1 deletions(-) create mode 100644 src/fssum.c create mode 100755 tests/btrfs/316 create mode 100644 tests/btrfs/316.out -- 1.7.2.5 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH v6 1/5] Btrfs: skip merge part for delayed data refs
When we have data deduplication on, we'll hang on the merge part because it needs to verify every queued delayed data refs related to this disk offset but we may have millions refs. And in the case of delayed data refs, we don't usually have too much data refs to merge. So it's safe to shut it down for data refs. Signed-off-by: Liu Bo bo.li@oracle.com --- fs/btrfs/delayed-ref.c |7 +++ 1 files changed, 7 insertions(+), 0 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index f7be9f7..fc4ce8b 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -320,6 +320,13 @@ void btrfs_merge_delayed_refs(struct btrfs_trans_handle *trans, struct rb_node *node; u64 seq = 0; + /* +* We don't have too much refs to merge in the case of delayed data +* refs. +*/ + if (head-is_data) + return; + spin_lock(fs_info-tree_mod_seq_lock); if (!list_empty(fs_info-tree_mod_seq_list)) { struct seq_list *elem; -- 1.7.7 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH v6 3/5] Btrfs: introduce a head ref rbtree
The way how we process delayed refs is 1) get a bunch of head refs, 2) pick up one head ref, 3) go one node back for any delayed ref updates. The head ref is also linked in the same rbtree as the delayed ref is, so in 1) stage, we have to walk one by one including not only head refs, but delayed refs. When we have a great number of delayed refs pending to process, this'll cost time a lot. Here we introduce a head ref specific rbtree, it only has head refs, so troubles go away. Signed-off-by: Liu Bo bo.li@oracle.com --- fs/btrfs/delayed-ref.c | 124 fs/btrfs/delayed-ref.h |5 ++ fs/btrfs/disk-io.c |3 + fs/btrfs/extent-tree.c | 21 +--- fs/btrfs/transaction.c |4 +- 5 files changed, 98 insertions(+), 59 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 198b7ad..73a3e55 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -161,35 +161,61 @@ static struct btrfs_delayed_ref_node *tree_insert(struct rb_root *root, return NULL; } +/* insert a new ref to head ref rbtree */ +static struct btrfs_delayed_ref_head *htree_insert(struct rb_root *root, + struct rb_node *node) +{ + struct rb_node **p = root-rb_node; + struct rb_node *parent_node = NULL; + struct btrfs_delayed_ref_head *entry; + struct btrfs_delayed_ref_head *ins; + u64 bytenr; + + ins = rb_entry(node, struct btrfs_delayed_ref_head, href_node); + bytenr = ins-node.bytenr; + while (*p) { + parent_node = *p; + entry = rb_entry(parent_node, struct btrfs_delayed_ref_head, +href_node); + + if (bytenr entry-node.bytenr) + p = (*p)-rb_left; + else if (bytenr entry-node.bytenr) + p = (*p)-rb_right; + else + return entry; + } + + rb_link_node(node, parent_node, p); + rb_insert_color(node, root); + return NULL; +} + /* * find an head entry based on bytenr. This returns the delayed ref * head if it was able to find one, or NULL if nothing was in that spot. * If return_bigger is given, the next bigger entry is returned if no exact * match is found. */ -static struct btrfs_delayed_ref_node *find_ref_head(struct rb_root *root, - u64 bytenr, - struct btrfs_delayed_ref_node **last, - int return_bigger) +static struct btrfs_delayed_ref_head * +find_ref_head(struct rb_root *root, u64 bytenr, + struct btrfs_delayed_ref_head **last, int return_bigger) { struct rb_node *n; - struct btrfs_delayed_ref_node *entry; + struct btrfs_delayed_ref_head *entry; int cmp = 0; again: n = root-rb_node; entry = NULL; while (n) { - entry = rb_entry(n, struct btrfs_delayed_ref_node, rb_node); - WARN_ON(!entry-in_tree); + entry = rb_entry(n, struct btrfs_delayed_ref_head, href_node); if (last) *last = entry; - if (bytenr entry-bytenr) + if (bytenr entry-node.bytenr) cmp = -1; - else if (bytenr entry-bytenr) - cmp = 1; - else if (!btrfs_delayed_ref_is_head(entry)) + else if (bytenr entry-node.bytenr) cmp = 1; else cmp = 0; @@ -203,12 +229,12 @@ again: } if (entry return_bigger) { if (cmp 0) { - n = rb_next(entry-rb_node); + n = rb_next(entry-href_node); if (!n) n = rb_first(root); - entry = rb_entry(n, struct btrfs_delayed_ref_node, -rb_node); - bytenr = entry-bytenr; + entry = rb_entry(n, struct btrfs_delayed_ref_head, +href_node); + bytenr = entry-node.bytenr; return_bigger = 0; goto again; } @@ -246,6 +272,12 @@ static void inline drop_delayed_ref(struct btrfs_trans_handle *trans, struct btrfs_delayed_ref_node *ref) { rb_erase(ref-rb_node, delayed_refs-root); + if (btrfs_delayed_ref_is_head(ref)) { + struct btrfs_delayed_ref_head *head; + + head = btrfs_delayed_node_to_head(ref); + rb_erase(head-href_node, delayed_refs-href_root); + } ref-in_tree = 0; btrfs_put_delayed_ref(ref); delayed_refs-num_entries--; @@ -386,42 +418,35 @@ int
[RFC PATCH v6 0/5] Online data deduplication
Data deduplication is a specialized data compression technique for eliminating duplicate copies of repeating data.[1] This patch set is also related to Content based storage in project ideas[2]. PATCH 1 is a hang fix with deduplication on, but it's also useful without dedup in practice use. PATCH 2 and 3 are targetting delayed refs' scalability problems, which are uncovered by the dedup feature. PATCH 4 is a speed-up improvement, which is about dedup and quota. PATCH 5 is full of real things, all details about implementation of dedup. Plus, there is also a btrfs-progs patch which helps to enable/disable dedup feature. TODO: * a bit-to-bit comparison callback. All comments are welcome! [1]: http://en.wikipedia.org/wiki/Data_deduplication [2]: https://btrfs.wiki.kernel.org/index.php/Project_ideas#Content_based_storage v5-v6: - remove BUG_ON()s and use proper error handling. - make dedup hash endian safe on disk. - refractor dedup tree item. - fix a bug of deleting file extents with dedup disabled. - some cleanups - add manpage for dedup subcommand. v4-v5: - go back to one dedup key with a special backref for dedup tree because the disk format understands backref well. - fix a fsync hang with dedup enabled. - rebase onto the latest btrfs. Liu Bo (5): Btrfs: skip merge part for delayed data refs Btrfs: improve the delayed refs process in rm case Btrfs: introduce a head ref rbtree Btrfs: disable qgroups accounting when quata_enable is 0 Btrfs: online data deduplication fs/btrfs/backref.c |9 + fs/btrfs/ctree.c |2 +- fs/btrfs/ctree.h | 82 ++ fs/btrfs/delayed-ref.c | 159 +++ fs/btrfs/delayed-ref.h |8 + fs/btrfs/disk-io.c | 31 ++ fs/btrfs/extent-tree.c | 190 +++-- fs/btrfs/extent_io.c | 29 ++- fs/btrfs/extent_io.h | 16 + fs/btrfs/file-item.c | 211 ++ fs/btrfs/inode.c | 673 +++- fs/btrfs/ioctl.c | 93 ++ fs/btrfs/ordered-data.c| 38 ++- fs/btrfs/ordered-data.h| 13 +- fs/btrfs/qgroup.c |3 + fs/btrfs/relocation.c |3 + fs/btrfs/super.c | 27 ++- fs/btrfs/transaction.c |4 +- include/uapi/linux/btrfs.h |5 + 19 files changed, 1420 insertions(+), 176 deletions(-) -- 1.7.7 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[RFC PATCH v6 4/5] Btrfs: disable qgroups accounting when quata_enable is 0
It's unnecessary to do qgroups accounting without enabling quota. Signed-off-by: Liu Bo bo.li@oracle.com --- v6: * don't record seq for qgroups with quota disabled as we do not need to, and keep the checker of qgroups. fs/btrfs/ctree.c |2 +- fs/btrfs/delayed-ref.c | 18 ++ fs/btrfs/qgroup.c |3 +++ 3 files changed, 18 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index d5387dd..2d22ddf 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -408,7 +408,7 @@ u64 btrfs_get_tree_mod_seq(struct btrfs_fs_info *fs_info, tree_mod_log_write_lock(fs_info); spin_lock(fs_info-tree_mod_seq_lock); - if (!elem-seq) { + if (elem !elem-seq) { elem-seq = btrfs_inc_tree_mod_seq_major(fs_info); list_add_tail(elem-list, fs_info-tree_mod_seq_list); } diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index 73a3e55..af57cfc 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -691,8 +691,13 @@ static noinline void add_delayed_tree_ref(struct btrfs_fs_info *fs_info, ref-is_head = 0; ref-in_tree = 1; - if (need_ref_seq(for_cow, ref_root)) - seq = btrfs_get_tree_mod_seq(fs_info, trans-delayed_ref_elem); + if (need_ref_seq(for_cow, ref_root)) { + struct seq_list *elem = NULL; + + if (fs_info-quota_enabled) + elem = trans-delayed_ref_elem; + seq = btrfs_get_tree_mod_seq(fs_info, elem); + } ref-seq = seq; full_ref = btrfs_delayed_node_to_tree_ref(ref); @@ -750,8 +755,13 @@ static noinline void add_delayed_data_ref(struct btrfs_fs_info *fs_info, ref-is_head = 0; ref-in_tree = 1; - if (need_ref_seq(for_cow, ref_root)) - seq = btrfs_get_tree_mod_seq(fs_info, trans-delayed_ref_elem); + if (need_ref_seq(for_cow, ref_root)) { + struct seq_list *elem = NULL; + + if (fs_info-quota_enabled) + elem = trans-delayed_ref_elem; + seq = btrfs_get_tree_mod_seq(fs_info, elem); + } ref-seq = seq; full_ref = btrfs_delayed_node_to_data_ref(ref); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 1280eff..780ff14 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -1200,6 +1200,9 @@ int btrfs_qgroup_record_ref(struct btrfs_trans_handle *trans, { struct qgroup_update *u; + if (!trans-root-fs_info-quota_enabled) + return 0; + BUG_ON(!trans-delayed_ref_elem.seq); u = kmalloc(sizeof(*u), GFP_NOFS); if (!u) -- 1.7.7 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH v2] Btrfs-progs: add dedup subcommand
This aims to add deduplication subcommand, 'btrfs dedup command path', ie. register/unregister'. It can be used to enable or disable dedup support for a filesystem. Signed-off-by: Liu Bo bo.li@oracle.com --- v2: add manpage Makefile |2 +- btrfs.c|1 + cmds-dedup.c | 101 commands.h |2 + ctree.h|2 + ioctl.h|5 +++ man/btrfs.8.in | 12 +++ 7 files changed, 124 insertions(+), 1 deletions(-) create mode 100644 cmds-dedup.c diff --git a/Makefile b/Makefile index da7438e..5b4a07d 100644 --- a/Makefile +++ b/Makefile @@ -10,7 +10,7 @@ objects = ctree.o disk-io.o radix-tree.o extent-tree.o print-tree.o \ cmds_objects = cmds-subvolume.o cmds-filesystem.o cmds-device.o cmds-scrub.o \ cmds-inspect.o cmds-balance.o cmds-send.o cmds-receive.o \ cmds-quota.o cmds-qgroup.o cmds-replace.o cmds-check.o \ - cmds-restore.o + cmds-restore.o cmds-dedup.o libbtrfs_objects = send-stream.o send-utils.o rbtree.o btrfs-list.o crc32c.o libbtrfs_headers = send-stream.h send-utils.h send.h rbtree.h btrfs-list.h \ crc32c.h list.h kerncompat.h radix-tree.h extent-cache.h \ diff --git a/btrfs.c b/btrfs.c index 691adef..956905c 100644 --- a/btrfs.c +++ b/btrfs.c @@ -254,6 +254,7 @@ const struct cmd_group btrfs_cmd_group = { { quota, cmd_quota, NULL, quota_cmd_group, 0 }, { qgroup, cmd_qgroup, NULL, qgroup_cmd_group, 0 }, { replace, cmd_replace, NULL, replace_cmd_group, 0 }, + { dedup, cmd_dedup, NULL, dedup_cmd_group, 0 }, { help, cmd_help, cmd_help_usage, NULL, 0 }, { version, cmd_version, cmd_version_usage, NULL, 0 }, { 0, 0, 0, 0, 0 } diff --git a/cmds-dedup.c b/cmds-dedup.c new file mode 100644 index 000..a977585 --- /dev/null +++ b/cmds-dedup.c @@ -0,0 +1,101 @@ +/* + * Copyright (C) 2013 Oracle. All rights reserved. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public + * License along with this program; if not, write to the + * Free Software Foundation, Inc., 59 Temple Place - Suite 330, + * Boston, MA 021110-1307, USA. + */ + +#include sys/ioctl.h +#include unistd.h + +#include ctree.h +#include ioctl.h + +#include commands.h +#include utils.h + +static const char * const dedup_cmd_group_usage[] = { + btrfs dedup command [options] path, + NULL +}; + +int dedup_ctl(int cmd, int argc, char **argv) +{ + int ret = 0; + int fd; + int e; + char *path = argv[1]; + + if (check_argc_exact(argc, 2)) + return -1; + + fd = open_file_or_dir(path); + if (fd 0) { + fprintf(stderr, ERROR: can't access '%s'\n, path); + return -EACCES; + } + + ret = ioctl(fd, BTRFS_IOC_DEDUP_CTL, cmd); + e = errno; + close(fd); + if (ret 0) { + fprintf(stderr, ERROR: dedup command failed: %s\n, + strerror(e)); + if (cmd == BTRFS_DEDUP_CTL_UNREG) + fprintf(stderr, please refer to 'dmesg | tail' for more info\n); + return -EINVAL; + } + return 0; +} + +static const char * const cmd_dedup_reg_usage[] = { + btrfs dedup register path, + Enable data deduplication support for a filesystem., + NULL +}; + +static int cmd_dedup_reg(int argc, char **argv) +{ + int ret = dedup_ctl(BTRFS_DEDUP_CTL_REG, argc, argv); + if (ret 0) + usage(cmd_dedup_reg_usage); + return ret; +} + +static const char * const cmd_dedup_unreg_usage[] = { + btrfs dedup unregister path, + Disable data deduplication support for a filesystem., + NULL +}; + +static int cmd_dedup_unreg(int argc, char **argv) +{ + int ret = dedup_ctl(BTRFS_DEDUP_CTL_UNREG, argc, argv); + if (ret 0) + usage(cmd_dedup_unreg_usage); + return ret; +} + +const struct cmd_group dedup_cmd_group = { + dedup_cmd_group_usage, NULL, { + { register, cmd_dedup_reg, cmd_dedup_reg_usage, NULL, 0 }, + { unregister, cmd_dedup_unreg, cmd_dedup_unreg_usage, 0, 0 }, + { 0, 0, 0, 0, 0 } + } +}; + +int cmd_dedup(int argc, char **argv) +{ + return handle_command_group(dedup_cmd_group, argc, argv); +} diff --git a/commands.h b/commands.h index 15c616d..d31afa4 100644 --- a/commands.h +++ b/commands.h @@
[RFC PATCH v6 2/5] Btrfs: improve the delayed refs process in rm case
While removing a file with dedup extents, we could have a great number of delayed refs pending to process, and these refs refer to droping a ref of the extent, which is of BTRFS_DROP_DELAYED_REF type. But in order to prevent an extent's ref count from going down to zero when there still are pending delayed refs, we first select those adding a ref ones, which is of BTRFS_ADD_DELAYED_REF type. So in removing case, all of our delayed refs are of BTRFS_DROP_DELAYED_REF type, but we have to walk all the refs issued to the extent to find any BTRFS_ADD_DELAYED_REF types and end up there is no such thing, and then start over again to find BTRFS_DROP_DELAYED_REF. This is really unnecessary, we can improve this by tracking how many BTRFS_ADD_DELAYED_REF refs we have and search by the right type. Signed-off-by: Liu Bo bo.li@oracle.com --- fs/btrfs/delayed-ref.c | 10 ++ fs/btrfs/delayed-ref.h |3 +++ fs/btrfs/extent-tree.c | 17 - 3 files changed, 29 insertions(+), 1 deletions(-) diff --git a/fs/btrfs/delayed-ref.c b/fs/btrfs/delayed-ref.c index fc4ce8b..198b7ad 100644 --- a/fs/btrfs/delayed-ref.c +++ b/fs/btrfs/delayed-ref.c @@ -543,6 +543,10 @@ update_existing_head_ref(struct btrfs_delayed_ref_node *existing, * update the reference mod on the head to reflect this new operation */ existing-ref_mod += update-ref_mod; + + WARN_ON_ONCE(update-ref_mod 1); + if (update-ref_mod == 1) + existing_ref-add_cnt++; } /* @@ -604,6 +608,12 @@ static noinline void add_delayed_ref_head(struct btrfs_fs_info *fs_info, head_ref-must_insert_reserved = must_insert_reserved; head_ref-is_data = is_data; + /* track added ref, more comments in select_delayed_ref() */ + if (count_mod == 1) + head_ref-add_cnt = 1; + else + head_ref-add_cnt = 0; + INIT_LIST_HEAD(head_ref-cluster); mutex_init(head_ref-mutex); diff --git a/fs/btrfs/delayed-ref.h b/fs/btrfs/delayed-ref.h index 70b962c..9377b27 100644 --- a/fs/btrfs/delayed-ref.h +++ b/fs/btrfs/delayed-ref.h @@ -84,6 +84,9 @@ struct btrfs_delayed_ref_head { struct list_head cluster; struct btrfs_delayed_extent_op *extent_op; + + int add_cnt; + /* * when a new extent is allocated, it is just reserved in memory * The actual extent isn't inserted into the extent allocation tree diff --git a/fs/btrfs/extent-tree.c b/fs/btrfs/extent-tree.c index 70002ea..2b8729e 100644 --- a/fs/btrfs/extent-tree.c +++ b/fs/btrfs/extent-tree.c @@ -2260,6 +2260,16 @@ select_delayed_ref(struct btrfs_delayed_ref_head *head) struct rb_node *node; struct btrfs_delayed_ref_node *ref; int action = BTRFS_ADD_DELAYED_REF; + + /* +* track the count of BTRFS_ADD_DELAYED_REF, +* in the case that there's no BTRFS_ADD_DELAYED_REF while there're a +* a great number of BTRFS_DROP_DELAYED_REF, +* it'll waste time on searching BTRFS_ADD_DELAYED_REF, usually this +* happens with dedup enabled. +*/ + if (head-add_cnt == 0) + action = BTRFS_DROP_DELAYED_REF; again: /* * select delayed ref of type BTRFS_ADD_DELAYED_REF first. @@ -2274,8 +2284,11 @@ again: rb_node); if (ref-bytenr != head-node.bytenr) break; - if (ref-action == action) + if (ref-action == action) { + if (action == BTRFS_ADD_DELAYED_REF) + head-add_cnt--; return ref; + } node = rb_prev(node); } if (action == BTRFS_ADD_DELAYED_REF) { @@ -2351,6 +2364,8 @@ static noinline int run_clustered_refs(struct btrfs_trans_handle *trans, * there are still refs with lower seq numbers in the * process of being added. Don't run this ref yet. */ + if (ref-action == BTRFS_ADD_DELAYED_REF) + locked_ref-add_cnt++; list_del_init(locked_ref-cluster); btrfs_delayed_ref_unlock(locked_ref); locked_ref = NULL; -- 1.7.7 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] Btrfs: catch error return value from find_extent_in_eb()
On Thu, Aug 8, 2013 at 6:04 AM, Wang Shilong wangsl.f...@cn.fujitsu.com wrote: find_extent_in_eb() may return ENOMEM, catch this error return value. Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com Reviewed-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/backref.c | 4 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 54e7610..f7781e6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -934,6 +934,10 @@ again: } ret = find_extent_in_eb(eb, bytenr, *extent_item_pos, eie); + if (ret) { + free_extent_buffer(eb); + goto out; + } ref-inode_list = eie; free_extent_buffer(eb); } Hello, this is a duplicate of: https://patchwork.kernel.org/patch/2835989/ thanks -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs-progs: fix a regression in mkfs.btrfs
Commit 55061a98 adds a cut paste error that makes mkfs.btrfs fail if leafsize != sectorsize. Signed-off-by: Stefan Behrens sbehr...@giantdisaster.de --- utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils.c b/utils.c index 15b991f..691b075 100644 --- a/utils.c +++ b/utils.c @@ -430,7 +430,7 @@ int make_btrfs(int fd, const char *device, const char *label, ret = pwrite(fd, buf-data, sectorsize, blocks[0]); if (ret 0) return -errno; - else if (ret != leafsize) + else if (ret != sectorsize) return -EIO; free(buf); -- 1.8.3.4 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/5] Btrfs-progs: return error on write failure in make_btrfs()
On Thu, 4 Jul 2013 10:48:39 +0100, Filipe David Borba Manana wrote: Instead of aborting with a BUG_ON() statement, return a negated errno code. Also updated mkfs and convert tools to print a nicer error message when make_btrfs() returns an error. Signed-off-by: Filipe David Borba Manana fdman...@gmail.com [...] ret = pwrite(fd, buf-data, sectorsize, blocks[0]); - BUG_ON(ret != sectorsize); - + if (ret 0) + return -errno; + else if (ret != leafsize) + return -EIO; mkfs.btrfs with leafsize != sectorsize fails. I've sent a patch for it. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Btrfs-progs: fix a regression in mkfs.btrfs
On Thu, Aug 8, 2013 at 11:51 AM, Stefan Behrens sbehr...@giantdisaster.de wrote: Commit 55061a98 adds a cut paste error that makes mkfs.btrfs fail if leafsize != sectorsize. Signed-off-by: Stefan Behrens sbehr...@giantdisaster.de --- utils.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/utils.c b/utils.c index 15b991f..691b075 100644 --- a/utils.c +++ b/utils.c @@ -430,7 +430,7 @@ int make_btrfs(int fd, const char *device, const char *label, ret = pwrite(fd, buf-data, sectorsize, blocks[0]); if (ret 0) return -errno; - else if (ret != leafsize) + else if (ret != sectorsize) return -EIO; Reviewed-by: Filipe Manana fdman...@gmail.com free(buf); -- 1.8.3.4 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html -- Filipe David Manana, Reasonable men adapt themselves to the world. Unreasonable men adapt the world to themselves. That's why all progress depends on unreasonable men. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/5] Btrfs-progs: return error on write failure in make_btrfs()
On Thu, Aug 8, 2013 at 11:52 AM, Stefan Behrens sbehr...@giantdisaster.de wrote: On Thu, 4 Jul 2013 10:48:39 +0100, Filipe David Borba Manana wrote: Instead of aborting with a BUG_ON() statement, return a negated errno code. Also updated mkfs and convert tools to print a nicer error message when make_btrfs() returns an error. Signed-off-by: Filipe David Borba Manana fdman...@gmail.com [...] ret = pwrite(fd, buf-data, sectorsize, blocks[0]); - BUG_ON(ret != sectorsize); - + if (ret 0) + return -errno; + else if (ret != leafsize) + return -EIO; mkfs.btrfs with leafsize != sectorsize fails. I've sent a patch for it. Copy paste error :( Thanks for catching and fixing it. -- Filipe David Manana, Reasonable men adapt themselves to the world. Unreasonable men adapt the world to themselves. That's why all progress depends on unreasonable men. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: btrfs-progs: drop weird indirections dead code from send/receive
On Fri, 02 Aug 2013 20:24:55 -0500, Eric Sandeen wrote: cmds-recieve.c cmds-send.c seem to have weird wrappers and indirections, and groups of commands which have only one member, which are never referenced in the code. I think these can be removed. Signed-off-by: Eric Sandeen sand...@redhat.com --- cmds-receive.c | 19 +-- cmds-send.c| 12 +--- commands.h |2 -- 3 files changed, 2 insertions(+), 31 deletions(-) Stefan, I'd appreciate your review testing of this though! TBH it's an old patch I had laying around, but I have not re-tested it recently. Reviewed tested without issues. diff --git a/cmds-receive.c b/cmds-receive.c index 4e480f9..7abce76 100644 --- a/cmds-receive.c +++ b/cmds-receive.c @@ -907,7 +907,7 @@ out: return ret; } -static int do_cmd_receive(int argc, char **argv) +int cmd_receive(int argc, char **argv) { int c; char *tomnt = NULL; @@ -960,11 +960,6 @@ static int do_cmd_receive(int argc, char **argv) return ret; } -static const char * const receive_cmd_group_usage[] = { - btrfs receive command args, - NULL -}; - const char * const cmd_receive_usage[] = { btrfs receive [-ve] [-f infile] mount, Receive subvolumes from stdin., @@ -988,15 +983,3 @@ const char * const cmd_receive_usage[] = { is recognized or on EOF., NULL }; - -const struct cmd_group receive_cmd_group = { - receive_cmd_group_usage, NULL, { - { receive, do_cmd_receive, cmd_receive_usage, NULL, 0 }, - { 0, 0, 0, 0, 0 }, -}, -}; - -int cmd_receive(int argc, char **argv) -{ - return do_cmd_receive(argc, argv); -} diff --git a/cmds-send.c b/cmds-send.c index 7209aba..f9899f4 100644 --- a/cmds-send.c +++ b/cmds-send.c @@ -465,7 +465,7 @@ out: return ret; } -int cmd_send_start(int argc, char **argv) +int cmd_send(int argc, char **argv) { char *subvol = NULL; int c; @@ -718,11 +718,6 @@ out: return ret; } -static const char * const send_cmd_group_usage[] = { - btrfs send command args, - NULL -}; - const char * const cmd_send_usage[] = { btrfs send [-ve] [-p parent] [-c clone-src] subvol, Send the subvolume to stdout., @@ -750,8 +745,3 @@ const char * const cmd_send_usage[] = { use pipes., NULL }; - -int cmd_send(int argc, char **argv) -{ - return cmd_send_start(argc, argv); -} diff --git a/commands.h b/commands.h index 65829f4..3f12fab 100644 --- a/commands.h +++ b/commands.h @@ -85,8 +85,6 @@ extern const struct cmd_group balance_cmd_group; extern const struct cmd_group device_cmd_group; extern const struct cmd_group scrub_cmd_group; extern const struct cmd_group inspect_cmd_group; -extern const struct cmd_group send_cmd_group; -extern const struct cmd_group receive_cmd_group; extern const struct cmd_group quota_cmd_group; extern const struct cmd_group qgroup_cmd_group; extern const struct cmd_group replace_cmd_group; -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] Btrfs: catch error return value from find_extent_in_eb()
On Thu, August 08, 2013 at 12:24 (+0200), Filipe David Manana wrote: On Thu, Aug 8, 2013 at 6:04 AM, Wang Shilong wangsl.f...@cn.fujitsu.com wrote: find_extent_in_eb() may return ENOMEM, catch this error return value. Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com Reviewed-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/backref.c | 4 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 54e7610..f7781e6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -934,6 +934,10 @@ again: } ret = find_extent_in_eb(eb, bytenr, *extent_item_pos, eie); + if (ret) { + free_extent_buffer(eb); + goto out; + } ref-inode_list = eie; free_extent_buffer(eb); } Hello, this is a duplicate of: https://patchwork.kernel.org/patch/2835989/ Your linked patch checks for ret 0, which is a safer option since there are functions down the stack returning 0 or 0 for success and 0 for errors. Currently, find_extent_in_eb doesn't return their return values, but I'd rather be a bit more on the safe side and use your patch. Thanks, -Jan -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3] Btrfs: fix possible memory leak in find_parent_nodes()
On 08/08/2013 07:02 PM, Jan Schmidt wrote: On Thu, August 08, 2013 at 07:04 (+0200), Wang Shilong wrote: Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com Reviewed-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/backref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index cb73a12..54e7610 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -911,7 +911,6 @@ again: while (!list_empty(prefs)) { ref = list_first_entry(prefs, struct __prelim_ref, list); -list_del(ref-list); WARN_ON(ref-count 0); if (ref-count ref-root_id ref-parent == 0) { /* no parent == root of tree */ @@ -954,6 +953,7 @@ again: eie-next = ref-inode_list; } } +list_del(ref-list); kfree(ref); } I'm not convinced, you're not calling kfree() more often. Can you please add some patch description? Yeah. i will add more description in V2. Thanks Wang -Jan -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/3] Btrfs: catch error return value from find_extent_in_eb()
On 08/08/2013 06:24 PM, Filipe David Manana wrote: On Thu, Aug 8, 2013 at 6:04 AM, Wang Shilong wangsl.f...@cn.fujitsu.com wrote: find_extent_in_eb() may return ENOMEM, catch this error return value. Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com Reviewed-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/backref.c | 4 1 file changed, 4 insertions(+) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 54e7610..f7781e6 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -934,6 +934,10 @@ again: } ret = find_extent_in_eb(eb, bytenr, *extent_item_pos, eie); + if (ret) { + free_extent_buffer(eb); + goto out; + } ref-inode_list = eie; free_extent_buffer(eb); } Hello, this is a duplicate of: https://patchwork.kernel.org/patch/2835989/ Yeah, just ignore my patch. Thanks, Wang thanks -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] Btrfs: allocate prelim_ref with a slab allocater
On Thu, August 08, 2013 at 07:04 (+0200), Wang Shilong wrote: struct __prelim_ref is allocated and freed frequently when walking backref tree, using slab allocater can not only speed up allocating but also detect memory leaks. Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com Reviewed-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/backref.c | 30 +- fs/btrfs/backref.h | 2 ++ fs/btrfs/super.c | 8 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f7781e6..916e4f1 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -119,6 +119,26 @@ struct __prelim_ref { u64 wanted_disk_byte; }; +static struct kmem_cache *prelim_ref_cache; + +int __init btrfs_prelim_ref_init(void) +{ + prelim_ref_cache = kmem_cache_create(btrfs_prelim_ref, + sizeof(struct __prelim_ref), + 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + NULL); + if (!prelim_ref_cache) + return -ENOMEM; + return 0; +} + +void btrfs_prelim_ref_exit(void) +{ + if (prelim_ref_cache) + kmem_cache_destroy(prelim_ref_cache); +} + /* * the rules for all callers of this function are: * - obtaining the parent is the goal @@ -165,7 +185,7 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, { struct __prelim_ref *ref; - ref = kmalloc(sizeof(*ref), gfp_mask); + ref = kmem_cache_alloc(prelim_ref_cache, gfp_mask); if (!ref) return -ENOMEM; @@ -493,7 +513,7 @@ static void __merge_refs(struct list_head *head, int mode) ref1-count += ref2-count; list_del(ref2-list); - kfree(ref2); + kmem_cache_free(prelim_ref_cache, ref2); } } @@ -958,7 +978,7 @@ again: } } list_del(ref-list); - kfree(ref); + kmem_cache_free(prelim_ref_cache, ref); } out: @@ -966,13 +986,13 @@ out: while (!list_empty(prefs)) { ref = list_first_entry(prefs, struct __prelim_ref, list); list_del(ref-list); - kfree(ref); + kmem_cache_free(prelim_ref_cache, ref); } while (!list_empty(prefs_delayed)) { ref = list_first_entry(prefs_delayed, struct __prelim_ref, list); list_del(ref-list); - kfree(ref); + kmem_cache_free(prelim_ref_cache, ref); } return ret; diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 8f2e767..a910b27 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, struct btrfs_inode_extref **ret_extref, u64 *found_off); +int __init btrfs_prelim_ref_init(void); +void btrfs_prelim_ref_exit(void); #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b64d762..de7eb3d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -56,6 +56,7 @@ #include rcu-string.h #include dev-replace.h #include free-space-cache.h +#include backref.h #define CREATE_TRACE_POINTS #include trace/events/btrfs.h @@ -1774,6 +1775,10 @@ static int __init init_btrfs_fs(void) if (err) goto free_auto_defrag; + err = btrfs_prelim_ref_init(); + if (err) + goto free_prelim_ref; + err = btrfs_interface_init(); if (err) goto free_delayed_ref; @@ -1791,6 +1796,8 @@ static int __init init_btrfs_fs(void) unregister_ioctl: btrfs_interface_exit(); +free_prelim_ref: + btrfs_prelim_ref_exit(); free_delayed_ref: btrfs_delayed_ref_exit(); free_auto_defrag: @@ -1817,6 +1824,7 @@ static void __exit exit_btrfs_fs(void) btrfs_delayed_ref_exit(); btrfs_auto_defrag_exit(); btrfs_delayed_inode_exit(); + btrfs_prelim_ref_exit(); ordered_data_exit(); extent_map_exit(); extent_io_exit(); I generally like the idea of using a custom cache here. What about this one? 324 static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, [...] 367 /* additional parents require new refs being added here */ 368 while ((node = ulist_next(parents, uiter))) { 369 new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); That new_ref will also be freed with kmem_cache_free after your patch, I think. Thanks, -Jan -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at
Re: [PATCH 3/3] Btrfs: allocate prelim_ref with a slab allocater
On Thu, August 08, 2013 at 07:04 (+0200), Wang Shilong wrote: struct __prelim_ref is allocated and freed frequently when walking backref tree, using slab allocater can not only speed up allocating but also detect memory leaks. Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com Reviewed-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/backref.c | 30 +- fs/btrfs/backref.h | 2 ++ fs/btrfs/super.c | 8 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f7781e6..916e4f1 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -119,6 +119,26 @@ struct __prelim_ref { u64 wanted_disk_byte; }; +static struct kmem_cache *prelim_ref_cache; + +int __init btrfs_prelim_ref_init(void) +{ +prelim_ref_cache = kmem_cache_create(btrfs_prelim_ref, +sizeof(struct __prelim_ref), +0, +SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, +NULL); +if (!prelim_ref_cache) +return -ENOMEM; +return 0; +} + +void btrfs_prelim_ref_exit(void) +{ +if (prelim_ref_cache) +kmem_cache_destroy(prelim_ref_cache); +} + /* * the rules for all callers of this function are: * - obtaining the parent is the goal @@ -165,7 +185,7 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, { struct __prelim_ref *ref; -ref = kmalloc(sizeof(*ref), gfp_mask); +ref = kmem_cache_alloc(prelim_ref_cache, gfp_mask); if (!ref) return -ENOMEM; @@ -493,7 +513,7 @@ static void __merge_refs(struct list_head *head, int mode) ref1-count += ref2-count; list_del(ref2-list); -kfree(ref2); +kmem_cache_free(prelim_ref_cache, ref2); } } @@ -958,7 +978,7 @@ again: } } list_del(ref-list); -kfree(ref); +kmem_cache_free(prelim_ref_cache, ref); } out: @@ -966,13 +986,13 @@ out: while (!list_empty(prefs)) { ref = list_first_entry(prefs, struct __prelim_ref, list); list_del(ref-list); -kfree(ref); +kmem_cache_free(prelim_ref_cache, ref); } while (!list_empty(prefs_delayed)) { ref = list_first_entry(prefs_delayed, struct __prelim_ref, list); list_del(ref-list); -kfree(ref); +kmem_cache_free(prelim_ref_cache, ref); } return ret; diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 8f2e767..a910b27 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, struct btrfs_inode_extref **ret_extref, u64 *found_off); +int __init btrfs_prelim_ref_init(void); +void btrfs_prelim_ref_exit(void); #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index b64d762..de7eb3d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -56,6 +56,7 @@ #include rcu-string.h #include dev-replace.h #include free-space-cache.h +#include backref.h #define CREATE_TRACE_POINTS #include trace/events/btrfs.h @@ -1774,6 +1775,10 @@ static int __init init_btrfs_fs(void) if (err) goto free_auto_defrag; +err = btrfs_prelim_ref_init(); +if (err) +goto free_prelim_ref; + err = btrfs_interface_init(); if (err) goto free_delayed_ref; @@ -1791,6 +1796,8 @@ static int __init init_btrfs_fs(void) unregister_ioctl: btrfs_interface_exit(); +free_prelim_ref: +btrfs_prelim_ref_exit(); free_delayed_ref: btrfs_delayed_ref_exit(); free_auto_defrag: @@ -1817,6 +1824,7 @@ static void __exit exit_btrfs_fs(void) btrfs_delayed_ref_exit(); btrfs_auto_defrag_exit(); btrfs_delayed_inode_exit(); +btrfs_prelim_ref_exit(); ordered_data_exit(); extent_map_exit(); extent_io_exit(); I generally like the idea of using a custom cache here. What about this one? 324 static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, [...] 367 /* additional parents require new refs being added here */ 368 while ((node = ulist_next(parents, uiter))) { 369 new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); That new_ref will also be freed with kmem_cache_free after your patch, I think. Yeah, you are right, i just have a question, why i can not cause problems when i free it with kmem_cahce_free during my test ~_~. Thanks, Wang Thanks, -Jan -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a
Re: [PATCH][RESEND] vfs: allow /proc/PID/maps to get device from stat
On Wed, Aug 07, 2013 at 04:51:46PM -0400, Josef Bacik wrote: Not possible, this will break other things as subvolumes have their own inode space, it will confuse applications that get multiples of an inode number for different devices with the same st_dev. Each subvolume has it's own anonymous dev to segregate things. Thanks, Yes, it's the same old issue of btrfs volumes misbehaving, and the solution is still the same as 5 years ago: make sure each subvolume has it's own sb, vfsmount and gets automounted, similar to what nfs4 does for this case. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RESEND] vfs: allow /proc/PID/maps to get device from stat
On Thu, Aug 08, 2013 at 05:13:49AM -0700, Christoph Hellwig wrote: On Wed, Aug 07, 2013 at 04:51:46PM -0400, Josef Bacik wrote: Not possible, this will break other things as subvolumes have their own inode space, it will confuse applications that get multiples of an inode number for different devices with the same st_dev. Each subvolume has it's own anonymous dev to segregate things. Thanks, Yes, it's the same old issue of btrfs volumes misbehaving, and the solution is still the same as 5 years ago: make sure each subvolume has it's own sb, vfsmount and gets automounted, similar to what nfs4 does for this case. This won't work, try having 1 subvolumes with dirty inodes and do sync then go skiing, you'll have time :). Thanks, Josef -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Btrfs: deal with enomem in the rewind path V3
On Thu, Aug 08, 2013 at 09:36:52AM +0200, Jan Schmidt wrote: On Wed, August 07, 2013 at 23:03 (+0200), Josef Bacik wrote: We can get ENOMEM trying to allocate dummy bufs for the rewind operation of the tree mod log. Instead of BUG_ON()'ing in this case pass up ENOMEM. I looked back through the callers and I'm pretty sure I got everybody who did BUG_ON(ret) in this path. Thanks, Signed-off-by: Josef Bacik jba...@fusionio.com --- V2-V3: -unlock and free the original buffer on error -return NULL instead of ERR_PTR(-ENOMEM) V1-V2: missed a BUG_ON() for alloc_dummy_extent_buffer. fs/btrfs/ctree.c | 16 +- fs/btrfs/extent_io.c | 145 + 2 files changed, 88 insertions(+), 73 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 0d5c686..1dd8a71 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1211,7 +1211,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, BUG_ON(tm-slot != 0); eb_rewin = alloc_dummy_extent_buffer(eb-start, fs_info-tree_root-nodesize); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock(eb); + free_extent_buffer(eb); + return NULL; + } btrfs_set_header_bytenr(eb_rewin, eb-start); btrfs_set_header_backref_rev(eb_rewin, btrfs_header_backref_rev(eb)); @@ -1219,7 +1223,11 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, btrfs_set_header_level(eb_rewin, btrfs_header_level(eb)); } else { eb_rewin = btrfs_clone_extent_buffer(eb); - BUG_ON(!eb_rewin); + if (!eb_rewin) { + btrfs_tree_read_unlock(eb); + free_extent_buffer(eb); + return NULL; + } } btrfs_tree_read_unlock(eb); @@ -2772,6 +2780,10 @@ again: BTRFS_READ_LOCK); } b = tree_mod_log_rewind(root-fs_info, b, time_seq); + if (!b) { + ret = -ENOMEM; + goto done; + } p-locks[level] = BTRFS_READ_LOCK; p-nodes[level] = b; } else { diff --git a/fs/btrfs/extent_io.c b/fs/btrfs/extent_io.c index deaea9c..b422cba 100644 --- a/fs/btrfs/extent_io.c +++ b/fs/btrfs/extent_io.c @@ -4222,6 +4222,76 @@ static void __free_extent_buffer(struct extent_buffer *eb) kmem_cache_free(extent_buffer_cache, eb); } +static int extent_buffer_under_io(struct extent_buffer *eb) +{ + return (atomic_read(eb-io_pages) || + test_bit(EXTENT_BUFFER_WRITEBACK, eb-bflags) || + test_bit(EXTENT_BUFFER_DIRTY, eb-bflags)); +} + +/* + * Helper for releasing extent buffer page. + */ +static void btrfs_release_extent_buffer_page(struct extent_buffer *eb, + unsigned long start_idx) +{ + unsigned long index; + unsigned long num_pages; + struct page *page; + int mapped = !test_bit(EXTENT_BUFFER_DUMMY, eb-bflags); + + BUG_ON(extent_buffer_under_io(eb)); + + num_pages = num_extent_pages(eb-start, eb-len); + index = start_idx + num_pages; + if (start_idx = index) + return; + + do { + index--; + page = extent_buffer_page(eb, index); + if (page mapped) { + spin_lock(page-mapping-private_lock); + /* +* We do this since we'll remove the pages after we've +* removed the eb from the radix tree, so we could race +* and have this page now attached to the new eb. So +* only clear page_private if it's still connected to +* this eb. +*/ + if (PagePrivate(page) + page-private == (unsigned long)eb) { + BUG_ON(test_bit(EXTENT_BUFFER_DIRTY, eb-bflags)); + BUG_ON(PageDirty(page)); + BUG_ON(PageWriteback(page)); + /* +* We need to make sure we haven't be attached +* to a new eb. +*/ + ClearPagePrivate(page); + set_page_private(page, 0); + /* One for the page private */ + page_cache_release(page); + } +
Re: [PATCH] Btrfs: stop using GFP_ATOMIC when allocating rewind ebs
On Thu, Aug 08, 2013 at 09:23:06AM +0200, Jan Schmidt wrote: On Wed, August 07, 2013 at 23:11 (+0200), Josef Bacik wrote: There is no reason we can't just set the path to blocking and then do normal GFP_NOFS allocations for these extent buffers. Thanks, Signed-off-by: Josef Bacik jba...@fusionio.com --- fs/btrfs/ctree.c | 16 ++-- fs/btrfs/extent_io.c |8 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1dd8a71..414a2d7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1191,8 +1191,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, * is freed (its refcount is decremented). */ static struct extent_buffer * -tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - u64 time_seq) +tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, + struct extent_buffer *eb, u64 time_seq) { struct extent_buffer *eb_rewin; struct tree_mod_elem *tm; @@ -1207,12 +1207,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (!tm) return eb; + btrfs_set_path_blocking(path); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (tm-op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm-slot != 0); eb_rewin = alloc_dummy_extent_buffer(eb-start, fs_info-tree_root-nodesize); if (!eb_rewin) { - btrfs_tree_read_unlock(eb); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); return NULL; } @@ -1224,13 +1227,14 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, } else { eb_rewin = btrfs_clone_extent_buffer(eb); if (!eb_rewin) { - btrfs_tree_read_unlock(eb); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); return NULL; } } - btrfs_tree_read_unlock(eb); + btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK); + btrfs_tree_read_unlock_blocking(eb); unlock_blocking? Rest looks ok to me. Yeah I change the lock to blocking above, so I have to do read_unlock_blocking here. Thanks, Josef -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Btrfs: stop using GFP_ATOMIC when allocating rewind ebs
On Thu, August 08, 2013 at 15:12 (+0200), Josef Bacik wrote: On Thu, Aug 08, 2013 at 09:23:06AM +0200, Jan Schmidt wrote: On Wed, August 07, 2013 at 23:11 (+0200), Josef Bacik wrote: There is no reason we can't just set the path to blocking and then do normal GFP_NOFS allocations for these extent buffers. Thanks, Signed-off-by: Josef Bacik jba...@fusionio.com --- fs/btrfs/ctree.c | 16 ++-- fs/btrfs/extent_io.c |8 2 files changed, 14 insertions(+), 10 deletions(-) diff --git a/fs/btrfs/ctree.c b/fs/btrfs/ctree.c index 1dd8a71..414a2d7 100644 --- a/fs/btrfs/ctree.c +++ b/fs/btrfs/ctree.c @@ -1191,8 +1191,8 @@ __tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, * is freed (its refcount is decremented). */ static struct extent_buffer * -tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, - u64 time_seq) +tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct btrfs_path *path, + struct extent_buffer *eb, u64 time_seq) { struct extent_buffer *eb_rewin; struct tree_mod_elem *tm; @@ -1207,12 +1207,15 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, if (!tm) return eb; + btrfs_set_path_blocking(path); + btrfs_set_lock_blocking_rw(eb, BTRFS_READ_LOCK); + if (tm-op == MOD_LOG_KEY_REMOVE_WHILE_FREEING) { BUG_ON(tm-slot != 0); eb_rewin = alloc_dummy_extent_buffer(eb-start, fs_info-tree_root-nodesize); if (!eb_rewin) { - btrfs_tree_read_unlock(eb); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); return NULL; } @@ -1224,13 +1227,14 @@ tree_mod_log_rewind(struct btrfs_fs_info *fs_info, struct extent_buffer *eb, } else { eb_rewin = btrfs_clone_extent_buffer(eb); if (!eb_rewin) { - btrfs_tree_read_unlock(eb); + btrfs_tree_read_unlock_blocking(eb); free_extent_buffer(eb); return NULL; } } - btrfs_tree_read_unlock(eb); + btrfs_clear_path_blocking(path, NULL, BTRFS_READ_LOCK); + btrfs_tree_read_unlock_blocking(eb); unlock_blocking? Rest looks ok to me. Yeah I change the lock to blocking above, so I have to do read_unlock_blocking here. Thanks, Uh, obviously. Got confused by the btrfs_clear_path_blocking above, but of course we're locking eb explicitly ourselves. Reviewed-by: Jan Schmidt list.bt...@jan-o-sch.net Thanks! -Jan -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3] Btrfs: fix oops when writing dirty qgroups to disk
On Wed, Aug 07, 2013 at 01:12:29PM +0800, Wang Shilong wrote: When disabling quota, we should clear out list 'dirty_qgroups',otherwise, we will get oops if enabling quota again. Fix this by abstracting similar code from del_qgroup_rb(). Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com Reviewed-by: Miao Xie mi...@cn.fujitsu.com Can we get an xfstest for this, or at the very least a generic xfstest to exercise qgroups in general so I can be sure all these qgroup patches I take don't cause regressions? Thanks, Josef -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 6/7] btrfs: cleanup: removed unused 'btrfs_reada_detach'
On Thu, Aug 08, 2013 at 09:33:14AM +0200, Arne Jansen wrote: On 07.08.2013 23:43, Sergei Trofimovich wrote: From: Sergei Trofimovich sly...@gentoo.org Found by uselex.rb: btrfs_reada_detach: [R]: exported from: fs/btrfs/btrfs.o fs/btrfs/built-in.o fs/btrfs/reada.o even though the function is currently unused, I'm hesitating to remove it as it's part of the reada-API and might be handy for anyone going to use the API in the future. I agree. As replied here, http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg24047.html please keep the function. david -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 4/7] btrfs: cleanup: removed unused 'btrfs_start_transaction_lflush'
On Thu, Aug 08, 2013 at 12:43:20AM +0300, Sergei Trofimovich wrote: From: Sergei Trofimovich sly...@gentoo.org Found by uselex.rb: btrfs_start_transaction_lflush: [R]: exported from: fs/btrfs/btrfs.o fs/btrfs/transaction.o fs/btrfs/built-in.o http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg24047.html btrfs_start_transaction_lflush() Transcaction API, removing the func does not make sense without removing BTRFS_RESERVE_FLUSH_LIMIT at the same time. Miao introduced this function in 08e007d2e57744472a9424735a to enhance flushing logic to avoid deadlocks. david -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/7] btrfs: cleanup: mark 'btrfs_write_and_wait_marked_extents' as static
On Thu, Aug 08, 2013 at 12:43:19AM +0300, Sergei Trofimovich wrote: From: Sergei Trofimovich sly...@gentoo.org Found by uselex.rb: btrfs_write_and_wait_marked_extents: [R]: exported from: fs/btrfs/btrfs.o fs/btrfs/transaction.o fs/btrfs/built-in.o Signed-off-by: Sergei Trofimovich sly...@gentoo.org --- fs/btrfs/transaction.c | 4 ++-- fs/btrfs/transaction.h | 2 -- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c index d58cce7..ff891d2 100644 --- a/fs/btrfs/transaction.c +++ b/fs/btrfs/transaction.c @@ -837,8 +837,8 @@ int btrfs_wait_marked_extents(struct btrfs_root *root, * them in one of two extent_io trees. This is used to make sure all of * those extents are on disk for transaction or log commit */ -int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, - struct extent_io_tree *dirty_pages, int mark) +static int btrfs_write_and_wait_marked_extents(struct btrfs_root *root, +struct extent_io_tree *dirty_pages, int mark) You may want to run the output through checkpatch.pl and fix obvious style violations (line too long). david -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RESEND] vfs: allow /proc/PID/maps to get device from stat
On Thu, Aug 08, 2013 at 09:02:07AM -0400, Josef Bacik wrote: This won't work, try having 1 subvolumes with dirty inodes and do sync then go skiing, you'll have time :). Thanks, Why would the dirty inodes make any difference? If you share the bdi between the subvolumes the sync workflow should be exactly the same still. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 7/7] btrfs: cleanup: removed unused 'btrfs_get_inode_ref_index'
On Thu, Aug 08, 2013 at 12:43:23AM +0300, Sergei Trofimovich wrote: From: Sergei Trofimovich sly...@gentoo.org Found by uselex.rb: btrfs_get_inode_ref_index: [R]: exported from: fs/btrfs/inode-item.o fs/btrfs/btrfs.o fs/btrfs/built-in.o Signed-off-by: Sergei Trofimovich sly...@gentoo.org Safe to remove. Reviewed-by: David Sterba dste...@suse.cz -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 3/3] Btrfs: allocate prelim_ref with a slab allocater
On Thu, Aug 08, 2013 at 01:04:19PM +0800, Wang Shilong wrote: struct __prelim_ref is allocated and freed frequently when walking backref tree, using slab allocater can not only speed up allocating but also detect memory leaks. Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com Reviewed-by: Miao Xie mi...@cn.fujitsu.com --- fs/btrfs/backref.c | 30 +- fs/btrfs/backref.h | 2 ++ fs/btrfs/super.c | 8 3 files changed, 35 insertions(+), 5 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index f7781e6..916e4f1 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -119,6 +119,26 @@ struct __prelim_ref { u64 wanted_disk_byte; }; +static struct kmem_cache *prelim_ref_cache; + +int __init btrfs_prelim_ref_init(void) +{ + prelim_ref_cache = kmem_cache_create(btrfs_prelim_ref, + sizeof(struct __prelim_ref), Would be nice to give it a name that matches the slab cache, btrfs_prelim_ref. david -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Btrfs: deal with enomem in the rewind path V3
On Thu, Aug 08, 2013 at 09:36:52AM +0200, Jan Schmidt wrote: Weird patch formatting concerning extent_io.c, I assume there are no changes in extent_buffer_under_io and btrfs_release_extent_buffer_page, you just moved btrfs_clone_extent_buffer, right? Perhaps --patience or --minimal could do better? Otherwise, git diff --patience produces identical result for me (1.8.3.1). Reviewed-by: Jan Schmidt list@jan-o-sch.net ^^^ xfs? :) -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Btrfs: deal with enomem in the rewind path V3
On Thu, August 08, 2013 at 16:28 (+0200), David Sterba wrote: On Thu, Aug 08, 2013 at 09:36:52AM +0200, Jan Schmidt wrote: Weird patch formatting concerning extent_io.c, I assume there are no changes in extent_buffer_under_io and btrfs_release_extent_buffer_page, you just moved btrfs_clone_extent_buffer, right? Perhaps --patience or --minimal could do better? Otherwise, git diff --patience produces identical result for me (1.8.3.1). Yeah, I expected that after Josef said that he actually moved the other two functions, so the structure really changed in a way git cannot diff any better. Reviewed-by: Jan Schmidt list@jan-o-sch.net ^^^ xfs? :) Whoops :-) Replace that by btrfs if you wish. -Jan -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RESEND] vfs: allow /proc/PID/maps to get device from stat
On Thu, Aug 08, 2013 at 06:48:05AM -0700, Christoph Hellwig wrote: On Thu, Aug 08, 2013 at 09:02:07AM -0400, Josef Bacik wrote: This won't work, try having 1 subvolumes with dirty inodes and do sync then go skiing, you'll have time :). Thanks, Why would the dirty inodes make any difference? If you share the bdi between the subvolumes the sync workflow should be exactly the same still. The inodes are in the per-sb list, so we may start all the writing but we don't wait all at once, so in the case of btrfs we will write all the dirty inodes, and then wait on the ones in whatever sb we have, and then sync, which will commit the transaction. Then we go to the next sb and wait on those inodes which will dirty metadata which means we'll have another transaction and we'll commit the transaction and so on and so forth. This means we write the superblock 1 times for one sync when we could have just done it once. Now we could probably get around this by having -sync_fs wait itself for all of the inodes to complete and then commit the transaction once, but we're still going to get called the 9 times for the same damned file system that has already had everything done. And this is just one example, IIRC there were a few other issues that popped up because we assume sb == completely separate file system, freeze I think is one of those things. I'm sure there were other ones but the last time I tried to do this was 2010/2011 and many brain cells have died since then. Thanks, Josef -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH][RESEND] vfs: allow /proc/PID/maps to get device from stat
On Thu, Aug 08, 2013 at 06:48:05AM -0700, Christoph Hellwig wrote: On Thu, Aug 08, 2013 at 09:02:07AM -0400, Josef Bacik wrote: This won't work, try having 1 subvolumes with dirty inodes and do sync then go skiing, you'll have time :). Thanks, Why would the dirty inodes make any difference? If you share the bdi between the subvolumes the sync workflow should be exactly the same still. If we could dis-entangle vfsmounts from sb's and have it so you could have multiple vfsmounts with just one sb that would solve at least the in-kernel confusion, but I think we still have the userspace confusion. Thanks, Josef -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/4] btrfs: out-of-band (aka offline) dedupe v4
On Tue, Aug 06, 2013 at 11:42:47AM -0700, Mark Fasheh wrote: The following series of patches implements in btrfs an ioctl to do out-of-band deduplication of file extents. Reviewed-by: David Sterba dste...@suse.cz -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Why does btrfs benchmark so badly in this case?
Phoronix periodically runs benchmarks on filesystems, and one thing I have noticed is that btrfs always does terribly on their fio Intel IOMeter fileserver access pattern benchmark: http://www.phoronix.com/scan.php?page=articleitem=linux_310_10fsnum=2 Here, btrfs is more than 6 times slower than ext4, and about 3 times slower than XFS. Lest we attribute it to an unavoidable downside of COW filesystems and move on...no, we cannot do that, because ZFS does well here -- btrfs is about 6 times slower than ZFS! Note that btrfs does quite well in the other Phoronix benchmarks. It is just the fio fileserver benchmark that btrfs has problems with. What is going on here? Why is btrfs doing so poorly? -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Why does btrfs benchmark so badly in this case?
On Thu, Aug 08, 2013 at 09:13:04AM -0700, John Williams wrote: Phoronix periodically runs benchmarks on filesystems, and one thing I have noticed is that btrfs always does terribly on their fio Intel IOMeter fileserver access pattern benchmark: http://www.phoronix.com/scan.php?page=articleitem=linux_310_10fsnum=2 Here, btrfs is more than 6 times slower than ext4, and about 3 times slower than XFS. Lest we attribute it to an unavoidable downside of COW filesystems and move on...no, we cannot do that, because ZFS does well here -- btrfs is about 6 times slower than ZFS! Note that btrfs does quite well in the other Phoronix benchmarks. It is just the fio fileserver benchmark that btrfs has problems with. What is going on here? Why is btrfs doing so poorly? Excellent question, I'll get back to you on that. Thanks, Josef -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 0/2] xfstest btrfs/316: test send / receive
On 8/8/13 3:17 AM, Jan Schmidt wrote: These two patches add the announced tests for btrfs send / receive. As requested, the fssum tool is now included. One drawback is that I'm unable to edit configure.ac or whatever needs to be modified in an autotools preferred way. Any hints appreciated, preferrably hints containing all the modifications required to introduce something like HAVE_SEEK_HOLE. Other tests in the tree just add: #ifndef SEEK_DATA #define SEEK_DATA 3 #define SEEK_HOLE 4 #endif I do not want to make modifications to fssum.c here, if that's absolutely required (because one /could/ get along using linux/fs.h, which is not the way I would like to go), I'd like to have that changed in the far-progs repository where fssum.c comes from as well. Well, unfortunately it breaks the build w/o some change or other, on older distros: Building src [CC]fssum fssum.c: In function 'sum_file_data_permissive': fssum.c:243: error: 'SEEK_DATA' undeclared (first use in this function) so this can't be merged as-is. Adding the 4 lines above to the xfstests copy seems like a pretty obvious fix to get the tool building and move this along. Then, to simply skip this test if the kernel we're running on doesn't grok SEEK_DATA, add: _require_seek_data_hole to your new test in patch 2. Thanks, -Eric -- v1-v2: - included fssum - test number is now 316 (was 314) v2-v3: - added missing -lcrypto to build fssum - removed obsolete change in README now that fssum is included - fixed comment in test/btrfs/316's header (314 - 316) Jan Schmidt (2): xfstests: add fssum tool xfstests btrfs/316: test send / receive .gitignore |1 + common/config |2 + src/Makefile| 11 +- src/fssum.c | 819 +++ tests/btrfs/316 | 113 +++ tests/btrfs/316.out |4 + tests/btrfs/group |1 + 7 files changed, 950 insertions(+), 1 deletions(-) create mode 100644 src/fssum.c create mode 100755 tests/btrfs/316 create mode 100644 tests/btrfs/316.out -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 1/2] xfstests: add fssum tool
On 8/8/13 3:17 AM, Jan Schmidt wrote: fssum is a tool to build a recursive checksum for a file system. The home repository of fssum is git://git.kernel.org/pub/scm/linux/kernel/git/arne/far-progs.git It is added as an optional target, because it depends on glibc = 2.15 for SEEK_HOLE / SEEK_DATA. The test to be added using fssum will just be skipped if fssum wasn't built. Signed-off-by: Jan Schmidt list@jan-o-sch.net --- .gitignore|1 + common/config |2 + src/Makefile | 11 +- src/fssum.c | 819 + 4 files changed, 832 insertions(+), 1 deletions(-) create mode 100644 src/fssum.c diff --git a/.gitignore b/.gitignore index 11594aa..c2fc6e3 100644 --- a/.gitignore +++ b/.gitignore @@ -45,6 +45,7 @@ /src/fill /src/fill2 /src/fs_perms +/src/fssum /src/fstest /src/fsync-tester /src/ftrunc diff --git a/common/config b/common/config index 67c1498..c8bee29 100644 --- a/common/config +++ b/common/config @@ -146,6 +146,8 @@ export SED_PROG=`set_prog_path sed` export BC_PROG=`set_prog_path bc` [ $BC_PROG = ] _fatal bc not found +export FSSUM_PROG=`set_prog_path fssum $here/src/fssum` So this will pick up a local copy of fssum if it exists; is that really desired? (If there's any difference in behavior, then the one in src/ presumably would need to be fixed...) + export PS_ALL_FLAGS=-ef export DF_PROG=`set_prog_path df` diff --git a/src/Makefile b/src/Makefile index cc679e8..10a4d3c 100644 --- a/src/Makefile +++ b/src/Makefile @@ -20,10 +20,14 @@ LINUX_TARGETS = xfsctl bstat t_mtab getdevicesize preallo_rw_pattern_reader \ stale_handle pwrite_mmap_blocked t_dir_offset2 seek_sanity_test \ seek_copy_test t_readdir_1 t_readdir_2 fsync-tester +OPT_TARGETS = fssum + I'm not sure how this helps . . . SUBDIRS = LLDLIBS = $(LIBATTR) $(LIBHANDLE) $(LIBACL) +OPT_LDLIBS = -lssl -lcrypto Hm, new deps. I guess it's not a huge problem, these should always be available, right? ifeq ($(HAVE_XLOG_ASSIGN_LSN), true) LINUX_TARGETS += loggen endif @@ -60,7 +64,7 @@ CFILES = $(TARGETS:=.c) LDIRT = $(TARGETS) -default: depend $(TARGETS) $(SUBDIRS) +default: depend $(TARGETS) $(OPT_TARGETS) $(SUBDIRS) Anyway, OPT_TARGETS isn't optional, because you still build it by default. :) depend: .dep @@ -70,11 +74,16 @@ $(TARGETS): $(LIBTEST) @echo [CC]$@ $(Q)$(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(LIBTEST) +$(OPT_TARGETS): $(LIBTEST) + @echo [CC]$@ + -$(Q)$(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) $(LDLIBS) $(OPT_LDLIBS) $(LIBTEST) Oh, I see, you ignore the error. Well, that's still pretty ugly. I'd really rather you just add the #defines as I suggested in my reply to [PATCH 0/2], so it'll build for everyone. Thanks, -Eric + LINKTEST = $(LTLINK) $@.c -o $@ $(CFLAGS) $(LDFLAGS) install: default $(addsuffix -install,$(SUBDIRS)) $(INSTALL) -m 755 -d $(PKG_LIB_DIR)/src $(LTINSTALL) -m 755 $(TARGETS) $(PKG_LIB_DIR)/src + -$(LTINSTALL) -m 755 $(OPT_TARGETS) $(PKG_LIB_DIR)/src $(LTINSTALL) -m 755 fill2attr fill2fs fill2fs_check scaleread.sh $(PKG_LIB_DIR)/src $(LTINSTALL) -m 644 dumpfile $(PKG_LIB_DIR)/src -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH v3 2/2] xfstests btrfs/316: test send / receive
On 8/8/13 3:17 AM, Jan Schmidt wrote: Basic send / receive functionality test for btrfs. Requires current version of fsstress built (-x support). Relies on fssum tool but can skip the test if it failed to build. Signed-off-by: Jan Schmidt list@jan-o-sch.net Reviewed-by: Josef Bacik jba...@fusionio.com --- tests/btrfs/316 | 113 +++ tests/btrfs/316.out |4 ++ tests/btrfs/group |1 + 3 files changed, 118 insertions(+), 0 deletions(-) create mode 100755 tests/btrfs/316 create mode 100644 tests/btrfs/316.out diff --git a/tests/btrfs/316 b/tests/btrfs/316 new file mode 100755 index 000..087978a --- /dev/null +++ b/tests/btrfs/316 @@ -0,0 +1,113 @@ +#! /bin/bash +# FSQA Test No. 316 +# +# Run fsstress to create a reasonably strange file system, make a +# snapshot (base) and run more fsstress. Then take another snapshot +# (incr) and send both snapshots to a temp file. Remake the file +# system and receive from the files. Check both states with fssum. +# +#--- +# Copyright (C) 2013 STRATO. All rights reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +# +#--- +# +# creator +owner=list.bt...@jan-o-sch.net + +seq=`basename $0` +seqres=$RESULT_DIR/$seq +echo QA output created by $seq + +here=`pwd` +tmp=`mktemp -d` +status=1 + +_cleanup() +{ + echo *** unmount + umount $SCRATCH_MNT 2/dev/null + rm -f $tmp.* +} +trap _cleanup; exit \$status 0 1 2 3 15 + +# get standard environment, filters and checks +. ./common/rc +. ./common/filter + +# real QA test starts here +_need_to_be_root +_supported_fs btrfs +_supported_os Linux +_require_scratch _require_seek_data_hole +_require_command $FSSUM_PROG fssum Usually for local binaries in src/ we'd just do: FSSUM_PROG=$here/src/fssum [ -x $FSSUM_PROG ] || _notrun fssum not built There's no other src/* binary that gets set in common/config; every test just does it locally, so probably best to stick with that convention. I think we're almost there! :) -Eric + +rm -f $seqres.full + +workout() +{ + fsz=$1 + ops=$2 + + umount $SCRATCH_DEV /dev/null 21 + echo *** mkfs -dsize=$fsz$seqres.full + echo $seqres.full + _scratch_mkfs_sized $fsz $seqres.full 21 \ + || _fail size=$fsz mkfs failed + run_check _scratch_mount -o noatime + + run_check $FSSTRESS_PROG -d $SCRATCH_MNT -n $ops $FSSTRESS_AVOID -x \ + $BTRFS_UTIL_PROG subvol snap -r $SCRATCH_MNT $SCRATCH_MNT/base + + run_check $BTRFS_UTIL_PROG subvol snap -r $SCRATCH_MNT $SCRATCH_MNT/incr + + echo # $BTRFS_UTIL_PROG send $SCRATCH_MNT/base $tmp/base.snap \ + $seqres.full + $BTRFS_UTIL_PROG send $SCRATCH_MNT/base $tmp/base.snap 2 $seqres.full \ + || _fail failed: '$@' + echo # $BTRFS_UTIL_PROG send -p $SCRATCH_MNT/base\ + $SCRATCH_MNT/incr $tmp/incr.snap $seqres.full + $BTRFS_UTIL_PROG send -p $SCRATCH_MNT/base \ + $SCRATCH_MNT/incr $tmp/incr.snap 2 $seqres.full \ + || _fail failed: '$@' + + run_check $FSSUM_PROG -A -f -w $tmp/base.fssum $SCRATCH_MNT/base + run_check $FSSUM_PROG -A -f -w $tmp/incr.fssum -x $SCRATCH_MNT/incr/base \ + $SCRATCH_MNT/incr + + umount $SCRATCH_DEV /dev/null 21 + echo *** mkfs -dsize=$fsz$seqres.full + echo $seqres.full + _scratch_mkfs_sized $fsz $seqres.full 21 \ + || _fail size=$fsz mkfs failed + run_check _scratch_mount -o noatime + + run_check $BTRFS_UTIL_PROG receive $SCRATCH_MNT $tmp/base.snap + run_check $FSSUM_PROG -r $tmp/base.fssum $SCRATCH_MNT/base + + run_check $BTRFS_UTIL_PROG receive $SCRATCH_MNT $tmp/incr.snap + run_check $FSSUM_PROG -r $tmp/incr.fssum $SCRATCH_MNT/incr +} + +echo *** test send / receive + +fssize=`expr 2000 \* 1024 \* 1024` +ops=200 + +workout $fssize $ops + +echo *** done +status=0 +exit diff --git a/tests/btrfs/316.out b/tests/btrfs/316.out new file mode 100644 index 000..4564c85 --- /dev/null +++ b/tests/btrfs/316.out @@
Re: [PATCH 6/7] btrfs: cleanup: removed unused 'btrfs_reada_detach'
even though the function is currently unused, I'm hesitating to remove it as it's part of the reada-API and might be handy for anyone going to use the API in the future. I agree. As replied here, http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg24047.html please keep the function. If we're keeping score, put me down for being in favour of removing dead untested code. git ressurection is easy. - z -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] btrfs-progs: introduce btrfs filesystem show --kernel
On Thu, Aug 08, 2013 at 04:07:07PM +0800, Anand Jain wrote: As of now btrfs filesystem show reads directly from disks. So sometimes output can be stale, mainly when user want to verify their last operation like, labeling or device delete or add... etc. This patch adds --kernel option to the 'filesystem show' subcli, which will read from the kernel instead of the disks directly. Why should this be an option? When mounted, the kernel cache is authoritative. It was always a bug to read stale data from disk. The kernel should be read first, and if that isn't available it can fall back to offering unreliable data from disk with a giant wraning. Right? - z -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Why does btrfs benchmark so badly in this case?
What is going on here? Why is btrfs doing so poorly? Funny thing, I was thinking exactly the same when reading the article ;) Regards -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 6/7] btrfs: cleanup: removed unused 'btrfs_reada_detach'
On 08/08/13 19:46, Zach Brown wrote: even though the function is currently unused, I'm hesitating to remove it as it's part of the reada-API and might be handy for anyone going to use the API in the future. I agree. As replied here, http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg24047.html please keep the function. If we're keeping score, put me down for being in favour of removing dead untested code. git ressurection is easy. It's not really untested, it has been in use some time ago. But of course there's a chance that some changes broke it. Yes, git ressurection is easy. To inform potential users, you might just leave a comment like this: /* * There has been a function once to detach from a running reada. * If you need such functionality, just revert the commit that * added this comment. */ -Arne - z -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Why does btrfs benchmark so badly in this case?
On Thu, Aug 08, 2013 at 09:13:04AM -0700, John Williams wrote: Phoronix periodically runs benchmarks on filesystems, and one thing I have noticed is that btrfs always does terribly on their fio Intel IOMeter fileserver access pattern benchmark: http://www.phoronix.com/scan.php?page=articleitem=linux_310_10fsnum=2 Here, btrfs is more than 6 times slower than ext4, and about 3 times slower than XFS. Lest we attribute it to an unavoidable downside of COW filesystems and move on...no, we cannot do that, because ZFS does well here -- btrfs is about 6 times slower than ZFS! Note that btrfs does quite well in the other Phoronix benchmarks. It is just the fio fileserver benchmark that btrfs has problems with. What is going on here? Why is btrfs doing so poorly? So the reason this workload sucks for btrfs is because we fall back on buffered IO because fio does not do block size aligned writes for this workload. If you add ba=4k to the iometer fio file then we go the same speed as xfs and ext4. Not a whole lot we can do about this since unaligned writes means we have to read in pages to cow the block properly, which is why we fall back to buffered. Once we do that we end up having a lot of page locking stuff that gets in the way and makes us twice as slow. Thanks, Josef -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs: fix race between removing a dev and writing sbs
Since all code paths that update the number of devices in the super copy (fs_info-super_copy) first lock the device list (fs_info-fs_devices-device_list_mutex), and write_all_supers() also needs to lock the devices list mutex, make write_all_supers() read the number of devices from the super copy after it locks the device list mutex (and before unlocking it of course). The only code path that doesn't lock the device list mutex before updating the number of devices in the super copy is disk-io.c:next_root_backup(), called by open_ctree() during mount time where concurrency issues can't happen. Signed-off-by: Filipe David Borba Manana fdman...@gmail.com --- fs/btrfs/disk-io.c |2 +- fs/btrfs/volumes.c | 11 --- 2 files changed, 5 insertions(+), 8 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 254cdc8..c4b24c7 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -3313,7 +3313,6 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) int total_errors = 0; u64 flags; - max_errors = btrfs_super_num_devices(root-fs_info-super_copy) - 1; do_barriers = !btrfs_test_opt(root, NOBARRIER); backup_super_roots(root-fs_info); @@ -3322,6 +3321,7 @@ static int write_all_supers(struct btrfs_root *root, int max_mirrors) mutex_lock(root-fs_info-fs_devices-device_list_mutex); head = root-fs_info-fs_devices-devices; + max_errors = btrfs_super_num_devices(root-fs_info-super_copy) - 1; if (do_barriers) { ret = barrier_all_devices(root-fs_info); diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 090f57c..eddf386 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -1568,11 +1568,6 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) if (ret) goto error_undo; - /* -* TODO: the superblock still includes this device in its num_devices -* counter although write_all_supers() is not locked out. This -* could give a filesystem state which requires a degraded mount. -*/ ret = btrfs_rm_dev_item(root-fs_info-chunk_root, device); if (ret) goto error_undo; @@ -1588,7 +1583,9 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) /* * the device list mutex makes sure that we don't change * the device list while someone else is writing out all -* the device supers. +* the device supers. Whoever is writing all supers, should +* lock the device list mutex before getting the number of +* devices in the super block (super_copy). */ cur_devices = device-fs_devices; @@ -1612,10 +1609,10 @@ int btrfs_rm_device(struct btrfs_root *root, char *device_path) device-fs_devices-open_devices--; call_rcu(device-rcu, free_device); - mutex_unlock(root-fs_info-fs_devices-device_list_mutex); num_devices = btrfs_super_num_devices(root-fs_info-super_copy) - 1; btrfs_set_super_num_devices(root-fs_info-super_copy, num_devices); + mutex_unlock(root-fs_info-fs_devices-device_list_mutex); if (cur_devices-open_devices == 0) { struct btrfs_fs_devices *fs_devices; -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: btrfs zero divide
tl;dr: we got the faulty code pinned down, it's m68k specific, except the m68k specific part didn’t change from 3.2… Joe Perches dixit: Something like this maybe. (uncompiled/untested) I tried this: --- div64.h.orig2013-08-08 19:34:32.663540965 + +++ - 2013-08-08 19:47:30.309776791 + @@ -6,6 +6,8 @@ #else #include linux/types.h +#include linux/bug.h +#include linux/printk.h /* n = n / base; return rem; */ @@ -16,6 +18,11 @@ } __n; \ unsigned long __rem, __upper; \ \ +if (base == 0) { \ +WARN(1, Attempted division by 0\n); \ +dump_stack(); \ +__rem = 0; \ +} else { \ __n.n64 = (n); \ if ((__upper = __n.n32[0])) { \ asm (divul.l %2,%1:%0 \ @@ -26,6 +33,7 @@ : =d (__n.n32[1]), =d (__rem) \ : d (base), 1 (__upper), 0 (__n.n32[1])); \ (n) = __n.n64; \ +} \ __rem; \ }) It didn’t trigger, apparently: [817508.37] bio: create slab bio-1 at 1 [817508.51] Btrfs loaded [817524.11] loop: module loaded [817534.86] device fsid 01cfa645-5cde-4e4c-9b0b-df7b37bdc495 devid 1 transid 4 /dev/loop0 [817534.86] btrfs: disk space caching is enabled [817534.86] *** ZERO DIVIDE *** FORMAT=2 [817534.86] Current process id is 32312 [817534.86] BAD KERNEL TRAP: [817534.86] Modules linked in: loop btrfs lzo_compress zlib_deflate raid6_pq crc32c libcrc32c xor ipv6 evdev mac_hid ext3 mbcache jbd [last unloaded: btrfs] [817534.86] PC: [31c46612] __btrfs_map_block+0x134/0x147a [btrfs] [817534.86] SR: 2000 SP: 0249fab0 a2: 3010f660 [817534.86] d0: d1: 00022000d2: d3: [817534.86] d4: 0001d5: 0001a0: 021777a4a1: 021777a4 [817534.86] Process mount (pid: 32312, task=3010f660) [817534.86] Frame format=2 instr addr=31c4660e [817534.86] Stack from 0249fae8: 0020 1000 00022000 0766a928 07621800 00415d84 0070 077a97c0 0070 0249fb68 0009e250 00d106c0 00011220 0070 0020 00022000 00ff 0009 1000 021777a4 0020 0249fd14 0009e26c 0020 0003 0009dd8a 3007c02c 0766a928 00415d84 1000 0110 31c417ae 0766a928 00415d84 1000 [817534.86] Call Trace: [1000] kernel_pg_dir+0x0/0x1000 [817534.86] [00022000] _060_fpsp_effadd+0xb2c0/0xd518 [817534.86] [0009e250] bvec_alloc+0xa2/0xbe [817534.86] [00011220] sasin+0x87c/0x944 [817534.86] [00022000] _060_fpsp_effadd+0xb2c0/0xd518 [817534.86] [1000] kernel_pg_dir+0x0/0x1000 [817534.86] [0009e26c] bio_alloc_bioset+0x0/0x12e [817534.86] [0009dd8a] bio_add_page+0x4a/0x58 [817534.86] [1000] kernel_pg_dir+0x0/0x1000 [817534.86] [31c417ae] submit_extent_page.isra.44+0x170/0x1bc [btrfs] [817534.86] [1000] kernel_pg_dir+0x0/0x1000 [817534.86] [1000] kernel_pg_dir+0x0/0x1000 [817534.86] [31c4cbfe] btrfs_map_bio+0x60/0x48c [btrfs] [817534.86] [00022000] _060_fpsp_effadd+0xb2c0/0xd518 [817534.86] [00022000] _060_fpsp_effadd+0xb2c0/0xd518 [817534.86] [31c24bb2] btree_submit_bio_hook+0x0/0xae [btrfs] [817534.86] [31c41ae4] end_bio_extent_readpage+0x0/0x69c [btrfs] [817534.86] [1000] kernel_pg_dir+0x0/0x1000 [817534.86] [31c24984] btrfs_bio_wq_end_io+0x16/0x50 [btrfs] [817534.86] [31c24c0e] btree_submit_bio_hook+0x5c/0xae [btrfs] [817534.87] [00022000] _060_fpsp_effadd+0xb2c0/0xd518 [817534.87] [31c3ed7a] submit_one_bio+0x7c/0xb2 [btrfs] [817534.87] [00022000] _060_fpsp_effadd+0xb2c0/0xd518 [817534.87] [31c421b8] __extent_read_full_page+0x0/0x70a [btrfs] [817534.87] [00058828] unlock_page+0x0/0x26 [817534.87] [31c44780] read_extent_buffer_pages+0x1a8/0x218 [btrfs] [817534.88] [31c4c3b2] btrfs_num_copies+0x0/0x142 [btrfs] [817534.88] [31c23aa6] btree_read_extent_buffer_pages.constprop.52+0x42/0xca [btrfs] [817534.88] [31c22802] btree_get_extent+0x0/0x102 [btrfs] [817534.88] [00022000] _060_fpsp_effadd+0xb2c0/0xd518 [817534.88] [1000] kernel_pg_dir+0x0/0x1000 [817534.88] [31c2525e] read_tree_block+0x38/0x48 [btrfs] [817534.88] [31c25226] read_tree_block+0x0/0x48 [btrfs] [817534.89] [31c26d40] open_ctree+0xe80/0x15e6 [btrfs] [817534.89] [00022000] _060_fpsp_effadd+0xb2c0/0xd518 [817534.89] [1000] kernel_pg_dir+0x0/0x1000 [817534.89] [1000] kernel_pg_dir+0x0/0x1000 [817534.89] [1000] kernel_pg_dir+0x0/0x1000
Re: Why does btrfs benchmark so badly in this case?
On Thu, Aug 8, 2013 at 12:40 PM, Josef Bacik jba...@fusionio.com wrote: On Thu, Aug 08, 2013 at 09:13:04AM -0700, John Williams wrote: Phoronix periodically runs benchmarks on filesystems, and one thing I have noticed is that btrfs always does terribly on their fio Intel IOMeter fileserver access pattern benchmark: http://www.phoronix.com/scan.php?page=articleitem=linux_310_10fsnum=2 So the reason this workload sucks for btrfs is because we fall back on buffered IO because fio does not do block size aligned writes for this workload. If you add ba=4k to the iometer fio file then we go the same speed as xfs and ext4. Not a whole lot we can do about this since unaligned writes means we have to read in pages to cow the block properly, which is why we fall back to buffered. Once we do that we end up having a lot of page locking stuff that gets in the way and makes us twice as slow. Thanks, Thanks for looking into it. So I guess the reason that ZFS does well with that workload is that ZFS is using smaller blocks, maybe just 512B ? I wonder how common these type of non-4K aligned workloads are. Apparently, people with such workloads should avoid btrfs, but maybe these types of workloads are very rare? -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Why does btrfs benchmark so badly in this case?
On Thu, Aug 08, 2013 at 01:23:22PM -0700, John Williams wrote: On Thu, Aug 8, 2013 at 12:40 PM, Josef Bacik jba...@fusionio.com wrote: On Thu, Aug 08, 2013 at 09:13:04AM -0700, John Williams wrote: Phoronix periodically runs benchmarks on filesystems, and one thing I have noticed is that btrfs always does terribly on their fio Intel IOMeter fileserver access pattern benchmark: http://www.phoronix.com/scan.php?page=articleitem=linux_310_10fsnum=2 So the reason this workload sucks for btrfs is because we fall back on buffered IO because fio does not do block size aligned writes for this workload. If you add ba=4k to the iometer fio file then we go the same speed as xfs and ext4. Not a whole lot we can do about this since unaligned writes means we have to read in pages to cow the block properly, which is why we fall back to buffered. Once we do that we end up having a lot of page locking stuff that gets in the way and makes us twice as slow. Thanks, Thanks for looking into it. So I guess the reason that ZFS does well with that workload is that ZFS is using smaller blocks, maybe just 512B ? Yeah I'm not sure what ZFS does, but if you are writing over a block and the size/offset isn't aligned then you'd see similar issues with ZFS since it would have to read+modify+write. It is likely that ZFS just is using a smaller blocksize. I wonder how common these type of non-4K aligned workloads are. Apparently, people with such workloads should avoid btrfs, but maybe these types of workloads are very rare? So most people who use AIO/O_DIRECT have really specific setups which generally can adjust how they align stuff (databases for example this would be the db page and those are usually large, like 16k-32k), or with virtual images which will hopefully be doing things in block aligned io's, but this depends on the host OS. Like I said there isn't a whole lot we can do about this, you can do NOCOW if you want to get around it without changing your application or you can change the app to be blocksize aligned. Thanks, Josef -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [XFSTESTS PATCH] btrfs: Test deduplication
On Thu, Jun 27, 2013 at 12:40:30AM +0200, Gabriel de Perthuis wrote: --- The matching kernel patch is here: https://github.com/g2p/linux/tree/v3.10%2Bextent-same (rebased on 3.10, fixing a small conflict) Requires the btrfs-extent-same command: - http://permalink.gmane.org/gmane.comp.file-systems.btrfs/26579 - https://github.com/markfasheh/duperemove Sorry it took me so long to get to this, but I wanted to have the dedup patches merged before I looked at this. So first of all just copy btrfs-extent-same into xfstests since it's not part of a normally installed package. tests/btrfs/313 | 93 + tests/btrfs/313.out | 25 ++ tests/btrfs/group | 1 + 3 files changed, 119 insertions(+) create mode 100755 tests/btrfs/313 create mode 100644 tests/btrfs/313.out diff --git a/tests/btrfs/313 b/tests/btrfs/313 new file mode 100755 index 000..04e4ccb --- /dev/null +++ b/tests/btrfs/313 @@ -0,0 +1,93 @@ +#! /bin/bash +# FS QA Test No. 313 +# +# Test the deduplication syscall +# +#--- +# Copyright (c) 2013 Red Hat, Inc. All Rights Reserved. +# +# This program is free software; you can redistribute it and/or +# modify it under the terms of the GNU General Public License as +# published by the Free Software Foundation. +# +# This program is distributed in the hope that it would be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write the Free Software Foundation, +# Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA +#--- +# + +seq=`basename $0` +seqres=$RESULT_DIR/$seq +echo QA output created by $seq + +here=`pwd` +tmp=/tmp/$$ +status=1 # failure is the default! +trap _cleanup; exit \$status 0 1 2 3 15 + +_cleanup() +{ +cd / +rm -f $tmp.* +} + +. ./common/rc +. ./common/filter + +ESAME=`set_prog_path btrfs-extent-same` + +_need_to_be_root +_supported_fs btrfs +_supported_os Linux +_require_command $ESAME +_require_command $XFS_IO_PROG +_require_scratch + +_scratch_mkfs /dev/null +_scratch_mount $seqres.full 21 + +fiemap() { +xfs_io -r -c fiemap $1 |tail -n+2 +} + +dedup() { +! diff -q (fiemap $1) (fiemap $2) +$ESAME $(stat -c %s $1) $1 0 $2 0 +diff -u (fiemap $1) (fiemap $2) These are spitting out the full path to SCRATCH, so you will want to use something like _filter_scratch so that the output is consistent across people running it. +} + +echo Silence is golden +set -e + +v1=$SCRATCH_MNT/v1 +v2=$SCRATCH_MNT/v2 +v3=$SCRATCH_MNT/v3 + +$BTRFS_UTIL_PROG subvolume create $v1 +$BTRFS_UTIL_PROG subvolume create $v2 Redirect the output of these commands to /dev/null + +dd bs=1M status=none if=/dev/urandom of=$v1/file1 count=1 +dd bs=1M status=none if=/dev/urandom of=$v1/file2 count=1 +dd bs=1M status=none if=$v1/file1 of=$v2/file3 +dd bs=1M status=none if=$v1/file1 of=$v2/file4 + status=none doesn't work on my copy of dd, so don't use this, just do dd $seqres.full 21 or to /dev/null. +$BTRFS_UTIL_PROG subvolume snapshot -r $v2 $v3 + +# identical, multiple volumes +dedup $v1/file1 $v2/file3 + +# not identical, same volume +! $ESAME $((2**20)) $v1/file1 0 $v1/file2 0 + +# identical, second file on a frozen volume +dedup $v1/file1 $v3/file4 + +_scratch_unmount +_check_scratch_fs +status=0 +exit diff --git a/tests/btrfs/313.out b/tests/btrfs/313.out new file mode 100644 index 000..eabe6be --- /dev/null +++ b/tests/btrfs/313.out @@ -0,0 +1,25 @@ +QA output created by 313 +Silence is golden +Create subvolume 'sdir/v1' +Create subvolume 'sdir/v2' +Create a readonly snapshot of 'sdir/v2' in 'sdir/v3' +Files /dev/fd/63 and /dev/fd/62 differ +Deduping 2 total files +(0, 1048576): sdir/v1/file1 +(0, 1048576): sdir/v2/file3 +1 files asked to be deduped +i: 0, status: 0, bytes_deduped: 1048576 +1048576 total bytes deduped in this operation +Deduping 2 total files +(0, 1048576): sdir/v1/file1 +(0, 1048576): sdir/v1/file2 +1 files asked to be deduped +i: 0, status: 1, bytes_deduped: 0 +0 total bytes deduped in this operation +Files /dev/fd/63 and /dev/fd/62 differ +Deduping 2 total files +(0, 1048576): sdir/v1/file1 +(0, 1048576): sdir/v3/file4 +1 files asked to be deduped +i: 0, status: 0, bytes_deduped: 1048576 +1048576 total bytes deduped in this operation diff --git a/tests/btrfs/group b/tests/btrfs/group index bc6c256..4c868c8 100644 --- a/tests/btrfs/group +++ b/tests/btrfs/group @@ -7,5 +7,6 @@ 264 auto 265 auto 276 auto rw metadata 284 auto 307 auto quick
Re: Why does btrfs benchmark so badly in this case?
On Aug 8, 2013, at 2:23 PM, John Williams jwilliams4...@gmail.com wrote: So I guess the reason that ZFS does well with that workload is that ZFS is using smaller blocks, maybe just 512B ? Likely. It uses a variable block size. I wonder how common these type of non-4K aligned workloads are. Apparently, people with such workloads should avoid btrfs, but maybe these types of workloads are very rare? I can't directly answer the question, but all of the typical file systems on OS X, Linux, and Windows default to 4KB block sizes for many years now, baked in at creation time. On OS X, the block size varies automatically with respect to volume size at fs creation time (it goes to 8KB block sizes above 2TB, and scales up to 1MB block sizes), but still isn't ever less than 4KB unless manually created this way. So I'd think such workloads are rare. I also don't know if any common use fs has an optimization whereby just the modified sector(s) is overwritten, rather than all sectors making up the file system block being modified. Chris Murphy-- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Why does btrfs benchmark so badly in this case?
I also don't know if any common use fs has an optimization whereby just the modified sector(s) is overwritten, rather than all sectors making up the file system block being modified. Most of them do. The generic direct io path allows sector sized dio. The very first bit of do_blockdev_direct_IO() is testing first for file system block size alignment then for block device sector size alignment. You can see this easily with dd conv=notrunc oflags=direct and blktrace. # blockdev --getss /dev/sda 512 # blockdev --getbsz /dev/sda 4096 # blktrace -d /dev/sda -a issue -o - | blkparse -i - $ dd if=/dev/zero of=file bs=4096 count=1 oflag=direct conv=notrunc 8,03 1435.957320002 17941 D WS 137297704 + 8 [dd] $ dd if=/dev/zero of=file bs=512 count=1 oflag=direct conv=notrunc 8,01431.405641362 17940 D WS 137297704 + 1 [dd] - z -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] Btrfs: set default max_inline to 8KiB instead of 8MiB
8MiB is way too large and likely set by mistake. This is not a significant issue as in practice the max amount of data added to an inline extent is also limited by the page cache and btree leaf sizes. Signed-off-by: Filipe David Borba Manana fdman...@gmail.com --- fs/btrfs/disk-io.c |2 +- fs/btrfs/disk-io.h |2 ++ fs/btrfs/super.c |2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5de9ad7..aff37bd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2189,7 +2189,7 @@ int open_ctree(struct super_block *sb, atomic_set(fs_info-defrag_running, 0); atomic64_set(fs_info-tree_mod_seq, 0); fs_info-sb = sb; - fs_info-max_inline = 8192 * 1024; + fs_info-max_inline = BTRFS_DEFAULT_MAX_INLINE; fs_info-metadata_ratio = 0; fs_info-defrag_inodes = RB_ROOT; fs_info-free_chunk_space = 0; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b71acd6e..e76c1a2 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -25,6 +25,8 @@ #define BTRFS_SUPER_MIRROR_MAX 3 #define BTRFS_SUPER_MIRROR_SHIFT 12 +#define BTRFS_DEFAULT_MAX_INLINE 8192 + enum { BTRFS_WQ_ENDIO_DATA = 0, BTRFS_WQ_ENDIO_METADATA = 1, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1967903..7359a9e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -941,7 +941,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ,nodatacow); if (btrfs_test_opt(root, NOBARRIER)) seq_puts(seq, ,nobarrier); - if (info-max_inline != 8192 * 1024) + if (info-max_inline != BTRFS_DEFAULT_MAX_INLINE) seq_printf(seq, ,max_inline=%llu, (unsigned long long)info-max_inline); if (info-alloc_start != 0) -- 1.7.9.5 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 6/7] btrfs: cleanup: removed unused 'btrfs_reada_detach'
On Thu, Aug 08, 2013 at 09:11:01PM +0200, Arne Jansen wrote: On 08/08/13 19:46, Zach Brown wrote: even though the function is currently unused, I'm hesitating to remove it as it's part of the reada-API and might be handy for anyone going to use the API in the future. I agree. As replied here, http://www.mail-archive.com/linux-btrfs@vger.kernel.org/msg24047.html please keep the function. If we're keeping score, put me down for being in favour of removing dead untested code. git ressurection is easy. It's not really untested, it has been in use some time ago. But of course there's a chance that some changes broke it. Yes, git ressurection is easy. To inform potential users, you might just leave a comment like this: /* * There has been a function once to detach from a running reada. * If you need such functionality, just revert the commit that * added this comment. */ And please write the exact commit sha1 instead of 'the commit' :) I've used the _detach function when prototyping readdir readahead, that did not bring the speedup as expected so more work is needed, that's why I'm concerned about removing it. But, if Arne is ok with that, so be it. david -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Btrfs: set default max_inline to 8KiB instead of 8MiB
On Thu, Aug 08, 2013 at 10:45:48PM +0100, Filipe David Borba Manana wrote: 8MiB is way too large and likely set by mistake. This is not a significant issue as in practice the max amount of data added to an inline extent is also limited by the page cache and btree leaf sizes. Signed-off-by: Filipe David Borba Manana fdman...@gmail.com Reviewed-by: David Sterba dste...@suse.cz -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC] Btrfs: add support for persistent mount options
On Wed, Aug 07, 2013 at 03:46:20PM +0200, Martin Steigerwald wrote: Because really, the motivation sounds like it's primarily for significant on-disk format changes controlled by mount options. I understand that motivation more than being able to persist something like noatime. For a hotplug-able SSD having noatime stored persistently IMHO makes a lot of sense as well. I agree, and we can let btrfs understand noatime (or ro) even if they get processed by vfs layer. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [v2 2/8] Btrfs-progs: add missing man information for btrfs-debug-tree
Thanks for updating the license. One comment that's applicable to all patches: On Wed, Aug 07, 2013 at 01:54:03PM +0800, Wang Shilong wrote: +.SH AVAILABILITY +.B btrfs-debug-tree +is part of btrfs-progs. Btrfs is currently under heavy development, +and not suitable for any uses other than benchmarking and review. That's (arguably) not true anymore and I'd rather see it removed or updated closer to current status. +Please refer to the btrfs wiki http://btrfs.wiki.kernel.org for +further details. -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC] Btrfs: track compression algorithm on inodes
On Wed, Aug 07, 2013 at 12:29:44PM +0100, Filipe David Borba Manana wrote: Currently the compression settings (algorithm and force mode) need to be specified at mount time in order to have newly created files compressed. [...] I think we should take the top-down approach and start with UI how to set these attributes, then think where to store the information (existing strucutures, xattrs). Tweaking compressin per-file is desirable, but with your patch it's required to set it via a mount option and that's not very practical (only via remount, root required). david -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH RFC] Btrfs: add support for persistent mount options
On Wed, Aug 07, 2013 at 12:33:09PM +0100, Filipe David Manana wrote: Thanks, I missed to find that before. The implementation is very different from the one I proposed. That's one of the fundaental questions how to store the information: inside existing structures, via xattrs, under new tree items. Each one has pros and cons. Designing and merging the properties feature takes time, but we want to tune simple things now. The wiki project mentions ‘tune2fs’ as an example, but the project details are not always accurate about how to do the things, it’s more like ideas what to do. If you’re going to work on that, please claim the project on the wiki, and possibly write more details abou the design. I will. The project is titled as persistent mount options, are you willing to take the more general per-object properties task? IMHO there's not much difference, the UI should be the same, just that it implements per-fs or per-subvolume properties like mount options. The rest of the object properties has to be collected and agreed on. I'm sure there's community knowledge of what's desired, so it's a matter of writing it down and bikeshe^Wagreement on the naming syntax. david -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch v2 1/2] Btrfs: fix possible memory leak in find_parent_nodes()
The origin code dealt with 'ref' as following steps: |-list_del(ref-list) |-some operations |-kfree(ref) If operations failed, it would goto label 'out' without freeing this 'ref'. and then memory leak would happen.Just move list_del() after kfree() will fix the problem. Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com Reviewed-by: Miao Xie mi...@cn.fujitsu.com --- V1-V2: add explanations to changelog --- fs/btrfs/backref.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 68048d6..7b55c95 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -911,7 +911,6 @@ again: while (!list_empty(prefs)) { ref = list_first_entry(prefs, struct __prelim_ref, list); - list_del(ref-list); WARN_ON(ref-count 0); if (ref-count ref-root_id ref-parent == 0) { /* no parent == root of tree */ @@ -956,6 +955,7 @@ again: eie-next = ref-inode_list; } } + list_del(ref-list); kfree(ref); } -- 1.8.0.1 -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[patch v2 2/2] Btrfs: allocate prelim_ref with a slab allocater
struct __prelim_ref is allocated and freed frequently when walking backref tree, using slab allocater can not only speed up allocating but also detect memory leaks. Signed-off-by: Wang Shilong wangsl.f...@cn.fujitsu.com Reviewed-by: Miao Xie mi...@cn.fujitsu.com --- V1-V2: 1.fix a missing allocating case that should be used by kmem_cache_alloc() spotted by Jan Schmidt 2.rename prelim_ref to btrfs_prelim_ref addressed by David --- fs/btrfs/backref.c | 33 +++-- fs/btrfs/backref.h | 2 ++ fs/btrfs/super.c | 8 3 files changed, 37 insertions(+), 6 deletions(-) diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c index 7b55c95..b352d15 100644 --- a/fs/btrfs/backref.c +++ b/fs/btrfs/backref.c @@ -119,6 +119,26 @@ struct __prelim_ref { u64 wanted_disk_byte; }; +static struct kmem_cache *btrfs_prelim_ref_cache; + +int __init btrfs_prelim_ref_init(void) +{ + btrfs_prelim_ref_cache = kmem_cache_create(btrfs_prelim_ref, + sizeof(struct __prelim_ref), + 0, + SLAB_RECLAIM_ACCOUNT | SLAB_MEM_SPREAD, + NULL); + if (!btrfs_prelim_ref_cache) + return -ENOMEM; + return 0; +} + +void btrfs_prelim_ref_exit(void) +{ + if (btrfs_prelim_ref_cache) + kmem_cache_destroy(btrfs_prelim_ref_cache); +} + /* * the rules for all callers of this function are: * - obtaining the parent is the goal @@ -165,7 +185,7 @@ static int __add_prelim_ref(struct list_head *head, u64 root_id, { struct __prelim_ref *ref; - ref = kmalloc(sizeof(*ref), gfp_mask); + ref = kmem_cache_alloc(btrfs_prelim_ref_cache, gfp_mask); if (!ref) return -ENOMEM; @@ -369,7 +389,8 @@ static int __resolve_indirect_refs(struct btrfs_fs_info *fs_info, /* additional parents require new refs being added here */ while ((node = ulist_next(parents, uiter))) { - new_ref = kmalloc(sizeof(*new_ref), GFP_NOFS); + new_ref = kmem_cache_alloc(btrfs_prelim_ref_cache, + GFP_NOFS); if (!new_ref) { ret = -ENOMEM; goto out; @@ -493,7 +514,7 @@ static void __merge_refs(struct list_head *head, int mode) ref1-count += ref2-count; list_del(ref2-list); - kfree(ref2); + kmem_cache_free(btrfs_prelim_ref_cache, ref2); } } @@ -956,7 +977,7 @@ again: } } list_del(ref-list); - kfree(ref); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } out: @@ -964,13 +985,13 @@ out: while (!list_empty(prefs)) { ref = list_first_entry(prefs, struct __prelim_ref, list); list_del(ref-list); - kfree(ref); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } while (!list_empty(prefs_delayed)) { ref = list_first_entry(prefs_delayed, struct __prelim_ref, list); list_del(ref-list); - kfree(ref); + kmem_cache_free(btrfs_prelim_ref_cache, ref); } return ret; diff --git a/fs/btrfs/backref.h b/fs/btrfs/backref.h index 8f2e767..a910b27 100644 --- a/fs/btrfs/backref.h +++ b/fs/btrfs/backref.h @@ -72,4 +72,6 @@ int btrfs_find_one_extref(struct btrfs_root *root, u64 inode_objectid, struct btrfs_inode_extref **ret_extref, u64 *found_off); +int __init btrfs_prelim_ref_init(void); +void btrfs_prelim_ref_exit(void); #endif diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1967903..812ab3d 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -56,6 +56,7 @@ #include rcu-string.h #include dev-replace.h #include free-space-cache.h +#include backref.h #define CREATE_TRACE_POINTS #include trace/events/btrfs.h @@ -1800,6 +1801,10 @@ static int __init init_btrfs_fs(void) if (err) goto free_auto_defrag; + err = btrfs_prelim_ref_init(); + if (err) + goto free_prelim_ref; + err = btrfs_interface_init(); if (err) goto free_delayed_ref; @@ -1817,6 +1822,8 @@ static int __init init_btrfs_fs(void) unregister_ioctl: btrfs_interface_exit(); +free_prelim_ref: + btrfs_prelim_ref_exit(); free_delayed_ref: btrfs_delayed_ref_exit(); free_auto_defrag: @@ -1843,6 +1850,7 @@ static void __exit exit_btrfs_fs(void) btrfs_delayed_ref_exit(); btrfs_auto_defrag_exit(); btrfs_delayed_inode_exit(); +
btrfs qgroup destroy - ERROR: unable to create quota group: Device or resource busy
I'm using qgroups and have created a few hundreds of subvolumes in the past. It seems that btrfs automatically assigns a qgroup to newly created snapshot/subvolume, but does not destroy the qgroup when the subvolume is deleted. So I've tried to destroy the unused qgroups, with mixed success. I was able to destroy most of them, but some are still failing, i.e.: # btrfs qgroup destroy 4494 /mnt/lxc1 ERROR: unable to create quota group: Device or resource busy Note the negative number here, but I also have qgroups with both positive numbers, which I'm not able to destroy as well: # btrfs qgroup show /mnt/lxc1 | grep 4494 0/4494 839516160 -69632 qgroup 4494 is not used by any subvolume: # btrfs sub list /mnt/lxc1 | grep 4494 I did run btrfs quota rescan for this filesystem, hoping it will fix the problem, but it didn't. Any advice? -- Tomasz Chmielewski http://wpkg.org -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] Btrfs: set default max_inline to 8KiB instead of 8MiB
On thu, 8 Aug 2013 22:45:48 +0100, Filipe David Borba Manana wrote: 8MiB is way too large and likely set by mistake. This is not a significant issue as in practice the max amount of data added to an inline extent is also limited by the page cache and btree leaf sizes. I don't think 8KB is a reasonable value of the default max inline size because it makes no sense on the machine whose page size is 4KB. I think 4KB is a reasonable value, because we may mount the fs on the machines with the different page size in the future, in order to avoid the compatible problem, we should use the min page size as the max inline size. Thanks Miao Signed-off-by: Filipe David Borba Manana fdman...@gmail.com --- fs/btrfs/disk-io.c |2 +- fs/btrfs/disk-io.h |2 ++ fs/btrfs/super.c |2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 5de9ad7..aff37bd 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2189,7 +2189,7 @@ int open_ctree(struct super_block *sb, atomic_set(fs_info-defrag_running, 0); atomic64_set(fs_info-tree_mod_seq, 0); fs_info-sb = sb; - fs_info-max_inline = 8192 * 1024; + fs_info-max_inline = BTRFS_DEFAULT_MAX_INLINE; fs_info-metadata_ratio = 0; fs_info-defrag_inodes = RB_ROOT; fs_info-free_chunk_space = 0; diff --git a/fs/btrfs/disk-io.h b/fs/btrfs/disk-io.h index b71acd6e..e76c1a2 100644 --- a/fs/btrfs/disk-io.h +++ b/fs/btrfs/disk-io.h @@ -25,6 +25,8 @@ #define BTRFS_SUPER_MIRROR_MAX3 #define BTRFS_SUPER_MIRROR_SHIFT 12 +#define BTRFS_DEFAULT_MAX_INLINE 8192 + enum { BTRFS_WQ_ENDIO_DATA = 0, BTRFS_WQ_ENDIO_METADATA = 1, diff --git a/fs/btrfs/super.c b/fs/btrfs/super.c index 1967903..7359a9e 100644 --- a/fs/btrfs/super.c +++ b/fs/btrfs/super.c @@ -941,7 +941,7 @@ static int btrfs_show_options(struct seq_file *seq, struct dentry *dentry) seq_puts(seq, ,nodatacow); if (btrfs_test_opt(root, NOBARRIER)) seq_puts(seq, ,nobarrier); - if (info-max_inline != 8192 * 1024) + if (info-max_inline != BTRFS_DEFAULT_MAX_INLINE) seq_printf(seq, ,max_inline=%llu, (unsigned long long)info-max_inline); if (info-alloc_start != 0) -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: btrfs qgroup destroy - ERROR: unable to create quota group: Device or resource busy
Hello, On 08/09/2013 01:39 PM, Tomasz Chmielewski wrote: I'm using qgroups and have created a few hundreds of subvolumes in the past. It seems that btrfs automatically assigns a qgroup to newly created snapshot/subvolume, but does not destroy the qgroup when the subvolume is deleted. This should be implemented. And will soon. So I've tried to destroy the unused qgroups, with mixed success. I was able to destroy most of them, but some are still failing, i.e.: # btrfs qgroup destroy 4494 /mnt/lxc1 ERROR: unable to create quota group: Device or resource busy Just remove qgroup(4494)'s parent qgroup. then it can be removed. Anyway, i think this is unnecessary. Thanks, Wang Note the negative number here, but I also have qgroups with both positive numbers, which I'm not able to destroy as well: # btrfs qgroup show /mnt/lxc1 | grep 4494 0/4494 839516160 -69632 qgroup 4494 is not used by any subvolume: # btrfs sub list /mnt/lxc1 | grep 4494 I did run btrfs quota rescan for this filesystem, hoping it will fix the problem, but it didn't. Any advice? -- To unsubscribe from this list: send the line unsubscribe linux-btrfs in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html