Re: [PATCH] btrfs: remove old tree_root dirent processing in btrfs_real_readdir()

2016-11-04 Thread kbuild test robot
Hi Jeff,

[auto build test WARNING on btrfs/next]
[also build test WARNING on v4.9-rc3 next-20161028]
[if your patch is applied to the wrong git tree, please drop us a note to help 
improve the system]

url:
https://github.com/0day-ci/linux/commits/jeffm-suse-com/btrfs-remove-old-tree_root-dirent-processing-in-btrfs_real_readdir/20161105-104432
base:   https://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git 
next
config: i386-randconfig-b0-11051048 (attached as .config)
compiler: gcc-5 (Debian 5.4.1-2) 5.4.1 20160904
reproduce:
# save the attached .config to linux build tree
make ARCH=i386 

All warnings (new ones prefixed by >>):

   fs/btrfs/inode.c: In function 'btrfs_real_readdir':
>> fs/btrfs/inode.c:5754:6: warning: unused variable 'di_len' 
>> [-Wunused-variable]
 u32 di_len;
 ^
>> fs/btrfs/inode.c:5753:6: warning: unused variable 'di_total' 
>> [-Wunused-variable]
 u32 di_total;
 ^
>> fs/btrfs/inode.c:5752:6: warning: unused variable 'di_cur' 
>> [-Wunused-variable]
 u32 di_cur;
 ^

vim +/di_len +5754 fs/btrfs/inode.c

16cdcec7 Miao Xie2011-04-22  5746   struct list_head del_list;
39279cc3 Chris Mason 2007-06-12  5747   int ret;
5f39d397 Chris Mason 2007-10-15  5748   struct extent_buffer *leaf;
39279cc3 Chris Mason 2007-06-12  5749   int slot;
39279cc3 Chris Mason 2007-06-12  5750   unsigned char d_type;
39279cc3 Chris Mason 2007-06-12  5751   int over = 0;
39279cc3 Chris Mason 2007-06-12 @5752   u32 di_cur;
39279cc3 Chris Mason 2007-06-12 @5753   u32 di_total;
39279cc3 Chris Mason 2007-06-12 @5754   u32 di_len;
5f39d397 Chris Mason 2007-10-15  5755   char tmp_name[32];
5f39d397 Chris Mason 2007-10-15  5756   char *name_ptr;
5f39d397 Chris Mason 2007-10-15  5757   int name_len;

:: The code at line 5754 was first introduced by commit
:: 39279cc3d2704cfbf9c35dcb5bdd392159ae4625 Btrfs: split up super.c

:: TO: Chris Mason 
:: CC: David Woodhouse 

---
0-DAY kernel test infrastructureOpen Source Technology Center
https://lists.01.org/pipermail/kbuild-all   Intel Corporation


.config.gz
Description: application/gzip


[PATCH] btrfs: remove old tree_root dirent processing in btrfs_real_readdir()

2016-11-04 Thread jeffm
From: Jeff Mahoney 

Commit 3de4586c527 (Btrfs: Allow subvolumes and snapshots anywhere
in the directory tree) introduced the current system of placing
snapshots in the directory tree.  It also introduced the behavior of
creating the snapshot and then creating the directory entries for it.

We've kept this code around for compatibility reasons, but it turns
out that no file systems with the old tree_root based snapshots can
be mounted on newer (>= 2009) kernels anyway.  About a month after the
above commit, commit 2a7108ad89e (Btrfs: rev the disk format for the
inode compat and csum selection changes) landed, changing the superblock
magic number.

As a result, we know that we'll never encounter tree_root-based dirents
or have to deal with skipping our own snapshot dirents.  Since that
also means that we're now only iterating over DIR_INDEX items, which only
contain one directory entry per leaf item, we don't need to loop over
the leaf item contents anymore either.

Signed-off-by: Jeff Mahoney 
---
 fs/btrfs/inode.c | 115 ++-
 1 file changed, 37 insertions(+), 78 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2b790bd..74f5a92 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -5808,17 +5808,13 @@ static int btrfs_real_readdir(struct file *file, struct 
dir_context *ctx)
u32 di_cur;
u32 di_total;
u32 di_len;
-   int key_type = BTRFS_DIR_INDEX_KEY;
char tmp_name[32];
char *name_ptr;
int name_len;
int is_curr = 0;/* ctx->pos points to the current index? */
bool emitted;
bool put = false;
-
-   /* FIXME, use a real flag for deciding about the key type */
-   if (root->fs_info->tree_root == root)
-   key_type = BTRFS_DIR_ITEM_KEY;
+   struct btrfs_key location;
 
if (!dir_emit_dots(file, ctx))
return 0;
@@ -5829,14 +5825,11 @@ static int btrfs_real_readdir(struct file *file, struct 
dir_context *ctx)
 
path->reada = READA_FORWARD;
 
-   if (key_type == BTRFS_DIR_INDEX_KEY) {
-   INIT_LIST_HEAD(_list);
-   INIT_LIST_HEAD(_list);
-   put = btrfs_readdir_get_delayed_items(inode, _list,
- _list);
-   }
+   INIT_LIST_HEAD(_list);
+   INIT_LIST_HEAD(_list);
+   put = btrfs_readdir_get_delayed_items(inode, _list, _list);
 
-   key.type = key_type;
+   key.type = BTRFS_DIR_INDEX_KEY;
key.offset = ctx->pos;
key.objectid = btrfs_ino(inode);
 
@@ -5862,85 +5855,53 @@ static int btrfs_real_readdir(struct file *file, struct 
dir_context *ctx)
 
if (found_key.objectid != key.objectid)
break;
-   if (found_key.type != key_type)
+   if (found_key.type != BTRFS_DIR_INDEX_KEY)
break;
if (found_key.offset < ctx->pos)
goto next;
-   if (key_type == BTRFS_DIR_INDEX_KEY &&
-   btrfs_should_delete_dir_index(_list,
- found_key.offset))
+   if (btrfs_should_delete_dir_index(_list, found_key.offset))
goto next;
 
ctx->pos = found_key.offset;
is_curr = 1;
 
di = btrfs_item_ptr(leaf, slot, struct btrfs_dir_item);
-   di_cur = 0;
-   di_total = btrfs_item_size(leaf, item);
-
-   while (di_cur < di_total) {
-   struct btrfs_key location;
-
-   if (verify_dir_item(root, leaf, di))
-   break;
+   if (verify_dir_item(root, leaf, di))
+   goto next;
 
-   name_len = btrfs_dir_name_len(leaf, di);
-   if (name_len <= sizeof(tmp_name)) {
-   name_ptr = tmp_name;
-   } else {
-   name_ptr = kmalloc(name_len, GFP_KERNEL);
-   if (!name_ptr) {
-   ret = -ENOMEM;
-   goto err;
-   }
+   name_len = btrfs_dir_name_len(leaf, di);
+   if (name_len <= sizeof(tmp_name)) {
+   name_ptr = tmp_name;
+   } else {
+   name_ptr = kmalloc(name_len, GFP_KERNEL);
+   if (!name_ptr) {
+   ret = -ENOMEM;
+   goto err;
}
-   read_extent_buffer(leaf, name_ptr,
-  (unsigned long)(di + 1), name_len);
-
-   d_type = btrfs_filetype_table[btrfs_dir_type(leaf, di)];
-   

Re: [Bug 186671] New: OOM on system with just rsync running 32GB of ram 30GB of pagecache

2016-11-04 Thread Vlastimil Babka
On 11/04/2016 03:13 PM, E V wrote:
> After the system panic'd yesterday I booted back into 4.8.4 and
> restarted the rsync's. I'm away on vacation next week, so when I get
> back I'll get rc4 or rc5 and try again. In the mean time here's data
> from the system running 4.8.4 without problems for about a day. I'm
> not familiar with xxd and didn't see a -e option, so used -E:
> xxd -E -g8 -c8 /proc/kpagecount | cut -d" " -f2 | sort | uniq -c
> 8258633 
>  216440 0100

The lack of -e means it's big endian, which is not a big issue. So here
most of memory is free, some pages have just one pin, and only
relatively few have more. The vmstats also doesn't show anything bad, so
we'll have to wait if something appears within the week, or after you
try 4.9 again. Thanks.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] Btrfs: adjust len of writes if following a preallocated extent

2016-11-04 Thread Liu Bo
If we have

|0--hole--4095||4096--preallocate--12287|

instead of using preallocated space, a 8K direct write will just
create a new 8K extent and it'll end up with

|0--new extent--8191||8192--preallocate--12287|

It's because we find a hole em and then go to create a new 8K
extent directly without adjusting @len.

Signed-off-by: Liu Bo 
---
 fs/btrfs/inode.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 2b790bd..48e9356 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -7783,10 +7783,12 @@ static int btrfs_get_blocks_direct(struct inode *inode, 
sector_t iblock,
}
 
/*
-* this will cow the extent, reset the len in case we changed
-* it above
+* this will cow the extent, if em is within [start, len], then
+* probably we've found a preallocated/existing extent, let's
+* give it a chance to use preallocated space.
 */
-   len = bh_result->b_size;
+   len = min_t(u64, bh_result->b_size, em->len - (start - em->start));
+   len = ALIGN(len, root->sectorsize);
free_extent_map(em);
em = btrfs_new_extent_direct(inode, start, len);
if (IS_ERR(em)) {
-- 
2.5.5

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs check --repair: ERROR: cannot read chunk root

2016-11-04 Thread Marc MERLIN
On Fri, Nov 04, 2016 at 02:00:43PM +0500, Roman Mamedov wrote:
> On Fri, 4 Nov 2016 01:01:13 -0700
> Marc MERLIN  wrote:
> 
> > Basically I have this:
> > sde8:64   0   3.7T  0 
> > └─sde1 8:65   0   3.7T  0 
> >   └─md59:50  14.6T  0 
> > └─bcache0252:00  14.6T  0 
> >   └─crypt_bcache0 (dm-0) 253:00  14.6T  0 
> > 
> > I'll try dd'ing the md5 directly now, but that's going to take another 2 
> > days :(
> > 
> > That said, given that almost half the device is not readable from user space
> > for some reason, that would explain why btrfs check is failing. Obviously it
> > can't do its job if it can't read blocks.
> 
> I don't see anything to support the notion that "half is unreadable", maybe
> just a 512-byte sector is unreadable -- but that would be enough to make
> regular dd bail out -- which is why you should be using dd_rescue for this,
> not regular dd. Assuming you just want to copy over as much data as possible,
> and not simply test if dd fails or not (but in any case dd_rescue at least
> would not fail instantly and would tell you precise count of how much is
> unreadable).

Thanks for the plug on ddrescue, I have used it to rescue drives in the
past.
Here, however, everything after the 8.8TB mark, is unreadable, so there
is nothing to skip.

Because the underlying drives are fine, I'm not entirely sure where the
issue is although it has to be on the mdadm side and not related to
btrfs.

And of course the mdadm array shows clean, and I have already disabled
the mdadm per drive bad block (mis-)feature which probably is
responsible for all the problems I've had here.
myth:~# mdadm --examine-badblocks /dev/sd[defgh]1
No bad-blocks list configured on /dev/sdd1
No bad-blocks list configured on /dev/sde1
No bad-blocks list configured on /dev/sdf1
No bad-blocks list configured on /dev/sdg1
No bad-blocks list configured on /dev/sdh1

I'm also still perplexed as to why despite the rear error I'm getting,
absolutely nothing is logged in the kernel :-/

I'll pursue that further and post a summary on the thread here if I find
something interesting.

Marc
-- 
"A mouse is a device used to point at the xterm you want to type in" - A.S.R.
Microsoft is to operating systems 
   what McDonalds is to gourmet cooking
Home page: http://marc.merlins.org/ | PGP 1024R/763BE901
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[GIT PULL] Btrfs

2016-11-04 Thread Chris Mason
Hi Linus,

My for-linus-4.9 branch:

git://git.kernel.org/pub/scm/linux/kernel/git/mason/linux-btrfs.git 
for-linus-4.9

Has some fixes that Dave Sterba collected.  We held off on these last 
week because I was focused on the memory corruption testing.

I had asked you about pulling this directly from Dave, and you can skip
my merge commit here:

git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux.git for-4.9-rc3

I'll probably have the merge commit when I nail down a fix for the crash 
Dave Jones triggered with trinity, but wanted to let you pick.

Wang Xiaoguang (3) commits (+10/-2):
 btrfs: pass correct args to btrfs_async_run_delayed_refs() (+2/-2)
 btrfs: fix WARNING in btrfs_select_ref_head() (+3/-0)
 btrfs: make file clone aware of fatal signals (+5/-0)

Dan Carpenter (1) commits (+4/-4):
 Btrfs: remove some no-op casts

Liu Bo (1) commits (+8/-1):
 Btrfs: kill BUG_ON in do_relocation

Goldwyn Rodrigues (1) commits (+7/-2):
 btrfs: qgroup: Prevent qgroup->reserved from going subzero

Total: (6) commits (+29/-9)

  fs/btrfs/extent-tree.c |  3 +++
  fs/btrfs/extent_io.c   |  8 
  fs/btrfs/inode.c   | 13 +
  fs/btrfs/ioctl.c   |  5 +
  fs/btrfs/relocation.c  |  9 -
  5 files changed, 29 insertions(+), 9 deletions(-)
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Identifying reflink / CoW files

2016-11-04 Thread Saint Germain
On Thu, 3 Nov 2016 01:17:07 -0400, Zygo Blaxell
 wrote :

> On Thu, Oct 27, 2016 at 01:30:11PM +0200, Saint Germain wrote:
> > Hello,
> > 
> > Following the previous discussion:
> > https://www.spinics.net/lists/linux-btrfs/msg19075.html
> > 
> > I would be interested in finding a way to reliably identify
> > reflink / CoW files in order to use deduplication programs (like
> > fdupes, jdupes, rmlint) efficiently.
> > 
> > Using FIEMAP doesn't seem to be reliable according to this
> > discussion on rmlint:
> > https://github.com/sahib/rmlint/issues/132#issuecomment-157665154
> 
> Inline extents have no physical address (FIEMAP returns 0 in that
> field). You can't dedup them and each file can have only one, so if
> you see the FIEMAP_EXTENT_INLINE bit set, you can just skip
> processing the entire file immediately.
> 
> You can create a separate non-inline extent in a temporary file then
> use dedup to replace _both_ copies of the original inline extent.
> Or don't bother, as the savings are negligible.
> 
> > Is there another way that deduplication programs can easily use ?
> 
> The problem is that it's not files that are reflinked--individual
> extents are.  "reflink file copy" really just means "a file whose
> extents are 100% shared with another file." It's possible for files
> on btrfs to have any percentage of shared extents from 0 to 100% in
> increments of the host page size.  It's also possible for the blocks
> to be shared with different extent boundaries.
> 
> The quality of the result therefore depends on the amount of effort
> put into measuring it.  If you look for the first non-hole extent in
> each file and use its physical address as a physical file identifier,
> then you get a fast reflink detector function that has a high risk of
> false positives.  If you map out two files and compare physical
> addresses block by block, you get a slow function with a low risk of
> false positives (but maybe a small risk of false negatives too).
> 
> If your dedup program only does full-file reflink copies then the
> first extent physical address method is sufficient.  If your program
> does block- or extent-level dedup then it shouldn't be using files in
> its data model at all, except where necessary to provide a mechanism
> to access the physical blocks through the POSIX filesystem API.
> 
> FIEMAP will tell you about all the extents (physical address for
> extents that have them, zero for other extent types).  It's also slow
> and has assorted accuracy problems especially with compressed files.
> Any user can run FIEMAP, and it uses only standard structure arrays.
> 
> SEARCH_V2 is root-only and requires parsing variable-length binary
> btrfs data encoding, but it's faster than FIEMAP and gives more
> accurate results on compressed files.
> 

As the dedup program only does full-file reflink, the first extent
physical address method can be used as a fast first check to identify
potential files.

But how to implement the second check in order to have 0% risk of false
positive ?
Because you said that mapping out two files and comparing the physical
addresses block by block also has a low risk of false positives.

Thank you very much for the detailed explanation !
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [Bug 186671] New: OOM on system with just rsync running 32GB of ram 30GB of pagecache

2016-11-04 Thread E V
After the system panic'd yesterday I booted back into 4.8.4 and
restarted the rsync's. I'm away on vacation next week, so when I get
back I'll get rc4 or rc5 and try again. In the mean time here's data
from the system running 4.8.4 without problems for about a day. I'm
not familiar with xxd and didn't see a -e option, so used -E:
xxd -E -g8 -c8 /proc/kpagecount | cut -d" " -f2 | sort | uniq -c
8258633 
 216440 0100
   5576 0200
592 0300
195 0400
184 0500
171 0600
 70 0700
  3 0800
 17 0900
 48 0a00
 78 0b00
 33 0c00
 23 0d00
 18 0e00
  3 0f00
  5 1000
  2 1100
  7 1200
  5 1300
  2 1400
 36 1500
 10 1600
  6 1700
  3 1800
  8 1900
  4 1a00
  7 1b00
  4 1c00
  5 1d00
  3 1e00
 18 1f00
  9 2000
  9 2100
  9 2200
 19 2300
 13 2400
  6 2500
 13 2600
 13 2700
  3 2800
 16 2900
  7 2a00
 21 2b00
 33 2c00
 19 2d00
 54 2e00
 29 2f00
 72 3000
 27 3100
 102635 81ff

cat /proc/vmstat
nr_free_pages 106970
nr_zone_inactive_anon 110034
nr_zone_active_anon 108424
nr_zone_inactive_file 350017
nr_zone_active_file 2158161
nr_zone_unevictable 0
nr_zone_write_pending 114
nr_mlock 0
nr_slab_reclaimable 4962990
nr_slab_unreclaimable 415089
nr_page_table_pages 2149
nr_kernel_stack 6176
nr_bounce 0
numa_hit 403780590
numa_miss 176970926
numa_foreign 176970926
numa_interleave 19415
numa_local 403780590
numa_other 0
nr_free_cma 0
nr_inactive_anon 110034
nr_active_anon 108424
nr_inactive_file 350017
nr_active_file 2158161
nr_unevictable 0
nr_isolated_anon 0
nr_isolated_file 0
nr_pages_scanned 0
workingset_refault 1443060
workingset_activate 558143
workingset_nodereclaim 6879280
nr_anon_pages 216243
nr_mapped 6462
nr_file_pages 2510544
nr_dirty 114
nr_writeback 0
nr_writeback_temp 0
nr_shmem 2179
nr_shmem_hugepages 0
nr_shmem_pmdmapped 0
nr_anon_transparent_hugepages 0
nr_unstable 0
nr_vmscan_write 1127
nr_vmscan_immediate_reclaim 19056
nr_dirtied 254716641
nr_written 254532248
nr_dirty_threshold 383652
nr_dirty_background_threshold 50612
pgpgin 21962903
pgpgout 1024651087
pswpin 214
pswpout 1127
pgalloc_dma 0
pgalloc_dma32 87690791
pgalloc_normal 806119097
pgalloc_movable 0
allocstall_dma 0
allocstall_dma32 0
allocstall_normal 210
allocstall_movable 0
pgskip_dma 0
pgskip_dma32 0
pgskip_normal 0
pgskip_movable 0
pgfree 894694404
pgactivate 5513535
pgdeactivate 7989719
pgfault 4748538
pgmajfault 2528
pglazyfreed 0
pgrefill 7999038
pgsteal_kswapd 504125672
pgsteal_direct 36130
pgscan_kswapd 504479233
pgscan_direct 36142
pgscan_direct_throttle 0
zone_reclaim_failed 0
pginodesteal 1074
slabs_scanned 61625344
kswapd_inodesteal 1956613
kswapd_low_wmark_hit_quickly 49386
kswapd_high_wmark_hit_quickly 79880
pageoutrun 211656
pgrotated 203832
drop_pagecache 0
drop_slab 0
pgmigrate_success 684523
pgmigrate_fail 1189249
compact_migrate_scanned 94848219
compact_free_scanned 2329620072
compact_isolated 2648057
compact_stall 38
compact_fail 0
compact_success 38
compact_daemon_wake 9682
htlb_buddy_alloc_success 0
htlb_buddy_alloc_fail 0
unevictable_pgs_culled 12473
unevictable_pgs_scanned 0
unevictable_pgs_rescued 11979
unevictable_pgs_mlocked 14556
unevictable_pgs_munlocked 14556
unevictable_pgs_cleared 0
unevictable_pgs_stranded 0
thp_fault_alloc 0
thp_fault_fallback 0
thp_collapse_alloc 0
thp_collapse_alloc_failed 0
thp_file_alloc 0
thp_file_mapped 0
thp_split_page 0
thp_split_page_failed 0
thp_deferred_split_page 0
thp_split_pmd 0
thp_zero_page_alloc 0
thp_zero_page_alloc_failed 0

On Thu, Nov 3, 2016 at 7:58 PM, Vlastimil Babka  wrote:
> On 11/03/2016 07:53 PM, Andrew Morton wrote:
>>
>> (switched to email.  Please respond via emailed reply-to-all, not via the
>> bugzilla web interface).
>
> +CC also btrfs just in case it's a problem in page reclaim there
>
>> On Wed, 02 Nov 2016 13:02:39 + bugzilla-dae...@bugzilla.kernel.org wrote:
>>
>>> https://bugzilla.kernel.org/show_bug.cgi?id=186671
>>>
>>> Bug ID: 186671
>>>Summary: OOM on system with just rsync running 32GB of ram 30GB
>>> of pagecache
>>>Product: Memory Management
>>>Version: 2.5
>>> Kernel Version: 4.9-rc3
>>>   Hardware: x86-64
>>> OS: Linux
>>>   Tree: Mainline
>>>  

Re: btrfs check --repair: ERROR: cannot read chunk root

2016-11-04 Thread Roman Mamedov
On Fri, 4 Nov 2016 01:01:13 -0700
Marc MERLIN  wrote:

> Basically I have this:
> sde8:64   0   3.7T  0 
> └─sde1 8:65   0   3.7T  0 
>   └─md59:50  14.6T  0 
> └─bcache0252:00  14.6T  0 
>   └─crypt_bcache0 (dm-0) 253:00  14.6T  0 
> 
> I'll try dd'ing the md5 directly now, but that's going to take another 2 days 
> :(
> 
> That said, given that almost half the device is not readable from user space
> for some reason, that would explain why btrfs check is failing. Obviously it
> can't do its job if it can't read blocks.

I don't see anything to support the notion that "half is unreadable", maybe
just a 512-byte sector is unreadable -- but that would be enough to make
regular dd bail out -- which is why you should be using dd_rescue for this,
not regular dd. Assuming you just want to copy over as much data as possible,
and not simply test if dd fails or not (but in any case dd_rescue at least
would not fail instantly and would tell you precise count of how much is
unreadable).

There is "GNU ddrescue" and "dd_rescue", I liked the first one better, but
they both work on a similar principle.

Also didn't you recently have issues with bad block lists on mdadm. This
mysterious "unreadable and nothing in dmesg" does appear to be a continuation
of that.

-- 
With respect,
Roman
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs-progs: send-test: add checking of clone-src option

2016-11-04 Thread Tsutomu Itoh
Sending stream size of clone-src(-c) option is checked.

Signed-off-by: Tsutomu Itoh 
---
 tests/misc-tests/016-send-clone-src/test.sh | 54 +
 1 file changed, 54 insertions(+)
 create mode 100755 tests/misc-tests/016-send-clone-src/test.sh

diff --git a/tests/misc-tests/016-send-clone-src/test.sh 
b/tests/misc-tests/016-send-clone-src/test.sh
new file mode 100755
index 000..16d
--- /dev/null
+++ b/tests/misc-tests/016-send-clone-src/test.sh
@@ -0,0 +1,54 @@
+#!/bin/bash
+#
+# test for sending stream size of clone-src option
+
+source $TOP/tests/common
+
+check_prereq mkfs.btrfs
+check_prereq btrfs
+
+setup_root_helper
+prepare_test_dev 2g
+
+run_check $TOP/mkfs.btrfs -f $IMAGE
+run_check_mount_test_dev
+
+here=`pwd`
+cd "$TEST_MNT" || _fail "cannot chdir to TEST_MNT"
+
+run_check $SUDO_HELPER btrfs subvolume create subv-parent1
+run_check $SUDO_HELPER dd if=/dev/urandom of=subv-parent1/file1_1 bs=1M 
count=10
+run_check $SUDO_HELPER btrfs subvolume snapshot -r subv-parent1 subv-snap1_1
+run_check $SUDO_HELPER dd if=/dev/urandom of=subv-parent1/file1_2 bs=1M 
count=10
+run_check $SUDO_HELPER btrfs subvolume snapshot -r subv-parent1 subv-snap1_2
+run_check $SUDO_HELPER dd if=/dev/urandom of=subv-parent1/file1_3 bs=1M 
count=10
+run_check $SUDO_HELPER btrfs subvolume snapshot -r subv-parent1 subv-snap1_3
+
+run_check $SUDO_HELPER btrfs subvolume create subv-parent2
+run_check $SUDO_HELPER dd if=/dev/urandom of=subv-parent2/file2_1 bs=1M 
count=10
+run_check $SUDO_HELPER btrfs subvolume snapshot -r subv-parent2 subv-snap2_1
+run_check $SUDO_HELPER dd if=/dev/urandom of=subv-parent2/file2_2 bs=1M 
count=10
+run_check $SUDO_HELPER btrfs subvolume snapshot -r subv-parent2 subv-snap2_2
+run_check $SUDO_HELPER dd if=/dev/urandom of=subv-parent2/file2_3 bs=1M 
count=10
+run_check $SUDO_HELPER btrfs subvolume snapshot -r subv-parent2 subv-snap2_3
+
+run_check $SUDO_HELPER btrfs send -f "$here"/send.stream.before \
+   -c subv-snap1_1 -c subv-snap2_1 subv-snap1_[23] subv-snap2_[23]
+
+run_check $SUDO_HELPER $TOP/btrfs send -f "$here"/send.stream.after \
+   -c subv-snap1_1 -c subv-snap2_1 subv-snap1_[23] subv-snap2_[23]
+
+before_size=`ls -l "$here"/send.stream.before | awk '{print $5}'`
+after_size=`ls -l "$here"/send.stream.after | awk '{print $5}'`
+
+if [ $before_size -lt $after_size ]; then
+   run_check ls -l "$here"/send.stream.*
+   _fail "sending stream size is bigger than old stream"
+fi
+
+run_check rm -f "$here"/send.stream.*
+
+cd "$here" || _fail "cannot chdir back to test directory"
+
+run_check_umount_test_dev
+
-- 
2.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v2 3/3] btrfs-progs: send: fix handling of -c option

2016-11-04 Thread Tsutomu Itoh
When two or more -c options are specified, cannot find a suitable
parent. So, output stream is bigger than correct one.

[before]
# btrfs send -f /tmp/data1 -c Snap0 -c ../SnapX Snap[12] ../SnapY
At subvol Snap1
At subvol Snap2
At subvol ../SnapY
# ls -l /tmp/data1
-rw--- 1 root root 3153 Oct 19 10:37 /tmp/data1
#

[after]
# btrfs send -f /tmp/data1 -c Snap0 -c ../SnapX Snap[12] ../SnapY
At subvol Snap1
At subvol Snap2
At subvol ../SnapY
# ls -l /tmp/data1
-rw--- 1 root root 1492 Oct 19 10:39 /tmp/data1
#

Signed-off-by: Tsutomu Itoh 
---
v2: make helper functions
---
 cmds-send.c | 58 ++
 1 file changed, 38 insertions(+), 20 deletions(-)

diff --git a/cmds-send.c b/cmds-send.c
index 2a8a697..f831e99 100644
--- a/cmds-send.c
+++ b/cmds-send.c
@@ -411,6 +411,36 @@ out:
return ret;
 }
 
+static int set_root_info(struct btrfs_send *s, char *subvol, u64 *root_id)
+{
+   int ret;
+
+   ret = init_root_path(s, subvol);
+   if (ret < 0)
+   goto out;
+
+   ret = get_root_id(s, subvol_strip_mountpoint(s->root_path, subvol),
+   root_id);
+   if (ret < 0) {
+   error("cannot resolve rootid for %s", subvol);
+   goto out;
+   }
+
+out:
+   return ret;
+}
+
+static void free_send_info(struct btrfs_send *s)
+{
+   if (s->mnt_fd >= 0) {
+   close(s->mnt_fd);
+   s->mnt_fd = -1;
+   }
+   free(s->root_path);
+   s->root_path = NULL;
+   subvol_uuid_search_finit(>sus);
+}
+
 int cmd_send(int argc, char **argv)
 {
char *subvol = NULL;
@@ -460,18 +490,10 @@ int cmd_send(int argc, char **argv)
goto out;
}
 
-   ret = init_root_path(, subvol);
+   ret = set_root_info(, subvol, _id);
if (ret < 0)
goto out;
 
-   ret = get_root_id(,
-   subvol_strip_mountpoint(send.root_path, subvol),
-   _id);
-   if (ret < 0) {
-   error("cannot resolve rootid for %s", subvol);
-   goto out;
-   }
-
ret = is_subvol_ro(, subvol);
if (ret < 0)
goto out;
@@ -486,15 +508,9 @@ int cmd_send(int argc, char **argv)
error("cannot add clone source: %s", 
strerror(-ret));
goto out;
}
-   subvol_uuid_search_finit();
free(subvol);
subvol = NULL;
-   if (send.mnt_fd >= 0) {
-   close(send.mnt_fd);
-   send.mnt_fd = -1;
-   }
-   free(send.root_path);
-   send.root_path = NULL;
+   free_send_info();
full_send = 0;
break;
case 'f':
@@ -651,6 +667,10 @@ int cmd_send(int argc, char **argv)
}
 
if (!full_send && root_id) {
+   ret = set_root_info(, subvol, _id);
+   if (ret < 0)
+   goto out;
+
ret = find_good_parent(, root_id, _root_id);
if (ret < 0) {
error("parent determination failed for %lld",
@@ -680,6 +700,7 @@ int cmd_send(int argc, char **argv)
error("cannot add clone source: %s", 
strerror(-ret));
goto out;
}
+   free_send_info();
}
}
 
@@ -689,10 +710,7 @@ out:
free(subvol);
free(snapshot_parent);
free(send.clone_sources);
-   if (send.mnt_fd >= 0)
-   close(send.mnt_fd);
-   free(send.root_path);
-   subvol_uuid_search_finit();
+   free_send_info();
return !!ret;
 }
 
-- 
2.9.3
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: btrfs check --repair: ERROR: cannot read chunk root

2016-11-04 Thread Marc MERLIN
On Mon, Oct 31, 2016 at 09:21:40PM -0700, Marc MERLIN wrote:
> On Tue, Nov 01, 2016 at 12:13:38PM +0800, Qu Wenruo wrote:
> > Would you try to locate the range where we starts to fail to read?
> > 
> > I still think the root problem is we failed to read the device in user
> > space.
>  
> Understood.
> 
> I'll run this then:
> myth:~# dd if=/dev/mapper/crypt_bcache0 of=/dev/null bs=1M &
> [2] 21108
> myth:~# while :; do killall -USR1 dd; sleep 1200; done
> 275+0 records in
> 274+0 records out
> 287309824 bytes (287 MB) copied, 7.20248 s, 39.9 MB/s
> 
> This will take a while to run, I'll report back on how far it goes.

Well, turns out you were right. My array is 14TB and dd was only able to
copy 8.8TB out of it.

I wonder if it's a bug with bcache and source devices that are too big?

8782434271232 bytes (8.8 TB) copied, 214809 s, 40.9 MB/s
dd: reading `/dev/mapper/crypt_bcache0': Invalid argument
8388608+0 records in
8388608+0 records out
8796093022208 bytes (8.8 TB) copied, 215197 s, 40.9 MB/s
[2]+  Exit 1  dd if=/dev/mapper/crypt_bcache0 of=/dev/null bs=1M

What's vexing is that absolutely nothing has been logged in the kernel dmesg
buffer about this read error.

Basically I have this:
sde8:64   0   3.7T  0 
└─sde1 8:65   0   3.7T  0 
  └─md59:50  14.6T  0 
└─bcache0252:00  14.6T  0 
  └─crypt_bcache0 (dm-0) 253:00  14.6T  0 

I'll try dd'ing the md5 directly now, but that's going to take another 2 days :(

That said, given that almost half the device is not readable from user space
for some reason, that would explain why btrfs check is failing. Obviously it
can't do its job if it can't read blocks.

I'll report back on what I find out with this problem but if you have
suggestions on what to look for, let me know :)

Thanks.
Marc
-- 
"A mouse is a device used to point at the xterm you want to type in" - A.S.R.
Microsoft is to operating systems 
   what McDonalds is to gourmet cooking
Home page: http://marc.merlins.org/  
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH] btrfs: make block group flags in balance printks human-readable

2016-11-04 Thread Adam Borowski
They're not even documented anywhere, letting users with no recourse but
to RTFS.  It's no big burden to output the bitfield as words.

Also, display unknown flags as hex.

Signed-off-by: Adam Borowski 
---
 fs/btrfs/relocation.c | 34 --
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/fs/btrfs/relocation.c b/fs/btrfs/relocation.c
index 0ec8ffa..388216f 100644
--- a/fs/btrfs/relocation.c
+++ b/fs/btrfs/relocation.c
@@ -4326,6 +4326,34 @@ static struct reloc_control *alloc_reloc_control(struct 
btrfs_fs_info *fs_info)
 }
 
 /*
+ * explain bit flags, prefixed by a '|' that'll be dropped
+ */
+static void describe_block_group_flags(char *buf, u64 flags)
+{
+   if (!flags)
+   *buf += sprintf(buf, "|NONE");
+   else {
+#define DESCRIBE_FLAG(f, d) \
+   if (flags & BTRFS_BLOCK_GROUP_##f) { \
+   buf += sprintf(buf, "|%s", d); \
+   flags &= ~BTRFS_BLOCK_GROUP_##f; \
+   }
+   DESCRIBE_FLAG(DATA, "data");
+   DESCRIBE_FLAG(SYSTEM,   "system");
+   DESCRIBE_FLAG(METADATA, "metadata");
+   DESCRIBE_FLAG(RAID0,"raid0");
+   DESCRIBE_FLAG(RAID1,"raid1");
+   DESCRIBE_FLAG(DUP,  "dup");
+   DESCRIBE_FLAG(RAID10,   "raid10");
+   DESCRIBE_FLAG(RAID5,"raid5");
+   DESCRIBE_FLAG(RAID6,"raid6");
+   if (flags)
+   buf += sprintf(buf, "|0x%llx", flags);
+   }
+   *buf = 0;
+}
+
+/*
  * function to relocate all extents in a block group.
  */
 int btrfs_relocate_block_group(struct btrfs_root *extent_root, u64 group_start)
@@ -4337,6 +4365,7 @@ int btrfs_relocate_block_group(struct btrfs_root 
*extent_root, u64 group_start)
int ret;
int rw = 0;
int err = 0;
+   char flags_str[128];
 
rc = alloc_reloc_control(fs_info);
if (!rc)
@@ -4381,9 +4410,10 @@ int btrfs_relocate_block_group(struct btrfs_root 
*extent_root, u64 group_start)
goto out;
}
 
+   describe_block_group_flags(flags_str, rc->block_group->flags);
btrfs_info(extent_root->fs_info,
-  "relocating block group %llu flags %llu",
-  rc->block_group->key.objectid, rc->block_group->flags);
+  "relocating block group %llu flags %s",
+  rc->block_group->key.objectid, flags_str+1);
 
btrfs_wait_block_group_reservations(rc->block_group);
btrfs_wait_nocow_writers(rc->block_group);
-- 
2.10.2

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH V2] btrfs: Remove some dead code

2016-11-04 Thread Christophe JAILLET
'btrfs_iget()' can not return NULL, so this test can be removed.

Signed-off-by: Christophe JAILLET 
---
V1 --> v2: fix the patch description
---
 fs/btrfs/free-space-cache.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/fs/btrfs/free-space-cache.c b/fs/btrfs/free-space-cache.c
index e4b48f377d3a..afd8b0c10acd 100644
--- a/fs/btrfs/free-space-cache.c
+++ b/fs/btrfs/free-space-cache.c
@@ -75,8 +75,6 @@ static struct inode *__lookup_free_space_inode(struct 
btrfs_root *root,
btrfs_release_path(path);
 
inode = btrfs_iget(root->fs_info->sb, , root, NULL);
-   if (!inode)
-   return ERR_PTR(-ENOENT);
if (IS_ERR(inode))
return inode;
if (is_bad_inode(inode)) {
-- 
2.9.3

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html