[PATCH] btrfs-progs: doc: correct the destination of btrfs-receive

2016-06-13 Thread Satoru Takeuchi

We can set not only btrfs mount point but also any path belong to
btrfs mount point as btrfs-receive's destination.

Signed-off-by: Satoru Takeuchi 
---
 Documentation/btrfs-receive.asciidoc | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Documentation/btrfs-receive.asciidoc 
b/Documentation/btrfs-receive.asciidoc
index fbbded2..e246603 100644
--- a/Documentation/btrfs-receive.asciidoc
+++ b/Documentation/btrfs-receive.asciidoc
@@ -7,14 +7,14 @@ btrfs-receive - receive subvolumes from send stream

 SYNOPSIS
 
-*btrfs receive* [options] 
+*btrfs receive* [options] 

 DESCRIPTION
 ---

 Receive a stream of changes and replicate one or more subvolumes that were
 previously used with *btrfs send* The received subvolumes are stored to
-'mount'.
+'path'.

 *btrfs receive* will fail int the following cases:

@@ -37,7 +37,7 @@ by default, btrfs receive uses standard input to receive the 
stream,
 use this option to read from a file instead

 -C|--chroot::
-confine the process to 'mount' using `chroot`(1)
+confine the process to 'path' using `chroot`(1)

 -e::
 terminate after receiving an 'end cmd' marker in the stream.
--
2.5.5
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH v4] fstests: btrfs: add test for qgroup handle extent de-reference

2016-06-13 Thread Lu Fengqi
Test if qgroup can handle extent de-reference during reallocation.
"extent de-reference" means that reducing an extent's reference count
or freeing an extent.
Although current qgroup can handle it, we still need to prevent any
regression which may break current qgroup.

Signed-off-by: Lu Fengqi 
---
v4: print the message out if grep finds the message
v3: remove unnecessary parameters of mkfs
v2: use btrfsck check for inconsistencies
---
 common/rc   |  4 +--
 tests/btrfs/028 | 95 +
 tests/btrfs/028.out |  2 ++
 tests/btrfs/group   |  1 +
 4 files changed, 100 insertions(+), 2 deletions(-)
 create mode 100755 tests/btrfs/028
 create mode 100644 tests/btrfs/028.out

diff --git a/common/rc b/common/rc
index 51092a0..650d198 100644
--- a/common/rc
+++ b/common/rc
@@ -3284,9 +3284,9 @@ _btrfs_get_profile_configs()
 # stress btrfs by running balance operation in a loop
 _btrfs_stress_balance()
 {
-   local btrfs_mnt=$1
+   local options=$@
while true; do
-   $BTRFS_UTIL_PROG balance start $btrfs_mnt
+   $BTRFS_UTIL_PROG balance start $options
done
 }
 
diff --git a/tests/btrfs/028 b/tests/btrfs/028
new file mode 100755
index 000..1425609
--- /dev/null
+++ b/tests/btrfs/028
@@ -0,0 +1,95 @@
+#! /bin/bash
+# FS QA Test 028
+#
+# Test if qgroup can handle extent de-reference during reallocation.
+# "extent de-reference" means that reducing an extent's reference count
+# or freeing an extent.
+# Although current qgroup can handle it, we still need to prevent any
+# regression which may break current qgroup.
+#
+#---
+# Copyright (c) 2016 Fujitsu. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+   cd /
+   rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+
+_scratch_mkfs
+_scratch_mount
+
+_run_btrfs_util_prog quota enable $SCRATCH_MNT
+_run_btrfs_util_prog quota rescan -w $SCRATCH_MNT
+
+# Increase the probability of generating de-refer extent, and decrease
+# other.
+args=`_scale_fsstress_args -z \
+   -f write=10 -f unlink=10 \
+   -f creat=10 -f fsync=10 \
+   -f fsync=10 -n 10 -p 2 \
+   -d $SCRATCH_MNT/stress_dir`
+echo "Run fsstress $args" >>$seqres.full
+$FSSTRESS_PROG $args >/dev/null 2>&1 &
+fsstress_pid=$!
+
+echo "Start balance" >>$seqres.full
+_btrfs_stress_balance -d $SCRATCH_MNT >/dev/null 2>&1 &
+balance_pid=$!
+
+# 30s is enough to trigger bug
+sleep $((30*$TIME_FACTOR))
+kill $fsstress_pid $balance_pid
+wait
+
+# kill _btrfs_stress_balance can't end balance, so call btrfs balance cancel
+# to cancel running or paused balance.
+$BTRFS_UTIL_PROG balance cancel $SCRATCH_MNT &> /dev/null
+
+_run_btrfs_util_prog filesystem sync $SCRATCH_MNT
+
+_scratch_unmount
+
+# generate a qgroup report and look for inconsistent groups
+$BTRFS_UTIL_PROG check --qgroup-report $SCRATCH_DEV 2>&1 | \
+   grep -E "Counts for qgroup.*are different"
+echo "Silence is golden"
+status=0
+
+exit
diff --git a/tests/btrfs/028.out b/tests/btrfs/028.out
new file mode 100644
index 000..2615f73
--- /dev/null
+++ b/tests/btrfs/028.out
@@ -0,0 +1,2 @@
+QA output created by 028
+Silence is golden
diff --git a/tests/btrfs/group b/tests/btrfs/group
index da0e27f..35ecf59 100644
--- a/tests/btrfs/group
+++ b/tests/btrfs/group
@@ -30,6 +30,7 @@
 025 auto quick send clone
 026 auto quick compress prealloc
 027 auto replace
+028 auto qgroup balance
 029 auto quick clone
 030 auto quick send
 031 auto quick subvol clone
-- 
2.5.5



--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v3] fstests: btrfs: add test for qgroup handle extent de-reference

2016-06-13 Thread Lu Fengqi

At 06/13/2016 05:49 PM, Filipe Manana wrote:

On Mon, Jun 13, 2016 at 9:06 AM, Lu Fengqi  wrote:

At 06/13/2016 03:29 PM, Lu Fengqi wrote:


At 06/13/2016 11:04 AM, Eryu Guan wrote:


On Mon, Jun 13, 2016 at 10:10:50AM +0800, Lu Fengqi wrote:


Test if qgroup can handle extent de-reference during reallocation.
"extent de-reference" means that reducing an extent's reference count
or freeing an extent.
Although current qgroup can handle it, we still need to prevent any
regression which may break current qgroup.

Signed-off-by: Lu Fengqi 
---
 common/rc   |  4 +--
 tests/btrfs/028 | 98
+
 tests/btrfs/028.out |  2 ++
 tests/btrfs/group   |  1 +
 4 files changed, 103 insertions(+), 2 deletions(-)
 create mode 100755 tests/btrfs/028
 create mode 100644 tests/btrfs/028.out

diff --git a/common/rc b/common/rc
index 51092a0..650d198 100644
--- a/common/rc
+++ b/common/rc
@@ -3284,9 +3284,9 @@ _btrfs_get_profile_configs()
 # stress btrfs by running balance operation in a loop
 _btrfs_stress_balance()
 {
-local btrfs_mnt=$1
+local options=$@
 while true; do
-$BTRFS_UTIL_PROG balance start $btrfs_mnt
+$BTRFS_UTIL_PROG balance start $options
 done
 }

diff --git a/tests/btrfs/028 b/tests/btrfs/028
new file mode 100755
index 000..8cea49a
--- /dev/null
+++ b/tests/btrfs/028
@@ -0,0 +1,98 @@
+#! /bin/bash
+# FS QA Test 028
+#
+# Test if qgroup can handle extent de-reference during reallocation.
+# "extent de-reference" means that reducing an extent's reference count
+# or freeing an extent.
+# Although current qgroup can handle it, we still need to prevent any
+# regression which may break current qgroup.
+#

+#---

+# Copyright (c) 2016 Fujitsu. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

+#---

+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1# failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+cd /
+rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+
+_scratch_mkfs
+_scratch_mount
+
+_run_btrfs_util_prog quota enable $SCRATCH_MNT
+_run_btrfs_util_prog quota rescan -w $SCRATCH_MNT
+
+# Increase the probability of generating de-refer extent, and decrease
+# other.
+args=`_scale_fsstress_args -z \
+-f write=10 -f unlink=10 \
+-f creat=10 -f fsync=10 \
+-f fsync=10 -n 10 -p 2 \
+-d $SCRATCH_MNT/stress_dir`
+echo "Run fsstress $args" >>$seqres.full
+$FSSTRESS_PROG $args >/dev/null 2>&1 &
+fsstress_pid=$!
+
+echo "Start balance" >>$seqres.full
+_btrfs_stress_balance -d $SCRATCH_MNT >/dev/null 2>&1 &
+balance_pid=$!
+
+# 30s is enough to trigger bug
+sleep $((30*$TIME_FACTOR))
+kill $fsstress_pid $balance_pid
+wait
+
+# kill _btrfs_stress_balance can't end balance, so call btrfs
balance cancel
+# to cancel running or paused balance.
+$BTRFS_UTIL_PROG balance cancel $SCRATCH_MNT &> /dev/null
+
+_run_btrfs_util_prog filesystem sync $SCRATCH_MNT
+
+_scratch_unmount
+
+# generate a qgroup report and look for inconsistent groups
+$BTRFS_UTIL_PROG check --qgroup-report $SCRATCH_DEV 2>&1 | \
+grep -q -E "Counts for qgroup.*are different"
+if [ $? -ne 0 ]; then
+echo "Silence is golden"
+# success, all done
+status=0
+fi



I'm testing with 4.7-rc1 kernel and btrfs-progs v4.4, this test fails,
which means btrfs check finds inconsistent groups. But according to your
commit log, current kernel should pass the test. So is the failure
expected?

Also, just grep for different qgroup counts and print the message out if
grep finds the message, so it breaks golden image on error and we know
something really goes wrong. Right now test fails just because of
missing "Silence is golden", which is unclear why it fails:

 @@ -1,2 +1 @@
  QA output created by 028
 -Silence is golden

Do the following instead:

$BTRFS_UTIL_PROG check ... | grep -E "..."
echo "Silence is golden"
status=0
 

Re: [PATCH v3] fstests: btrfs: add test for qgroup handle extent de-reference

2016-06-13 Thread Qu Wenruo



At 06/13/2016 05:49 PM, Filipe Manana wrote:

On Mon, Jun 13, 2016 at 9:06 AM, Lu Fengqi  wrote:

At 06/13/2016 03:29 PM, Lu Fengqi wrote:


At 06/13/2016 11:04 AM, Eryu Guan wrote:


On Mon, Jun 13, 2016 at 10:10:50AM +0800, Lu Fengqi wrote:


Test if qgroup can handle extent de-reference during reallocation.
"extent de-reference" means that reducing an extent's reference count
or freeing an extent.
Although current qgroup can handle it, we still need to prevent any
regression which may break current qgroup.

Signed-off-by: Lu Fengqi 
---
 common/rc   |  4 +--
 tests/btrfs/028 | 98
+
 tests/btrfs/028.out |  2 ++
 tests/btrfs/group   |  1 +
 4 files changed, 103 insertions(+), 2 deletions(-)
 create mode 100755 tests/btrfs/028
 create mode 100644 tests/btrfs/028.out

diff --git a/common/rc b/common/rc
index 51092a0..650d198 100644
--- a/common/rc
+++ b/common/rc
@@ -3284,9 +3284,9 @@ _btrfs_get_profile_configs()
 # stress btrfs by running balance operation in a loop
 _btrfs_stress_balance()
 {
-local btrfs_mnt=$1
+local options=$@
 while true; do
-$BTRFS_UTIL_PROG balance start $btrfs_mnt
+$BTRFS_UTIL_PROG balance start $options
 done
 }

diff --git a/tests/btrfs/028 b/tests/btrfs/028
new file mode 100755
index 000..8cea49a
--- /dev/null
+++ b/tests/btrfs/028
@@ -0,0 +1,98 @@
+#! /bin/bash
+# FS QA Test 028
+#
+# Test if qgroup can handle extent de-reference during reallocation.
+# "extent de-reference" means that reducing an extent's reference count
+# or freeing an extent.
+# Although current qgroup can handle it, we still need to prevent any
+# regression which may break current qgroup.
+#

+#---

+# Copyright (c) 2016 Fujitsu. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

+#---

+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1# failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+cd /
+rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+
+_scratch_mkfs
+_scratch_mount
+
+_run_btrfs_util_prog quota enable $SCRATCH_MNT
+_run_btrfs_util_prog quota rescan -w $SCRATCH_MNT
+
+# Increase the probability of generating de-refer extent, and decrease
+# other.
+args=`_scale_fsstress_args -z \
+-f write=10 -f unlink=10 \
+-f creat=10 -f fsync=10 \
+-f fsync=10 -n 10 -p 2 \
+-d $SCRATCH_MNT/stress_dir`
+echo "Run fsstress $args" >>$seqres.full
+$FSSTRESS_PROG $args >/dev/null 2>&1 &
+fsstress_pid=$!
+
+echo "Start balance" >>$seqres.full
+_btrfs_stress_balance -d $SCRATCH_MNT >/dev/null 2>&1 &
+balance_pid=$!
+
+# 30s is enough to trigger bug
+sleep $((30*$TIME_FACTOR))
+kill $fsstress_pid $balance_pid
+wait
+
+# kill _btrfs_stress_balance can't end balance, so call btrfs
balance cancel
+# to cancel running or paused balance.
+$BTRFS_UTIL_PROG balance cancel $SCRATCH_MNT &> /dev/null
+
+_run_btrfs_util_prog filesystem sync $SCRATCH_MNT
+
+_scratch_unmount
+
+# generate a qgroup report and look for inconsistent groups
+$BTRFS_UTIL_PROG check --qgroup-report $SCRATCH_DEV 2>&1 | \
+grep -q -E "Counts for qgroup.*are different"
+if [ $? -ne 0 ]; then
+echo "Silence is golden"
+# success, all done
+status=0
+fi



I'm testing with 4.7-rc1 kernel and btrfs-progs v4.4, this test fails,
which means btrfs check finds inconsistent groups. But according to your
commit log, current kernel should pass the test. So is the failure
expected?

Also, just grep for different qgroup counts and print the message out if
grep finds the message, so it breaks golden image on error and we know
something really goes wrong. Right now test fails just because of
missing "Silence is golden", which is unclear why it fails:

 @@ -1,2 +1 @@
  QA output created by 028
 -Silence is golden

Do the following instead:

$BTRFS_UTIL_PROG check ... | grep -E "..."
echo "Silence is golden"

Re: [PATCH 2/2] btrfs: prefix fsid to all trace events

2016-06-13 Thread Jeff Mahoney
On 6/13/16 11:48 AM, David Sterba wrote:
> On Thu, Jun 09, 2016 at 07:48:01PM -0400, je...@suse.com wrote:
>> +#define TP_printk_btrfs(fmt, args...) \
>> +TP_printk("%pU: " fmt, __entry->fsid, args)
> 
> So it's identified by the UUID. As there's no previous format of the
> filesystem identifier (unlike the syslog messages), I'm ok with using
> the UUID.

Buried in the patch is actually the single use of a previous format for
trace events that I've converted to use the macros.  It's the
btrfs_space_reservation that Josef added in 2012[1].

I wouldn't have minded a shorter identifier but given how prevalent its
usage is internal to the FS, its exported interfaces, and tools
surrounding it, it seems like it's the only real choice.

-Jeff

[1] 8c2a3ca20f6 (Btrfs: space leak tracepoints)

-- 
Jeff Mahoney
SUSE Labs



signature.asc
Description: OpenPGP digital signature


Re: [PATCH 2/2] btrfs: prefix fsid to all trace events

2016-06-13 Thread David Sterba
On Thu, Jun 09, 2016 at 07:48:01PM -0400, je...@suse.com wrote:
> +#define TP_printk_btrfs(fmt, args...) \
> + TP_printk("%pU: " fmt, __entry->fsid, args)

So it's identified by the UUID. As there's no previous format of the
filesystem identifier (unlike the syslog messages), I'm ok with using
the UUID.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH v2] Btrfs: fix eb memory leak due to readpage failure

2016-06-13 Thread David Sterba
On Fri, Jun 03, 2016 at 12:08:38PM -0700, Liu Bo wrote:
> eb->io_pages is set in read_extent_buffer_pages().
> 
> In case of readpage failure, for pages that have been added to bio,
> it calls bio_endio and later readpage_io_failed_hook() does the work.
> 
> When this eb's page (couldn't be the 1st page) fails to add itself to bio
> due to failure in merge_bio(), it cannot decrease eb->io_pages via bio_endio,
>  and ends up with a memory leak eventually.
> 
> This lets __do_readpage propagate errors to callers and adds the
>  'atomic_dec(>io_pages)'.
> 
> Signed-off-by: Liu Bo 

I'm adding this to for-next, but a review is needed if this is supposed
to go to 4.7.
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: [PATCH 1/1] btrfs: Use correct format specifier

2016-06-13 Thread David Sterba
On Sat, Jun 11, 2016 at 06:11:10PM +0200, Heinrich Schuchardt wrote:
> Component mirror_num of struct btrfsic_block is defined
> as unsigned int. Use %u as format specifier.
> 
> Signed-off-by: Heinrich Schuchardt 
Reviewed-by: David Sterba 
--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Cannot balance FS (No space left on device)

2016-06-13 Thread Austin S. Hemmelgarn

On 2016-06-10 18:39, Hans van Kranenburg wrote:

On 06/11/2016 12:10 AM, ojab // wrote:

On Fri, Jun 10, 2016 at 9:56 PM, Hans van Kranenburg
 wrote:

You can work around it by either adding two disks (like Henk said),
or by
temporarily converting some chunks to single. Just enough to get some
free
space on the first two disks to get a balance going that can fill the
third
one. You don't have to convert all of your data or metadata to single!

Something like:

btrfs balance start -v -dconvert=single,limit=10 /mnt/xxx/


Unfortunately it fails even if I set limit=1:

$ sudo btrfs balance start -v -dconvert=single,limit=1 /mnt/xxx/
Dumping filters: flags 0x1, state 0x0, force is off
  DATA (flags 0x120): converting, target=281474976710656, soft is
off, limit=1
ERROR: error during balancing '/mnt/xxx/': No space left on device
There may be more info in syslog - try dmesg | tail


Ah, apparently the balance operation *always* wants to allocate some new
empty space before starting to look more close at the task you give it...
No, that's not exactly true.  It seems to be a rather common fallacy 
right now that balance repacks data into existing chunks, which is 
absolutely false.  What a balance does is to send everything selected by 
the filters through the allocator again, and specifically prevent any 
existing chunks from being used to satisfy the allocation.  When you 
have 5 data chunks that are 20% used and run 'balance -dlimit=20', it 
doesn't pack that all into the first chunk, it allocates a new chunk, 
and then packs it all into that, then frees all the other chunks.  This 
behavior is actually a pretty important property when adding or removing 
devices or converting between profiles, because it's what forces things 
into the new configuration of the filesystem.


In an ideal situation, the limit filters should make it repack into 
existing chunks when specified alone, but currently that's not how it 
works, and I kind of doubt that that will ever be how it works.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Replacing drives with larger ones in a 4 drive raid1

2016-06-13 Thread Austin S. Hemmelgarn

On 2016-06-12 06:35, boli wrote:

It has now been doing "btrfs device delete missing /mnt" for about 90 hours.

These 90 hours seem like a rather long time, given that a rebalance/convert 
from 4-disk-raid5 to 4-disk-raid1 took about 20 hours months ago, and a scrub 
takes about 7 hours (4-disk-raid1).

OTOH the filesystem will be rather full with only 3 of 4 disks available, so I do expect 
it to take somewhat "longer than usual".

Would anyone venture a guess as to how long it might take?


It's done now, and took close to 99 hours to rebalance 8.1 TB of data from a 
4x6TB raid1 (12 TB capacity) with 1 drive missing onto the remaining 3x6TB 
raid1 (9 TB capacity).

Now I made sure quotas were off, then started a screen to fill the new 8 TB 
disk with zeros, detached it and and checked iotop to get a rough estimate on 
how long it will take (I'm aware it will become slower in time).

After that I'll add this 8 TB disk to the btrfs raid1 (for yet another 
rebalance).

The next 3 disks will be replaced with "btrfs replace", so only one rebalance 
each is needed.

I assume each "btrfs replace" would do a full rebalance, and thus assign chunks 
according to the normal strategy of choosing the two drives with the most free space, 
which in this case would be a chunk to the new drive, and a mirrored chunk to that 
existing 3 drive with most free space.
Replace doesn't need to do a balance, it's largely just a block level 
copy of the device being replaced, but with some special handling so 
that the filesystem is consistent throughout the whole operation.  This 
is most of why it's so much more efficient than add/delete.


What I'm wondering is this:
If the goal is to replace 4x 6TB drive (raid1) with 4x 8TB drive (still raid1), 
is there a way to remove one 6 TB drive at a time, recreate its exact contents 
from the other 3 drives onto a new 8 TB drive, without doing a full rebalance? 
That is: without writing any substantial amount of data onto the remaining 3 
drives.
The most efficient way of converting the array online without adding any 
more disks than you have to begin with is:

1. Delete one device from the array with device delete.
2. Physically switch the now unused device with one of the new devices.
3. Use btrfs replace to replace one of the devices in the array with the 
newly connected device (and make sure to resize to the full size of the 
new device).
4. Repeat from step 2 until you aren't using any of the old devices in 
the array.
5. You should have one old device left unused, physically switch it for 
a new device.
6. Use btrfs device add to add the new device to the array, then run a 
full balance.


This will result in only two balances being needed (one implicit in the 
device delete, and the explicit final one to restripe across the full 
array), and will result in the absolute minimum possible data transfer.

--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


Re: Allocator behaviour during device delete

2016-06-13 Thread Austin S. Hemmelgarn

On 2016-06-10 15:26, Henk Slager wrote:

On Thu, Jun 9, 2016 at 3:54 PM, Brendan Hide  wrote:



On 06/09/2016 03:07 PM, Austin S. Hemmelgarn wrote:


On 2016-06-09 08:34, Brendan Hide wrote:


Hey, all

I noticed this odd behaviour while migrating from a 1TB spindle to SSD
(in this case on a LUKS-encrypted 200GB partition) - and am curious if
this behaviour I've noted below is expected or known. I figure it is a
bug. Depending on the situation, it *could* be severe. In my case it was
simply annoying.

---
Steps

After having added the new device (btrfs dev add), I deleted the old
device (btrfs dev del)

Then, whilst waiting for that to complete, I started a watch of "btrfs
fi show /". Note that the below is very close to the output at the time
- but is not actually copy/pasted from the output.


Label: 'tricky-root'  uuid: bcbe47a5-bd3f-497a-816b-decb4f822c42
Total devices 2 FS bytes used 115.03GiB
devid1 size 0.00GiB used 298.06GiB path /dev/sda2
devid2 size 200.88GiB used 0.00GiB path
/dev/mapper/cryptroot




devid1 is the old disk while devid2 is the new SSD

After a few minutes, I saw that the numbers have changed - but that the
SSD still had no data:


Label: 'tricky-root'  uuid: bcbe47a5-bd3f-497a-816b-decb4f822c42
Total devices 2 FS bytes used 115.03GiB
devid1 size 0.00GiB used 284.06GiB path /dev/sda2
devid2 size 200.88GiB used 0.00GiB path
/dev/mapper/cryptroot



The "FS bytes used" amount was changing a lot - but mostly stayed near
the original total, which is expected since there was very little
happening other than the "migration".

I'm not certain of the exact point where it started using the new disk's
space. I figure that may have been helpful to pinpoint. :-/


OK, I'm pretty sure I know what was going on in this case.  Your
assumption that device delete uses the balance code is correct, and that
is why you see what's happening happening.  There are two key bits that
are missing though:
1. Balance will never allocate chunks when it doesn't need to.


In relation to discussions w.r.t. enospc and device full of chunks, I
say this 1. statement and I see different behavior with kernel 4.6.0
tools 4.5.3
On a idle fs with some fragmentation, I did balance -dusage=5, it
completes succesfuly and leaves and new empty chunk (highest vaddr).
Then balance -dusage=6, does 2 chunks with that usage level:
- the zero filled last chunk is replaced with a new empty chunk (higher vaddr)
- the 2 usage=6 chunks are gone
- one chunk with the lowest vaddr saw its usage increase from 47 to 60
- several metadata chunks have change slightly in usage

It could be a 2-step datamove, but from just the states before and
after balance I can't prove that.

I should have been more clear about this, I meant:
Balance will never allocate chunks if there's no data to move from the 
one it's balance, or if it already has allocated a chunk which isn't yet 
full.


IOW, If a chunk is empty, it won't trigger a new allocation to balance 
just that chunk, and if the data in a chunk will all fit in the free 
space in a chunk that's already been allocated by this balance run, it 
will get packed there instead of triggering a new allocation.


What balance actually does is send everything selected by the filters 
through the allocator again.  Using the convert filters makes balance 
tell the allocator to start using that profile for new allocations, 
doing a device delete tells the allocator to not use that device and 
then runs balance.  This ends up being most of why balance is useful at 
all, because it has the net effect of defragmenting free space, which in 
turn can free up empty chunks.



2. The space usage listed in fi show is how much space is allocated to
chunks, not how much is used in those chunks.

In this case, based on what you've said, you had a lot of empty or
mostly empty chunks.  As a result of this, the device delete was both
copying data, and consolidating free space.  If you have a lot of empty
or mostly empty chunks, it's not unusual for a device delete to look
like this until you start hitting chunks that have actual data in them.
The pri8mary point of this behavior is that it makes it possible to
directly switch to a smaller device without having to run a balance and
then a resize before replacing the device, and then resize again
afterwards.



Thanks, Austin. Your explanation is along the lines of my thinking though.

The new disk should have had *some* data written to it at that point, as it
started out at over 600GiB in allocation (should have probably mentioned
that already). Consolidating or not, I would consider data being written to
the old disk to be a bug, even if it is considered minor.

I'll set up a reproducible test later today to prove/disprove the theory. :)


--
To unsubscribe from this list: send the line "unsubscribe linux-btrfs" in
the body of a message to majord...@vger.kernel.org
More majordomo 

Re: [PATCH v3] fstests: btrfs: add test for qgroup handle extent de-reference

2016-06-13 Thread Filipe Manana
On Mon, Jun 13, 2016 at 9:06 AM, Lu Fengqi  wrote:
> At 06/13/2016 03:29 PM, Lu Fengqi wrote:
>>
>> At 06/13/2016 11:04 AM, Eryu Guan wrote:
>>>
>>> On Mon, Jun 13, 2016 at 10:10:50AM +0800, Lu Fengqi wrote:

 Test if qgroup can handle extent de-reference during reallocation.
 "extent de-reference" means that reducing an extent's reference count
 or freeing an extent.
 Although current qgroup can handle it, we still need to prevent any
 regression which may break current qgroup.

 Signed-off-by: Lu Fengqi 
 ---
  common/rc   |  4 +--
  tests/btrfs/028 | 98
 +
  tests/btrfs/028.out |  2 ++
  tests/btrfs/group   |  1 +
  4 files changed, 103 insertions(+), 2 deletions(-)
  create mode 100755 tests/btrfs/028
  create mode 100644 tests/btrfs/028.out

 diff --git a/common/rc b/common/rc
 index 51092a0..650d198 100644
 --- a/common/rc
 +++ b/common/rc
 @@ -3284,9 +3284,9 @@ _btrfs_get_profile_configs()
  # stress btrfs by running balance operation in a loop
  _btrfs_stress_balance()
  {
 -local btrfs_mnt=$1
 +local options=$@
  while true; do
 -$BTRFS_UTIL_PROG balance start $btrfs_mnt
 +$BTRFS_UTIL_PROG balance start $options
  done
  }

 diff --git a/tests/btrfs/028 b/tests/btrfs/028
 new file mode 100755
 index 000..8cea49a
 --- /dev/null
 +++ b/tests/btrfs/028
 @@ -0,0 +1,98 @@
 +#! /bin/bash
 +# FS QA Test 028
 +#
 +# Test if qgroup can handle extent de-reference during reallocation.
 +# "extent de-reference" means that reducing an extent's reference count
 +# or freeing an extent.
 +# Although current qgroup can handle it, we still need to prevent any
 +# regression which may break current qgroup.
 +#

 +#---

 +# Copyright (c) 2016 Fujitsu. All Rights Reserved.
 +#
 +# This program is free software; you can redistribute it and/or
 +# modify it under the terms of the GNU General Public License as
 +# published by the Free Software Foundation.
 +#
 +# This program is distributed in the hope that it would be useful,
 +# but WITHOUT ANY WARRANTY; without even the implied warranty of
 +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
 +# GNU General Public License for more details.
 +#
 +# You should have received a copy of the GNU General Public License
 +# along with this program; if not, write the Free Software Foundation,
 +# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA

 +#---

 +#
 +
 +seq=`basename $0`
 +seqres=$RESULT_DIR/$seq
 +echo "QA output created by $seq"
 +
 +here=`pwd`
 +tmp=/tmp/$$
 +status=1# failure is the default!
 +trap "_cleanup; exit \$status" 0 1 2 3 15
 +
 +_cleanup()
 +{
 +cd /
 +rm -f $tmp.*
 +}
 +
 +# get standard environment, filters and checks
 +. ./common/rc
 +. ./common/filter
 +
 +# remove previous $seqres.full before test
 +rm -f $seqres.full
 +
 +# real QA test starts here
 +_supported_fs btrfs
 +_supported_os Linux
 +_require_scratch
 +
 +_scratch_mkfs
 +_scratch_mount
 +
 +_run_btrfs_util_prog quota enable $SCRATCH_MNT
 +_run_btrfs_util_prog quota rescan -w $SCRATCH_MNT
 +
 +# Increase the probability of generating de-refer extent, and decrease
 +# other.
 +args=`_scale_fsstress_args -z \
 +-f write=10 -f unlink=10 \
 +-f creat=10 -f fsync=10 \
 +-f fsync=10 -n 10 -p 2 \
 +-d $SCRATCH_MNT/stress_dir`
 +echo "Run fsstress $args" >>$seqres.full
 +$FSSTRESS_PROG $args >/dev/null 2>&1 &
 +fsstress_pid=$!
 +
 +echo "Start balance" >>$seqres.full
 +_btrfs_stress_balance -d $SCRATCH_MNT >/dev/null 2>&1 &
 +balance_pid=$!
 +
 +# 30s is enough to trigger bug
 +sleep $((30*$TIME_FACTOR))
 +kill $fsstress_pid $balance_pid
 +wait
 +
 +# kill _btrfs_stress_balance can't end balance, so call btrfs
 balance cancel
 +# to cancel running or paused balance.
 +$BTRFS_UTIL_PROG balance cancel $SCRATCH_MNT &> /dev/null
 +
 +_run_btrfs_util_prog filesystem sync $SCRATCH_MNT
 +
 +_scratch_unmount
 +
 +# generate a qgroup report and look for inconsistent groups
 +$BTRFS_UTIL_PROG check --qgroup-report $SCRATCH_DEV 2>&1 | \
 +grep -q -E "Counts for qgroup.*are different"
 +if [ $? -ne 0 ]; then
 +echo "Silence is golden"
 +# success, all done
 +status=0
 +fi
>>>
>>>
>>> I'm 

Re: [PATCH v3] fstests: btrfs: add test for qgroup handle extent de-reference

2016-06-13 Thread Lu Fengqi

At 06/13/2016 03:29 PM, Lu Fengqi wrote:

At 06/13/2016 11:04 AM, Eryu Guan wrote:

On Mon, Jun 13, 2016 at 10:10:50AM +0800, Lu Fengqi wrote:

Test if qgroup can handle extent de-reference during reallocation.
"extent de-reference" means that reducing an extent's reference count
or freeing an extent.
Although current qgroup can handle it, we still need to prevent any
regression which may break current qgroup.

Signed-off-by: Lu Fengqi 
---
 common/rc   |  4 +--
 tests/btrfs/028 | 98
+
 tests/btrfs/028.out |  2 ++
 tests/btrfs/group   |  1 +
 4 files changed, 103 insertions(+), 2 deletions(-)
 create mode 100755 tests/btrfs/028
 create mode 100644 tests/btrfs/028.out

diff --git a/common/rc b/common/rc
index 51092a0..650d198 100644
--- a/common/rc
+++ b/common/rc
@@ -3284,9 +3284,9 @@ _btrfs_get_profile_configs()
 # stress btrfs by running balance operation in a loop
 _btrfs_stress_balance()
 {
-local btrfs_mnt=$1
+local options=$@
 while true; do
-$BTRFS_UTIL_PROG balance start $btrfs_mnt
+$BTRFS_UTIL_PROG balance start $options
 done
 }

diff --git a/tests/btrfs/028 b/tests/btrfs/028
new file mode 100755
index 000..8cea49a
--- /dev/null
+++ b/tests/btrfs/028
@@ -0,0 +1,98 @@
+#! /bin/bash
+# FS QA Test 028
+#
+# Test if qgroup can handle extent de-reference during reallocation.
+# "extent de-reference" means that reducing an extent's reference count
+# or freeing an extent.
+# Although current qgroup can handle it, we still need to prevent any
+# regression which may break current qgroup.
+#
+#---

+# Copyright (c) 2016 Fujitsu. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---

+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1# failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+cd /
+rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+
+_scratch_mkfs
+_scratch_mount
+
+_run_btrfs_util_prog quota enable $SCRATCH_MNT
+_run_btrfs_util_prog quota rescan -w $SCRATCH_MNT
+
+# Increase the probability of generating de-refer extent, and decrease
+# other.
+args=`_scale_fsstress_args -z \
+-f write=10 -f unlink=10 \
+-f creat=10 -f fsync=10 \
+-f fsync=10 -n 10 -p 2 \
+-d $SCRATCH_MNT/stress_dir`
+echo "Run fsstress $args" >>$seqres.full
+$FSSTRESS_PROG $args >/dev/null 2>&1 &
+fsstress_pid=$!
+
+echo "Start balance" >>$seqres.full
+_btrfs_stress_balance -d $SCRATCH_MNT >/dev/null 2>&1 &
+balance_pid=$!
+
+# 30s is enough to trigger bug
+sleep $((30*$TIME_FACTOR))
+kill $fsstress_pid $balance_pid
+wait
+
+# kill _btrfs_stress_balance can't end balance, so call btrfs
balance cancel
+# to cancel running or paused balance.
+$BTRFS_UTIL_PROG balance cancel $SCRATCH_MNT &> /dev/null
+
+_run_btrfs_util_prog filesystem sync $SCRATCH_MNT
+
+_scratch_unmount
+
+# generate a qgroup report and look for inconsistent groups
+$BTRFS_UTIL_PROG check --qgroup-report $SCRATCH_DEV 2>&1 | \
+grep -q -E "Counts for qgroup.*are different"
+if [ $? -ne 0 ]; then
+echo "Silence is golden"
+# success, all done
+status=0
+fi


I'm testing with 4.7-rc1 kernel and btrfs-progs v4.4, this test fails,
which means btrfs check finds inconsistent groups. But according to your
commit log, current kernel should pass the test. So is the failure
expected?

Also, just grep for different qgroup counts and print the message out if
grep finds the message, so it breaks golden image on error and we know
something really goes wrong. Right now test fails just because of
missing "Silence is golden", which is unclear why it fails:

 @@ -1,2 +1 @@
  QA output created by 028
 -Silence is golden

Do the following instead:

$BTRFS_UTIL_PROG check ... | grep -E "..."
echo "Silence is golden"
status=0
exit

And we see this on failure:

 @@ -1,2 +1,3 @@
  QA output created by 028
 +Counts for qgroup id: 5 are different
  

Re: [PATCH v3] fstests: btrfs: add test for qgroup handle extent de-reference

2016-06-13 Thread Lu Fengqi

At 06/13/2016 11:04 AM, Eryu Guan wrote:

On Mon, Jun 13, 2016 at 10:10:50AM +0800, Lu Fengqi wrote:

Test if qgroup can handle extent de-reference during reallocation.
"extent de-reference" means that reducing an extent's reference count
or freeing an extent.
Although current qgroup can handle it, we still need to prevent any
regression which may break current qgroup.

Signed-off-by: Lu Fengqi 
---
 common/rc   |  4 +--
 tests/btrfs/028 | 98 +
 tests/btrfs/028.out |  2 ++
 tests/btrfs/group   |  1 +
 4 files changed, 103 insertions(+), 2 deletions(-)
 create mode 100755 tests/btrfs/028
 create mode 100644 tests/btrfs/028.out

diff --git a/common/rc b/common/rc
index 51092a0..650d198 100644
--- a/common/rc
+++ b/common/rc
@@ -3284,9 +3284,9 @@ _btrfs_get_profile_configs()
 # stress btrfs by running balance operation in a loop
 _btrfs_stress_balance()
 {
-   local btrfs_mnt=$1
+   local options=$@
while true; do
-   $BTRFS_UTIL_PROG balance start $btrfs_mnt
+   $BTRFS_UTIL_PROG balance start $options
done
 }

diff --git a/tests/btrfs/028 b/tests/btrfs/028
new file mode 100755
index 000..8cea49a
--- /dev/null
+++ b/tests/btrfs/028
@@ -0,0 +1,98 @@
+#! /bin/bash
+# FS QA Test 028
+#
+# Test if qgroup can handle extent de-reference during reallocation.
+# "extent de-reference" means that reducing an extent's reference count
+# or freeing an extent.
+# Although current qgroup can handle it, we still need to prevent any
+# regression which may break current qgroup.
+#
+#---
+# Copyright (c) 2016 Fujitsu. All Rights Reserved.
+#
+# This program is free software; you can redistribute it and/or
+# modify it under the terms of the GNU General Public License as
+# published by the Free Software Foundation.
+#
+# This program is distributed in the hope that it would be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write the Free Software Foundation,
+# Inc.,  51 Franklin St, Fifth Floor, Boston, MA  02110-1301  USA
+#---
+#
+
+seq=`basename $0`
+seqres=$RESULT_DIR/$seq
+echo "QA output created by $seq"
+
+here=`pwd`
+tmp=/tmp/$$
+status=1   # failure is the default!
+trap "_cleanup; exit \$status" 0 1 2 3 15
+
+_cleanup()
+{
+   cd /
+   rm -f $tmp.*
+}
+
+# get standard environment, filters and checks
+. ./common/rc
+. ./common/filter
+
+# remove previous $seqres.full before test
+rm -f $seqres.full
+
+# real QA test starts here
+_supported_fs btrfs
+_supported_os Linux
+_require_scratch
+
+_scratch_mkfs
+_scratch_mount
+
+_run_btrfs_util_prog quota enable $SCRATCH_MNT
+_run_btrfs_util_prog quota rescan -w $SCRATCH_MNT
+
+# Increase the probability of generating de-refer extent, and decrease
+# other.
+args=`_scale_fsstress_args -z \
+   -f write=10 -f unlink=10 \
+   -f creat=10 -f fsync=10 \
+   -f fsync=10 -n 10 -p 2 \
+   -d $SCRATCH_MNT/stress_dir`
+echo "Run fsstress $args" >>$seqres.full
+$FSSTRESS_PROG $args >/dev/null 2>&1 &
+fsstress_pid=$!
+
+echo "Start balance" >>$seqres.full
+_btrfs_stress_balance -d $SCRATCH_MNT >/dev/null 2>&1 &
+balance_pid=$!
+
+# 30s is enough to trigger bug
+sleep $((30*$TIME_FACTOR))
+kill $fsstress_pid $balance_pid
+wait
+
+# kill _btrfs_stress_balance can't end balance, so call btrfs balance cancel
+# to cancel running or paused balance.
+$BTRFS_UTIL_PROG balance cancel $SCRATCH_MNT &> /dev/null
+
+_run_btrfs_util_prog filesystem sync $SCRATCH_MNT
+
+_scratch_unmount
+
+# generate a qgroup report and look for inconsistent groups
+$BTRFS_UTIL_PROG check --qgroup-report $SCRATCH_DEV 2>&1 | \
+   grep -q -E "Counts for qgroup.*are different"
+if [ $? -ne 0 ]; then
+   echo "Silence is golden"
+   # success, all done
+   status=0
+fi


I'm testing with 4.7-rc1 kernel and btrfs-progs v4.4, this test fails,
which means btrfs check finds inconsistent groups. But according to your
commit log, current kernel should pass the test. So is the failure
expected?

Also, just grep for different qgroup counts and print the message out if
grep finds the message, so it breaks golden image on error and we know
something really goes wrong. Right now test fails just because of
missing "Silence is golden", which is unclear why it fails:

 @@ -1,2 +1 @@
  QA output created by 028
 -Silence is golden

Do the following instead:

$BTRFS_UTIL_PROG check ... | grep -E "..."
echo "Silence is golden"
status=0
exit

And we see this on failure:

 @@ -1,2 +1,3 @@
  QA output created by 028
 +Counts